perplexity-webui-scraper 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +2 -13
- perplexity_webui_scraper/config.py +27 -2
- perplexity_webui_scraper/core.py +166 -9
- perplexity_webui_scraper/enums.py +34 -4
- perplexity_webui_scraper/exceptions.py +74 -0
- perplexity_webui_scraper/http.py +368 -37
- perplexity_webui_scraper/logging.py +256 -0
- perplexity_webui_scraper/mcp/__init__.py +18 -0
- perplexity_webui_scraper/mcp/__main__.py +9 -0
- perplexity_webui_scraper/mcp/server.py +181 -0
- perplexity_webui_scraper/resilience.py +179 -0
- {perplexity_webui_scraper-0.3.4.dist-info → perplexity_webui_scraper-0.3.5.dist-info}/METADATA +98 -8
- perplexity_webui_scraper-0.3.5.dist-info/RECORD +21 -0
- {perplexity_webui_scraper-0.3.4.dist-info → perplexity_webui_scraper-0.3.5.dist-info}/entry_points.txt +1 -0
- perplexity_webui_scraper-0.3.4.dist-info/RECORD +0 -16
- {perplexity_webui_scraper-0.3.4.dist-info → perplexity_webui_scraper-0.3.5.dist-info}/WHEEL +0 -0
|
@@ -4,33 +4,22 @@ from importlib import metadata
|
|
|
4
4
|
|
|
5
5
|
from .config import ClientConfig, ConversationConfig
|
|
6
6
|
from .core import Conversation, Perplexity
|
|
7
|
-
from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
|
|
8
|
-
from .exceptions import (
|
|
9
|
-
AuthenticationError,
|
|
10
|
-
FileUploadError,
|
|
11
|
-
FileValidationError,
|
|
12
|
-
PerplexityError,
|
|
13
|
-
RateLimitError,
|
|
14
|
-
)
|
|
7
|
+
from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
|
|
15
8
|
from .models import Model, Models
|
|
16
9
|
from .types import Coordinates, Response, SearchResultItem
|
|
17
10
|
|
|
18
11
|
|
|
19
12
|
__version__: str = metadata.version("perplexity-webui-scraper")
|
|
20
13
|
__all__: list[str] = [
|
|
21
|
-
"AuthenticationError",
|
|
22
14
|
"CitationMode",
|
|
23
15
|
"ClientConfig",
|
|
24
16
|
"Conversation",
|
|
25
17
|
"ConversationConfig",
|
|
26
18
|
"Coordinates",
|
|
27
|
-
"
|
|
28
|
-
"FileValidationError",
|
|
19
|
+
"LogLevel",
|
|
29
20
|
"Model",
|
|
30
21
|
"Models",
|
|
31
22
|
"Perplexity",
|
|
32
|
-
"PerplexityError",
|
|
33
|
-
"RateLimitError",
|
|
34
23
|
"Response",
|
|
35
24
|
"SearchFocus",
|
|
36
25
|
"SearchResultItem",
|
|
@@ -5,10 +5,12 @@ from __future__ import annotations
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
|
-
from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
|
|
8
|
+
from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
12
14
|
from .models import Model
|
|
13
15
|
from .types import Coordinates
|
|
14
16
|
|
|
@@ -30,7 +32,30 @@ class ConversationConfig:
|
|
|
30
32
|
|
|
31
33
|
@dataclass(frozen=True, slots=True)
|
|
32
34
|
class ClientConfig:
|
|
33
|
-
"""
|
|
35
|
+
"""
|
|
36
|
+
HTTP client settings.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
timeout: Request timeout in seconds.
|
|
40
|
+
impersonate: Browser to impersonate (e.g., "chrome", "edge", "safari").
|
|
41
|
+
max_retries: Maximum retry attempts for failed requests.
|
|
42
|
+
retry_base_delay: Initial delay in seconds before first retry.
|
|
43
|
+
retry_max_delay: Maximum delay between retries.
|
|
44
|
+
retry_jitter: Random jitter factor (0-1) to add to delays.
|
|
45
|
+
requests_per_second: Rate limit for requests (0 to disable).
|
|
46
|
+
rotate_fingerprint: Whether to rotate browser fingerprint on retries.
|
|
47
|
+
logging_level: Logging verbosity level. Default is DISABLED.
|
|
48
|
+
log_file: Optional file path for persistent logging. If set, logs go to file only.
|
|
49
|
+
If None, logs go to console. All logs are appended.
|
|
50
|
+
"""
|
|
34
51
|
|
|
35
52
|
timeout: int = 3600
|
|
36
53
|
impersonate: str = "chrome"
|
|
54
|
+
max_retries: int = 3
|
|
55
|
+
retry_base_delay: float = 1.0
|
|
56
|
+
retry_max_delay: float = 60.0
|
|
57
|
+
retry_jitter: float = 0.5
|
|
58
|
+
requests_per_second: float = 0.5
|
|
59
|
+
rotate_fingerprint: bool = True
|
|
60
|
+
logging_level: LogLevel = LogLevel.DISABLED
|
|
61
|
+
log_file: str | Path | None = None
|
perplexity_webui_scraper/core.py
CHANGED
|
@@ -26,20 +26,25 @@ from .constants import (
|
|
|
26
26
|
USE_SCHEMATIZED_API,
|
|
27
27
|
)
|
|
28
28
|
from .enums import CitationMode
|
|
29
|
-
from .exceptions import FileUploadError, FileValidationError
|
|
29
|
+
from .exceptions import FileUploadError, FileValidationError, ResearchClarifyingQuestionsError, ResponseParsingError
|
|
30
30
|
from .http import HTTPClient
|
|
31
31
|
from .limits import MAX_FILE_SIZE, MAX_FILES
|
|
32
|
+
from .logging import configure_logging, get_logger, log_conversation_created, log_query_sent
|
|
32
33
|
from .models import Model, Models
|
|
33
34
|
from .types import Response, SearchResultItem, _FileInfo
|
|
34
35
|
|
|
35
36
|
|
|
37
|
+
logger = get_logger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
36
40
|
class Perplexity:
|
|
37
41
|
"""Web scraper for Perplexity AI conversations."""
|
|
38
42
|
|
|
39
43
|
__slots__ = ("_http",)
|
|
40
44
|
|
|
41
45
|
def __init__(self, session_token: str, config: ClientConfig | None = None) -> None:
|
|
42
|
-
"""
|
|
46
|
+
"""
|
|
47
|
+
Initialize web scraper with session token.
|
|
43
48
|
|
|
44
49
|
Args:
|
|
45
50
|
session_token: Perplexity session cookie (__Secure-next-auth.session-token).
|
|
@@ -53,17 +58,71 @@ class Perplexity:
|
|
|
53
58
|
raise ValueError("session_token cannot be empty")
|
|
54
59
|
|
|
55
60
|
cfg = config or ClientConfig()
|
|
56
|
-
|
|
61
|
+
|
|
62
|
+
# Configure logging based on config
|
|
63
|
+
configure_logging(level=cfg.logging_level, log_file=cfg.log_file)
|
|
64
|
+
|
|
65
|
+
logger.info(
|
|
66
|
+
"Perplexity client initializing | "
|
|
67
|
+
f"session_token_length={len(session_token)} "
|
|
68
|
+
f"logging_level={cfg.logging_level.value} "
|
|
69
|
+
f"log_file={cfg.log_file}"
|
|
70
|
+
)
|
|
71
|
+
logger.debug(
|
|
72
|
+
"Client configuration | "
|
|
73
|
+
f"timeout={cfg.timeout}s "
|
|
74
|
+
f"impersonate={cfg.impersonate} "
|
|
75
|
+
f"max_retries={cfg.max_retries} "
|
|
76
|
+
f"retry_base_delay={cfg.retry_base_delay}s "
|
|
77
|
+
f"retry_max_delay={cfg.retry_max_delay}s "
|
|
78
|
+
f"retry_jitter={cfg.retry_jitter} "
|
|
79
|
+
f"requests_per_second={cfg.requests_per_second} "
|
|
80
|
+
f"rotate_fingerprint={cfg.rotate_fingerprint}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
self._http = HTTPClient(
|
|
84
|
+
session_token,
|
|
85
|
+
timeout=cfg.timeout,
|
|
86
|
+
impersonate=cfg.impersonate,
|
|
87
|
+
max_retries=cfg.max_retries,
|
|
88
|
+
retry_base_delay=cfg.retry_base_delay,
|
|
89
|
+
retry_max_delay=cfg.retry_max_delay,
|
|
90
|
+
retry_jitter=cfg.retry_jitter,
|
|
91
|
+
requests_per_second=cfg.requests_per_second,
|
|
92
|
+
rotate_fingerprint=cfg.rotate_fingerprint,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
logger.info("Perplexity client initialized successfully")
|
|
57
96
|
|
|
58
97
|
def create_conversation(self, config: ConversationConfig | None = None) -> Conversation:
|
|
59
98
|
"""Create a new conversation."""
|
|
60
99
|
|
|
61
|
-
|
|
100
|
+
cfg = config or ConversationConfig()
|
|
101
|
+
logger.debug(
|
|
102
|
+
"Creating conversation | "
|
|
103
|
+
f"model={cfg.model} "
|
|
104
|
+
f"citation_mode={cfg.citation_mode} "
|
|
105
|
+
f"save_to_library={cfg.save_to_library} "
|
|
106
|
+
f"search_focus={cfg.search_focus} "
|
|
107
|
+
f"language={cfg.language}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
conversation = Conversation(self._http, cfg)
|
|
111
|
+
|
|
112
|
+
log_conversation_created(
|
|
113
|
+
f"model={cfg.model}, citation_mode={cfg.citation_mode}, "
|
|
114
|
+
f"search_focus={cfg.search_focus}, language={cfg.language}"
|
|
115
|
+
)
|
|
116
|
+
logger.info("Conversation created successfully")
|
|
117
|
+
|
|
118
|
+
return conversation
|
|
62
119
|
|
|
63
120
|
def close(self) -> None:
|
|
64
121
|
"""Close the client."""
|
|
65
122
|
|
|
123
|
+
logger.debug("Closing Perplexity client")
|
|
66
124
|
self._http.close()
|
|
125
|
+
logger.info("Perplexity client closed")
|
|
67
126
|
|
|
68
127
|
def __enter__(self) -> Perplexity:
|
|
69
128
|
return self
|
|
@@ -90,6 +149,13 @@ class Conversation:
|
|
|
90
149
|
)
|
|
91
150
|
|
|
92
151
|
def __init__(self, http: HTTPClient, config: ConversationConfig) -> None:
|
|
152
|
+
logger.debug(
|
|
153
|
+
"Conversation.__init__ | "
|
|
154
|
+
f"model={config.model} "
|
|
155
|
+
f"citation_mode={config.citation_mode} "
|
|
156
|
+
f"save_to_library={config.save_to_library} "
|
|
157
|
+
f"search_focus={config.search_focus}"
|
|
158
|
+
)
|
|
93
159
|
self._http = http
|
|
94
160
|
self._config = config
|
|
95
161
|
self._citation_mode = CitationMode.DEFAULT
|
|
@@ -101,6 +167,7 @@ class Conversation:
|
|
|
101
167
|
self._search_results: list[SearchResultItem] = []
|
|
102
168
|
self._raw_data: dict[str, Any] = {}
|
|
103
169
|
self._stream_generator: Generator[Response, None, None] | None = None
|
|
170
|
+
logger.debug("Conversation initialized with empty state")
|
|
104
171
|
|
|
105
172
|
@property
|
|
106
173
|
def answer(self) -> str | None:
|
|
@@ -142,11 +209,29 @@ class Conversation:
|
|
|
142
209
|
) -> Conversation:
|
|
143
210
|
"""Ask a question. Returns self for method chaining or streaming iteration."""
|
|
144
211
|
|
|
212
|
+
logger.info(
|
|
213
|
+
"Conversation.ask called | "
|
|
214
|
+
f"query_length={len(query)} "
|
|
215
|
+
f"query_preview={query[:100]}{'...' if len(query) > 100 else ''} "
|
|
216
|
+
f"model={model} "
|
|
217
|
+
f"files_count={len(files) if files else 0} "
|
|
218
|
+
f"citation_mode={citation_mode} "
|
|
219
|
+
f"stream={stream}"
|
|
220
|
+
)
|
|
221
|
+
|
|
145
222
|
effective_model = model or self._config.model or Models.BEST
|
|
146
223
|
effective_citation = citation_mode if citation_mode is not None else self._config.citation_mode
|
|
147
224
|
self._citation_mode = effective_citation
|
|
225
|
+
|
|
226
|
+
logger.debug(
|
|
227
|
+
f"Effective parameters | effective_model={effective_model} effective_citation={effective_citation}"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
log_query_sent(query, str(effective_model), bool(files))
|
|
148
231
|
self._execute(query, effective_model, files, stream=stream)
|
|
149
232
|
|
|
233
|
+
logger.debug("Query execution completed")
|
|
234
|
+
|
|
150
235
|
return self
|
|
151
236
|
|
|
152
237
|
def _execute(
|
|
@@ -158,22 +243,49 @@ class Conversation:
|
|
|
158
243
|
) -> None:
|
|
159
244
|
"""Execute a query."""
|
|
160
245
|
|
|
246
|
+
logger.debug(
|
|
247
|
+
f"Executing query | "
|
|
248
|
+
f"query_length={len(query)} "
|
|
249
|
+
f"model={model} "
|
|
250
|
+
f"files_count={len(files) if files else 0} "
|
|
251
|
+
f"stream={stream} "
|
|
252
|
+
f"is_followup={self._backend_uuid is not None}"
|
|
253
|
+
)
|
|
254
|
+
|
|
161
255
|
self._reset_response_state()
|
|
256
|
+
logger.debug("Response state reset")
|
|
162
257
|
|
|
163
258
|
# Upload files
|
|
164
259
|
file_urls: list[str] = []
|
|
165
260
|
|
|
166
261
|
if files:
|
|
262
|
+
logger.debug(f"Validating {len(files)} files")
|
|
167
263
|
validated = self._validate_files(files)
|
|
264
|
+
logger.debug(f"Validated {len(validated)} files, uploading...")
|
|
168
265
|
file_urls = [self._upload_file(f) for f in validated]
|
|
266
|
+
logger.debug(f"Uploaded {len(file_urls)} files successfully")
|
|
169
267
|
|
|
170
268
|
payload = self._build_payload(query, model, file_urls)
|
|
269
|
+
logger.debug(
|
|
270
|
+
f"Payload built | payload_keys={list(payload.keys())} params_keys={list(payload.get('params', {}).keys())}"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
logger.debug("Initializing search session")
|
|
171
274
|
self._http.init_search(query)
|
|
172
275
|
|
|
173
276
|
if stream:
|
|
277
|
+
logger.debug("Starting streaming mode")
|
|
174
278
|
self._stream_generator = self._stream(payload)
|
|
175
279
|
else:
|
|
280
|
+
logger.debug("Starting complete mode (non-streaming)")
|
|
176
281
|
self._complete(payload)
|
|
282
|
+
logger.debug(
|
|
283
|
+
f"Query completed | "
|
|
284
|
+
f"title={self._title} "
|
|
285
|
+
f"answer_length={len(self._answer) if self._answer else 0} "
|
|
286
|
+
f"chunks_count={len(self._chunks)} "
|
|
287
|
+
f"search_results_count={len(self._search_results)}"
|
|
288
|
+
)
|
|
177
289
|
|
|
178
290
|
def _reset_response_state(self) -> None:
|
|
179
291
|
self._title = None
|
|
@@ -237,8 +349,8 @@ class Conversation:
|
|
|
237
349
|
is_image=mimetype.startswith("image/"),
|
|
238
350
|
)
|
|
239
351
|
)
|
|
240
|
-
except FileValidationError:
|
|
241
|
-
raise
|
|
352
|
+
except FileValidationError as error:
|
|
353
|
+
raise error
|
|
242
354
|
except (FileNotFoundError, PermissionError) as error:
|
|
243
355
|
raise FileValidationError(file_path, f"Cannot access file: {error}") from error
|
|
244
356
|
except OSError as error:
|
|
@@ -356,12 +468,17 @@ class Conversation:
|
|
|
356
468
|
return None
|
|
357
469
|
|
|
358
470
|
def _process_data(self, data: dict[str, Any]) -> None:
|
|
471
|
+
"""Process SSE data chunk and update conversation state."""
|
|
472
|
+
|
|
359
473
|
if self._backend_uuid is None and "backend_uuid" in data:
|
|
360
474
|
self._backend_uuid = data["backend_uuid"]
|
|
361
475
|
|
|
362
476
|
if self._read_write_token is None and "read_write_token" in data:
|
|
363
477
|
self._read_write_token = data["read_write_token"]
|
|
364
478
|
|
|
479
|
+
if self._title is None and "thread_title" in data:
|
|
480
|
+
self._title = data["thread_title"]
|
|
481
|
+
|
|
365
482
|
if "blocks" in data:
|
|
366
483
|
for block in data["blocks"]:
|
|
367
484
|
if block.get("intended_usage") == "web_results":
|
|
@@ -385,7 +502,15 @@ class Conversation:
|
|
|
385
502
|
|
|
386
503
|
if isinstance(json_data, list):
|
|
387
504
|
for item in json_data:
|
|
388
|
-
|
|
505
|
+
step_type = item.get("step_type")
|
|
506
|
+
|
|
507
|
+
# Handle Research mode clarifying questions
|
|
508
|
+
if step_type == "RESEARCH_CLARIFYING_QUESTIONS":
|
|
509
|
+
questions = self._extract_clarifying_questions(item)
|
|
510
|
+
|
|
511
|
+
raise ResearchClarifyingQuestionsError(questions)
|
|
512
|
+
|
|
513
|
+
if step_type == "FINAL":
|
|
389
514
|
raw_content = item.get("content", {})
|
|
390
515
|
answer_content = raw_content.get("answer")
|
|
391
516
|
|
|
@@ -400,7 +525,39 @@ class Conversation:
|
|
|
400
525
|
elif isinstance(json_data, dict):
|
|
401
526
|
self._update_state(data.get("thread_title"), json_data)
|
|
402
527
|
else:
|
|
403
|
-
raise
|
|
528
|
+
raise ResponseParsingError(
|
|
529
|
+
"Unexpected JSON structure in 'text' field",
|
|
530
|
+
raw_data=str(json_data),
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
def _extract_clarifying_questions(self, item: dict[str, Any]) -> list[str]:
|
|
534
|
+
"""Extract clarifying questions from a RESEARCH_CLARIFYING_QUESTIONS step."""
|
|
535
|
+
|
|
536
|
+
questions: list[str] = []
|
|
537
|
+
content = item.get("content", {})
|
|
538
|
+
|
|
539
|
+
# Try different possible structures for questions
|
|
540
|
+
if isinstance(content, dict):
|
|
541
|
+
if "questions" in content:
|
|
542
|
+
raw_questions = content["questions"]
|
|
543
|
+
|
|
544
|
+
if isinstance(raw_questions, list):
|
|
545
|
+
questions = [str(q) for q in raw_questions if q]
|
|
546
|
+
elif "clarifying_questions" in content:
|
|
547
|
+
raw_questions = content["clarifying_questions"]
|
|
548
|
+
|
|
549
|
+
if isinstance(raw_questions, list):
|
|
550
|
+
questions = [str(q) for q in raw_questions if q]
|
|
551
|
+
elif not questions:
|
|
552
|
+
for value in content.values():
|
|
553
|
+
if isinstance(value, str) and "?" in value:
|
|
554
|
+
questions.append(value)
|
|
555
|
+
elif isinstance(content, list):
|
|
556
|
+
questions = [str(q) for q in content if q]
|
|
557
|
+
elif isinstance(content, str):
|
|
558
|
+
questions = [content]
|
|
559
|
+
|
|
560
|
+
return questions
|
|
404
561
|
|
|
405
562
|
def _update_state(self, title: str | None, answer_data: dict[str, Any]) -> None:
|
|
406
563
|
self._title = title
|
|
@@ -426,7 +583,7 @@ class Conversation:
|
|
|
426
583
|
chunks = answer_data.get("chunks", [])
|
|
427
584
|
|
|
428
585
|
if chunks:
|
|
429
|
-
self._chunks = chunks
|
|
586
|
+
self._chunks = [self._format_citations(chunk) for chunk in chunks]
|
|
430
587
|
|
|
431
588
|
self._raw_data = answer_data
|
|
432
589
|
|
|
@@ -6,7 +6,8 @@ from enum import Enum
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class CitationMode(str, Enum):
|
|
9
|
-
"""
|
|
9
|
+
"""
|
|
10
|
+
Citation formatting modes for response text.
|
|
10
11
|
|
|
11
12
|
Controls how citation markers (e.g., [1], [2]) are formatted in the response.
|
|
12
13
|
"""
|
|
@@ -22,7 +23,8 @@ class CitationMode(str, Enum):
|
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class SearchFocus(str, Enum):
|
|
25
|
-
"""
|
|
26
|
+
"""
|
|
27
|
+
Search focus types that control the type of search performed.
|
|
26
28
|
|
|
27
29
|
Determines whether to search the web or focus on writing tasks.
|
|
28
30
|
"""
|
|
@@ -35,7 +37,8 @@ class SearchFocus(str, Enum):
|
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
class SourceFocus(str, Enum):
|
|
38
|
-
"""
|
|
40
|
+
"""
|
|
41
|
+
Source focus types that control which sources to prioritize.
|
|
39
42
|
|
|
40
43
|
Can be combined (e.g., [SourceFocus.WEB, SourceFocus.ACADEMIC]) for multi-source searches.
|
|
41
44
|
"""
|
|
@@ -54,7 +57,8 @@ class SourceFocus(str, Enum):
|
|
|
54
57
|
|
|
55
58
|
|
|
56
59
|
class TimeRange(str, Enum):
|
|
57
|
-
"""
|
|
60
|
+
"""
|
|
61
|
+
Time range filters for search results.
|
|
58
62
|
|
|
59
63
|
Controls how recent the sources should be.
|
|
60
64
|
"""
|
|
@@ -73,3 +77,29 @@ class TimeRange(str, Enum):
|
|
|
73
77
|
|
|
74
78
|
LAST_YEAR = "YEAR"
|
|
75
79
|
"""Include sources from the last 365 days."""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LogLevel(str, Enum):
|
|
83
|
+
"""
|
|
84
|
+
Logging level configuration.
|
|
85
|
+
|
|
86
|
+
Controls the verbosity of logging output. DISABLED is the default.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
DISABLED = "DISABLED"
|
|
90
|
+
"""Completely disable all logging output. This is the default."""
|
|
91
|
+
|
|
92
|
+
DEBUG = "DEBUG"
|
|
93
|
+
"""Show all messages including internal debug information."""
|
|
94
|
+
|
|
95
|
+
INFO = "INFO"
|
|
96
|
+
"""Show informational messages, warnings, and errors."""
|
|
97
|
+
|
|
98
|
+
WARNING = "WARNING"
|
|
99
|
+
"""Show only warnings and errors."""
|
|
100
|
+
|
|
101
|
+
ERROR = "ERROR"
|
|
102
|
+
"""Show only error messages."""
|
|
103
|
+
|
|
104
|
+
CRITICAL = "CRITICAL"
|
|
105
|
+
"""Show only critical/fatal errors."""
|
|
@@ -3,6 +3,19 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
__all__: list[str] = [
|
|
7
|
+
"AuthenticationError",
|
|
8
|
+
"CloudflareBlockError",
|
|
9
|
+
"FileUploadError",
|
|
10
|
+
"FileValidationError",
|
|
11
|
+
"PerplexityError",
|
|
12
|
+
"RateLimitError",
|
|
13
|
+
"ResearchClarifyingQuestionsError",
|
|
14
|
+
"ResponseParsingError",
|
|
15
|
+
"StreamingError",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
6
19
|
class PerplexityError(Exception):
|
|
7
20
|
"""Base exception for all Perplexity-related errors."""
|
|
8
21
|
|
|
@@ -34,6 +47,25 @@ class RateLimitError(PerplexityError):
|
|
|
34
47
|
)
|
|
35
48
|
|
|
36
49
|
|
|
50
|
+
class CloudflareBlockError(PerplexityError):
|
|
51
|
+
"""
|
|
52
|
+
Raised when Cloudflare blocks the request with a challenge page.
|
|
53
|
+
|
|
54
|
+
This typically means the request triggered Cloudflare's bot detection.
|
|
55
|
+
The client will automatically retry with fingerprint rotation, but if
|
|
56
|
+
this exception is raised, all retry attempts have failed.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, message: str | None = None) -> None:
|
|
60
|
+
super().__init__(
|
|
61
|
+
message
|
|
62
|
+
or "Cloudflare challenge detected. The request was blocked by Cloudflare's "
|
|
63
|
+
"bot protection. Try waiting a few minutes before retrying, or obtain a "
|
|
64
|
+
"fresh session token.",
|
|
65
|
+
status_code=403,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
37
69
|
class FileUploadError(PerplexityError):
|
|
38
70
|
"""Raised when file upload fails."""
|
|
39
71
|
|
|
@@ -48,3 +80,45 @@ class FileValidationError(PerplexityError):
|
|
|
48
80
|
def __init__(self, file_path: str, reason: str) -> None:
|
|
49
81
|
self.file_path = file_path
|
|
50
82
|
super().__init__(f"File validation failed for '{file_path}': {reason}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ResearchClarifyingQuestionsError(PerplexityError):
|
|
86
|
+
"""
|
|
87
|
+
Raised when Research mode requires clarifying questions.
|
|
88
|
+
|
|
89
|
+
This library does not support programmatic interaction with clarifying questions.
|
|
90
|
+
Consider rephrasing your query to be more specific.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
questions: List of clarifying questions from the API.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(self, questions: list[str]) -> None:
|
|
97
|
+
self.questions = questions
|
|
98
|
+
questions_text = "\n".join(f" - {q}" for q in questions) if questions else " (no questions provided)"
|
|
99
|
+
|
|
100
|
+
super().__init__(
|
|
101
|
+
f"Research mode is asking clarifying questions:\n{questions_text}\n\n"
|
|
102
|
+
"Programmatic interaction with clarifying questions is not supported. "
|
|
103
|
+
"Please rephrase your query to be more specific."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class ResponseParsingError(PerplexityError):
|
|
108
|
+
"""
|
|
109
|
+
Raised when the API response cannot be parsed.
|
|
110
|
+
|
|
111
|
+
Attributes:
|
|
112
|
+
raw_data: The raw data that failed to parse.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def __init__(self, message: str, raw_data: str | None = None) -> None:
|
|
116
|
+
self.raw_data = raw_data
|
|
117
|
+
super().__init__(f"Failed to parse API response: {message}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class StreamingError(PerplexityError):
|
|
121
|
+
"""Raised when an error occurs during streaming."""
|
|
122
|
+
|
|
123
|
+
def __init__(self, message: str) -> None:
|
|
124
|
+
super().__init__(f"Streaming error: {message}")
|