perplexity-webui-scraper 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +4 -15
- perplexity_webui_scraper/cli/get_perplexity_session_token.py +216 -0
- perplexity_webui_scraper/config.py +29 -4
- perplexity_webui_scraper/constants.py +9 -35
- perplexity_webui_scraper/core.py +225 -21
- perplexity_webui_scraper/enums.py +34 -4
- perplexity_webui_scraper/exceptions.py +74 -0
- perplexity_webui_scraper/http.py +370 -36
- perplexity_webui_scraper/limits.py +2 -5
- perplexity_webui_scraper/logging.py +256 -0
- perplexity_webui_scraper/mcp/__init__.py +18 -0
- perplexity_webui_scraper/mcp/__main__.py +9 -0
- perplexity_webui_scraper/mcp/server.py +181 -0
- perplexity_webui_scraper/models.py +34 -19
- perplexity_webui_scraper/resilience.py +179 -0
- perplexity_webui_scraper-0.3.5.dist-info/METADATA +304 -0
- perplexity_webui_scraper-0.3.5.dist-info/RECORD +21 -0
- {perplexity_webui_scraper-0.3.3.dist-info → perplexity_webui_scraper-0.3.5.dist-info}/WHEEL +1 -1
- perplexity_webui_scraper-0.3.5.dist-info/entry_points.txt +4 -0
- perplexity_webui_scraper-0.3.3.dist-info/METADATA +0 -166
- perplexity_webui_scraper-0.3.3.dist-info/RECORD +0 -14
perplexity_webui_scraper/core.py
CHANGED
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any
|
|
9
9
|
from uuid import uuid4
|
|
10
10
|
|
|
11
|
-
from orjson import loads
|
|
11
|
+
from orjson import JSONDecodeError, loads
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
@@ -26,20 +26,25 @@ from .constants import (
|
|
|
26
26
|
USE_SCHEMATIZED_API,
|
|
27
27
|
)
|
|
28
28
|
from .enums import CitationMode
|
|
29
|
-
from .exceptions import FileUploadError, FileValidationError
|
|
29
|
+
from .exceptions import FileUploadError, FileValidationError, ResearchClarifyingQuestionsError, ResponseParsingError
|
|
30
30
|
from .http import HTTPClient
|
|
31
31
|
from .limits import MAX_FILE_SIZE, MAX_FILES
|
|
32
|
+
from .logging import configure_logging, get_logger, log_conversation_created, log_query_sent
|
|
32
33
|
from .models import Model, Models
|
|
33
34
|
from .types import Response, SearchResultItem, _FileInfo
|
|
34
35
|
|
|
35
36
|
|
|
37
|
+
logger = get_logger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
36
40
|
class Perplexity:
|
|
37
41
|
"""Web scraper for Perplexity AI conversations."""
|
|
38
42
|
|
|
39
43
|
__slots__ = ("_http",)
|
|
40
44
|
|
|
41
45
|
def __init__(self, session_token: str, config: ClientConfig | None = None) -> None:
|
|
42
|
-
"""
|
|
46
|
+
"""
|
|
47
|
+
Initialize web scraper with session token.
|
|
43
48
|
|
|
44
49
|
Args:
|
|
45
50
|
session_token: Perplexity session cookie (__Secure-next-auth.session-token).
|
|
@@ -53,15 +58,71 @@ class Perplexity:
|
|
|
53
58
|
raise ValueError("session_token cannot be empty")
|
|
54
59
|
|
|
55
60
|
cfg = config or ClientConfig()
|
|
56
|
-
|
|
61
|
+
|
|
62
|
+
# Configure logging based on config
|
|
63
|
+
configure_logging(level=cfg.logging_level, log_file=cfg.log_file)
|
|
64
|
+
|
|
65
|
+
logger.info(
|
|
66
|
+
"Perplexity client initializing | "
|
|
67
|
+
f"session_token_length={len(session_token)} "
|
|
68
|
+
f"logging_level={cfg.logging_level.value} "
|
|
69
|
+
f"log_file={cfg.log_file}"
|
|
70
|
+
)
|
|
71
|
+
logger.debug(
|
|
72
|
+
"Client configuration | "
|
|
73
|
+
f"timeout={cfg.timeout}s "
|
|
74
|
+
f"impersonate={cfg.impersonate} "
|
|
75
|
+
f"max_retries={cfg.max_retries} "
|
|
76
|
+
f"retry_base_delay={cfg.retry_base_delay}s "
|
|
77
|
+
f"retry_max_delay={cfg.retry_max_delay}s "
|
|
78
|
+
f"retry_jitter={cfg.retry_jitter} "
|
|
79
|
+
f"requests_per_second={cfg.requests_per_second} "
|
|
80
|
+
f"rotate_fingerprint={cfg.rotate_fingerprint}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
self._http = HTTPClient(
|
|
84
|
+
session_token,
|
|
85
|
+
timeout=cfg.timeout,
|
|
86
|
+
impersonate=cfg.impersonate,
|
|
87
|
+
max_retries=cfg.max_retries,
|
|
88
|
+
retry_base_delay=cfg.retry_base_delay,
|
|
89
|
+
retry_max_delay=cfg.retry_max_delay,
|
|
90
|
+
retry_jitter=cfg.retry_jitter,
|
|
91
|
+
requests_per_second=cfg.requests_per_second,
|
|
92
|
+
rotate_fingerprint=cfg.rotate_fingerprint,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
logger.info("Perplexity client initialized successfully")
|
|
57
96
|
|
|
58
97
|
def create_conversation(self, config: ConversationConfig | None = None) -> Conversation:
|
|
59
98
|
"""Create a new conversation."""
|
|
60
|
-
|
|
99
|
+
|
|
100
|
+
cfg = config or ConversationConfig()
|
|
101
|
+
logger.debug(
|
|
102
|
+
"Creating conversation | "
|
|
103
|
+
f"model={cfg.model} "
|
|
104
|
+
f"citation_mode={cfg.citation_mode} "
|
|
105
|
+
f"save_to_library={cfg.save_to_library} "
|
|
106
|
+
f"search_focus={cfg.search_focus} "
|
|
107
|
+
f"language={cfg.language}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
conversation = Conversation(self._http, cfg)
|
|
111
|
+
|
|
112
|
+
log_conversation_created(
|
|
113
|
+
f"model={cfg.model}, citation_mode={cfg.citation_mode}, "
|
|
114
|
+
f"search_focus={cfg.search_focus}, language={cfg.language}"
|
|
115
|
+
)
|
|
116
|
+
logger.info("Conversation created successfully")
|
|
117
|
+
|
|
118
|
+
return conversation
|
|
61
119
|
|
|
62
120
|
def close(self) -> None:
|
|
63
121
|
"""Close the client."""
|
|
122
|
+
|
|
123
|
+
logger.debug("Closing Perplexity client")
|
|
64
124
|
self._http.close()
|
|
125
|
+
logger.info("Perplexity client closed")
|
|
65
126
|
|
|
66
127
|
def __enter__(self) -> Perplexity:
|
|
67
128
|
return self
|
|
@@ -88,6 +149,13 @@ class Conversation:
|
|
|
88
149
|
)
|
|
89
150
|
|
|
90
151
|
def __init__(self, http: HTTPClient, config: ConversationConfig) -> None:
|
|
152
|
+
logger.debug(
|
|
153
|
+
"Conversation.__init__ | "
|
|
154
|
+
f"model={config.model} "
|
|
155
|
+
f"citation_mode={config.citation_mode} "
|
|
156
|
+
f"save_to_library={config.save_to_library} "
|
|
157
|
+
f"search_focus={config.search_focus}"
|
|
158
|
+
)
|
|
91
159
|
self._http = http
|
|
92
160
|
self._config = config
|
|
93
161
|
self._citation_mode = CitationMode.DEFAULT
|
|
@@ -99,70 +167,125 @@ class Conversation:
|
|
|
99
167
|
self._search_results: list[SearchResultItem] = []
|
|
100
168
|
self._raw_data: dict[str, Any] = {}
|
|
101
169
|
self._stream_generator: Generator[Response, None, None] | None = None
|
|
170
|
+
logger.debug("Conversation initialized with empty state")
|
|
102
171
|
|
|
103
172
|
@property
|
|
104
173
|
def answer(self) -> str | None:
|
|
105
174
|
"""Last response text."""
|
|
175
|
+
|
|
106
176
|
return self._answer
|
|
107
177
|
|
|
108
178
|
@property
|
|
109
179
|
def title(self) -> str | None:
|
|
110
180
|
"""Conversation title."""
|
|
181
|
+
|
|
111
182
|
return self._title
|
|
112
183
|
|
|
113
184
|
@property
|
|
114
185
|
def search_results(self) -> list[SearchResultItem]:
|
|
115
186
|
"""Search results from last response."""
|
|
187
|
+
|
|
116
188
|
return self._search_results
|
|
117
189
|
|
|
118
190
|
@property
|
|
119
191
|
def uuid(self) -> str | None:
|
|
120
192
|
"""Conversation UUID."""
|
|
193
|
+
|
|
121
194
|
return self._backend_uuid
|
|
122
195
|
|
|
123
196
|
def __iter__(self) -> Generator[Response, None, None]:
|
|
124
197
|
if self._stream_generator is not None:
|
|
125
198
|
yield from self._stream_generator
|
|
199
|
+
|
|
126
200
|
self._stream_generator = None
|
|
127
201
|
|
|
128
202
|
def ask(
|
|
129
203
|
self,
|
|
130
204
|
query: str,
|
|
131
205
|
model: Model | None = None,
|
|
132
|
-
files: list[str | PathLike
|
|
206
|
+
files: list[str | PathLike] | None = None,
|
|
133
207
|
citation_mode: CitationMode | None = None,
|
|
134
208
|
stream: bool = False,
|
|
135
209
|
) -> Conversation:
|
|
136
210
|
"""Ask a question. Returns self for method chaining or streaming iteration."""
|
|
211
|
+
|
|
212
|
+
logger.info(
|
|
213
|
+
"Conversation.ask called | "
|
|
214
|
+
f"query_length={len(query)} "
|
|
215
|
+
f"query_preview={query[:100]}{'...' if len(query) > 100 else ''} "
|
|
216
|
+
f"model={model} "
|
|
217
|
+
f"files_count={len(files) if files else 0} "
|
|
218
|
+
f"citation_mode={citation_mode} "
|
|
219
|
+
f"stream={stream}"
|
|
220
|
+
)
|
|
221
|
+
|
|
137
222
|
effective_model = model or self._config.model or Models.BEST
|
|
138
223
|
effective_citation = citation_mode if citation_mode is not None else self._config.citation_mode
|
|
139
224
|
self._citation_mode = effective_citation
|
|
225
|
+
|
|
226
|
+
logger.debug(
|
|
227
|
+
f"Effective parameters | effective_model={effective_model} effective_citation={effective_citation}"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
log_query_sent(query, str(effective_model), bool(files))
|
|
140
231
|
self._execute(query, effective_model, files, stream=stream)
|
|
232
|
+
|
|
233
|
+
logger.debug("Query execution completed")
|
|
234
|
+
|
|
141
235
|
return self
|
|
142
236
|
|
|
143
237
|
def _execute(
|
|
144
238
|
self,
|
|
145
239
|
query: str,
|
|
146
240
|
model: Model,
|
|
147
|
-
files: list[str | PathLike
|
|
241
|
+
files: list[str | PathLike] | None,
|
|
148
242
|
stream: bool = False,
|
|
149
243
|
) -> None:
|
|
150
244
|
"""Execute a query."""
|
|
245
|
+
|
|
246
|
+
logger.debug(
|
|
247
|
+
f"Executing query | "
|
|
248
|
+
f"query_length={len(query)} "
|
|
249
|
+
f"model={model} "
|
|
250
|
+
f"files_count={len(files) if files else 0} "
|
|
251
|
+
f"stream={stream} "
|
|
252
|
+
f"is_followup={self._backend_uuid is not None}"
|
|
253
|
+
)
|
|
254
|
+
|
|
151
255
|
self._reset_response_state()
|
|
256
|
+
logger.debug("Response state reset")
|
|
152
257
|
|
|
153
258
|
# Upload files
|
|
154
259
|
file_urls: list[str] = []
|
|
260
|
+
|
|
155
261
|
if files:
|
|
262
|
+
logger.debug(f"Validating {len(files)} files")
|
|
156
263
|
validated = self._validate_files(files)
|
|
264
|
+
logger.debug(f"Validated {len(validated)} files, uploading...")
|
|
157
265
|
file_urls = [self._upload_file(f) for f in validated]
|
|
266
|
+
logger.debug(f"Uploaded {len(file_urls)} files successfully")
|
|
158
267
|
|
|
159
268
|
payload = self._build_payload(query, model, file_urls)
|
|
269
|
+
logger.debug(
|
|
270
|
+
f"Payload built | payload_keys={list(payload.keys())} params_keys={list(payload.get('params', {}).keys())}"
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
logger.debug("Initializing search session")
|
|
160
274
|
self._http.init_search(query)
|
|
161
275
|
|
|
162
276
|
if stream:
|
|
277
|
+
logger.debug("Starting streaming mode")
|
|
163
278
|
self._stream_generator = self._stream(payload)
|
|
164
279
|
else:
|
|
280
|
+
logger.debug("Starting complete mode (non-streaming)")
|
|
165
281
|
self._complete(payload)
|
|
282
|
+
logger.debug(
|
|
283
|
+
f"Query completed | "
|
|
284
|
+
f"title={self._title} "
|
|
285
|
+
f"answer_length={len(self._answer) if self._answer else 0} "
|
|
286
|
+
f"chunks_count={len(self._chunks)} "
|
|
287
|
+
f"search_results_count={len(self._search_results)}"
|
|
288
|
+
)
|
|
166
289
|
|
|
167
290
|
def _reset_response_state(self) -> None:
|
|
168
291
|
self._title = None
|
|
@@ -172,15 +295,17 @@ class Conversation:
|
|
|
172
295
|
self._raw_data = {}
|
|
173
296
|
self._stream_generator = None
|
|
174
297
|
|
|
175
|
-
def _validate_files(self, files: list[str | PathLike
|
|
298
|
+
def _validate_files(self, files: list[str | PathLike] | None) -> list[_FileInfo]:
|
|
176
299
|
if not files:
|
|
177
300
|
return []
|
|
178
301
|
|
|
179
302
|
seen: set[str] = set()
|
|
180
303
|
file_list: list[Path] = []
|
|
304
|
+
|
|
181
305
|
for item in files:
|
|
182
306
|
if item and isinstance(item, (str, PathLike)):
|
|
183
307
|
path = Path(item).resolve()
|
|
308
|
+
|
|
184
309
|
if path.as_posix() not in seen:
|
|
185
310
|
seen.add(path.as_posix())
|
|
186
311
|
file_list.append(path)
|
|
@@ -203,11 +328,13 @@ class Conversation:
|
|
|
203
328
|
raise FileValidationError(file_path, "Path is not a file")
|
|
204
329
|
|
|
205
330
|
file_size = path.stat().st_size
|
|
331
|
+
|
|
206
332
|
if file_size > MAX_FILE_SIZE:
|
|
207
333
|
raise FileValidationError(
|
|
208
334
|
file_path,
|
|
209
335
|
f"File exceeds 50MB limit: {file_size / (1024 * 1024):.1f}MB",
|
|
210
336
|
)
|
|
337
|
+
|
|
211
338
|
if file_size == 0:
|
|
212
339
|
raise FileValidationError(file_path, "File is empty")
|
|
213
340
|
|
|
@@ -222,12 +349,12 @@ class Conversation:
|
|
|
222
349
|
is_image=mimetype.startswith("image/"),
|
|
223
350
|
)
|
|
224
351
|
)
|
|
225
|
-
except FileValidationError:
|
|
226
|
-
raise
|
|
227
|
-
except (FileNotFoundError, PermissionError) as
|
|
228
|
-
raise FileValidationError(file_path, f"Cannot access file: {
|
|
229
|
-
except OSError as
|
|
230
|
-
raise FileValidationError(file_path, f"File system error: {
|
|
352
|
+
except FileValidationError as error:
|
|
353
|
+
raise error
|
|
354
|
+
except (FileNotFoundError, PermissionError) as error:
|
|
355
|
+
raise FileValidationError(file_path, f"Cannot access file: {error}") from error
|
|
356
|
+
except OSError as error:
|
|
357
|
+
raise FileValidationError(file_path, f"File system error: {error}") from error
|
|
231
358
|
|
|
232
359
|
return result
|
|
233
360
|
|
|
@@ -255,8 +382,8 @@ class Conversation:
|
|
|
255
382
|
raise FileUploadError(file_info.path, "No upload URL returned")
|
|
256
383
|
|
|
257
384
|
return upload_url
|
|
258
|
-
except FileUploadError:
|
|
259
|
-
raise
|
|
385
|
+
except FileUploadError as error:
|
|
386
|
+
raise error
|
|
260
387
|
except Exception as e:
|
|
261
388
|
raise FileUploadError(file_info.path, str(e)) from e
|
|
262
389
|
|
|
@@ -301,6 +428,7 @@ class Conversation:
|
|
|
301
428
|
if self._backend_uuid is not None:
|
|
302
429
|
params["last_backend_uuid"] = self._backend_uuid
|
|
303
430
|
params["query_source"] = "followup"
|
|
431
|
+
|
|
304
432
|
if self._read_write_token:
|
|
305
433
|
params["read_write_token"] = self._read_write_token
|
|
306
434
|
|
|
@@ -312,6 +440,7 @@ class Conversation:
|
|
|
312
440
|
|
|
313
441
|
def replacer(m: Match[str]) -> str:
|
|
314
442
|
num = m.group(1)
|
|
443
|
+
|
|
315
444
|
if not num.isdigit():
|
|
316
445
|
return m.group(0)
|
|
317
446
|
|
|
@@ -319,8 +448,10 @@ class Conversation:
|
|
|
319
448
|
return ""
|
|
320
449
|
|
|
321
450
|
idx = int(num) - 1
|
|
451
|
+
|
|
322
452
|
if 0 <= idx < len(self._search_results):
|
|
323
453
|
url = self._search_results[idx].url or ""
|
|
454
|
+
|
|
324
455
|
if self._citation_mode == CitationMode.MARKDOWN and url:
|
|
325
456
|
return f"[{num}]({url})"
|
|
326
457
|
|
|
@@ -330,26 +461,56 @@ class Conversation:
|
|
|
330
461
|
|
|
331
462
|
def _parse_line(self, line: str | bytes) -> dict[str, Any] | None:
|
|
332
463
|
prefix = b"data: " if isinstance(line, bytes) else "data: "
|
|
464
|
+
|
|
333
465
|
if (isinstance(line, bytes) and line.startswith(prefix)) or (isinstance(line, str) and line.startswith(prefix)):
|
|
334
466
|
return loads(line[6:])
|
|
467
|
+
|
|
335
468
|
return None
|
|
336
469
|
|
|
337
470
|
def _process_data(self, data: dict[str, Any]) -> None:
|
|
471
|
+
"""Process SSE data chunk and update conversation state."""
|
|
472
|
+
|
|
338
473
|
if self._backend_uuid is None and "backend_uuid" in data:
|
|
339
474
|
self._backend_uuid = data["backend_uuid"]
|
|
340
475
|
|
|
341
476
|
if self._read_write_token is None and "read_write_token" in data:
|
|
342
477
|
self._read_write_token = data["read_write_token"]
|
|
343
478
|
|
|
344
|
-
if "
|
|
345
|
-
|
|
479
|
+
if self._title is None and "thread_title" in data:
|
|
480
|
+
self._title = data["thread_title"]
|
|
481
|
+
|
|
482
|
+
if "blocks" in data:
|
|
483
|
+
for block in data["blocks"]:
|
|
484
|
+
if block.get("intended_usage") == "web_results":
|
|
485
|
+
diff = block.get("diff_block", {})
|
|
486
|
+
|
|
487
|
+
for patch in diff.get("patches", []):
|
|
488
|
+
if patch.get("op") == "replace" and patch.get("path") == "/web_results":
|
|
489
|
+
pass
|
|
490
|
+
|
|
491
|
+
if "text" not in data and "blocks" not in data:
|
|
492
|
+
return None
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
json_data = loads(data["text"])
|
|
496
|
+
except KeyError as e:
|
|
497
|
+
raise ValueError("Missing 'text' field in data") from e
|
|
498
|
+
except JSONDecodeError as e:
|
|
499
|
+
raise ValueError("Invalid JSON in 'text' field") from e
|
|
346
500
|
|
|
347
|
-
json_data = loads(data["text"])
|
|
348
501
|
answer_data: dict[str, Any] = {}
|
|
349
502
|
|
|
350
503
|
if isinstance(json_data, list):
|
|
351
504
|
for item in json_data:
|
|
352
|
-
|
|
505
|
+
step_type = item.get("step_type")
|
|
506
|
+
|
|
507
|
+
# Handle Research mode clarifying questions
|
|
508
|
+
if step_type == "RESEARCH_CLARIFYING_QUESTIONS":
|
|
509
|
+
questions = self._extract_clarifying_questions(item)
|
|
510
|
+
|
|
511
|
+
raise ResearchClarifyingQuestionsError(questions)
|
|
512
|
+
|
|
513
|
+
if step_type == "FINAL":
|
|
353
514
|
raw_content = item.get("content", {})
|
|
354
515
|
answer_content = raw_content.get("answer")
|
|
355
516
|
|
|
@@ -359,14 +520,50 @@ class Conversation:
|
|
|
359
520
|
answer_data = raw_content
|
|
360
521
|
|
|
361
522
|
self._update_state(data.get("thread_title"), answer_data)
|
|
523
|
+
|
|
362
524
|
break
|
|
363
525
|
elif isinstance(json_data, dict):
|
|
364
526
|
self._update_state(data.get("thread_title"), json_data)
|
|
527
|
+
else:
|
|
528
|
+
raise ResponseParsingError(
|
|
529
|
+
"Unexpected JSON structure in 'text' field",
|
|
530
|
+
raw_data=str(json_data),
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
def _extract_clarifying_questions(self, item: dict[str, Any]) -> list[str]:
|
|
534
|
+
"""Extract clarifying questions from a RESEARCH_CLARIFYING_QUESTIONS step."""
|
|
535
|
+
|
|
536
|
+
questions: list[str] = []
|
|
537
|
+
content = item.get("content", {})
|
|
538
|
+
|
|
539
|
+
# Try different possible structures for questions
|
|
540
|
+
if isinstance(content, dict):
|
|
541
|
+
if "questions" in content:
|
|
542
|
+
raw_questions = content["questions"]
|
|
543
|
+
|
|
544
|
+
if isinstance(raw_questions, list):
|
|
545
|
+
questions = [str(q) for q in raw_questions if q]
|
|
546
|
+
elif "clarifying_questions" in content:
|
|
547
|
+
raw_questions = content["clarifying_questions"]
|
|
548
|
+
|
|
549
|
+
if isinstance(raw_questions, list):
|
|
550
|
+
questions = [str(q) for q in raw_questions if q]
|
|
551
|
+
elif not questions:
|
|
552
|
+
for value in content.values():
|
|
553
|
+
if isinstance(value, str) and "?" in value:
|
|
554
|
+
questions.append(value)
|
|
555
|
+
elif isinstance(content, list):
|
|
556
|
+
questions = [str(q) for q in content if q]
|
|
557
|
+
elif isinstance(content, str):
|
|
558
|
+
questions = [content]
|
|
559
|
+
|
|
560
|
+
return questions
|
|
365
561
|
|
|
366
562
|
def _update_state(self, title: str | None, answer_data: dict[str, Any]) -> None:
|
|
367
563
|
self._title = title
|
|
368
564
|
|
|
369
565
|
web_results = answer_data.get("web_results", [])
|
|
566
|
+
|
|
370
567
|
if web_results:
|
|
371
568
|
self._search_results = [
|
|
372
569
|
SearchResultItem(
|
|
@@ -379,12 +576,14 @@ class Conversation:
|
|
|
379
576
|
]
|
|
380
577
|
|
|
381
578
|
answer_text = answer_data.get("answer")
|
|
579
|
+
|
|
382
580
|
if answer_text is not None:
|
|
383
581
|
self._answer = self._format_citations(answer_text)
|
|
384
582
|
|
|
385
583
|
chunks = answer_data.get("chunks", [])
|
|
584
|
+
|
|
386
585
|
if chunks:
|
|
387
|
-
self._chunks = chunks
|
|
586
|
+
self._chunks = [self._format_citations(chunk) for chunk in chunks]
|
|
388
587
|
|
|
389
588
|
self._raw_data = answer_data
|
|
390
589
|
|
|
@@ -402,16 +601,21 @@ class Conversation:
|
|
|
402
601
|
def _complete(self, payload: dict[str, Any]) -> None:
|
|
403
602
|
for line in self._http.stream_ask(payload):
|
|
404
603
|
data = self._parse_line(line)
|
|
604
|
+
|
|
405
605
|
if data:
|
|
406
606
|
self._process_data(data)
|
|
607
|
+
|
|
407
608
|
if data.get("final"):
|
|
408
609
|
break
|
|
409
610
|
|
|
410
611
|
def _stream(self, payload: dict[str, Any]) -> Generator[Response, None, None]:
|
|
411
612
|
for line in self._http.stream_ask(payload):
|
|
412
613
|
data = self._parse_line(line)
|
|
614
|
+
|
|
413
615
|
if data:
|
|
414
616
|
self._process_data(data)
|
|
617
|
+
|
|
415
618
|
yield self._build_response()
|
|
619
|
+
|
|
416
620
|
if data.get("final"):
|
|
417
621
|
break
|
|
@@ -6,7 +6,8 @@ from enum import Enum
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class CitationMode(str, Enum):
|
|
9
|
-
"""
|
|
9
|
+
"""
|
|
10
|
+
Citation formatting modes for response text.
|
|
10
11
|
|
|
11
12
|
Controls how citation markers (e.g., [1], [2]) are formatted in the response.
|
|
12
13
|
"""
|
|
@@ -22,7 +23,8 @@ class CitationMode(str, Enum):
|
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class SearchFocus(str, Enum):
|
|
25
|
-
"""
|
|
26
|
+
"""
|
|
27
|
+
Search focus types that control the type of search performed.
|
|
26
28
|
|
|
27
29
|
Determines whether to search the web or focus on writing tasks.
|
|
28
30
|
"""
|
|
@@ -35,7 +37,8 @@ class SearchFocus(str, Enum):
|
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
class SourceFocus(str, Enum):
|
|
38
|
-
"""
|
|
40
|
+
"""
|
|
41
|
+
Source focus types that control which sources to prioritize.
|
|
39
42
|
|
|
40
43
|
Can be combined (e.g., [SourceFocus.WEB, SourceFocus.ACADEMIC]) for multi-source searches.
|
|
41
44
|
"""
|
|
@@ -54,7 +57,8 @@ class SourceFocus(str, Enum):
|
|
|
54
57
|
|
|
55
58
|
|
|
56
59
|
class TimeRange(str, Enum):
|
|
57
|
-
"""
|
|
60
|
+
"""
|
|
61
|
+
Time range filters for search results.
|
|
58
62
|
|
|
59
63
|
Controls how recent the sources should be.
|
|
60
64
|
"""
|
|
@@ -73,3 +77,29 @@ class TimeRange(str, Enum):
|
|
|
73
77
|
|
|
74
78
|
LAST_YEAR = "YEAR"
|
|
75
79
|
"""Include sources from the last 365 days."""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LogLevel(str, Enum):
|
|
83
|
+
"""
|
|
84
|
+
Logging level configuration.
|
|
85
|
+
|
|
86
|
+
Controls the verbosity of logging output. DISABLED is the default.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
DISABLED = "DISABLED"
|
|
90
|
+
"""Completely disable all logging output. This is the default."""
|
|
91
|
+
|
|
92
|
+
DEBUG = "DEBUG"
|
|
93
|
+
"""Show all messages including internal debug information."""
|
|
94
|
+
|
|
95
|
+
INFO = "INFO"
|
|
96
|
+
"""Show informational messages, warnings, and errors."""
|
|
97
|
+
|
|
98
|
+
WARNING = "WARNING"
|
|
99
|
+
"""Show only warnings and errors."""
|
|
100
|
+
|
|
101
|
+
ERROR = "ERROR"
|
|
102
|
+
"""Show only error messages."""
|
|
103
|
+
|
|
104
|
+
CRITICAL = "CRITICAL"
|
|
105
|
+
"""Show only critical/fatal errors."""
|
|
@@ -3,6 +3,19 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
__all__: list[str] = [
|
|
7
|
+
"AuthenticationError",
|
|
8
|
+
"CloudflareBlockError",
|
|
9
|
+
"FileUploadError",
|
|
10
|
+
"FileValidationError",
|
|
11
|
+
"PerplexityError",
|
|
12
|
+
"RateLimitError",
|
|
13
|
+
"ResearchClarifyingQuestionsError",
|
|
14
|
+
"ResponseParsingError",
|
|
15
|
+
"StreamingError",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
6
19
|
class PerplexityError(Exception):
|
|
7
20
|
"""Base exception for all Perplexity-related errors."""
|
|
8
21
|
|
|
@@ -34,6 +47,25 @@ class RateLimitError(PerplexityError):
|
|
|
34
47
|
)
|
|
35
48
|
|
|
36
49
|
|
|
50
|
+
class CloudflareBlockError(PerplexityError):
|
|
51
|
+
"""
|
|
52
|
+
Raised when Cloudflare blocks the request with a challenge page.
|
|
53
|
+
|
|
54
|
+
This typically means the request triggered Cloudflare's bot detection.
|
|
55
|
+
The client will automatically retry with fingerprint rotation, but if
|
|
56
|
+
this exception is raised, all retry attempts have failed.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, message: str | None = None) -> None:
|
|
60
|
+
super().__init__(
|
|
61
|
+
message
|
|
62
|
+
or "Cloudflare challenge detected. The request was blocked by Cloudflare's "
|
|
63
|
+
"bot protection. Try waiting a few minutes before retrying, or obtain a "
|
|
64
|
+
"fresh session token.",
|
|
65
|
+
status_code=403,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
37
69
|
class FileUploadError(PerplexityError):
|
|
38
70
|
"""Raised when file upload fails."""
|
|
39
71
|
|
|
@@ -48,3 +80,45 @@ class FileValidationError(PerplexityError):
|
|
|
48
80
|
def __init__(self, file_path: str, reason: str) -> None:
|
|
49
81
|
self.file_path = file_path
|
|
50
82
|
super().__init__(f"File validation failed for '{file_path}': {reason}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ResearchClarifyingQuestionsError(PerplexityError):
|
|
86
|
+
"""
|
|
87
|
+
Raised when Research mode requires clarifying questions.
|
|
88
|
+
|
|
89
|
+
This library does not support programmatic interaction with clarifying questions.
|
|
90
|
+
Consider rephrasing your query to be more specific.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
questions: List of clarifying questions from the API.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(self, questions: list[str]) -> None:
|
|
97
|
+
self.questions = questions
|
|
98
|
+
questions_text = "\n".join(f" - {q}" for q in questions) if questions else " (no questions provided)"
|
|
99
|
+
|
|
100
|
+
super().__init__(
|
|
101
|
+
f"Research mode is asking clarifying questions:\n{questions_text}\n\n"
|
|
102
|
+
"Programmatic interaction with clarifying questions is not supported. "
|
|
103
|
+
"Please rephrase your query to be more specific."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class ResponseParsingError(PerplexityError):
|
|
108
|
+
"""
|
|
109
|
+
Raised when the API response cannot be parsed.
|
|
110
|
+
|
|
111
|
+
Attributes:
|
|
112
|
+
raw_data: The raw data that failed to parse.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def __init__(self, message: str, raw_data: str | None = None) -> None:
|
|
116
|
+
self.raw_data = raw_data
|
|
117
|
+
super().__init__(f"Failed to parse API response: {message}")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class StreamingError(PerplexityError):
|
|
121
|
+
"""Raised when an error occurs during streaming."""
|
|
122
|
+
|
|
123
|
+
def __init__(self, message: str) -> None:
|
|
124
|
+
super().__init__(f"Streaming error: {message}")
|