perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +24 -3
- perplexity_webui_scraper/cli/get_perplexity_session_token.py +21 -53
- perplexity_webui_scraper/config.py +12 -29
- perplexity_webui_scraper/constants.py +13 -51
- perplexity_webui_scraper/core.py +18 -154
- perplexity_webui_scraper/enums.py +26 -88
- perplexity_webui_scraper/exceptions.py +29 -50
- perplexity_webui_scraper/http.py +39 -332
- perplexity_webui_scraper/limits.py +6 -16
- perplexity_webui_scraper/logging.py +23 -180
- perplexity_webui_scraper/mcp/__init__.py +2 -8
- perplexity_webui_scraper/mcp/__main__.py +1 -3
- perplexity_webui_scraper/mcp/server.py +105 -82
- perplexity_webui_scraper/models.py +27 -71
- perplexity_webui_scraper/resilience.py +17 -100
- perplexity_webui_scraper/types.py +18 -25
- {perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/METADATA +120 -101
- perplexity_webui_scraper-0.4.0.dist-info/RECORD +21 -0
- {perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/WHEEL +1 -1
- perplexity_webui_scraper-0.3.7.dist-info/RECORD +0 -21
- {perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/entry_points.txt +0 -0
perplexity_webui_scraper/core.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Core client implementation.
|
|
3
|
-
"""
|
|
1
|
+
"""Core client implementation."""
|
|
4
2
|
|
|
5
3
|
from __future__ import annotations
|
|
6
4
|
|
|
@@ -33,7 +31,7 @@ from .enums import CitationMode
|
|
|
33
31
|
from .exceptions import FileUploadError, FileValidationError, ResearchClarifyingQuestionsError, ResponseParsingError
|
|
34
32
|
from .http import HTTPClient
|
|
35
33
|
from .limits import MAX_FILE_SIZE, MAX_FILES
|
|
36
|
-
from .logging import configure_logging, get_logger
|
|
34
|
+
from .logging import configure_logging, get_logger
|
|
37
35
|
from .models import Model, Models
|
|
38
36
|
from .types import Response, SearchResultItem, _FileInfo
|
|
39
37
|
|
|
@@ -47,43 +45,14 @@ class Perplexity:
|
|
|
47
45
|
__slots__ = ("_http",)
|
|
48
46
|
|
|
49
47
|
def __init__(self, session_token: str, config: ClientConfig | None = None) -> None:
|
|
50
|
-
"""
|
|
51
|
-
Initialize web scraper with session token.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
session_token: Perplexity session cookie (__Secure-next-auth.session-token).
|
|
55
|
-
config: Optional HTTP client configuration.
|
|
56
|
-
|
|
57
|
-
Raises:
|
|
58
|
-
ValueError: If session_token is empty or whitespace.
|
|
59
|
-
"""
|
|
48
|
+
"""Initialize with session token."""
|
|
60
49
|
|
|
61
50
|
if not session_token or not session_token.strip():
|
|
62
51
|
raise ValueError("session_token cannot be empty")
|
|
63
52
|
|
|
64
53
|
cfg = config or ClientConfig()
|
|
65
|
-
|
|
66
|
-
# Configure logging based on config
|
|
67
54
|
configure_logging(level=cfg.logging_level, log_file=cfg.log_file)
|
|
68
55
|
|
|
69
|
-
logger.info(
|
|
70
|
-
"Perplexity client initializing | "
|
|
71
|
-
f"session_token_length={len(session_token)} "
|
|
72
|
-
f"logging_level={cfg.logging_level.value} "
|
|
73
|
-
f"log_file={cfg.log_file}"
|
|
74
|
-
)
|
|
75
|
-
logger.debug(
|
|
76
|
-
"Client configuration | "
|
|
77
|
-
f"timeout={cfg.timeout}s "
|
|
78
|
-
f"impersonate={cfg.impersonate} "
|
|
79
|
-
f"max_retries={cfg.max_retries} "
|
|
80
|
-
f"retry_base_delay={cfg.retry_base_delay}s "
|
|
81
|
-
f"retry_max_delay={cfg.retry_max_delay}s "
|
|
82
|
-
f"retry_jitter={cfg.retry_jitter} "
|
|
83
|
-
f"requests_per_second={cfg.requests_per_second} "
|
|
84
|
-
f"rotate_fingerprint={cfg.rotate_fingerprint}"
|
|
85
|
-
)
|
|
86
|
-
|
|
87
56
|
self._http = HTTPClient(
|
|
88
57
|
session_token,
|
|
89
58
|
timeout=cfg.timeout,
|
|
@@ -96,37 +65,17 @@ class Perplexity:
|
|
|
96
65
|
rotate_fingerprint=cfg.rotate_fingerprint,
|
|
97
66
|
)
|
|
98
67
|
|
|
99
|
-
logger.info("Perplexity client initialized
|
|
68
|
+
logger.info("Perplexity client initialized")
|
|
100
69
|
|
|
101
70
|
def create_conversation(self, config: ConversationConfig | None = None) -> Conversation:
|
|
102
71
|
"""Create a new conversation."""
|
|
103
72
|
|
|
104
|
-
|
|
105
|
-
logger.debug(
|
|
106
|
-
"Creating conversation | "
|
|
107
|
-
f"model={cfg.model} "
|
|
108
|
-
f"citation_mode={cfg.citation_mode} "
|
|
109
|
-
f"save_to_library={cfg.save_to_library} "
|
|
110
|
-
f"search_focus={cfg.search_focus} "
|
|
111
|
-
f"language={cfg.language}"
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
conversation = Conversation(self._http, cfg)
|
|
115
|
-
|
|
116
|
-
log_conversation_created(
|
|
117
|
-
f"model={cfg.model}, citation_mode={cfg.citation_mode}, "
|
|
118
|
-
f"search_focus={cfg.search_focus}, language={cfg.language}"
|
|
119
|
-
)
|
|
120
|
-
logger.info("Conversation created successfully")
|
|
121
|
-
|
|
122
|
-
return conversation
|
|
73
|
+
return Conversation(self._http, config or ConversationConfig())
|
|
123
74
|
|
|
124
75
|
def close(self) -> None:
|
|
125
76
|
"""Close the client."""
|
|
126
77
|
|
|
127
|
-
logger.debug("Closing Perplexity client")
|
|
128
78
|
self._http.close()
|
|
129
|
-
logger.info("Perplexity client closed")
|
|
130
79
|
|
|
131
80
|
def __enter__(self) -> Perplexity:
|
|
132
81
|
return self
|
|
@@ -153,13 +102,6 @@ class Conversation:
|
|
|
153
102
|
)
|
|
154
103
|
|
|
155
104
|
def __init__(self, http: HTTPClient, config: ConversationConfig) -> None:
|
|
156
|
-
logger.debug(
|
|
157
|
-
"Conversation.__init__ | "
|
|
158
|
-
f"model={config.model} "
|
|
159
|
-
f"citation_mode={config.citation_mode} "
|
|
160
|
-
f"save_to_library={config.save_to_library} "
|
|
161
|
-
f"search_focus={config.search_focus}"
|
|
162
|
-
)
|
|
163
105
|
self._http = http
|
|
164
106
|
self._config = config
|
|
165
107
|
self._citation_mode = CitationMode.DEFAULT
|
|
@@ -171,7 +113,6 @@ class Conversation:
|
|
|
171
113
|
self._search_results: list[SearchResultItem] = []
|
|
172
114
|
self._raw_data: dict[str, Any] = {}
|
|
173
115
|
self._stream_generator: Generator[Response, None, None] | None = None
|
|
174
|
-
logger.debug("Conversation initialized with empty state")
|
|
175
116
|
|
|
176
117
|
@property
|
|
177
118
|
def answer(self) -> str | None:
|
|
@@ -200,7 +141,6 @@ class Conversation:
|
|
|
200
141
|
def __iter__(self) -> Generator[Response, None, None]:
|
|
201
142
|
if self._stream_generator is not None:
|
|
202
143
|
yield from self._stream_generator
|
|
203
|
-
|
|
204
144
|
self._stream_generator = None
|
|
205
145
|
|
|
206
146
|
def ask(
|
|
@@ -213,29 +153,11 @@ class Conversation:
|
|
|
213
153
|
) -> Conversation:
|
|
214
154
|
"""Ask a question. Returns self for method chaining or streaming iteration."""
|
|
215
155
|
|
|
216
|
-
logger.info(
|
|
217
|
-
"Conversation.ask called | "
|
|
218
|
-
f"query_length={len(query)} "
|
|
219
|
-
f"query_preview={query[:100]}{'...' if len(query) > 100 else ''} "
|
|
220
|
-
f"model={model} "
|
|
221
|
-
f"files_count={len(files) if files else 0} "
|
|
222
|
-
f"citation_mode={citation_mode} "
|
|
223
|
-
f"stream={stream}"
|
|
224
|
-
)
|
|
225
|
-
|
|
226
156
|
effective_model = model or self._config.model or Models.BEST
|
|
227
157
|
effective_citation = citation_mode if citation_mode is not None else self._config.citation_mode
|
|
228
158
|
self._citation_mode = effective_citation
|
|
229
159
|
|
|
230
|
-
logger.debug(
|
|
231
|
-
f"Effective parameters | effective_model={effective_model} effective_citation={effective_citation}"
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
log_query_sent(query, str(effective_model), bool(files))
|
|
235
160
|
self._execute(query, effective_model, files, stream=stream)
|
|
236
|
-
|
|
237
|
-
logger.debug("Query execution completed")
|
|
238
|
-
|
|
239
161
|
return self
|
|
240
162
|
|
|
241
163
|
def _execute(
|
|
@@ -247,49 +169,20 @@ class Conversation:
|
|
|
247
169
|
) -> None:
|
|
248
170
|
"""Execute a query."""
|
|
249
171
|
|
|
250
|
-
logger.debug(
|
|
251
|
-
f"Executing query | "
|
|
252
|
-
f"query_length={len(query)} "
|
|
253
|
-
f"model={model} "
|
|
254
|
-
f"files_count={len(files) if files else 0} "
|
|
255
|
-
f"stream={stream} "
|
|
256
|
-
f"is_followup={self._backend_uuid is not None}"
|
|
257
|
-
)
|
|
258
|
-
|
|
259
172
|
self._reset_response_state()
|
|
260
|
-
logger.debug("Response state reset")
|
|
261
173
|
|
|
262
|
-
# Upload files
|
|
263
174
|
file_urls: list[str] = []
|
|
264
|
-
|
|
265
175
|
if files:
|
|
266
|
-
logger.debug(f"Validating {len(files)} files")
|
|
267
176
|
validated = self._validate_files(files)
|
|
268
|
-
logger.debug(f"Validated {len(validated)} files, uploading...")
|
|
269
177
|
file_urls = [self._upload_file(f) for f in validated]
|
|
270
|
-
logger.debug(f"Uploaded {len(file_urls)} files successfully")
|
|
271
178
|
|
|
272
179
|
payload = self._build_payload(query, model, file_urls)
|
|
273
|
-
logger.debug(
|
|
274
|
-
f"Payload built | payload_keys={list(payload.keys())} params_keys={list(payload.get('params', {}).keys())}"
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
logger.debug("Initializing search session")
|
|
278
180
|
self._http.init_search(query)
|
|
279
181
|
|
|
280
182
|
if stream:
|
|
281
|
-
logger.debug("Starting streaming mode")
|
|
282
183
|
self._stream_generator = self._stream(payload)
|
|
283
184
|
else:
|
|
284
|
-
logger.debug("Starting complete mode (non-streaming)")
|
|
285
185
|
self._complete(payload)
|
|
286
|
-
logger.debug(
|
|
287
|
-
f"Query completed | "
|
|
288
|
-
f"title={self._title} "
|
|
289
|
-
f"answer_length={len(self._answer) if self._answer else 0} "
|
|
290
|
-
f"chunks_count={len(self._chunks)} "
|
|
291
|
-
f"search_results_count={len(self._search_results)}"
|
|
292
|
-
)
|
|
293
186
|
|
|
294
187
|
def _reset_response_state(self) -> None:
|
|
295
188
|
self._title = None
|
|
@@ -309,7 +202,6 @@ class Conversation:
|
|
|
309
202
|
for item in files:
|
|
310
203
|
if item and isinstance(item, (str, PathLike)):
|
|
311
204
|
path = Path(item).resolve()
|
|
312
|
-
|
|
313
205
|
if path.as_posix() not in seen:
|
|
314
206
|
seen.add(path.as_posix())
|
|
315
207
|
file_list.append(path)
|
|
@@ -338,7 +230,6 @@ class Conversation:
|
|
|
338
230
|
file_path,
|
|
339
231
|
f"File exceeds 50MB limit: {file_size / (1024 * 1024):.1f}MB",
|
|
340
232
|
)
|
|
341
|
-
|
|
342
233
|
if file_size == 0:
|
|
343
234
|
raise FileValidationError(file_path, "File is empty")
|
|
344
235
|
|
|
@@ -388,18 +279,13 @@ class Conversation:
|
|
|
388
279
|
|
|
389
280
|
if not s3_object_url:
|
|
390
281
|
raise FileUploadError(file_info.path, "No upload URL returned")
|
|
391
|
-
|
|
392
282
|
if not s3_bucket_url or not fields:
|
|
393
283
|
raise FileUploadError(file_info.path, "Missing S3 upload credentials")
|
|
394
284
|
|
|
395
|
-
# Upload the file to S3 using presigned POST
|
|
396
285
|
file_path = Path(file_info.path)
|
|
397
|
-
|
|
398
286
|
with file_path.open("rb") as f:
|
|
399
287
|
file_content = f.read()
|
|
400
288
|
|
|
401
|
-
# Build multipart form data using CurlMime
|
|
402
|
-
# For S3 presigned POST, form fields must come before the file
|
|
403
289
|
mime = CurlMime()
|
|
404
290
|
|
|
405
291
|
for field_name, field_value in fields.items():
|
|
@@ -412,7 +298,6 @@ class Conversation:
|
|
|
412
298
|
data=file_content,
|
|
413
299
|
)
|
|
414
300
|
|
|
415
|
-
# S3 requires a clean session
|
|
416
301
|
with Session() as s3_session:
|
|
417
302
|
upload_response = s3_session.post(s3_bucket_url, multipart=mime)
|
|
418
303
|
|
|
@@ -471,7 +356,6 @@ class Conversation:
|
|
|
471
356
|
if self._backend_uuid is not None:
|
|
472
357
|
params["last_backend_uuid"] = self._backend_uuid
|
|
473
358
|
params["query_source"] = "followup"
|
|
474
|
-
|
|
475
359
|
if self._read_write_token:
|
|
476
360
|
params["read_write_token"] = self._read_write_token
|
|
477
361
|
|
|
@@ -483,7 +367,6 @@ class Conversation:
|
|
|
483
367
|
|
|
484
368
|
def replacer(m: Match[str]) -> str:
|
|
485
369
|
num = m.group(1)
|
|
486
|
-
|
|
487
370
|
if not num.isdigit():
|
|
488
371
|
return m.group(0)
|
|
489
372
|
|
|
@@ -491,10 +374,8 @@ class Conversation:
|
|
|
491
374
|
return ""
|
|
492
375
|
|
|
493
376
|
idx = int(num) - 1
|
|
494
|
-
|
|
495
377
|
if 0 <= idx < len(self._search_results):
|
|
496
378
|
url = self._search_results[idx].url or ""
|
|
497
|
-
|
|
498
379
|
if self._citation_mode == CitationMode.MARKDOWN and url:
|
|
499
380
|
return f"[{num}]({url})"
|
|
500
381
|
|
|
@@ -505,7 +386,6 @@ class Conversation:
|
|
|
505
386
|
def _parse_line(self, line: str | bytes) -> dict[str, Any] | None:
|
|
506
387
|
if isinstance(line, bytes) and line.startswith(b"data: "):
|
|
507
388
|
return loads(line[6:])
|
|
508
|
-
|
|
509
389
|
if isinstance(line, str) and line.startswith("data: "):
|
|
510
390
|
return loads(line[6:])
|
|
511
391
|
|
|
@@ -514,24 +394,15 @@ class Conversation:
|
|
|
514
394
|
def _process_data(self, data: dict[str, Any]) -> None:
|
|
515
395
|
"""Process SSE data chunk and update conversation state."""
|
|
516
396
|
|
|
517
|
-
if
|
|
397
|
+
if "backend_uuid" in data:
|
|
518
398
|
self._backend_uuid = data["backend_uuid"]
|
|
519
399
|
|
|
520
|
-
if
|
|
400
|
+
if "read_write_token" in data:
|
|
521
401
|
self._read_write_token = data["read_write_token"]
|
|
522
402
|
|
|
523
|
-
if
|
|
403
|
+
if data.get("thread_title"):
|
|
524
404
|
self._title = data["thread_title"]
|
|
525
405
|
|
|
526
|
-
if "blocks" in data:
|
|
527
|
-
for block in data["blocks"]:
|
|
528
|
-
if block.get("intended_usage") == "web_results":
|
|
529
|
-
diff = block.get("diff_block", {})
|
|
530
|
-
|
|
531
|
-
for patch in diff.get("patches", []):
|
|
532
|
-
if patch.get("op") == "replace" and patch.get("path") == "/web_results":
|
|
533
|
-
pass
|
|
534
|
-
|
|
535
406
|
if "text" not in data and "blocks" not in data:
|
|
536
407
|
return None
|
|
537
408
|
|
|
@@ -548,10 +419,8 @@ class Conversation:
|
|
|
548
419
|
for item in json_data:
|
|
549
420
|
step_type = item.get("step_type")
|
|
550
421
|
|
|
551
|
-
# Handle Research mode clarifying questions
|
|
552
422
|
if step_type == "RESEARCH_CLARIFYING_QUESTIONS":
|
|
553
423
|
questions = self._extract_clarifying_questions(item)
|
|
554
|
-
|
|
555
424
|
raise ResearchClarifyingQuestionsError(questions)
|
|
556
425
|
|
|
557
426
|
if step_type == "FINAL":
|
|
@@ -563,11 +432,14 @@ class Conversation:
|
|
|
563
432
|
else:
|
|
564
433
|
answer_data = raw_content
|
|
565
434
|
|
|
566
|
-
|
|
567
|
-
|
|
435
|
+
title = data.get("thread_title") or answer_data.get("thread_title")
|
|
436
|
+
self._update_state(title, answer_data)
|
|
568
437
|
break
|
|
438
|
+
|
|
569
439
|
elif isinstance(json_data, dict):
|
|
570
|
-
|
|
440
|
+
title = data.get("thread_title") or json_data.get("thread_title")
|
|
441
|
+
self._update_state(title, json_data)
|
|
442
|
+
|
|
571
443
|
else:
|
|
572
444
|
raise ResponseParsingError(
|
|
573
445
|
"Unexpected JSON structure in 'text' field",
|
|
@@ -580,34 +452,33 @@ class Conversation:
|
|
|
580
452
|
questions: list[str] = []
|
|
581
453
|
content = item.get("content", {})
|
|
582
454
|
|
|
583
|
-
# Try different possible structures for questions
|
|
584
455
|
if isinstance(content, dict):
|
|
585
456
|
if "questions" in content:
|
|
586
457
|
raw_questions = content["questions"]
|
|
587
|
-
|
|
588
458
|
if isinstance(raw_questions, list):
|
|
589
459
|
questions = [str(q) for q in raw_questions if q]
|
|
590
460
|
elif "clarifying_questions" in content:
|
|
591
461
|
raw_questions = content["clarifying_questions"]
|
|
592
|
-
|
|
593
462
|
if isinstance(raw_questions, list):
|
|
594
463
|
questions = [str(q) for q in raw_questions if q]
|
|
595
464
|
elif not questions:
|
|
596
465
|
for value in content.values():
|
|
597
466
|
if isinstance(value, str) and "?" in value:
|
|
598
467
|
questions.append(value)
|
|
468
|
+
|
|
599
469
|
elif isinstance(content, list):
|
|
600
470
|
questions = [str(q) for q in content if q]
|
|
471
|
+
|
|
601
472
|
elif isinstance(content, str):
|
|
602
473
|
questions = [content]
|
|
603
474
|
|
|
604
475
|
return questions
|
|
605
476
|
|
|
606
477
|
def _update_state(self, title: str | None, answer_data: dict[str, Any]) -> None:
|
|
607
|
-
|
|
478
|
+
if title is not None:
|
|
479
|
+
self._title = title
|
|
608
480
|
|
|
609
481
|
web_results = answer_data.get("web_results", [])
|
|
610
|
-
|
|
611
482
|
if web_results:
|
|
612
483
|
self._search_results = [
|
|
613
484
|
SearchResultItem(
|
|
@@ -620,12 +491,10 @@ class Conversation:
|
|
|
620
491
|
]
|
|
621
492
|
|
|
622
493
|
answer_text = answer_data.get("answer")
|
|
623
|
-
|
|
624
494
|
if answer_text is not None:
|
|
625
495
|
self._answer = self._format_citations(answer_text)
|
|
626
496
|
|
|
627
497
|
chunks = answer_data.get("chunks", [])
|
|
628
|
-
|
|
629
498
|
if chunks:
|
|
630
499
|
formatted = [self._format_citations(chunk) for chunk in chunks if chunk is not None]
|
|
631
500
|
self._chunks = [c for c in formatted if c is not None]
|
|
@@ -646,21 +515,16 @@ class Conversation:
|
|
|
646
515
|
def _complete(self, payload: dict[str, Any]) -> None:
|
|
647
516
|
for line in self._http.stream_ask(payload):
|
|
648
517
|
data = self._parse_line(line)
|
|
649
|
-
|
|
650
518
|
if data:
|
|
651
519
|
self._process_data(data)
|
|
652
|
-
|
|
653
520
|
if data.get("final"):
|
|
654
521
|
break
|
|
655
522
|
|
|
656
523
|
def _stream(self, payload: dict[str, Any]) -> Generator[Response, None, None]:
|
|
657
524
|
for line in self._http.stream_ask(payload):
|
|
658
525
|
data = self._parse_line(line)
|
|
659
|
-
|
|
660
526
|
if data:
|
|
661
527
|
self._process_data(data)
|
|
662
|
-
|
|
663
528
|
yield self._build_response()
|
|
664
|
-
|
|
665
529
|
if data.get("final"):
|
|
666
530
|
break
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Enums for Perplexity WebUI Scraper configuration options.
|
|
3
|
-
"""
|
|
1
|
+
"""Enums for configuration options."""
|
|
4
2
|
|
|
5
3
|
from __future__ import annotations
|
|
6
4
|
|
|
@@ -8,140 +6,80 @@ from enum import Enum
|
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class CitationMode(str, Enum):
|
|
11
|
-
"""
|
|
12
|
-
Citation formatting modes for response text.
|
|
13
|
-
|
|
14
|
-
Controls how citation markers (e.g., [1], [2]) are formatted in the response.
|
|
15
|
-
"""
|
|
9
|
+
"""Citation formatting modes for response text."""
|
|
16
10
|
|
|
17
11
|
DEFAULT = "default"
|
|
18
|
-
"""
|
|
19
|
-
Keep original Perplexity citation format (e.g., 'This is a citation[1]').
|
|
20
|
-
"""
|
|
12
|
+
"""Keep original format (e.g., 'text[1]')."""
|
|
21
13
|
|
|
22
14
|
MARKDOWN = "markdown"
|
|
23
|
-
"""
|
|
24
|
-
Convert citations to markdown links (e.g., 'This is a citation[1](https://example.com)').
|
|
25
|
-
"""
|
|
15
|
+
"""Convert to markdown links (e.g., 'text[1](url)')."""
|
|
26
16
|
|
|
27
17
|
CLEAN = "clean"
|
|
28
|
-
"""
|
|
29
|
-
Remove all citation markers (e.g., 'This is a citation').
|
|
30
|
-
"""
|
|
18
|
+
"""Remove all citation markers."""
|
|
31
19
|
|
|
32
20
|
|
|
33
21
|
class SearchFocus(str, Enum):
|
|
34
|
-
"""
|
|
35
|
-
Search focus types that control the type of search performed.
|
|
36
|
-
|
|
37
|
-
Determines whether to search the web or focus on writing tasks.
|
|
38
|
-
"""
|
|
22
|
+
"""Search focus types."""
|
|
39
23
|
|
|
40
24
|
WEB = "internet"
|
|
41
|
-
"""
|
|
42
|
-
Search the web for information. Best for factual queries and research.
|
|
43
|
-
"""
|
|
25
|
+
"""Search the web for information."""
|
|
44
26
|
|
|
45
27
|
WRITING = "writing"
|
|
46
|
-
"""
|
|
47
|
-
Focus on writing tasks. Best for creative writing, editing, and text generation.
|
|
48
|
-
"""
|
|
28
|
+
"""Focus on writing tasks."""
|
|
49
29
|
|
|
50
30
|
|
|
51
31
|
class SourceFocus(str, Enum):
|
|
52
|
-
"""
|
|
53
|
-
Source focus types that control which sources to prioritize.
|
|
54
|
-
|
|
55
|
-
Can be combined (e.g., [SourceFocus.WEB, SourceFocus.ACADEMIC]) for multi-source searches.
|
|
56
|
-
"""
|
|
32
|
+
"""Source focus types for search prioritization."""
|
|
57
33
|
|
|
58
34
|
WEB = "web"
|
|
59
|
-
"""
|
|
60
|
-
Search across the entire internet. General web search.
|
|
61
|
-
"""
|
|
35
|
+
"""General web search."""
|
|
62
36
|
|
|
63
37
|
ACADEMIC = "scholar"
|
|
64
|
-
"""
|
|
65
|
-
Search academic papers and scholarly articles (Google Scholar, etc.).
|
|
66
|
-
"""
|
|
38
|
+
"""Academic papers and scholarly articles."""
|
|
67
39
|
|
|
68
40
|
SOCIAL = "social"
|
|
69
|
-
"""
|
|
70
|
-
Search social media for discussions and opinions (Reddit, Twitter, etc.).
|
|
71
|
-
"""
|
|
41
|
+
"""Social media (Reddit, Twitter, etc.)."""
|
|
72
42
|
|
|
73
43
|
FINANCE = "edgar"
|
|
74
|
-
"""
|
|
75
|
-
Search SEC EDGAR filings for financial and corporate documents.
|
|
76
|
-
"""
|
|
44
|
+
"""SEC EDGAR filings."""
|
|
77
45
|
|
|
78
46
|
|
|
79
47
|
class TimeRange(str, Enum):
|
|
80
|
-
"""
|
|
81
|
-
Time range filters for search results.
|
|
82
|
-
|
|
83
|
-
Controls how recent the sources should be.
|
|
84
|
-
"""
|
|
48
|
+
"""Time range filters for search results."""
|
|
85
49
|
|
|
86
50
|
ALL = ""
|
|
87
|
-
"""
|
|
88
|
-
Include sources from all time. No time restriction.
|
|
89
|
-
"""
|
|
51
|
+
"""No time restriction."""
|
|
90
52
|
|
|
91
53
|
TODAY = "DAY"
|
|
92
|
-
"""
|
|
93
|
-
Include only sources from today (last 24 hours).
|
|
94
|
-
"""
|
|
54
|
+
"""Last 24 hours."""
|
|
95
55
|
|
|
96
56
|
LAST_WEEK = "WEEK"
|
|
97
|
-
"""
|
|
98
|
-
Include sources from the last 7 days.
|
|
99
|
-
"""
|
|
57
|
+
"""Last 7 days."""
|
|
100
58
|
|
|
101
59
|
LAST_MONTH = "MONTH"
|
|
102
|
-
"""
|
|
103
|
-
Include sources from the last 30 days.
|
|
104
|
-
"""
|
|
60
|
+
"""Last 30 days."""
|
|
105
61
|
|
|
106
62
|
LAST_YEAR = "YEAR"
|
|
107
|
-
"""
|
|
108
|
-
Include sources from the last 365 days.
|
|
109
|
-
"""
|
|
63
|
+
"""Last 365 days."""
|
|
110
64
|
|
|
111
65
|
|
|
112
66
|
class LogLevel(str, Enum):
|
|
113
|
-
"""
|
|
114
|
-
Logging level configuration.
|
|
115
|
-
|
|
116
|
-
Controls the verbosity of logging output. DISABLED is the default.
|
|
117
|
-
"""
|
|
67
|
+
"""Logging level configuration."""
|
|
118
68
|
|
|
119
69
|
DISABLED = "DISABLED"
|
|
120
|
-
"""
|
|
121
|
-
Completely disable all logging output. This is the default.
|
|
122
|
-
"""
|
|
70
|
+
"""Disable all logging (default)."""
|
|
123
71
|
|
|
124
72
|
DEBUG = "DEBUG"
|
|
125
|
-
"""
|
|
126
|
-
Show all messages including internal debug information.
|
|
127
|
-
"""
|
|
73
|
+
"""Show all messages including debug info."""
|
|
128
74
|
|
|
129
75
|
INFO = "INFO"
|
|
130
|
-
"""
|
|
131
|
-
Show informational messages, warnings, and errors.
|
|
132
|
-
"""
|
|
76
|
+
"""Show info, warnings, and errors."""
|
|
133
77
|
|
|
134
78
|
WARNING = "WARNING"
|
|
135
|
-
"""
|
|
136
|
-
Show only warnings and errors.
|
|
137
|
-
"""
|
|
79
|
+
"""Show warnings and errors only."""
|
|
138
80
|
|
|
139
81
|
ERROR = "ERROR"
|
|
140
|
-
"""
|
|
141
|
-
Show only error messages.
|
|
142
|
-
"""
|
|
82
|
+
"""Show errors only."""
|
|
143
83
|
|
|
144
84
|
CRITICAL = "CRITICAL"
|
|
145
|
-
"""
|
|
146
|
-
Show only critical/fatal errors.
|
|
147
|
-
"""
|
|
85
|
+
"""Show critical/fatal errors only."""
|