perplexity-webui-scraper 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/PKG-INFO +5 -5
  2. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/pyproject.toml +6 -5
  3. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/__init__.py +3 -1
  4. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/cli/get_perplexity_session_token.py +24 -8
  5. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/config.py +6 -2
  6. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/constants.py +30 -10
  7. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/core.py +58 -13
  8. perplexity_webui_scraper-0.3.7/src/perplexity_webui_scraper/enums.py +147 -0
  9. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/exceptions.py +3 -1
  10. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/http.py +8 -3
  11. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/limits.py +12 -4
  12. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/logging.py +36 -14
  13. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/mcp/__init__.py +3 -1
  14. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/mcp/__main__.py +3 -1
  15. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/mcp/server.py +15 -30
  16. perplexity_webui_scraper-0.3.7/src/perplexity_webui_scraper/models.py +109 -0
  17. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/resilience.py +3 -1
  18. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/types.py +15 -5
  19. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/enums.py +0 -105
  20. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/models.py +0 -73
  21. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/README.md +0 -0
  22. {perplexity_webui_scraper-0.3.5 → perplexity_webui_scraper-0.3.7}/src/perplexity_webui_scraper/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: perplexity-webui-scraper
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Python scraper to extract AI responses from Perplexity's web interface.
5
5
  Keywords: perplexity,ai,scraper,webui,api,client
6
6
  Author: henrique-coder
@@ -24,13 +24,13 @@ Requires-Dist: loguru>=0.7.3
24
24
  Requires-Dist: orjson>=3.11.5
25
25
  Requires-Dist: pydantic>=2.12.5
26
26
  Requires-Dist: tenacity>=9.1.2
27
- Requires-Dist: fastmcp>=2.14.1 ; extra == 'mcp'
27
+ Requires-Dist: fastmcp>=2.14.4 ; extra == 'mcp'
28
28
  Requires-Python: >=3.10, <3.15
29
- Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
30
- Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
31
29
  Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
32
- Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
30
+ Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
33
31
  Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
32
+ Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
33
+ Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
34
34
  Provides-Extra: mcp
35
35
  Description-Content-Type: text/markdown
36
36
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "perplexity-webui-scraper"
3
- version = "0.3.5"
3
+ version = "0.3.7"
4
4
  description = "Python scraper to extract AI responses from Perplexity's web interface."
5
5
  authors = [{ name = "henrique-coder", email = "henriquemoreira10fk@gmail.com" }]
6
6
  license = "MIT"
@@ -35,11 +35,13 @@ dev = [
35
35
  "beautifulsoup4>=4.14.3",
36
36
  "jsbeautifier>=1.15.4",
37
37
  "lxml>=6.0.2",
38
+ "prek>=0.3.0",
38
39
  "python-dotenv>=1.2.1",
39
- "rich>=14.2.0",
40
+ "rich>=14.3.1",
40
41
  ]
41
42
  lint = [
42
- "ruff>=0.14.10",
43
+ "ruff>=0.14.14",
44
+ "ty>=0.0.13",
43
45
  ]
44
46
  tests = [
45
47
  "pytest>=9.0.2",
@@ -47,7 +49,7 @@ tests = [
47
49
 
48
50
  [project.optional-dependencies]
49
51
  mcp = [
50
- "fastmcp>=2.14.1",
52
+ "fastmcp>=2.14.4",
51
53
  ]
52
54
 
53
55
  [project.urls]
@@ -90,7 +92,6 @@ fixable = ["ALL"] # Allow auto-fix for all enabled rules
90
92
  dummy-variable-rgx = "^_$" # Only underscore is considered a dummy variable
91
93
 
92
94
  [tool.ruff.lint.isort]
93
- known-first-party = ["perplexity_webui_scraper"]
94
95
  lines-after-imports = 2 # PEP 8: two blank lines after imports
95
96
  force-sort-within-sections = true # Alphabetical order within each section
96
97
  split-on-trailing-comma = true # Trailing comma triggers multi-line format
@@ -1,4 +1,6 @@
1
- """Extract AI responses from Perplexity's web interface."""
1
+ """
2
+ Extract AI responses from Perplexity's web interface.
3
+ """
2
4
 
3
5
  from importlib import metadata
4
6
 
@@ -1,4 +1,6 @@
1
- """CLI utility for secure Perplexity authentication and session extraction."""
1
+ """
2
+ CLI utility for secure Perplexity authentication and session extraction.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -57,7 +59,9 @@ def update_env(token: str) -> bool:
57
59
 
58
60
 
59
61
  def _initialize_session() -> tuple[Session, str]:
60
- """Initialize session and obtain CSRF token."""
62
+ """
63
+ Initialize session and obtain CSRF token.
64
+ """
61
65
 
62
66
  session = Session(impersonate="chrome", headers={"Referer": BASE_URL, "Origin": BASE_URL})
63
67
 
@@ -73,7 +77,9 @@ def _initialize_session() -> tuple[Session, str]:
73
77
 
74
78
 
75
79
  def _request_verification_code(session: Session, csrf: str, email: str) -> None:
76
- """Send verification code to user's email."""
80
+ """
81
+ Send verification code to user's email.
82
+ """
77
83
 
78
84
  with console.status("[bold green]Sending verification code...", spinner="dots"):
79
85
  r = session.post(
@@ -92,7 +98,9 @@ def _request_verification_code(session: Session, csrf: str, email: str) -> None:
92
98
 
93
99
 
94
100
  def _validate_and_get_redirect_url(session: Session, email: str, user_input: str) -> str:
95
- """Validate user input (OTP or magic link) and return redirect URL."""
101
+ """
102
+ Validate user input (OTP or magic link) and return redirect URL.
103
+ """
96
104
 
97
105
  with console.status("[bold green]Validating...", spinner="dots"):
98
106
  if user_input.startswith("http"):
@@ -120,7 +128,9 @@ def _validate_and_get_redirect_url(session: Session, email: str, user_input: str
120
128
 
121
129
 
122
130
  def _extract_session_token(session: Session, redirect_url: str) -> str:
123
- """Extract session token from cookies after authentication."""
131
+ """
132
+ Extract session token from cookies after authentication.
133
+ """
124
134
 
125
135
  session.get(redirect_url)
126
136
  token = session.cookies.get("__Secure-next-auth.session-token")
@@ -132,7 +142,9 @@ def _extract_session_token(session: Session, redirect_url: str) -> str:
132
142
 
133
143
 
134
144
  def _display_and_save_token(token: str) -> None:
135
- """Display token and optionally save to .env file."""
145
+ """
146
+ Display token and optionally save to .env file.
147
+ """
136
148
 
137
149
  console.print("\n[bold green]✅ Token generated successfully![/bold green]")
138
150
  console.print(f"\n[bold white]Your session token:[/bold white]\n[green]{token}[/green]\n")
@@ -147,7 +159,9 @@ def _display_and_save_token(token: str) -> None:
147
159
 
148
160
 
149
161
  def _show_header() -> None:
150
- """Display welcome header."""
162
+ """
163
+ Display welcome header.
164
+ """
151
165
 
152
166
  console.print(
153
167
  Panel(
@@ -161,7 +175,9 @@ def _show_header() -> None:
161
175
 
162
176
 
163
177
  def _show_exit_message() -> None:
164
- """Display security note and wait for user to exit."""
178
+ """
179
+ Display security note and wait for user to exit.
180
+ """
165
181
 
166
182
  console.print("\n[bold yellow]⚠️ Security Note:[/bold yellow]")
167
183
  console.print("Press [bold white]ENTER[/bold white] to clear screen and exit.")
@@ -1,4 +1,6 @@
1
- """Configuration classes."""
1
+ """
2
+ Configuration classes.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -17,7 +19,9 @@ if TYPE_CHECKING:
17
19
 
18
20
  @dataclass(slots=True)
19
21
  class ConversationConfig:
20
- """Default settings for a conversation. Can be overridden per message."""
22
+ """
23
+ Default settings for a conversation. Can be overridden per message.
24
+ """
21
25
 
22
26
  model: Model | None = None
23
27
  citation_mode: CitationMode = CitationMode.CLEAN
@@ -1,4 +1,6 @@
1
- """Constants and values for the Perplexity internal API and HTTP interactions."""
1
+ """
2
+ Constants and values for the Perplexity internal API and HTTP interactions.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -8,20 +10,30 @@ from typing import Final
8
10
 
9
11
  # API Configuration
10
12
  API_VERSION: Final[str] = "2.18"
11
- """Current API version used by Perplexity WebUI."""
13
+ """
14
+ Current API version used by Perplexity WebUI.
15
+ """
12
16
 
13
17
  API_BASE_URL: Final[str] = "https://www.perplexity.ai"
14
- """Base URL for all API requests."""
18
+ """
19
+ Base URL for all API requests.
20
+ """
15
21
 
16
22
  # API Endpoints
17
23
  ENDPOINT_ASK: Final[str] = "/rest/sse/perplexity_ask"
18
- """SSE endpoint for sending prompts."""
24
+ """
25
+ SSE endpoint for sending prompts.
26
+ """
19
27
 
20
28
  ENDPOINT_SEARCH_INIT: Final[str] = "/search/new"
21
- """Endpoint to initialize a search session."""
29
+ """
30
+ Endpoint to initialize a search session.
31
+ """
22
32
 
23
33
  ENDPOINT_UPLOAD: Final[str] = "/rest/uploads/batch_create_upload_urls"
24
- """Endpoint for file upload URL generation."""
34
+ """
35
+ Endpoint for file upload URL generation.
36
+ """
25
37
 
26
38
  # API Fixed Parameters
27
39
  SEND_BACK_TEXT: Final[bool] = True
@@ -33,10 +45,14 @@ False = API sends delta chunks only (accumulate mode).
33
45
  """
34
46
 
35
47
  USE_SCHEMATIZED_API: Final[bool] = False
36
- """Whether to use the schematized API format."""
48
+ """
49
+ Whether to use the schematized API format.
50
+ """
37
51
 
38
52
  PROMPT_SOURCE: Final[str] = "user"
39
- """Source identifier for prompts."""
53
+ """
54
+ Source identifier for prompts.
55
+ """
40
56
 
41
57
  # Regex Patterns (Pre-compiled for performance in streaming parsing)
42
58
  CITATION_PATTERN: Final[Pattern[str]] = compile(r"\[(\d{1,2})\]")
@@ -47,7 +63,9 @@ Uses word boundary to avoid matching things like [123].
47
63
  """
48
64
 
49
65
  JSON_OBJECT_PATTERN: Final[Pattern[str]] = compile(r"^\{.*\}$")
50
- """Pattern to detect JSON object strings."""
66
+ """
67
+ Pattern to detect JSON object strings.
68
+ """
51
69
 
52
70
  # HTTP Headers
53
71
  DEFAULT_HEADERS: Final[dict[str, str]] = {
@@ -61,4 +79,6 @@ Referer and Origin are added dynamically based on BASE_URL.
61
79
  """
62
80
 
63
81
  SESSION_COOKIE_NAME: Final[str] = "__Secure-next-auth.session-token"
64
- """Name of the session cookie used for authentication."""
82
+ """
83
+ Name of the session cookie used for authentication.
84
+ """
@@ -1,4 +1,6 @@
1
- """Core client implementation."""
1
+ """
2
+ Core client implementation.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -8,6 +10,8 @@ from pathlib import Path
8
10
  from typing import TYPE_CHECKING, Any
9
11
  from uuid import uuid4
10
12
 
13
+ from curl_cffi import CurlMime
14
+ from curl_cffi.requests import Session
11
15
  from orjson import JSONDecodeError, loads
12
16
 
13
17
 
@@ -376,16 +380,55 @@ class Conversation:
376
380
  try:
377
381
  response = self._http.post(ENDPOINT_UPLOAD, json=json_data)
378
382
  response_data = response.json()
379
- upload_url = response_data.get("results", {}).get(file_uuid, {}).get("s3_object_url")
383
+ result = response_data.get("results", {}).get(file_uuid, {})
380
384
 
381
- if not upload_url:
385
+ s3_bucket_url = result.get("s3_bucket_url")
386
+ s3_object_url = result.get("s3_object_url")
387
+ fields = result.get("fields", {})
388
+
389
+ if not s3_object_url:
382
390
  raise FileUploadError(file_info.path, "No upload URL returned")
383
391
 
384
- return upload_url
392
+ if not s3_bucket_url or not fields:
393
+ raise FileUploadError(file_info.path, "Missing S3 upload credentials")
394
+
395
+ # Upload the file to S3 using presigned POST
396
+ file_path = Path(file_info.path)
397
+
398
+ with file_path.open("rb") as f:
399
+ file_content = f.read()
400
+
401
+ # Build multipart form data using CurlMime
402
+ # For S3 presigned POST, form fields must come before the file
403
+ mime = CurlMime()
404
+
405
+ for field_name, field_value in fields.items():
406
+ mime.addpart(name=field_name, data=field_value)
407
+
408
+ mime.addpart(
409
+ name="file",
410
+ content_type=file_info.mimetype,
411
+ filename=file_path.name,
412
+ data=file_content,
413
+ )
414
+
415
+ # S3 requires a clean session
416
+ with Session() as s3_session:
417
+ upload_response = s3_session.post(s3_bucket_url, multipart=mime)
418
+
419
+ mime.close()
420
+
421
+ if upload_response.status_code not in (200, 201, 204):
422
+ raise FileUploadError(
423
+ file_info.path,
424
+ f"S3 upload failed with status {upload_response.status_code}: {upload_response.text}",
425
+ )
426
+
427
+ return s3_object_url
385
428
  except FileUploadError as error:
386
429
  raise error
387
- except Exception as e:
388
- raise FileUploadError(file_info.path, str(e)) from e
430
+ except Exception as error:
431
+ raise FileUploadError(file_info.path, str(error)) from error
389
432
 
390
433
  def _build_payload(
391
434
  self,
@@ -460,9 +503,10 @@ class Conversation:
460
503
  return CITATION_PATTERN.sub(replacer, text)
461
504
 
462
505
  def _parse_line(self, line: str | bytes) -> dict[str, Any] | None:
463
- prefix = b"data: " if isinstance(line, bytes) else "data: "
506
+ if isinstance(line, bytes) and line.startswith(b"data: "):
507
+ return loads(line[6:])
464
508
 
465
- if (isinstance(line, bytes) and line.startswith(prefix)) or (isinstance(line, str) and line.startswith(prefix)):
509
+ if isinstance(line, str) and line.startswith("data: "):
466
510
  return loads(line[6:])
467
511
 
468
512
  return None
@@ -493,10 +537,10 @@ class Conversation:
493
537
 
494
538
  try:
495
539
  json_data = loads(data["text"])
496
- except KeyError as e:
497
- raise ValueError("Missing 'text' field in data") from e
498
- except JSONDecodeError as e:
499
- raise ValueError("Invalid JSON in 'text' field") from e
540
+ except KeyError as error:
541
+ raise ValueError("Missing 'text' field in data") from error
542
+ except JSONDecodeError as error:
543
+ raise ValueError("Invalid JSON in 'text' field") from error
500
544
 
501
545
  answer_data: dict[str, Any] = {}
502
546
 
@@ -583,7 +627,8 @@ class Conversation:
583
627
  chunks = answer_data.get("chunks", [])
584
628
 
585
629
  if chunks:
586
- self._chunks = [self._format_citations(chunk) for chunk in chunks]
630
+ formatted = [self._format_citations(chunk) for chunk in chunks if chunk is not None]
631
+ self._chunks = [c for c in formatted if c is not None]
587
632
 
588
633
  self._raw_data = answer_data
589
634
 
@@ -0,0 +1,147 @@
1
+ """
2
+ Enums for Perplexity WebUI Scraper configuration options.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from enum import Enum
8
+
9
+
10
+ class CitationMode(str, Enum):
11
+ """
12
+ Citation formatting modes for response text.
13
+
14
+ Controls how citation markers (e.g., [1], [2]) are formatted in the response.
15
+ """
16
+
17
+ DEFAULT = "default"
18
+ """
19
+ Keep original Perplexity citation format (e.g., 'This is a citation[1]').
20
+ """
21
+
22
+ MARKDOWN = "markdown"
23
+ """
24
+ Convert citations to markdown links (e.g., 'This is a citation[1](https://example.com)').
25
+ """
26
+
27
+ CLEAN = "clean"
28
+ """
29
+ Remove all citation markers (e.g., 'This is a citation').
30
+ """
31
+
32
+
33
+ class SearchFocus(str, Enum):
34
+ """
35
+ Search focus types that control the type of search performed.
36
+
37
+ Determines whether to search the web or focus on writing tasks.
38
+ """
39
+
40
+ WEB = "internet"
41
+ """
42
+ Search the web for information. Best for factual queries and research.
43
+ """
44
+
45
+ WRITING = "writing"
46
+ """
47
+ Focus on writing tasks. Best for creative writing, editing, and text generation.
48
+ """
49
+
50
+
51
+ class SourceFocus(str, Enum):
52
+ """
53
+ Source focus types that control which sources to prioritize.
54
+
55
+ Can be combined (e.g., [SourceFocus.WEB, SourceFocus.ACADEMIC]) for multi-source searches.
56
+ """
57
+
58
+ WEB = "web"
59
+ """
60
+ Search across the entire internet. General web search.
61
+ """
62
+
63
+ ACADEMIC = "scholar"
64
+ """
65
+ Search academic papers and scholarly articles (Google Scholar, etc.).
66
+ """
67
+
68
+ SOCIAL = "social"
69
+ """
70
+ Search social media for discussions and opinions (Reddit, Twitter, etc.).
71
+ """
72
+
73
+ FINANCE = "edgar"
74
+ """
75
+ Search SEC EDGAR filings for financial and corporate documents.
76
+ """
77
+
78
+
79
+ class TimeRange(str, Enum):
80
+ """
81
+ Time range filters for search results.
82
+
83
+ Controls how recent the sources should be.
84
+ """
85
+
86
+ ALL = ""
87
+ """
88
+ Include sources from all time. No time restriction.
89
+ """
90
+
91
+ TODAY = "DAY"
92
+ """
93
+ Include only sources from today (last 24 hours).
94
+ """
95
+
96
+ LAST_WEEK = "WEEK"
97
+ """
98
+ Include sources from the last 7 days.
99
+ """
100
+
101
+ LAST_MONTH = "MONTH"
102
+ """
103
+ Include sources from the last 30 days.
104
+ """
105
+
106
+ LAST_YEAR = "YEAR"
107
+ """
108
+ Include sources from the last 365 days.
109
+ """
110
+
111
+
112
+ class LogLevel(str, Enum):
113
+ """
114
+ Logging level configuration.
115
+
116
+ Controls the verbosity of logging output. DISABLED is the default.
117
+ """
118
+
119
+ DISABLED = "DISABLED"
120
+ """
121
+ Completely disable all logging output. This is the default.
122
+ """
123
+
124
+ DEBUG = "DEBUG"
125
+ """
126
+ Show all messages including internal debug information.
127
+ """
128
+
129
+ INFO = "INFO"
130
+ """
131
+ Show informational messages, warnings, and errors.
132
+ """
133
+
134
+ WARNING = "WARNING"
135
+ """
136
+ Show only warnings and errors.
137
+ """
138
+
139
+ ERROR = "ERROR"
140
+ """
141
+ Show only error messages.
142
+ """
143
+
144
+ CRITICAL = "CRITICAL"
145
+ """
146
+ Show only critical/fatal errors.
147
+ """
@@ -1,4 +1,6 @@
1
- """Custom exceptions for Perplexity WebUI Scraper."""
1
+ """
2
+ Custom exceptions for Perplexity WebUI Scraper.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -1,4 +1,6 @@
1
- """HTTP client wrapper for Perplexity API requests."""
1
+ """
2
+ HTTP client wrapper for Perplexity API requests.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -177,7 +179,9 @@ class HTTPClient:
177
179
  logger.debug(f"Browser fingerprint rotated successfully | new_profile={new_profile}")
178
180
 
179
181
  def _on_retry(self, retry_state: RetryCallState) -> None:
180
- """Callback executed before each retry attempt."""
182
+ """
183
+ Callback executed before each retry attempt.
184
+ """
181
185
 
182
186
  attempt = retry_state.attempt_number
183
187
  exception = retry_state.outcome.exception() if retry_state.outcome else None
@@ -257,7 +261,7 @@ class HTTPClient:
257
261
  logger.debug(f"Error has response | status_code={status_code}")
258
262
 
259
263
  # Check for Cloudflare before handling as regular 403
260
- if is_cloudflare_status(status_code):
264
+ if status_code is not None and is_cloudflare_status(status_code):
261
265
  logger.debug(f"Checking if error is Cloudflare challenge | status_code={status_code}")
262
266
 
263
267
  try:
@@ -431,6 +435,7 @@ class HTTPClient:
431
435
  response.raise_for_status()
432
436
 
433
437
  logger.debug(f"POST request successful | endpoint={endpoint}")
438
+
434
439
  return response
435
440
  except Exception as error:
436
441
  elapsed_ms = (monotonic() - request_start) * 1000
@@ -1,4 +1,6 @@
1
- """Upload and request limits for Perplexity WebUI Scraper."""
1
+ """
2
+ Upload and request limits for Perplexity WebUI Scraper.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -7,11 +9,17 @@ from typing import Final
7
9
 
8
10
  # File Upload Limits
9
11
  MAX_FILES: Final[int] = 30
10
- """Maximum number of files that can be attached to a single prompt."""
12
+ """
13
+ Maximum number of files that can be attached to a single prompt.
14
+ """
11
15
 
12
16
  MAX_FILE_SIZE: Final[int] = 50 * 1024 * 1024 # 50 MB in bytes
13
- """Maximum file size in bytes."""
17
+ """
18
+ Maximum file size in bytes.
19
+ """
14
20
 
15
21
  # Request Limits
16
22
  DEFAULT_TIMEOUT: Final[int] = 30 * 60 # 30 minutes in seconds
17
- """Default request timeout in seconds"""
23
+ """
24
+ Default request timeout in seconds.
25
+ """
@@ -120,7 +120,9 @@ def log_request(
120
120
  headers: dict[str, str] | None = None,
121
121
  body_size: int | None = None,
122
122
  ) -> None:
123
- """Log an outgoing HTTP request with full details."""
123
+ """
124
+ Log an outgoing HTTP request with full details.
125
+ """
124
126
 
125
127
  logger.debug(
126
128
  "HTTP request initiated | method={method} url={url} params={params} "
@@ -142,7 +144,9 @@ def log_response(
142
144
  content_length: int | None = None,
143
145
  headers: dict[str, str] | None = None,
144
146
  ) -> None:
145
- """Log an HTTP response with full details."""
147
+ """
148
+ Log an HTTP response with full details.
149
+ """
146
150
 
147
151
  level = "DEBUG" if status_code < 400 else "WARNING"
148
152
  logger.log(
@@ -160,24 +164,28 @@ def log_response(
160
164
  def log_retry(
161
165
  attempt: int,
162
166
  max_attempts: int,
163
- exception: Exception,
167
+ exception: BaseException | None,
164
168
  wait_seconds: float,
165
169
  ) -> None:
166
- """Log a retry attempt."""
170
+ """
171
+ Log a retry attempt.
172
+ """
167
173
 
168
174
  logger.warning(
169
175
  "Retry attempt | attempt={attempt}/{max_attempts} exception={exception_type}: {exception_msg} "
170
176
  "wait_seconds={wait_seconds:.2f}",
171
177
  attempt=attempt,
172
178
  max_attempts=max_attempts,
173
- exception_type=type(exception).__name__,
174
- exception_msg=str(exception),
179
+ exception_type=type(exception).__name__ if exception else "None",
180
+ exception_msg=str(exception) if exception else "None",
175
181
  wait_seconds=wait_seconds,
176
182
  )
177
183
 
178
184
 
179
185
  def log_cloudflare_detected(status_code: int, markers_found: list[str]) -> None:
180
- """Log Cloudflare challenge detection."""
186
+ """
187
+ Log Cloudflare challenge detection.
188
+ """
181
189
 
182
190
  logger.warning(
183
191
  "Cloudflare challenge detected | status_code={status_code} markers={markers}",
@@ -187,7 +195,9 @@ def log_cloudflare_detected(status_code: int, markers_found: list[str]) -> None:
187
195
 
188
196
 
189
197
  def log_fingerprint_rotation(old_profile: str, new_profile: str) -> None:
190
- """Log browser fingerprint rotation."""
198
+ """
199
+ Log browser fingerprint rotation.
200
+ """
191
201
 
192
202
  logger.info(
193
203
  "Browser fingerprint rotated | old_profile={old} new_profile={new}",
@@ -197,7 +207,9 @@ def log_fingerprint_rotation(old_profile: str, new_profile: str) -> None:
197
207
 
198
208
 
199
209
  def log_rate_limit(wait_seconds: float) -> None:
200
- """Log rate limiting wait."""
210
+ """
211
+ Log rate limiting wait.
212
+ """
201
213
 
202
214
  logger.debug(
203
215
  "Rate limiter throttling | wait_seconds={wait_seconds:.3f}",
@@ -206,7 +218,9 @@ def log_rate_limit(wait_seconds: float) -> None:
206
218
 
207
219
 
208
220
  def log_session_created(impersonate: str, timeout: int) -> None:
209
- """Log HTTP session creation."""
221
+ """
222
+ Log HTTP session creation.
223
+ """
210
224
 
211
225
  logger.info(
212
226
  "HTTP session created | browser_profile={profile} timeout={timeout}s",
@@ -216,7 +230,9 @@ def log_session_created(impersonate: str, timeout: int) -> None:
216
230
 
217
231
 
218
232
  def log_conversation_created(config_summary: str) -> None:
219
- """Log conversation creation."""
233
+ """
234
+ Log conversation creation.
235
+ """
220
236
 
221
237
  logger.info(
222
238
  "Conversation created | config={config}",
@@ -225,7 +241,9 @@ def log_conversation_created(config_summary: str) -> None:
225
241
 
226
242
 
227
243
  def log_query_sent(query: str, model: str, has_files: bool) -> None:
228
- """Log a query being sent."""
244
+ """
245
+ Log a query being sent.
246
+ """
229
247
 
230
248
  logger.info(
231
249
  "Query sent | model={model} has_files={has_files} query_preview={query_preview}",
@@ -236,7 +254,9 @@ def log_query_sent(query: str, model: str, has_files: bool) -> None:
236
254
 
237
255
 
238
256
  def log_stream_chunk(chunk_size: int, is_final: bool) -> None:
239
- """Log a streaming chunk received."""
257
+ """
258
+ Log a streaming chunk received.
259
+ """
240
260
 
241
261
  logger.debug(
242
262
  "Stream chunk received | size={size} is_final={is_final}",
@@ -246,7 +266,9 @@ def log_stream_chunk(chunk_size: int, is_final: bool) -> None:
246
266
 
247
267
 
248
268
  def log_error(error: Exception, context: str = "") -> None:
249
- """Log an error with full traceback."""
269
+ """
270
+ Log an error with full traceback.
271
+ """
250
272
 
251
273
  logger.exception(
252
274
  "Error occurred | context={context} error_type={error_type} message={message}",
@@ -11,7 +11,9 @@ __all__: list[str] = ["run_server"]
11
11
 
12
12
 
13
13
  def run_server() -> None:
14
- """Run the MCP server."""
14
+ """
15
+ Run the MCP server.
16
+ """
15
17
 
16
18
  from .server import main # noqa: PLC0415
17
19
 
@@ -1,4 +1,6 @@
1
- """CLI entry point for MCP server."""
1
+ """
2
+ CLI entry point for MCP server.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -1,4 +1,6 @@
1
- """MCP server implementation using FastMCP."""
1
+ """
2
+ MCP server implementation using FastMCP.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -45,7 +47,6 @@ MODEL_MAP = {
45
47
  "kimi_thinking": Models.KIMI_K2_THINKING,
46
48
  }
47
49
 
48
- # Available model names for type hints
49
50
  ModelName = Literal[
50
51
  "best",
51
52
  "research",
@@ -81,7 +82,9 @@ _client: Perplexity | None = None
81
82
 
82
83
 
83
84
  def _get_client() -> Perplexity:
84
- """Get or create Perplexity client."""
85
+ """
86
+ Get or create Perplexity client.
87
+ """
85
88
 
86
89
  global _client # noqa: PLW0603
87
90
  if _client is None:
@@ -108,35 +111,15 @@ def perplexity_ask(
108
111
 
109
112
  Returns up-to-date information from web sources. Use for factual queries, research,
110
113
  current events, news, library versions, documentation, or any question requiring
114
+ the latest information.
111
115
 
112
116
  Args:
113
- query: The search query or question to ask Perplexity AI.
114
- model: AI model to use. Options:
115
- - "best": Automatically selects optimal model (default)
116
- - "research": Fast and thorough for routine research
117
- - "labs": Multi-step tasks with advanced troubleshooting
118
- - "sonar": Perplexity's fast built-in model
119
- - "gpt52": OpenAI's GPT-5.2
120
- - "gpt52_thinking": GPT-5.2 with reasoning
121
- - "claude_opus": Anthropic's Claude Opus 4.5
122
- - "claude_opus_thinking": Claude Opus with reasoning
123
- - "claude_sonnet": Anthropic's Claude Sonnet 4.5
124
- - "claude_sonnet_thinking": Claude Sonnet with reasoning
125
- - "gemini_pro": Google's Gemini 3 Pro
126
- - "gemini_flash": Google's Gemini 3 Flash
127
- - "gemini_flash_thinking": Gemini Flash with reasoning
128
- - "grok": xAI's Grok 4.1
129
- - "grok_thinking": Grok with reasoning
130
- - "kimi_thinking": Moonshot's Kimi K2 with reasoning
131
- source_focus: Type of sources to prioritize:
132
- - "web": General web search (default)
133
- - "academic": Scholarly articles and papers
134
- - "social": Social media (Reddit, Twitter)
135
- - "finance": SEC EDGAR financial filings
136
- - "all": Combine web, academic, and social sources
117
+ query: The question to ask.
118
+ model: AI model to use.
119
+ source_focus: Type of sources to prioritize (web, academic, social, finance, all).
137
120
 
138
121
  Returns:
139
- AI-generated answer with inline citations [1][2] and a Citations section.
122
+ AI-generated answer with inline citations and a Citations section.
140
123
  """
141
124
 
142
125
  client = _get_client()
@@ -168,11 +151,13 @@ def perplexity_ask(
168
151
 
169
152
  return "".join(response_parts)
170
153
  except Exception as error:
171
- return f"Error searching Perplexity: {error!s}"
154
+ return f"Error: {error!s}"
172
155
 
173
156
 
174
157
  def main() -> None:
175
- """Run the MCP server."""
158
+ """
159
+ Run the MCP server.
160
+ """
176
161
 
177
162
  mcp.run()
178
163
 
@@ -0,0 +1,109 @@
1
+ """
2
+ AI model definitions for Perplexity WebUI Scraper.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass(frozen=True, slots=True)
11
+ class Model:
12
+ """
13
+ AI model configuration.
14
+
15
+ Attributes:
16
+ identifier: Model identifier used by the API.
17
+ mode: Model execution mode. Default: "copilot".
18
+ """
19
+
20
+ identifier: str
21
+ mode: str = "copilot"
22
+
23
+
24
+ class Models:
25
+ """
26
+ Available AI models with their configurations.
27
+
28
+ All models use the "copilot" mode which enables web search.
29
+ """
30
+
31
+ RESEARCH = Model(identifier="pplx_alpha")
32
+ """
33
+ Research - Fast and thorough for routine research.
34
+ """
35
+
36
+ LABS = Model(identifier="pplx_beta")
37
+ """
38
+ Labs - Multi-step tasks with advanced troubleshooting.
39
+ """
40
+
41
+ BEST = Model(identifier="pplx_pro_upgraded")
42
+ """
43
+ Best - Automatically selects the most responsive model based on the query.
44
+ """
45
+
46
+ SONAR = Model(identifier="experimental")
47
+ """
48
+ Sonar - Perplexity's fast model.
49
+ """
50
+
51
+ GEMINI_3_FLASH = Model(identifier="gemini30flash")
52
+ """
53
+ Gemini 3 Flash - Google's fast reasoning model.
54
+ """
55
+
56
+ GEMINI_3_FLASH_THINKING = Model(identifier="gemini30flash_high")
57
+ """
58
+ Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking.
59
+ """
60
+
61
+ GEMINI_3_PRO = Model(identifier="gemini30pro")
62
+ """
63
+ Gemini 3 Pro - Google's newest reasoning model.
64
+ """
65
+
66
+ GPT_52 = Model(identifier="gpt52")
67
+ """
68
+ GPT-5.2 - OpenAI's latest model.
69
+ """
70
+
71
+ GPT_52_THINKING = Model(identifier="gpt52_thinking")
72
+ """
73
+ GPT-5.2 Thinking - OpenAI's latest model with thinking.
74
+ """
75
+
76
+ CLAUDE_45_SONNET = Model(identifier="claude45sonnet")
77
+ """
78
+ Claude Sonnet 4.5 - Anthropic's newest advanced model.
79
+ """
80
+
81
+ CLAUDE_45_SONNET_THINKING = Model(identifier="claude45sonnetthinking")
82
+ """
83
+ Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model.
84
+ """
85
+
86
+ CLAUDE_45_OPUS = Model(identifier="claude45opus")
87
+ """
88
+ Claude Opus 4.5 - Anthropic's Opus reasoning model.
89
+ """
90
+
91
+ CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")
92
+ """
93
+ Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking.
94
+ """
95
+
96
+ GROK_41 = Model(identifier="grok41nonreasoning")
97
+ """
98
+ Grok 4.1 - xAI's latest advanced model.
99
+ """
100
+
101
+ GROK_41_THINKING = Model(identifier="grok41reasoning")
102
+ """
103
+ Grok 4.1 Thinking - xAI's latest reasoning model.
104
+ """
105
+
106
+ KIMI_K2_THINKING = Model(identifier="kimik2thinking")
107
+ """
108
+ Kimi K2 Thinking - Moonshot AI's latest reasoning model.
109
+ """
@@ -84,7 +84,9 @@ class RateLimiter:
84
84
  _lock: Lock = field(default_factory=Lock, init=False)
85
85
 
86
86
  def acquire(self) -> None:
87
- """Wait until a request can be made within rate limits."""
87
+ """
88
+ Wait until a request can be made within rate limits.
89
+ """
88
90
 
89
91
  with self._lock:
90
92
  now = time.monotonic()
@@ -1,4 +1,6 @@
1
- """Response types and data models."""
1
+ """
2
+ Response types and data models.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -8,7 +10,9 @@ from typing import Any
8
10
 
9
11
  @dataclass(frozen=True, slots=True)
10
12
  class Coordinates:
11
- """Geographic coordinates (lat/lng)."""
13
+ """
14
+ Geographic coordinates (lat/lng).
15
+ """
12
16
 
13
17
  latitude: float
14
18
  longitude: float
@@ -16,7 +20,9 @@ class Coordinates:
16
20
 
17
21
  @dataclass(frozen=True, slots=True)
18
22
  class SearchResultItem:
19
- """A single search result."""
23
+ """
24
+ A single search result.
25
+ """
20
26
 
21
27
  title: str | None = None
22
28
  snippet: str | None = None
@@ -25,7 +31,9 @@ class SearchResultItem:
25
31
 
26
32
  @dataclass(slots=True)
27
33
  class Response:
28
- """Response from Perplexity AI."""
34
+ """
35
+ Response from Perplexity AI.
36
+ """
29
37
 
30
38
  title: str | None = None
31
39
  answer: str | None = None
@@ -38,7 +46,9 @@ class Response:
38
46
 
39
47
  @dataclass(frozen=True, slots=True)
40
48
  class _FileInfo:
41
- """Internal file info for uploads."""
49
+ """
50
+ Internal file info for uploads.
51
+ """
42
52
 
43
53
  path: str
44
54
  size: int
@@ -1,105 +0,0 @@
1
- """Enums for Perplexity WebUI Scraper configuration options."""
2
-
3
- from __future__ import annotations
4
-
5
- from enum import Enum
6
-
7
-
8
- class CitationMode(str, Enum):
9
- """
10
- Citation formatting modes for response text.
11
-
12
- Controls how citation markers (e.g., [1], [2]) are formatted in the response.
13
- """
14
-
15
- DEFAULT = "default"
16
- """Keep original Perplexity citation format (e.g., 'This is a citation[1]')."""
17
-
18
- MARKDOWN = "markdown"
19
- """Convert citations to markdown links (e.g., 'This is a citation[1](https://example.com)')."""
20
-
21
- CLEAN = "clean"
22
- """Remove all citation markers (e.g., 'This is a citation')."""
23
-
24
-
25
- class SearchFocus(str, Enum):
26
- """
27
- Search focus types that control the type of search performed.
28
-
29
- Determines whether to search the web or focus on writing tasks.
30
- """
31
-
32
- WEB = "internet"
33
- """Search the web for information. Best for factual queries and research."""
34
-
35
- WRITING = "writing"
36
- """Focus on writing tasks. Best for creative writing, editing, and text generation."""
37
-
38
-
39
- class SourceFocus(str, Enum):
40
- """
41
- Source focus types that control which sources to prioritize.
42
-
43
- Can be combined (e.g., [SourceFocus.WEB, SourceFocus.ACADEMIC]) for multi-source searches.
44
- """
45
-
46
- WEB = "web"
47
- """Search across the entire internet. General web search."""
48
-
49
- ACADEMIC = "scholar"
50
- """Search academic papers and scholarly articles (Google Scholar, etc.)."""
51
-
52
- SOCIAL = "social"
53
- """Search social media for discussions and opinions (Reddit, Twitter, etc.)."""
54
-
55
- FINANCE = "edgar"
56
- """Search SEC EDGAR filings for financial and corporate documents."""
57
-
58
-
59
- class TimeRange(str, Enum):
60
- """
61
- Time range filters for search results.
62
-
63
- Controls how recent the sources should be.
64
- """
65
-
66
- ALL = ""
67
- """Include sources from all time. No time restriction."""
68
-
69
- TODAY = "DAY"
70
- """Include only sources from today (last 24 hours)."""
71
-
72
- LAST_WEEK = "WEEK"
73
- """Include sources from the last 7 days."""
74
-
75
- LAST_MONTH = "MONTH"
76
- """Include sources from the last 30 days."""
77
-
78
- LAST_YEAR = "YEAR"
79
- """Include sources from the last 365 days."""
80
-
81
-
82
- class LogLevel(str, Enum):
83
- """
84
- Logging level configuration.
85
-
86
- Controls the verbosity of logging output. DISABLED is the default.
87
- """
88
-
89
- DISABLED = "DISABLED"
90
- """Completely disable all logging output. This is the default."""
91
-
92
- DEBUG = "DEBUG"
93
- """Show all messages including internal debug information."""
94
-
95
- INFO = "INFO"
96
- """Show informational messages, warnings, and errors."""
97
-
98
- WARNING = "WARNING"
99
- """Show only warnings and errors."""
100
-
101
- ERROR = "ERROR"
102
- """Show only error messages."""
103
-
104
- CRITICAL = "CRITICAL"
105
- """Show only critical/fatal errors."""
@@ -1,73 +0,0 @@
1
- """AI model definitions for Perplexity WebUI Scraper."""
2
-
3
- from __future__ import annotations
4
-
5
- from dataclasses import dataclass
6
-
7
-
8
- @dataclass(frozen=True, slots=True)
9
- class Model:
10
- """AI model configuration.
11
-
12
- Attributes:
13
- identifier: Model identifier used by the API.
14
- mode: Model execution mode. Default: "copilot".
15
- """
16
-
17
- identifier: str
18
- mode: str = "copilot"
19
-
20
-
21
- class Models:
22
- """Available AI models with their configurations.
23
-
24
- All models use the "copilot" mode which enables web search.
25
- """
26
-
27
- RESEARCH = Model(identifier="pplx_alpha")
28
- """Research - Fast and thorough for routine research"""
29
-
30
- LABS = Model(identifier="pplx_beta")
31
- """Labs - Multi-step tasks with advanced troubleshooting"""
32
-
33
- BEST = Model(identifier="pplx_pro_upgraded")
34
- """Best - Automatically selects the most responsive model based on the query"""
35
-
36
- SONAR = Model(identifier="experimental")
37
- """Sonar - Perplexity's fast model"""
38
-
39
- GPT_52 = Model(identifier="gpt52")
40
- """GPT-5.2 - OpenAI's latest model"""
41
-
42
- GPT_52_THINKING = Model(identifier="gpt52_thinking")
43
- """GPT-5.2 Thinking - OpenAI's latest model with thinking"""
44
-
45
- CLAUDE_45_OPUS = Model(identifier="claude45opus")
46
- """Claude Opus 4.5 - Anthropic's Opus reasoning model"""
47
-
48
- CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")
49
- """Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking"""
50
-
51
- GEMINI_3_PRO = Model(identifier="gemini30pro")
52
- """Gemini 3 Pro - Google's newest reasoning model"""
53
-
54
- GEMINI_3_FLASH = Model(identifier="gemini30flash")
55
- """Gemini 3 Flash - Google's fast reasoning model"""
56
-
57
- GEMINI_3_FLASH_THINKING = Model(identifier="gemini30flash_high")
58
- """Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking"""
59
-
60
- GROK_41 = Model(identifier="grok41nonreasoning")
61
- """Grok 4.1 - xAI's latest advanced model"""
62
-
63
- GROK_41_THINKING = Model(identifier="grok41reasoning")
64
- """Grok 4.1 Thinking - xAI's latest reasoning model"""
65
-
66
- KIMI_K2_THINKING = Model(identifier="kimik2thinking")
67
- """Kimi K2 Thinking - Moonshot AI's latest reasoning model"""
68
-
69
- CLAUDE_45_SONNET = Model(identifier="claude45sonnet")
70
- """Claude Sonnet 4.5 - Anthropic's newest advanced model"""
71
-
72
- CLAUDE_45_SONNET_THINKING = Model(identifier="claude45sonnetthinking")
73
- """Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model"""