perplexity-webui-scraper 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,181 @@
1
+ """
2
+ Resilience utilities for HTTP requests.
3
+
4
+ Provides retry mechanisms, rate limiting, and Cloudflare bypass utilities
5
+ using the tenacity library for robust retry handling.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from dataclasses import dataclass, field
12
+ import random
13
+ from threading import Lock
14
+ import time
15
+ from typing import TYPE_CHECKING, Any, TypeVar
16
+
17
+ from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
18
+
19
+
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Callable
22
+
23
+ T = TypeVar("T")
24
+
25
+
26
+ # Browser profiles supported by curl_cffi for fingerprint rotation
27
+ BROWSER_PROFILES: tuple[str, ...] = (
28
+ "chrome",
29
+ "chrome110",
30
+ "chrome116",
31
+ "chrome119",
32
+ "chrome120",
33
+ "chrome123",
34
+ "chrome124",
35
+ "chrome131",
36
+ "edge99",
37
+ "edge101",
38
+ "safari15_3",
39
+ "safari15_5",
40
+ "safari17_0",
41
+ "safari17_2_ios",
42
+ )
43
+
44
+ # Cloudflare challenge detection markers
45
+ CLOUDFLARE_MARKERS: tuple[str, ...] = (
46
+ "cf-ray",
47
+ "cf-mitigated",
48
+ "__cf_chl_",
49
+ "Checking your browser",
50
+ "Just a moment...",
51
+ "cloudflare",
52
+ "Enable JavaScript and cookies to continue",
53
+ "challenge-platform",
54
+ )
55
+
56
+
57
+ @dataclass(slots=True)
58
+ class RetryConfig:
59
+ """Configuration for retry behavior.
60
+
61
+ Attributes:
62
+ max_retries: Maximum number of retry attempts.
63
+ base_delay: Initial delay in seconds before first retry.
64
+ max_delay: Maximum delay between retries.
65
+ jitter: Random jitter factor to add to delays (0-1).
66
+ """
67
+
68
+ max_retries: int = 3
69
+ base_delay: float = 1.0
70
+ max_delay: float = 60.0
71
+ jitter: float = 0.5
72
+
73
+
74
+ @dataclass
75
+ class RateLimiter:
76
+ """Token bucket rate limiter for throttling requests.
77
+
78
+ Attributes:
79
+ requests_per_second: Maximum requests allowed per second.
80
+ """
81
+
82
+ requests_per_second: float = 0.5
83
+ _last_request: float = field(default=0.0, init=False)
84
+ _lock: Lock = field(default_factory=Lock, init=False)
85
+
86
+ def acquire(self) -> None:
87
+ """
88
+ Wait until a request can be made within rate limits.
89
+ """
90
+
91
+ with self._lock:
92
+ now = time.monotonic()
93
+ min_interval = 1.0 / self.requests_per_second
94
+
95
+ if self._last_request > 0:
96
+ elapsed = now - self._last_request
97
+ wait_time = min_interval - elapsed
98
+
99
+ if wait_time > 0:
100
+ time.sleep(wait_time)
101
+
102
+ self._last_request = time.monotonic()
103
+
104
+
105
+ def get_random_browser_profile() -> str:
106
+ """Get a random browser profile for fingerprint rotation.
107
+
108
+ Returns:
109
+ A browser profile identifier compatible with curl_cffi.
110
+ """
111
+
112
+ return random.choice(BROWSER_PROFILES)
113
+
114
+
115
+ def is_cloudflare_challenge(response_text: str, headers: dict[str, Any] | None = None) -> bool:
116
+ """Detect if a response is a Cloudflare challenge page.
117
+
118
+ Args:
119
+ response_text: The response body text.
120
+ headers: Optional response headers.
121
+
122
+ Returns:
123
+ True if Cloudflare challenge markers are detected.
124
+ """
125
+
126
+ text_lower = response_text.lower()
127
+
128
+ for marker in CLOUDFLARE_MARKERS:
129
+ if marker.lower() in text_lower:
130
+ return True
131
+
132
+ if headers:
133
+ for key in headers:
134
+ key_lower = key.lower()
135
+
136
+ if "cf-" in key_lower or "cloudflare" in key_lower:
137
+ return True
138
+
139
+ return False
140
+
141
+
142
+ def is_cloudflare_status(status_code: int) -> bool:
143
+ """Check if status code indicates a potential Cloudflare block.
144
+
145
+ Args:
146
+ status_code: HTTP status code.
147
+
148
+ Returns:
149
+ True if status code is commonly used by Cloudflare challenges.
150
+ """
151
+
152
+ return status_code in (403, 503, 520, 521, 522, 523, 524, 525, 526)
153
+
154
+
155
+ def create_retry_decorator(
156
+ config: RetryConfig,
157
+ retryable_exceptions: tuple[type[Exception], ...],
158
+ on_retry: Callable[[RetryCallState], None] | None = None,
159
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
160
+ """Create a tenacity retry decorator with the given configuration.
161
+
162
+ Args:
163
+ config: Retry configuration.
164
+ retryable_exceptions: Tuple of exception types to retry on.
165
+ on_retry: Optional callback to execute on each retry.
166
+
167
+ Returns:
168
+ A retry decorator configured with the given settings.
169
+ """
170
+
171
+ return retry(
172
+ stop=stop_after_attempt(config.max_retries + 1),
173
+ wait=wait_exponential_jitter(
174
+ initial=config.base_delay,
175
+ max=config.max_delay,
176
+ jitter=config.max_delay * config.jitter,
177
+ ),
178
+ retry=retry_if_exception_type(retryable_exceptions),
179
+ before_sleep=on_retry,
180
+ reraise=True,
181
+ )
@@ -1,4 +1,6 @@
1
- """Response types and data models."""
1
+ """
2
+ Response types and data models.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -8,7 +10,9 @@ from typing import Any
8
10
 
9
11
  @dataclass(frozen=True, slots=True)
10
12
  class Coordinates:
11
- """Geographic coordinates (lat/lng)."""
13
+ """
14
+ Geographic coordinates (lat/lng).
15
+ """
12
16
 
13
17
  latitude: float
14
18
  longitude: float
@@ -16,7 +20,9 @@ class Coordinates:
16
20
 
17
21
  @dataclass(frozen=True, slots=True)
18
22
  class SearchResultItem:
19
- """A single search result."""
23
+ """
24
+ A single search result.
25
+ """
20
26
 
21
27
  title: str | None = None
22
28
  snippet: str | None = None
@@ -25,7 +31,9 @@ class SearchResultItem:
25
31
 
26
32
  @dataclass(slots=True)
27
33
  class Response:
28
- """Response from Perplexity AI."""
34
+ """
35
+ Response from Perplexity AI.
36
+ """
29
37
 
30
38
  title: str | None = None
31
39
  answer: str | None = None
@@ -38,7 +46,9 @@ class Response:
38
46
 
39
47
  @dataclass(frozen=True, slots=True)
40
48
  class _FileInfo:
41
- """Internal file info for uploads."""
49
+ """
50
+ Internal file info for uploads.
51
+ """
42
52
 
43
53
  path: str
44
54
  size: int
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: perplexity-webui-scraper
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: Python scraper to extract AI responses from Perplexity's web interface.
5
5
  Keywords: perplexity,ai,scraper,webui,api,client
6
6
  Author: henrique-coder
@@ -20,14 +20,18 @@ Classifier: Topic :: Internet :: WWW/HTTP
20
20
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
21
  Classifier: Typing :: Typed
22
22
  Requires-Dist: curl-cffi>=0.14.0
23
+ Requires-Dist: loguru>=0.7.3
23
24
  Requires-Dist: orjson>=3.11.5
24
25
  Requires-Dist: pydantic>=2.12.5
26
+ Requires-Dist: tenacity>=9.1.2
27
+ Requires-Dist: fastmcp>=2.14.2 ; extra == 'mcp'
25
28
  Requires-Python: >=3.10
26
29
  Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
27
30
  Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
28
31
  Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
29
32
  Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
30
33
  Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
34
+ Provides-Extra: mcp
31
35
  Description-Content-Type: text/markdown
32
36
 
33
37
  <div align="center">
@@ -47,7 +51,8 @@ Python scraper to extract AI responses from [Perplexity's](https://www.perplexit
47
51
  ## Installation
48
52
 
49
53
  ```bash
50
- uv pip install perplexity-webui-scraper
54
+ uv pip install perplexity-webui-scraper # from PyPI (stable)
55
+ uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
51
56
  ```
52
57
 
53
58
  ## Requirements
@@ -197,18 +202,103 @@ conversation.ask("Latest AI research", files=["paper.pdf"])
197
202
  | `timezone` | `None` | Timezone |
198
203
  | `coordinates` | `None` | Location (lat/lng) |
199
204
 
200
- ## CLI Tools
205
+ ## Exceptions
201
206
 
202
- ### Session Token Generator
207
+ The library provides specific exception types for better error handling:
208
+
209
+ | Exception | Description |
210
+ | ---------------------------------- | ------------------------------------------------------------ |
211
+ | `PerplexityError` | Base exception for all library errors |
212
+ | `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
213
+ | `RateLimitError` | Rate limit exceeded (HTTP 429) |
214
+ | `FileUploadError` | File upload failed |
215
+ | `FileValidationError` | File validation failed (size, type, etc.) |
216
+ | `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
217
+ | `ResponseParsingError` | API response could not be parsed |
218
+ | `StreamingError` | Error during streaming response |
219
+
220
+ ### Handling Research Mode Clarifying Questions
221
+
222
+ When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
223
+
224
+ ```python
225
+ from perplexity_webui_scraper import (
226
+ Perplexity,
227
+ ResearchClarifyingQuestionsError,
228
+ )
229
+
230
+ try:
231
+ conversation.ask("Research this topic", model=Models.RESEARCH)
232
+ except ResearchClarifyingQuestionsError as error:
233
+ print("The AI needs clarification:")
234
+ for question in error.questions:
235
+ print(f" - {question}")
236
+ # Consider rephrasing your query to be more specific
237
+ ```
238
+
239
+ ## MCP Server (Model Context Protocol)
240
+
241
+ The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
242
+
243
+ ### Installation
203
244
 
204
245
  ```bash
205
- get-perplexity-session-token
246
+ uv pip install perplexity-webui-scraper[mcp]
247
+ ```
248
+
249
+ ### Running the Server
250
+
251
+ ```bash
252
+ # Set your session token
253
+ export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
254
+ set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
255
+
256
+ # Run with FastMCP
257
+ uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
258
+
259
+ # Or test with the dev inspector
260
+ uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
261
+ ```
262
+
263
+ ### Claude Desktop Configuration
264
+
265
+ Add to `~/.config/claude/claude_desktop_config.json`:
266
+
267
+ ```json
268
+ {
269
+ "mcpServers": {
270
+ "perplexity": {
271
+ "command": "uv",
272
+ "args": [
273
+ "run",
274
+ "fastmcp",
275
+ "run",
276
+ "path/to/perplexity_webui_scraper/mcp/server.py"
277
+ ],
278
+ "env": {
279
+ "PERPLEXITY_SESSION_TOKEN": "your_token_here"
280
+ }
281
+ }
282
+ }
283
+ }
206
284
  ```
207
285
 
208
- Interactive tool to automatically obtain your Perplexity session token via email authentication. The token can be automatically saved to your `.env` file for immediate use.
286
+ ### Available Tool
287
+
288
+ | Tool | Description |
289
+ | ---------------- | --------------------------------------------------------------------------- |
290
+ | `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
291
+
292
+ **Parameters:**
293
+
294
+ | Parameter | Type | Default | Description |
295
+ | -------------- | ----- | -------- | ------------------------------------------------------------- |
296
+ | `query` | `str` | - | Question to ask (required) |
297
+ | `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
298
+ | `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
209
299
 
210
300
  ## Disclaimer
211
301
 
212
- This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
302
+ This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
213
303
 
214
304
  By using this library, you agree to Perplexity AI's Terms of Service.
@@ -0,0 +1,21 @@
1
+ perplexity_webui_scraper/__init__.py,sha256=DQdyF667plbSLdULrEll1ovrOPB7HwVNrD7fttLNkuQ,715
2
+ perplexity_webui_scraper/cli/get_perplexity_session_token.py,sha256=PxHZbrTbekUQX2MIMDZC0iXBTmnTSBFZMb85mLgezR8,7009
3
+ perplexity_webui_scraper/config.py,sha256=nQYNsyCvMQW1V51R-6UV5hA0DI02qBLJ-ufX6r3NZyU,2067
4
+ perplexity_webui_scraper/constants.py,sha256=j32_i67mZGJ4B3KPV1ICE-6NR5g3KxDtDY5OxQXIthw,1887
5
+ perplexity_webui_scraper/core.py,sha256=X8aQgIBCFzz000Cy58ofWCByT94HYuqyuQQmlgEQBVk,22460
6
+ perplexity_webui_scraper/enums.py,sha256=QHrDfdawedGuuThcAR0XC0xPnc7VwSaK4BFQ9jqzQag,2953
7
+ perplexity_webui_scraper/exceptions.py,sha256=vdr5fm6OSgruII8zLwcYT3NsrU58GAXFX-WyS6nuW0M,3987
8
+ perplexity_webui_scraper/http.py,sha256=blRITnFrJsqcsZXUDub8_ZFjSNtLz7mK2ZBRV6tRc1I,19757
9
+ perplexity_webui_scraper/limits.py,sha256=4-eO9xbfKtObN4kMDIVglXmfC4dZvYtvjyUKUP_LKSU,474
10
+ perplexity_webui_scraper/logging.py,sha256=jxgho9jjrZvr3tZ5m0z3caQQsqdtFT9sM-HDVFWL67M,7300
11
+ perplexity_webui_scraper/mcp/__init__.py,sha256=Ur7fmsPDrxewBzq9UdvMoFHp85a8pGsW32WBHG5pfrc,376
12
+ perplexity_webui_scraper/mcp/__main__.py,sha256=cmGev_HXYQK2hoNSQAziEo7bVqHdc0lXaTquzMItSiU,148
13
+ perplexity_webui_scraper/mcp/server.py,sha256=3RJeOHOjv048ZleUURG3sFdMaDHsSPmdn7GY47yAm2s,4758
14
+ perplexity_webui_scraper/models.py,sha256=I5PhVD7B5XeeWfjWvXBtnl3CHMv4P59DO_hnYk9O0b0,2636
15
+ perplexity_webui_scraper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ perplexity_webui_scraper/resilience.py,sha256=sVsLW1kU6dMit4L6-dyPhVoT00EXGfokbEUnzvBNb0k,4680
17
+ perplexity_webui_scraper/types.py,sha256=qWsIABBxn1vcQQ2KUbC_hyiQHeaLsVb63epmRcpcmLk,1070
18
+ perplexity_webui_scraper-0.3.6.dist-info/WHEEL,sha256=RRVLqVugUmFOqBedBFAmA4bsgFcROUBiSUKlERi0Hcg,79
19
+ perplexity_webui_scraper-0.3.6.dist-info/entry_points.txt,sha256=ODpXpDTkmoQ_o3Y3lsy22PLs-8ndapvMKYwxcz6A9gs,189
20
+ perplexity_webui_scraper-0.3.6.dist-info/METADATA,sha256=OkwWS6PQwj1-hTkMPXvqA8peuBL-ei5AUp_kgX9KpAo,12168
21
+ perplexity_webui_scraper-0.3.6.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.9.18
2
+ Generator: uv 0.9.21
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,3 +1,4 @@
1
1
  [console_scripts]
2
2
  get-perplexity-session-token = perplexity_webui_scraper.cli.get_perplexity_session_token:get_token
3
+ perplexity-webui-scraper-mcp = perplexity_webui_scraper.mcp:run_server
3
4
 
@@ -1,16 +0,0 @@
1
- perplexity_webui_scraper/__init__.py,sha256=aUs8lsx11WeqDi-ZT4Y2RiEhQVd6K66EZZ6x0BDuU90,949
2
- perplexity_webui_scraper/cli/get_perplexity_session_token.py,sha256=67Ck4S2MJ0701LnRHq73qY5oRLCsFOyu_8SMgsbTNFc,6937
3
- perplexity_webui_scraper/config.py,sha256=tjTwTFO39ONI4yOodqAov6sAXti18DiOOwFndpct68o,944
4
- perplexity_webui_scraper/constants.py,sha256=Kq-4i6yyTZ5VhUvbiZmbUmHrjMQm-p7H82Emm7b10-c,1867
5
- perplexity_webui_scraper/core.py,sha256=QGDjdd_h8fV7yE4ukV6iL8xuidxlxDzu5HvUx0W-0tA,14824
6
- perplexity_webui_scraper/enums.py,sha256=Xo7RmtWFxhSQU2Zma5sFmMyitOqlqjAb4XwRC0KJON0,2124
7
- perplexity_webui_scraper/exceptions.py,sha256=0oOWe_A0B0wBsFeogt323BGJY3oBzaFK9PItXs77J70,1629
8
- perplexity_webui_scraper/http.py,sha256=4x0LSCpKFtIR_izFYaGWXvNcewnXGnICFoPJNP892W8,5615
9
- perplexity_webui_scraper/limits.py,sha256=GwcwC8CnSNhlcLWGLpuDYA37gn8OXSfsXLIOc-QbxNs,465
10
- perplexity_webui_scraper/models.py,sha256=QVeZI-WQzpyi9JnE15QIMJ7nsG0YjIjOsZEA6YfX0tw,2448
11
- perplexity_webui_scraper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- perplexity_webui_scraper/types.py,sha256=VlnzvNilIHrDXM2YOGjJa1y2VY0tfR-F0zaPjQHoPKs,1028
13
- perplexity_webui_scraper-0.3.4.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
14
- perplexity_webui_scraper-0.3.4.dist-info/entry_points.txt,sha256=x98Wqg3iD6aXxtm27KmB-BXPFo1ccDDGRaCp6fWn9m4,118
15
- perplexity_webui_scraper-0.3.4.dist-info/METADATA,sha256=Mm1LlknQT4ipS2bT-mtrcnNvCSjgjrW79tyQU0mxu9s,8557
16
- perplexity_webui_scraper-0.3.4.dist-info/RECORD,,