perplexity-webui-scraper 0.3.4__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/PKG-INFO +98 -8
  2. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/README.md +92 -6
  3. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/pyproject.toml +10 -2
  4. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/__init__.py +2 -13
  5. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/config.py +61 -0
  6. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/core.py +166 -9
  7. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/enums.py +34 -4
  8. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/exceptions.py +124 -0
  9. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/http.py +528 -0
  10. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/logging.py +256 -0
  11. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/mcp/__init__.py +18 -0
  12. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/mcp/__main__.py +9 -0
  13. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/mcp/server.py +181 -0
  14. perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/resilience.py +179 -0
  15. perplexity_webui_scraper-0.3.4/src/perplexity_webui_scraper/config.py +0 -36
  16. perplexity_webui_scraper-0.3.4/src/perplexity_webui_scraper/exceptions.py +0 -50
  17. perplexity_webui_scraper-0.3.4/src/perplexity_webui_scraper/http.py +0 -197
  18. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/cli/get_perplexity_session_token.py +0 -0
  19. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/constants.py +0 -0
  20. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/limits.py +0 -0
  21. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/models.py +0 -0
  22. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/py.typed +0 -0
  23. {perplexity_webui_scraper-0.3.4 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: perplexity-webui-scraper
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: Python scraper to extract AI responses from Perplexity's web interface.
5
5
  Keywords: perplexity,ai,scraper,webui,api,client
6
6
  Author: henrique-coder
@@ -20,14 +20,18 @@ Classifier: Topic :: Internet :: WWW/HTTP
20
20
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
21
  Classifier: Typing :: Typed
22
22
  Requires-Dist: curl-cffi>=0.14.0
23
+ Requires-Dist: loguru>=0.7.3
23
24
  Requires-Dist: orjson>=3.11.5
24
25
  Requires-Dist: pydantic>=2.12.5
25
- Requires-Python: >=3.10
26
+ Requires-Dist: tenacity>=9.1.2
27
+ Requires-Dist: fastmcp>=2.14.1 ; extra == 'mcp'
28
+ Requires-Python: >=3.10, <3.15
26
29
  Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
27
30
  Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
28
31
  Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
29
32
  Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
30
33
  Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
34
+ Provides-Extra: mcp
31
35
  Description-Content-Type: text/markdown
32
36
 
33
37
  <div align="center">
@@ -47,7 +51,8 @@ Python scraper to extract AI responses from [Perplexity's](https://www.perplexit
47
51
  ## Installation
48
52
 
49
53
  ```bash
50
- uv pip install perplexity-webui-scraper
54
+ uv pip install perplexity-webui-scraper # from PyPI (stable)
55
+ uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
51
56
  ```
52
57
 
53
58
  ## Requirements
@@ -197,18 +202,103 @@ conversation.ask("Latest AI research", files=["paper.pdf"])
197
202
  | `timezone` | `None` | Timezone |
198
203
  | `coordinates` | `None` | Location (lat/lng) |
199
204
 
200
- ## CLI Tools
205
+ ## Exceptions
201
206
 
202
- ### Session Token Generator
207
+ The library provides specific exception types for better error handling:
208
+
209
+ | Exception | Description |
210
+ | ---------------------------------- | ------------------------------------------------------------ |
211
+ | `PerplexityError` | Base exception for all library errors |
212
+ | `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
213
+ | `RateLimitError` | Rate limit exceeded (HTTP 429) |
214
+ | `FileUploadError` | File upload failed |
215
+ | `FileValidationError` | File validation failed (size, type, etc.) |
216
+ | `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
217
+ | `ResponseParsingError` | API response could not be parsed |
218
+ | `StreamingError` | Error during streaming response |
219
+
220
+ ### Handling Research Mode Clarifying Questions
221
+
222
+ When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
223
+
224
+ ```python
225
+ from perplexity_webui_scraper import (
226
+ Perplexity,
227
+ ResearchClarifyingQuestionsError,
228
+ )
229
+
230
+ try:
231
+ conversation.ask("Research this topic", model=Models.RESEARCH)
232
+ except ResearchClarifyingQuestionsError as error:
233
+ print("The AI needs clarification:")
234
+ for question in error.questions:
235
+ print(f" - {question}")
236
+ # Consider rephrasing your query to be more specific
237
+ ```
238
+
239
+ ## MCP Server (Model Context Protocol)
240
+
241
+ The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
242
+
243
+ ### Installation
203
244
 
204
245
  ```bash
205
- get-perplexity-session-token
246
+ uv pip install perplexity-webui-scraper[mcp]
247
+ ```
248
+
249
+ ### Running the Server
250
+
251
+ ```bash
252
+ # Set your session token
253
+ export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
254
+ set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
255
+
256
+ # Run with FastMCP
257
+ uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
258
+
259
+ # Or test with the dev inspector
260
+ uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
261
+ ```
262
+
263
+ ### Claude Desktop Configuration
264
+
265
+ Add to `~/.config/claude/claude_desktop_config.json`:
266
+
267
+ ```json
268
+ {
269
+ "mcpServers": {
270
+ "perplexity": {
271
+ "command": "uv",
272
+ "args": [
273
+ "run",
274
+ "fastmcp",
275
+ "run",
276
+ "path/to/perplexity_webui_scraper/mcp/server.py"
277
+ ],
278
+ "env": {
279
+ "PERPLEXITY_SESSION_TOKEN": "your_token_here"
280
+ }
281
+ }
282
+ }
283
+ }
206
284
  ```
207
285
 
208
- Interactive tool to automatically obtain your Perplexity session token via email authentication. The token can be automatically saved to your `.env` file for immediate use.
286
+ ### Available Tool
287
+
288
+ | Tool | Description |
289
+ | ---------------- | --------------------------------------------------------------------------- |
290
+ | `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
291
+
292
+ **Parameters:**
293
+
294
+ | Parameter | Type | Default | Description |
295
+ | -------------- | ----- | -------- | ------------------------------------------------------------- |
296
+ | `query` | `str` | - | Question to ask (required) |
297
+ | `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
298
+ | `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
209
299
 
210
300
  ## Disclaimer
211
301
 
212
- This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
302
+ This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
213
303
 
214
304
  By using this library, you agree to Perplexity AI's Terms of Service.
@@ -15,7 +15,8 @@ Python scraper to extract AI responses from [Perplexity's](https://www.perplexit
15
15
  ## Installation
16
16
 
17
17
  ```bash
18
- uv pip install perplexity-webui-scraper
18
+ uv pip install perplexity-webui-scraper # from PyPI (stable)
19
+ uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
19
20
  ```
20
21
 
21
22
  ## Requirements
@@ -165,18 +166,103 @@ conversation.ask("Latest AI research", files=["paper.pdf"])
165
166
  | `timezone` | `None` | Timezone |
166
167
  | `coordinates` | `None` | Location (lat/lng) |
167
168
 
168
- ## CLI Tools
169
+ ## Exceptions
169
170
 
170
- ### Session Token Generator
171
+ The library provides specific exception types for better error handling:
172
+
173
+ | Exception | Description |
174
+ | ---------------------------------- | ------------------------------------------------------------ |
175
+ | `PerplexityError` | Base exception for all library errors |
176
+ | `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
177
+ | `RateLimitError` | Rate limit exceeded (HTTP 429) |
178
+ | `FileUploadError` | File upload failed |
179
+ | `FileValidationError` | File validation failed (size, type, etc.) |
180
+ | `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
181
+ | `ResponseParsingError` | API response could not be parsed |
182
+ | `StreamingError` | Error during streaming response |
183
+
184
+ ### Handling Research Mode Clarifying Questions
185
+
186
+ When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
187
+
188
+ ```python
189
+ from perplexity_webui_scraper import (
190
+ Perplexity,
191
+ ResearchClarifyingQuestionsError,
192
+ )
193
+
194
+ try:
195
+ conversation.ask("Research this topic", model=Models.RESEARCH)
196
+ except ResearchClarifyingQuestionsError as error:
197
+ print("The AI needs clarification:")
198
+ for question in error.questions:
199
+ print(f" - {question}")
200
+ # Consider rephrasing your query to be more specific
201
+ ```
202
+
203
+ ## MCP Server (Model Context Protocol)
204
+
205
+ The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
206
+
207
+ ### Installation
171
208
 
172
209
  ```bash
173
- get-perplexity-session-token
210
+ uv pip install perplexity-webui-scraper[mcp]
211
+ ```
212
+
213
+ ### Running the Server
214
+
215
+ ```bash
216
+ # Set your session token
217
+ export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
218
+ set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
219
+
220
+ # Run with FastMCP
221
+ uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
222
+
223
+ # Or test with the dev inspector
224
+ uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
225
+ ```
226
+
227
+ ### Claude Desktop Configuration
228
+
229
+ Add to `~/.config/claude/claude_desktop_config.json`:
230
+
231
+ ```json
232
+ {
233
+ "mcpServers": {
234
+ "perplexity": {
235
+ "command": "uv",
236
+ "args": [
237
+ "run",
238
+ "fastmcp",
239
+ "run",
240
+ "path/to/perplexity_webui_scraper/mcp/server.py"
241
+ ],
242
+ "env": {
243
+ "PERPLEXITY_SESSION_TOKEN": "your_token_here"
244
+ }
245
+ }
246
+ }
247
+ }
174
248
  ```
175
249
 
176
- Interactive tool to automatically obtain your Perplexity session token via email authentication. The token can be automatically saved to your `.env` file for immediate use.
250
+ ### Available Tool
251
+
252
+ | Tool | Description |
253
+ | ---------------- | --------------------------------------------------------------------------- |
254
+ | `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
255
+
256
+ **Parameters:**
257
+
258
+ | Parameter | Type | Default | Description |
259
+ | -------------- | ----- | -------- | ------------------------------------------------------------- |
260
+ | `query` | `str` | - | Question to ask (required) |
261
+ | `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
262
+ | `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
177
263
 
178
264
  ## Disclaimer
179
265
 
180
- This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
266
+ This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
181
267
 
182
268
  By using this library, you agree to Perplexity AI's Terms of Service.
@@ -1,11 +1,11 @@
1
1
  [project]
2
2
  name = "perplexity-webui-scraper"
3
- version = "0.3.4"
3
+ version = "0.3.5"
4
4
  description = "Python scraper to extract AI responses from Perplexity's web interface."
5
5
  authors = [{ name = "henrique-coder", email = "henriquemoreira10fk@gmail.com" }]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
- requires-python = ">=3.10"
8
+ requires-python = ">=3.10,<3.15"
9
9
  keywords = ["perplexity", "ai", "scraper", "webui", "api", "client"]
10
10
  classifiers = [
11
11
  "Development Status :: 4 - Beta",
@@ -24,8 +24,10 @@ classifiers = [
24
24
  ]
25
25
  dependencies = [
26
26
  "curl-cffi>=0.14.0",
27
+ "loguru>=0.7.3",
27
28
  "orjson>=3.11.5",
28
29
  "pydantic>=2.12.5",
30
+ "tenacity>=9.1.2",
29
31
  ]
30
32
 
31
33
  [dependency-groups]
@@ -43,6 +45,11 @@ tests = [
43
45
  "pytest>=9.0.2",
44
46
  ]
45
47
 
48
+ [project.optional-dependencies]
49
+ mcp = [
50
+ "fastmcp>=2.14.1",
51
+ ]
52
+
46
53
  [project.urls]
47
54
  Homepage = "https://github.com/henrique-coder/perplexity-webui-scraper"
48
55
  Documentation = "https://github.com/henrique-coder/perplexity-webui-scraper#readme"
@@ -103,6 +110,7 @@ skip-magic-trailing-comma = false # Preserve trailing commas as formatting hin
103
110
 
104
111
  [project.scripts]
105
112
  get-perplexity-session-token = "perplexity_webui_scraper.cli.get_perplexity_session_token:get_token"
113
+ perplexity-webui-scraper-mcp = "perplexity_webui_scraper.mcp:run_server"
106
114
 
107
115
  [build-system]
108
116
  requires = ["uv_build"]
@@ -4,33 +4,22 @@ from importlib import metadata
4
4
 
5
5
  from .config import ClientConfig, ConversationConfig
6
6
  from .core import Conversation, Perplexity
7
- from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
8
- from .exceptions import (
9
- AuthenticationError,
10
- FileUploadError,
11
- FileValidationError,
12
- PerplexityError,
13
- RateLimitError,
14
- )
7
+ from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
15
8
  from .models import Model, Models
16
9
  from .types import Coordinates, Response, SearchResultItem
17
10
 
18
11
 
19
12
  __version__: str = metadata.version("perplexity-webui-scraper")
20
13
  __all__: list[str] = [
21
- "AuthenticationError",
22
14
  "CitationMode",
23
15
  "ClientConfig",
24
16
  "Conversation",
25
17
  "ConversationConfig",
26
18
  "Coordinates",
27
- "FileUploadError",
28
- "FileValidationError",
19
+ "LogLevel",
29
20
  "Model",
30
21
  "Models",
31
22
  "Perplexity",
32
- "PerplexityError",
33
- "RateLimitError",
34
23
  "Response",
35
24
  "SearchFocus",
36
25
  "SearchResultItem",
@@ -0,0 +1,61 @@
1
+ """Configuration classes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING
7
+
8
+ from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
9
+
10
+
11
+ if TYPE_CHECKING:
12
+ from pathlib import Path
13
+
14
+ from .models import Model
15
+ from .types import Coordinates
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class ConversationConfig:
20
+ """Default settings for a conversation. Can be overridden per message."""
21
+
22
+ model: Model | None = None
23
+ citation_mode: CitationMode = CitationMode.CLEAN
24
+ save_to_library: bool = False
25
+ search_focus: SearchFocus = SearchFocus.WEB
26
+ source_focus: SourceFocus | list[SourceFocus] = SourceFocus.WEB
27
+ time_range: TimeRange = TimeRange.ALL
28
+ language: str = "en-US"
29
+ timezone: str | None = None
30
+ coordinates: Coordinates | None = None
31
+
32
+
33
+ @dataclass(frozen=True, slots=True)
34
+ class ClientConfig:
35
+ """
36
+ HTTP client settings.
37
+
38
+ Attributes:
39
+ timeout: Request timeout in seconds.
40
+ impersonate: Browser to impersonate (e.g., "chrome", "edge", "safari").
41
+ max_retries: Maximum retry attempts for failed requests.
42
+ retry_base_delay: Initial delay in seconds before first retry.
43
+ retry_max_delay: Maximum delay between retries.
44
+ retry_jitter: Random jitter factor (0-1) to add to delays.
45
+ requests_per_second: Rate limit for requests (0 to disable).
46
+ rotate_fingerprint: Whether to rotate browser fingerprint on retries.
47
+ logging_level: Logging verbosity level. Default is DISABLED.
48
+ log_file: Optional file path for persistent logging. If set, logs go to file only.
49
+ If None, logs go to console. All logs are appended.
50
+ """
51
+
52
+ timeout: int = 3600
53
+ impersonate: str = "chrome"
54
+ max_retries: int = 3
55
+ retry_base_delay: float = 1.0
56
+ retry_max_delay: float = 60.0
57
+ retry_jitter: float = 0.5
58
+ requests_per_second: float = 0.5
59
+ rotate_fingerprint: bool = True
60
+ logging_level: LogLevel = LogLevel.DISABLED
61
+ log_file: str | Path | None = None