perplexity-webui-scraper 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +4 -15
- perplexity_webui_scraper/cli/get_perplexity_session_token.py +216 -0
- perplexity_webui_scraper/config.py +29 -4
- perplexity_webui_scraper/constants.py +9 -35
- perplexity_webui_scraper/core.py +225 -21
- perplexity_webui_scraper/enums.py +34 -4
- perplexity_webui_scraper/exceptions.py +74 -0
- perplexity_webui_scraper/http.py +370 -36
- perplexity_webui_scraper/limits.py +2 -5
- perplexity_webui_scraper/logging.py +256 -0
- perplexity_webui_scraper/mcp/__init__.py +18 -0
- perplexity_webui_scraper/mcp/__main__.py +9 -0
- perplexity_webui_scraper/mcp/server.py +181 -0
- perplexity_webui_scraper/models.py +34 -19
- perplexity_webui_scraper/resilience.py +179 -0
- perplexity_webui_scraper-0.3.5.dist-info/METADATA +304 -0
- perplexity_webui_scraper-0.3.5.dist-info/RECORD +21 -0
- {perplexity_webui_scraper-0.3.3.dist-info → perplexity_webui_scraper-0.3.5.dist-info}/WHEEL +1 -1
- perplexity_webui_scraper-0.3.5.dist-info/entry_points.txt +4 -0
- perplexity_webui_scraper-0.3.3.dist-info/METADATA +0 -166
- perplexity_webui_scraper-0.3.3.dist-info/RECORD +0 -14
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Resilience utilities for HTTP requests.
|
|
3
|
+
|
|
4
|
+
Provides retry mechanisms, rate limiting, and Cloudflare bypass utilities
|
|
5
|
+
using the tenacity library for robust retry handling.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
import random
|
|
13
|
+
from threading import Lock
|
|
14
|
+
import time
|
|
15
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
16
|
+
|
|
17
|
+
from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from collections.abc import Callable
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Browser profiles supported by curl_cffi for fingerprint rotation
|
|
27
|
+
BROWSER_PROFILES: tuple[str, ...] = (
|
|
28
|
+
"chrome",
|
|
29
|
+
"chrome110",
|
|
30
|
+
"chrome116",
|
|
31
|
+
"chrome119",
|
|
32
|
+
"chrome120",
|
|
33
|
+
"chrome123",
|
|
34
|
+
"chrome124",
|
|
35
|
+
"chrome131",
|
|
36
|
+
"edge99",
|
|
37
|
+
"edge101",
|
|
38
|
+
"safari15_3",
|
|
39
|
+
"safari15_5",
|
|
40
|
+
"safari17_0",
|
|
41
|
+
"safari17_2_ios",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Cloudflare challenge detection markers
|
|
45
|
+
CLOUDFLARE_MARKERS: tuple[str, ...] = (
|
|
46
|
+
"cf-ray",
|
|
47
|
+
"cf-mitigated",
|
|
48
|
+
"__cf_chl_",
|
|
49
|
+
"Checking your browser",
|
|
50
|
+
"Just a moment...",
|
|
51
|
+
"cloudflare",
|
|
52
|
+
"Enable JavaScript and cookies to continue",
|
|
53
|
+
"challenge-platform",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(slots=True)
|
|
58
|
+
class RetryConfig:
|
|
59
|
+
"""Configuration for retry behavior.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
max_retries: Maximum number of retry attempts.
|
|
63
|
+
base_delay: Initial delay in seconds before first retry.
|
|
64
|
+
max_delay: Maximum delay between retries.
|
|
65
|
+
jitter: Random jitter factor to add to delays (0-1).
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
max_retries: int = 3
|
|
69
|
+
base_delay: float = 1.0
|
|
70
|
+
max_delay: float = 60.0
|
|
71
|
+
jitter: float = 0.5
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class RateLimiter:
|
|
76
|
+
"""Token bucket rate limiter for throttling requests.
|
|
77
|
+
|
|
78
|
+
Attributes:
|
|
79
|
+
requests_per_second: Maximum requests allowed per second.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
requests_per_second: float = 0.5
|
|
83
|
+
_last_request: float = field(default=0.0, init=False)
|
|
84
|
+
_lock: Lock = field(default_factory=Lock, init=False)
|
|
85
|
+
|
|
86
|
+
def acquire(self) -> None:
|
|
87
|
+
"""Wait until a request can be made within rate limits."""
|
|
88
|
+
|
|
89
|
+
with self._lock:
|
|
90
|
+
now = time.monotonic()
|
|
91
|
+
min_interval = 1.0 / self.requests_per_second
|
|
92
|
+
|
|
93
|
+
if self._last_request > 0:
|
|
94
|
+
elapsed = now - self._last_request
|
|
95
|
+
wait_time = min_interval - elapsed
|
|
96
|
+
|
|
97
|
+
if wait_time > 0:
|
|
98
|
+
time.sleep(wait_time)
|
|
99
|
+
|
|
100
|
+
self._last_request = time.monotonic()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_random_browser_profile() -> str:
|
|
104
|
+
"""Get a random browser profile for fingerprint rotation.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
A browser profile identifier compatible with curl_cffi.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
return random.choice(BROWSER_PROFILES)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def is_cloudflare_challenge(response_text: str, headers: dict[str, Any] | None = None) -> bool:
|
|
114
|
+
"""Detect if a response is a Cloudflare challenge page.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
response_text: The response body text.
|
|
118
|
+
headers: Optional response headers.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
True if Cloudflare challenge markers are detected.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
text_lower = response_text.lower()
|
|
125
|
+
|
|
126
|
+
for marker in CLOUDFLARE_MARKERS:
|
|
127
|
+
if marker.lower() in text_lower:
|
|
128
|
+
return True
|
|
129
|
+
|
|
130
|
+
if headers:
|
|
131
|
+
for key in headers:
|
|
132
|
+
key_lower = key.lower()
|
|
133
|
+
|
|
134
|
+
if "cf-" in key_lower or "cloudflare" in key_lower:
|
|
135
|
+
return True
|
|
136
|
+
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def is_cloudflare_status(status_code: int) -> bool:
|
|
141
|
+
"""Check if status code indicates a potential Cloudflare block.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
status_code: HTTP status code.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
True if status code is commonly used by Cloudflare challenges.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
return status_code in (403, 503, 520, 521, 522, 523, 524, 525, 526)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def create_retry_decorator(
|
|
154
|
+
config: RetryConfig,
|
|
155
|
+
retryable_exceptions: tuple[type[Exception], ...],
|
|
156
|
+
on_retry: Callable[[RetryCallState], None] | None = None,
|
|
157
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
158
|
+
"""Create a tenacity retry decorator with the given configuration.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
config: Retry configuration.
|
|
162
|
+
retryable_exceptions: Tuple of exception types to retry on.
|
|
163
|
+
on_retry: Optional callback to execute on each retry.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
A retry decorator configured with the given settings.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
return retry(
|
|
170
|
+
stop=stop_after_attempt(config.max_retries + 1),
|
|
171
|
+
wait=wait_exponential_jitter(
|
|
172
|
+
initial=config.base_delay,
|
|
173
|
+
max=config.max_delay,
|
|
174
|
+
jitter=config.max_delay * config.jitter,
|
|
175
|
+
),
|
|
176
|
+
retry=retry_if_exception_type(retryable_exceptions),
|
|
177
|
+
before_sleep=on_retry,
|
|
178
|
+
reraise=True,
|
|
179
|
+
)
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: perplexity-webui-scraper
|
|
3
|
+
Version: 0.3.5
|
|
4
|
+
Summary: Python scraper to extract AI responses from Perplexity's web interface.
|
|
5
|
+
Keywords: perplexity,ai,scraper,webui,api,client
|
|
6
|
+
Author: henrique-coder
|
|
7
|
+
Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Dist: curl-cffi>=0.14.0
|
|
23
|
+
Requires-Dist: loguru>=0.7.3
|
|
24
|
+
Requires-Dist: orjson>=3.11.5
|
|
25
|
+
Requires-Dist: pydantic>=2.12.5
|
|
26
|
+
Requires-Dist: tenacity>=9.1.2
|
|
27
|
+
Requires-Dist: fastmcp>=2.14.1 ; extra == 'mcp'
|
|
28
|
+
Requires-Python: >=3.10, <3.15
|
|
29
|
+
Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
|
|
30
|
+
Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
|
|
31
|
+
Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
|
|
32
|
+
Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
|
|
33
|
+
Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
|
|
34
|
+
Provides-Extra: mcp
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
<div align="center">
|
|
38
|
+
|
|
39
|
+
# Perplexity WebUI Scraper
|
|
40
|
+
|
|
41
|
+
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
42
|
+
|
|
43
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
44
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
45
|
+
[](./LICENSE)
|
|
46
|
+
|
|
47
|
+
</div>
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv pip install perplexity-webui-scraper # from PyPI (stable)
|
|
55
|
+
uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Requirements
|
|
59
|
+
|
|
60
|
+
- **Perplexity Pro/Max account**
|
|
61
|
+
- **Session token** (`__Secure-next-auth.session-token` cookie from your browser)
|
|
62
|
+
|
|
63
|
+
### Getting Your Session Token
|
|
64
|
+
|
|
65
|
+
You can obtain your session token in two ways:
|
|
66
|
+
|
|
67
|
+
#### Option 1: Automatic (CLI Tool)
|
|
68
|
+
|
|
69
|
+
The package includes a CLI tool to automatically generate and save your session token:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
get-perplexity-session-token
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
This interactive tool will:
|
|
76
|
+
|
|
77
|
+
1. Ask for your Perplexity email
|
|
78
|
+
2. Send a verification code to your email
|
|
79
|
+
3. Accept either a 6-digit code or magic link
|
|
80
|
+
4. Extract and display your session token
|
|
81
|
+
5. Optionally save it to your `.env` file
|
|
82
|
+
|
|
83
|
+
**Features:**
|
|
84
|
+
|
|
85
|
+
- Secure ephemeral session (cleared on exit)
|
|
86
|
+
- Automatic `.env` file management
|
|
87
|
+
- Support for both OTP codes and magic links
|
|
88
|
+
- Clean terminal interface with status updates
|
|
89
|
+
|
|
90
|
+
#### Option 2: Manual (Browser)
|
|
91
|
+
|
|
92
|
+
If you prefer to extract the token manually:
|
|
93
|
+
|
|
94
|
+
1. Log in at [perplexity.ai](https://www.perplexity.ai)
|
|
95
|
+
2. Open DevTools (`F12`) → Application/Storage → Cookies
|
|
96
|
+
3. Copy the value of `__Secure-next-auth.session-token`
|
|
97
|
+
4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN="your_token"`
|
|
98
|
+
|
|
99
|
+
## Quick Start
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from perplexity_webui_scraper import Perplexity
|
|
103
|
+
|
|
104
|
+
client = Perplexity(session_token="YOUR_TOKEN")
|
|
105
|
+
conversation = client.create_conversation()
|
|
106
|
+
|
|
107
|
+
conversation.ask("What is quantum computing?")
|
|
108
|
+
print(conversation.answer)
|
|
109
|
+
|
|
110
|
+
# Follow-up
|
|
111
|
+
conversation.ask("Explain it simpler")
|
|
112
|
+
print(conversation.answer)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Streaming
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
for chunk in conversation.ask("Explain AI", stream=True):
|
|
119
|
+
print(chunk.answer)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### With Options
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from perplexity_webui_scraper import (
|
|
126
|
+
ConversationConfig,
|
|
127
|
+
Coordinates,
|
|
128
|
+
Models,
|
|
129
|
+
SourceFocus,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
config = ConversationConfig(
|
|
133
|
+
model=Models.RESEARCH,
|
|
134
|
+
source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
|
|
135
|
+
language="en-US",
|
|
136
|
+
coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
conversation = client.create_conversation(config)
|
|
140
|
+
conversation.ask("Latest AI research", files=["paper.pdf"])
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## API
|
|
144
|
+
|
|
145
|
+
### `Perplexity(session_token, config?)`
|
|
146
|
+
|
|
147
|
+
| Parameter | Type | Description |
|
|
148
|
+
| --------------- | -------------- | ------------------ |
|
|
149
|
+
| `session_token` | `str` | Browser cookie |
|
|
150
|
+
| `config` | `ClientConfig` | Timeout, TLS, etc. |
|
|
151
|
+
|
|
152
|
+
### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
|
|
153
|
+
|
|
154
|
+
| Parameter | Type | Default | Description |
|
|
155
|
+
| --------------- | ----------------------- | ------------- | ------------------- |
|
|
156
|
+
| `query` | `str` | - | Question (required) |
|
|
157
|
+
| `model` | `Model` | `Models.BEST` | AI model |
|
|
158
|
+
| `files` | `list[str \| PathLike]` | `None` | File paths |
|
|
159
|
+
| `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
|
|
160
|
+
| `stream` | `bool` | `False` | Enable streaming |
|
|
161
|
+
|
|
162
|
+
### Models
|
|
163
|
+
|
|
164
|
+
| Model | Description |
|
|
165
|
+
| ---------------------------------- | ------------------------------------------------------------------------- |
|
|
166
|
+
| `Models.RESEARCH` | Research - Fast and thorough for routine research |
|
|
167
|
+
| `Models.LABS` | Labs - Multi-step tasks with advanced troubleshooting |
|
|
168
|
+
| `Models.BEST` | Best - Automatically selects the most responsive model based on the query |
|
|
169
|
+
| `Models.SONAR` | Sonar - Perplexity's fast model |
|
|
170
|
+
| `Models.GPT_52` | GPT-5.2 - OpenAI's latest model |
|
|
171
|
+
| `Models.GPT_52_THINKING` | GPT-5.2 Thinking - OpenAI's latest model with thinking |
|
|
172
|
+
| `Models.CLAUDE_45_OPUS` | Claude Opus 4.5 - Anthropic's Opus reasoning model |
|
|
173
|
+
| `Models.CLAUDE_45_OPUS_THINKING` | Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking |
|
|
174
|
+
| `Models.GEMINI_3_PRO` | Gemini 3 Pro - Google's newest reasoning model |
|
|
175
|
+
| `Models.GEMINI_3_FLASH` | Gemini 3 Flash - Google's fast reasoning model |
|
|
176
|
+
| `Models.GEMINI_3_FLASH_THINKING` | Gemini 3 Flash Thinking - Google's fast reasoning model with thinking |
|
|
177
|
+
| `Models.GROK_41` | Grok 4.1 - xAI's latest advanced model |
|
|
178
|
+
| `Models.GROK_41_THINKING` | Grok 4.1 Thinking - xAI's latest reasoning model |
|
|
179
|
+
| `Models.KIMI_K2_THINKING` | Kimi K2 Thinking - Moonshot AI's latest reasoning model |
|
|
180
|
+
| `Models.CLAUDE_45_SONNET` | Claude Sonnet 4.5 - Anthropic's newest advanced model |
|
|
181
|
+
| `Models.CLAUDE_45_SONNET_THINKING` | Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model |
|
|
182
|
+
|
|
183
|
+
### CitationMode
|
|
184
|
+
|
|
185
|
+
| Mode | Output |
|
|
186
|
+
| ---------- | --------------------- |
|
|
187
|
+
| `DEFAULT` | `text[1]` |
|
|
188
|
+
| `MARKDOWN` | `text[1](url)` |
|
|
189
|
+
| `CLEAN` | `text` (no citations) |
|
|
190
|
+
|
|
191
|
+
### ConversationConfig
|
|
192
|
+
|
|
193
|
+
| Parameter | Default | Description |
|
|
194
|
+
| ----------------- | ------------- | ------------------ |
|
|
195
|
+
| `model` | `Models.BEST` | Default model |
|
|
196
|
+
| `citation_mode` | `CLEAN` | Citation format |
|
|
197
|
+
| `save_to_library` | `False` | Save to library |
|
|
198
|
+
| `search_focus` | `WEB` | Search type |
|
|
199
|
+
| `source_focus` | `WEB` | Source types |
|
|
200
|
+
| `time_range` | `ALL` | Time filter |
|
|
201
|
+
| `language` | `"en-US"` | Response language |
|
|
202
|
+
| `timezone` | `None` | Timezone |
|
|
203
|
+
| `coordinates` | `None` | Location (lat/lng) |
|
|
204
|
+
|
|
205
|
+
## Exceptions
|
|
206
|
+
|
|
207
|
+
The library provides specific exception types for better error handling:
|
|
208
|
+
|
|
209
|
+
| Exception | Description |
|
|
210
|
+
| ---------------------------------- | ------------------------------------------------------------ |
|
|
211
|
+
| `PerplexityError` | Base exception for all library errors |
|
|
212
|
+
| `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
|
|
213
|
+
| `RateLimitError` | Rate limit exceeded (HTTP 429) |
|
|
214
|
+
| `FileUploadError` | File upload failed |
|
|
215
|
+
| `FileValidationError` | File validation failed (size, type, etc.) |
|
|
216
|
+
| `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
|
|
217
|
+
| `ResponseParsingError` | API response could not be parsed |
|
|
218
|
+
| `StreamingError` | Error during streaming response |
|
|
219
|
+
|
|
220
|
+
### Handling Research Mode Clarifying Questions
|
|
221
|
+
|
|
222
|
+
When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
from perplexity_webui_scraper import (
|
|
226
|
+
Perplexity,
|
|
227
|
+
ResearchClarifyingQuestionsError,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
conversation.ask("Research this topic", model=Models.RESEARCH)
|
|
232
|
+
except ResearchClarifyingQuestionsError as error:
|
|
233
|
+
print("The AI needs clarification:")
|
|
234
|
+
for question in error.questions:
|
|
235
|
+
print(f" - {question}")
|
|
236
|
+
# Consider rephrasing your query to be more specific
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## MCP Server (Model Context Protocol)
|
|
240
|
+
|
|
241
|
+
The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
|
|
242
|
+
|
|
243
|
+
### Installation
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
uv pip install perplexity-webui-scraper[mcp]
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Running the Server
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
# Set your session token
|
|
253
|
+
export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
|
|
254
|
+
set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
|
|
255
|
+
|
|
256
|
+
# Run with FastMCP
|
|
257
|
+
uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
|
|
258
|
+
|
|
259
|
+
# Or test with the dev inspector
|
|
260
|
+
uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Claude Desktop Configuration
|
|
264
|
+
|
|
265
|
+
Add to `~/.config/claude/claude_desktop_config.json`:
|
|
266
|
+
|
|
267
|
+
```json
|
|
268
|
+
{
|
|
269
|
+
"mcpServers": {
|
|
270
|
+
"perplexity": {
|
|
271
|
+
"command": "uv",
|
|
272
|
+
"args": [
|
|
273
|
+
"run",
|
|
274
|
+
"fastmcp",
|
|
275
|
+
"run",
|
|
276
|
+
"path/to/perplexity_webui_scraper/mcp/server.py"
|
|
277
|
+
],
|
|
278
|
+
"env": {
|
|
279
|
+
"PERPLEXITY_SESSION_TOKEN": "your_token_here"
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Available Tool
|
|
287
|
+
|
|
288
|
+
| Tool | Description |
|
|
289
|
+
| ---------------- | --------------------------------------------------------------------------- |
|
|
290
|
+
| `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
|
|
291
|
+
|
|
292
|
+
**Parameters:**
|
|
293
|
+
|
|
294
|
+
| Parameter | Type | Default | Description |
|
|
295
|
+
| -------------- | ----- | -------- | ------------------------------------------------------------- |
|
|
296
|
+
| `query` | `str` | - | Question to ask (required) |
|
|
297
|
+
| `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
|
|
298
|
+
| `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
|
|
299
|
+
|
|
300
|
+
## Disclaimer
|
|
301
|
+
|
|
302
|
+
This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
|
|
303
|
+
|
|
304
|
+
By using this library, you agree to Perplexity AI's Terms of Service.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
perplexity_webui_scraper/__init__.py,sha256=7hLTMTHMC-aCvbDRMXCUBfJ3vztsW9WtEZ08LdIYs9I,713
|
|
2
|
+
perplexity_webui_scraper/cli/get_perplexity_session_token.py,sha256=67Ck4S2MJ0701LnRHq73qY5oRLCsFOyu_8SMgsbTNFc,6937
|
|
3
|
+
perplexity_webui_scraper/config.py,sha256=05lkW9PlMmbj-oh-4xc3-iFXXGvKfKCy8yK-O1GWJdw,2055
|
|
4
|
+
perplexity_webui_scraper/constants.py,sha256=Kq-4i6yyTZ5VhUvbiZmbUmHrjMQm-p7H82Emm7b10-c,1867
|
|
5
|
+
perplexity_webui_scraper/core.py,sha256=dpcx8tTxESWz_iz7nSKN1XWRvBOV_cD9gK-E2gJYJ8w,20894
|
|
6
|
+
perplexity_webui_scraper/enums.py,sha256=I-jMiQMzBW72PGJFBNZIc874wijBMynDF-pYQmS1OZc,2751
|
|
7
|
+
perplexity_webui_scraper/exceptions.py,sha256=Q2dx7j1OrM9CB7ty8fRhheAt4-QhN7szUDXzoT6rx1E,3985
|
|
8
|
+
perplexity_webui_scraper/http.py,sha256=1qxZ3cvkL-TXST2H1V7AUoA5_poZ9sDBm8OV3FEtWZU,19708
|
|
9
|
+
perplexity_webui_scraper/limits.py,sha256=GwcwC8CnSNhlcLWGLpuDYA37gn8OXSfsXLIOc-QbxNs,465
|
|
10
|
+
perplexity_webui_scraper/logging.py,sha256=5IyUiKjN88WCItKl6Yrbfn6rF6jz68rWjFTWQGdvTRo,7129
|
|
11
|
+
perplexity_webui_scraper/mcp/__init__.py,sha256=Ke166qPFVZORf39lc6cHjKoBbbuJztAfU29vYpCwOrA,366
|
|
12
|
+
perplexity_webui_scraper/mcp/__main__.py,sha256=N_cSeNjAzSJ861jspq60W0ZVjxWNXhM5O-FQ0aD1oPs,146
|
|
13
|
+
perplexity_webui_scraper/mcp/server.py,sha256=dadzelHJO3Fkw85hdUiTO10jFaZB26D5e5jWuL3yVoA,5982
|
|
14
|
+
perplexity_webui_scraper/models.py,sha256=QVeZI-WQzpyi9JnE15QIMJ7nsG0YjIjOsZEA6YfX0tw,2448
|
|
15
|
+
perplexity_webui_scraper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
perplexity_webui_scraper/resilience.py,sha256=-_XYYCazsx5jxrc2HbuJBP16TLh02EEhy8WOukLmbFE,4662
|
|
17
|
+
perplexity_webui_scraper/types.py,sha256=VlnzvNilIHrDXM2YOGjJa1y2VY0tfR-F0zaPjQHoPKs,1028
|
|
18
|
+
perplexity_webui_scraper-0.3.5.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
19
|
+
perplexity_webui_scraper-0.3.5.dist-info/entry_points.txt,sha256=ODpXpDTkmoQ_o3Y3lsy22PLs-8ndapvMKYwxcz6A9gs,189
|
|
20
|
+
perplexity_webui_scraper-0.3.5.dist-info/METADATA,sha256=NEkzx5B0HIj9gA9p72e6v0GOTG6cHoEWeSatgaoghhw,12175
|
|
21
|
+
perplexity_webui_scraper-0.3.5.dist-info/RECORD,,
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: perplexity-webui-scraper
|
|
3
|
-
Version: 0.3.3
|
|
4
|
-
Summary: Python scraper to extract AI responses from Perplexity's web interface.
|
|
5
|
-
Keywords: perplexity,ai,scraper,webui,api,client
|
|
6
|
-
Author: henrique-coder
|
|
7
|
-
Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
|
|
8
|
-
License-Expression: MIT
|
|
9
|
-
Classifier: Development Status :: 4 - Beta
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Operating System :: OS Independent
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
-
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
-
Classifier: Typing :: Typed
|
|
22
|
-
Requires-Dist: curl-cffi>=0.13.0
|
|
23
|
-
Requires-Dist: orjson>=3.11.5
|
|
24
|
-
Requires-Dist: pydantic>=2.12.5
|
|
25
|
-
Requires-Python: >=3.10
|
|
26
|
-
Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
|
|
27
|
-
Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
|
|
28
|
-
Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
|
|
29
|
-
Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
|
|
30
|
-
Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
|
|
31
|
-
Description-Content-Type: text/markdown
|
|
32
|
-
|
|
33
|
-
<div align="center">
|
|
34
|
-
|
|
35
|
-
# Perplexity WebUI Scraper
|
|
36
|
-
|
|
37
|
-
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
38
|
-
|
|
39
|
-
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
40
|
-
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
41
|
-
[](./LICENSE)
|
|
42
|
-
|
|
43
|
-
</div>
|
|
44
|
-
|
|
45
|
-
---
|
|
46
|
-
|
|
47
|
-
## Installation
|
|
48
|
-
|
|
49
|
-
```bash
|
|
50
|
-
uv pip install perplexity-webui-scraper
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## Requirements
|
|
54
|
-
|
|
55
|
-
- **Perplexity Pro subscription**
|
|
56
|
-
- **Session token** (`__Secure-next-auth.session-token` cookie from browser)
|
|
57
|
-
|
|
58
|
-
### Getting Your Session Token
|
|
59
|
-
|
|
60
|
-
1. Log in at [perplexity.ai](https://www.perplexity.ai)
|
|
61
|
-
2. Open DevTools (`F12`) → Application → Cookies
|
|
62
|
-
3. Copy `__Secure-next-auth.session-token` value
|
|
63
|
-
4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN=your_token`
|
|
64
|
-
|
|
65
|
-
## Quick Start
|
|
66
|
-
|
|
67
|
-
```python
|
|
68
|
-
from perplexity_webui_scraper import Perplexity
|
|
69
|
-
|
|
70
|
-
client = Perplexity(session_token="YOUR_TOKEN")
|
|
71
|
-
conversation = client.create_conversation()
|
|
72
|
-
|
|
73
|
-
conversation.ask("What is quantum computing?")
|
|
74
|
-
print(conversation.answer)
|
|
75
|
-
|
|
76
|
-
# Follow-up
|
|
77
|
-
conversation.ask("Explain it simpler")
|
|
78
|
-
print(conversation.answer)
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
### Streaming
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
for chunk in conversation.ask("Explain AI", stream=True):
|
|
85
|
-
print(chunk.answer)
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
### With Options
|
|
89
|
-
|
|
90
|
-
```python
|
|
91
|
-
from perplexity_webui_scraper import (
|
|
92
|
-
ConversationConfig,
|
|
93
|
-
Coordinates,
|
|
94
|
-
Models,
|
|
95
|
-
SourceFocus,
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
config = ConversationConfig(
|
|
99
|
-
model=Models.RESEARCH,
|
|
100
|
-
source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
|
|
101
|
-
language="en-US",
|
|
102
|
-
coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
conversation = client.create_conversation(config)
|
|
106
|
-
conversation.ask("Latest AI research", files=["paper.pdf"])
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
## API
|
|
110
|
-
|
|
111
|
-
### `Perplexity(session_token, config?)`
|
|
112
|
-
|
|
113
|
-
| Parameter | Type | Description |
|
|
114
|
-
| --------------- | -------------- | ------------------ |
|
|
115
|
-
| `session_token` | `str` | Browser cookie |
|
|
116
|
-
| `config` | `ClientConfig` | Timeout, TLS, etc. |
|
|
117
|
-
|
|
118
|
-
### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
|
|
119
|
-
|
|
120
|
-
| Parameter | Type | Default | Description |
|
|
121
|
-
| --------------- | -------------- | ------------- | ------------------- |
|
|
122
|
-
| `query` | `str` | — | Question (required) |
|
|
123
|
-
| `model` | `Model` | `Models.BEST` | AI model |
|
|
124
|
-
| `files` | `list[str]` | `None` | File paths |
|
|
125
|
-
| `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
|
|
126
|
-
| `stream` | `bool` | `False` | Enable streaming |
|
|
127
|
-
|
|
128
|
-
### Models
|
|
129
|
-
|
|
130
|
-
| Model | Description |
|
|
131
|
-
| ------------------------------ | ----------------- |
|
|
132
|
-
| `Models.BEST` | Auto-select best |
|
|
133
|
-
| `Models.RESEARCH` | Deep research |
|
|
134
|
-
| `Models.SONAR` | Fast queries |
|
|
135
|
-
| `Models.GPT_51` | OpenAI GPT-5.1 |
|
|
136
|
-
| `Models.CLAUDE_45_SONNET` | Claude 4.5 Sonnet |
|
|
137
|
-
| `Models.GEMINI_3_PRO_THINKING` | Gemini 3.0 Pro |
|
|
138
|
-
| `Models.GROK_41` | xAI Grok 4.1 |
|
|
139
|
-
|
|
140
|
-
### CitationMode
|
|
141
|
-
|
|
142
|
-
| Mode | Output |
|
|
143
|
-
| ---------- | --------------------- |
|
|
144
|
-
| `DEFAULT` | `text[1]` |
|
|
145
|
-
| `MARKDOWN` | `text[1](url)` |
|
|
146
|
-
| `CLEAN` | `text` (no citations) |
|
|
147
|
-
|
|
148
|
-
### ConversationConfig
|
|
149
|
-
|
|
150
|
-
| Parameter | Default | Description |
|
|
151
|
-
| ----------------- | ------------- | ------------------ |
|
|
152
|
-
| `model` | `Models.BEST` | Default model |
|
|
153
|
-
| `citation_mode` | `CLEAN` | Citation format |
|
|
154
|
-
| `save_to_library` | `False` | Save to library |
|
|
155
|
-
| `search_focus` | `WEB` | Search type |
|
|
156
|
-
| `source_focus` | `WEB` | Source types |
|
|
157
|
-
| `time_range` | `ALL` | Time filter |
|
|
158
|
-
| `language` | `"en-US"` | Response language |
|
|
159
|
-
| `timezone` | `None` | Timezone |
|
|
160
|
-
| `coordinates` | `None` | Location (lat/lng) |
|
|
161
|
-
|
|
162
|
-
## Disclaimer
|
|
163
|
-
|
|
164
|
-
This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
|
|
165
|
-
|
|
166
|
-
By using this library, you agree to Perplexity AI's Terms of Service.
|