perplexity-webui-scraper 0.3.3__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper-0.3.5/PKG-INFO +304 -0
- perplexity_webui_scraper-0.3.5/README.md +268 -0
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/pyproject.toml +18 -4
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/__init__.py +4 -15
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/cli/get_perplexity_session_token.py +216 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/config.py +61 -0
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/constants.py +9 -35
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/core.py +225 -21
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/enums.py +34 -4
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/exceptions.py +124 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/http.py +528 -0
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/limits.py +2 -5
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/logging.py +256 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/mcp/__init__.py +18 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/mcp/__main__.py +9 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/mcp/server.py +181 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/models.py +73 -0
- perplexity_webui_scraper-0.3.5/src/perplexity_webui_scraper/resilience.py +179 -0
- perplexity_webui_scraper-0.3.3/PKG-INFO +0 -166
- perplexity_webui_scraper-0.3.3/README.md +0 -134
- perplexity_webui_scraper-0.3.3/src/perplexity_webui_scraper/config.py +0 -36
- perplexity_webui_scraper-0.3.3/src/perplexity_webui_scraper/exceptions.py +0 -50
- perplexity_webui_scraper-0.3.3/src/perplexity_webui_scraper/http.py +0 -194
- perplexity_webui_scraper-0.3.3/src/perplexity_webui_scraper/models.py +0 -58
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/py.typed +0 -0
- {perplexity_webui_scraper-0.3.3 → perplexity_webui_scraper-0.3.5}/src/perplexity_webui_scraper/types.py +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: perplexity-webui-scraper
|
|
3
|
+
Version: 0.3.5
|
|
4
|
+
Summary: Python scraper to extract AI responses from Perplexity's web interface.
|
|
5
|
+
Keywords: perplexity,ai,scraper,webui,api,client
|
|
6
|
+
Author: henrique-coder
|
|
7
|
+
Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Dist: curl-cffi>=0.14.0
|
|
23
|
+
Requires-Dist: loguru>=0.7.3
|
|
24
|
+
Requires-Dist: orjson>=3.11.5
|
|
25
|
+
Requires-Dist: pydantic>=2.12.5
|
|
26
|
+
Requires-Dist: tenacity>=9.1.2
|
|
27
|
+
Requires-Dist: fastmcp>=2.14.1 ; extra == 'mcp'
|
|
28
|
+
Requires-Python: >=3.10, <3.15
|
|
29
|
+
Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
|
|
30
|
+
Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
|
|
31
|
+
Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
|
|
32
|
+
Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
|
|
33
|
+
Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
|
|
34
|
+
Provides-Extra: mcp
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
<div align="center">
|
|
38
|
+
|
|
39
|
+
# Perplexity WebUI Scraper
|
|
40
|
+
|
|
41
|
+
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
42
|
+
|
|
43
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
44
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
45
|
+
[](./LICENSE)
|
|
46
|
+
|
|
47
|
+
</div>
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv pip install perplexity-webui-scraper # from PyPI (stable)
|
|
55
|
+
uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Requirements
|
|
59
|
+
|
|
60
|
+
- **Perplexity Pro/Max account**
|
|
61
|
+
- **Session token** (`__Secure-next-auth.session-token` cookie from your browser)
|
|
62
|
+
|
|
63
|
+
### Getting Your Session Token
|
|
64
|
+
|
|
65
|
+
You can obtain your session token in two ways:
|
|
66
|
+
|
|
67
|
+
#### Option 1: Automatic (CLI Tool)
|
|
68
|
+
|
|
69
|
+
The package includes a CLI tool to automatically generate and save your session token:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
get-perplexity-session-token
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
This interactive tool will:
|
|
76
|
+
|
|
77
|
+
1. Ask for your Perplexity email
|
|
78
|
+
2. Send a verification code to your email
|
|
79
|
+
3. Accept either a 6-digit code or magic link
|
|
80
|
+
4. Extract and display your session token
|
|
81
|
+
5. Optionally save it to your `.env` file
|
|
82
|
+
|
|
83
|
+
**Features:**
|
|
84
|
+
|
|
85
|
+
- Secure ephemeral session (cleared on exit)
|
|
86
|
+
- Automatic `.env` file management
|
|
87
|
+
- Support for both OTP codes and magic links
|
|
88
|
+
- Clean terminal interface with status updates
|
|
89
|
+
|
|
90
|
+
#### Option 2: Manual (Browser)
|
|
91
|
+
|
|
92
|
+
If you prefer to extract the token manually:
|
|
93
|
+
|
|
94
|
+
1. Log in at [perplexity.ai](https://www.perplexity.ai)
|
|
95
|
+
2. Open DevTools (`F12`) → Application/Storage → Cookies
|
|
96
|
+
3. Copy the value of `__Secure-next-auth.session-token`
|
|
97
|
+
4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN="your_token"`
|
|
98
|
+
|
|
99
|
+
## Quick Start
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from perplexity_webui_scraper import Perplexity
|
|
103
|
+
|
|
104
|
+
client = Perplexity(session_token="YOUR_TOKEN")
|
|
105
|
+
conversation = client.create_conversation()
|
|
106
|
+
|
|
107
|
+
conversation.ask("What is quantum computing?")
|
|
108
|
+
print(conversation.answer)
|
|
109
|
+
|
|
110
|
+
# Follow-up
|
|
111
|
+
conversation.ask("Explain it simpler")
|
|
112
|
+
print(conversation.answer)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Streaming
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
for chunk in conversation.ask("Explain AI", stream=True):
|
|
119
|
+
print(chunk.answer)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### With Options
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from perplexity_webui_scraper import (
|
|
126
|
+
ConversationConfig,
|
|
127
|
+
Coordinates,
|
|
128
|
+
Models,
|
|
129
|
+
SourceFocus,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
config = ConversationConfig(
|
|
133
|
+
model=Models.RESEARCH,
|
|
134
|
+
source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
|
|
135
|
+
language="en-US",
|
|
136
|
+
coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
conversation = client.create_conversation(config)
|
|
140
|
+
conversation.ask("Latest AI research", files=["paper.pdf"])
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## API
|
|
144
|
+
|
|
145
|
+
### `Perplexity(session_token, config?)`
|
|
146
|
+
|
|
147
|
+
| Parameter | Type | Description |
|
|
148
|
+
| --------------- | -------------- | ------------------ |
|
|
149
|
+
| `session_token` | `str` | Browser cookie |
|
|
150
|
+
| `config` | `ClientConfig` | Timeout, TLS, etc. |
|
|
151
|
+
|
|
152
|
+
### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
|
|
153
|
+
|
|
154
|
+
| Parameter | Type | Default | Description |
|
|
155
|
+
| --------------- | ----------------------- | ------------- | ------------------- |
|
|
156
|
+
| `query` | `str` | - | Question (required) |
|
|
157
|
+
| `model` | `Model` | `Models.BEST` | AI model |
|
|
158
|
+
| `files` | `list[str \| PathLike]` | `None` | File paths |
|
|
159
|
+
| `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
|
|
160
|
+
| `stream` | `bool` | `False` | Enable streaming |
|
|
161
|
+
|
|
162
|
+
### Models
|
|
163
|
+
|
|
164
|
+
| Model | Description |
|
|
165
|
+
| ---------------------------------- | ------------------------------------------------------------------------- |
|
|
166
|
+
| `Models.RESEARCH` | Research - Fast and thorough for routine research |
|
|
167
|
+
| `Models.LABS` | Labs - Multi-step tasks with advanced troubleshooting |
|
|
168
|
+
| `Models.BEST` | Best - Automatically selects the most responsive model based on the query |
|
|
169
|
+
| `Models.SONAR` | Sonar - Perplexity's fast model |
|
|
170
|
+
| `Models.GPT_52` | GPT-5.2 - OpenAI's latest model |
|
|
171
|
+
| `Models.GPT_52_THINKING` | GPT-5.2 Thinking - OpenAI's latest model with thinking |
|
|
172
|
+
| `Models.CLAUDE_45_OPUS` | Claude Opus 4.5 - Anthropic's Opus reasoning model |
|
|
173
|
+
| `Models.CLAUDE_45_OPUS_THINKING` | Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking |
|
|
174
|
+
| `Models.GEMINI_3_PRO` | Gemini 3 Pro - Google's newest reasoning model |
|
|
175
|
+
| `Models.GEMINI_3_FLASH` | Gemini 3 Flash - Google's fast reasoning model |
|
|
176
|
+
| `Models.GEMINI_3_FLASH_THINKING` | Gemini 3 Flash Thinking - Google's fast reasoning model with thinking |
|
|
177
|
+
| `Models.GROK_41` | Grok 4.1 - xAI's latest advanced model |
|
|
178
|
+
| `Models.GROK_41_THINKING` | Grok 4.1 Thinking - xAI's latest reasoning model |
|
|
179
|
+
| `Models.KIMI_K2_THINKING` | Kimi K2 Thinking - Moonshot AI's latest reasoning model |
|
|
180
|
+
| `Models.CLAUDE_45_SONNET` | Claude Sonnet 4.5 - Anthropic's newest advanced model |
|
|
181
|
+
| `Models.CLAUDE_45_SONNET_THINKING` | Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model |
|
|
182
|
+
|
|
183
|
+
### CitationMode
|
|
184
|
+
|
|
185
|
+
| Mode | Output |
|
|
186
|
+
| ---------- | --------------------- |
|
|
187
|
+
| `DEFAULT` | `text[1]` |
|
|
188
|
+
| `MARKDOWN` | `text[1](url)` |
|
|
189
|
+
| `CLEAN` | `text` (no citations) |
|
|
190
|
+
|
|
191
|
+
### ConversationConfig
|
|
192
|
+
|
|
193
|
+
| Parameter | Default | Description |
|
|
194
|
+
| ----------------- | ------------- | ------------------ |
|
|
195
|
+
| `model` | `Models.BEST` | Default model |
|
|
196
|
+
| `citation_mode` | `CLEAN` | Citation format |
|
|
197
|
+
| `save_to_library` | `False` | Save to library |
|
|
198
|
+
| `search_focus` | `WEB` | Search type |
|
|
199
|
+
| `source_focus` | `WEB` | Source types |
|
|
200
|
+
| `time_range` | `ALL` | Time filter |
|
|
201
|
+
| `language` | `"en-US"` | Response language |
|
|
202
|
+
| `timezone` | `None` | Timezone |
|
|
203
|
+
| `coordinates` | `None` | Location (lat/lng) |
|
|
204
|
+
|
|
205
|
+
## Exceptions
|
|
206
|
+
|
|
207
|
+
The library provides specific exception types for better error handling:
|
|
208
|
+
|
|
209
|
+
| Exception | Description |
|
|
210
|
+
| ---------------------------------- | ------------------------------------------------------------ |
|
|
211
|
+
| `PerplexityError` | Base exception for all library errors |
|
|
212
|
+
| `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
|
|
213
|
+
| `RateLimitError` | Rate limit exceeded (HTTP 429) |
|
|
214
|
+
| `FileUploadError` | File upload failed |
|
|
215
|
+
| `FileValidationError` | File validation failed (size, type, etc.) |
|
|
216
|
+
| `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
|
|
217
|
+
| `ResponseParsingError` | API response could not be parsed |
|
|
218
|
+
| `StreamingError` | Error during streaming response |
|
|
219
|
+
|
|
220
|
+
### Handling Research Mode Clarifying Questions
|
|
221
|
+
|
|
222
|
+
When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
from perplexity_webui_scraper import (
|
|
226
|
+
Perplexity,
|
|
227
|
+
ResearchClarifyingQuestionsError,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
conversation.ask("Research this topic", model=Models.RESEARCH)
|
|
232
|
+
except ResearchClarifyingQuestionsError as error:
|
|
233
|
+
print("The AI needs clarification:")
|
|
234
|
+
for question in error.questions:
|
|
235
|
+
print(f" - {question}")
|
|
236
|
+
# Consider rephrasing your query to be more specific
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## MCP Server (Model Context Protocol)
|
|
240
|
+
|
|
241
|
+
The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
|
|
242
|
+
|
|
243
|
+
### Installation
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
uv pip install perplexity-webui-scraper[mcp]
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Running the Server
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
# Set your session token
|
|
253
|
+
export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
|
|
254
|
+
set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
|
|
255
|
+
|
|
256
|
+
# Run with FastMCP
|
|
257
|
+
uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
|
|
258
|
+
|
|
259
|
+
# Or test with the dev inspector
|
|
260
|
+
uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Claude Desktop Configuration
|
|
264
|
+
|
|
265
|
+
Add to `~/.config/claude/claude_desktop_config.json`:
|
|
266
|
+
|
|
267
|
+
```json
|
|
268
|
+
{
|
|
269
|
+
"mcpServers": {
|
|
270
|
+
"perplexity": {
|
|
271
|
+
"command": "uv",
|
|
272
|
+
"args": [
|
|
273
|
+
"run",
|
|
274
|
+
"fastmcp",
|
|
275
|
+
"run",
|
|
276
|
+
"path/to/perplexity_webui_scraper/mcp/server.py"
|
|
277
|
+
],
|
|
278
|
+
"env": {
|
|
279
|
+
"PERPLEXITY_SESSION_TOKEN": "your_token_here"
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Available Tool
|
|
287
|
+
|
|
288
|
+
| Tool | Description |
|
|
289
|
+
| ---------------- | --------------------------------------------------------------------------- |
|
|
290
|
+
| `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
|
|
291
|
+
|
|
292
|
+
**Parameters:**
|
|
293
|
+
|
|
294
|
+
| Parameter | Type | Default | Description |
|
|
295
|
+
| -------------- | ----- | -------- | ------------------------------------------------------------- |
|
|
296
|
+
| `query` | `str` | - | Question to ask (required) |
|
|
297
|
+
| `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
|
|
298
|
+
| `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
|
|
299
|
+
|
|
300
|
+
## Disclaimer
|
|
301
|
+
|
|
302
|
+
This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
|
|
303
|
+
|
|
304
|
+
By using this library, you agree to Perplexity AI's Terms of Service.
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# Perplexity WebUI Scraper
|
|
4
|
+
|
|
5
|
+
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
6
|
+
|
|
7
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
8
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
9
|
+
[](./LICENSE)
|
|
10
|
+
|
|
11
|
+
</div>
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
uv pip install perplexity-webui-scraper # from PyPI (stable)
|
|
19
|
+
uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Requirements
|
|
23
|
+
|
|
24
|
+
- **Perplexity Pro/Max account**
|
|
25
|
+
- **Session token** (`__Secure-next-auth.session-token` cookie from your browser)
|
|
26
|
+
|
|
27
|
+
### Getting Your Session Token
|
|
28
|
+
|
|
29
|
+
You can obtain your session token in two ways:
|
|
30
|
+
|
|
31
|
+
#### Option 1: Automatic (CLI Tool)
|
|
32
|
+
|
|
33
|
+
The package includes a CLI tool to automatically generate and save your session token:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
get-perplexity-session-token
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This interactive tool will:
|
|
40
|
+
|
|
41
|
+
1. Ask for your Perplexity email
|
|
42
|
+
2. Send a verification code to your email
|
|
43
|
+
3. Accept either a 6-digit code or magic link
|
|
44
|
+
4. Extract and display your session token
|
|
45
|
+
5. Optionally save it to your `.env` file
|
|
46
|
+
|
|
47
|
+
**Features:**
|
|
48
|
+
|
|
49
|
+
- Secure ephemeral session (cleared on exit)
|
|
50
|
+
- Automatic `.env` file management
|
|
51
|
+
- Support for both OTP codes and magic links
|
|
52
|
+
- Clean terminal interface with status updates
|
|
53
|
+
|
|
54
|
+
#### Option 2: Manual (Browser)
|
|
55
|
+
|
|
56
|
+
If you prefer to extract the token manually:
|
|
57
|
+
|
|
58
|
+
1. Log in at [perplexity.ai](https://www.perplexity.ai)
|
|
59
|
+
2. Open DevTools (`F12`) → Application/Storage → Cookies
|
|
60
|
+
3. Copy the value of `__Secure-next-auth.session-token`
|
|
61
|
+
4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN="your_token"`
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from perplexity_webui_scraper import Perplexity
|
|
67
|
+
|
|
68
|
+
client = Perplexity(session_token="YOUR_TOKEN")
|
|
69
|
+
conversation = client.create_conversation()
|
|
70
|
+
|
|
71
|
+
conversation.ask("What is quantum computing?")
|
|
72
|
+
print(conversation.answer)
|
|
73
|
+
|
|
74
|
+
# Follow-up
|
|
75
|
+
conversation.ask("Explain it simpler")
|
|
76
|
+
print(conversation.answer)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Streaming
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
for chunk in conversation.ask("Explain AI", stream=True):
|
|
83
|
+
print(chunk.answer)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### With Options
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from perplexity_webui_scraper import (
|
|
90
|
+
ConversationConfig,
|
|
91
|
+
Coordinates,
|
|
92
|
+
Models,
|
|
93
|
+
SourceFocus,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
config = ConversationConfig(
|
|
97
|
+
model=Models.RESEARCH,
|
|
98
|
+
source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
|
|
99
|
+
language="en-US",
|
|
100
|
+
coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
conversation = client.create_conversation(config)
|
|
104
|
+
conversation.ask("Latest AI research", files=["paper.pdf"])
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## API
|
|
108
|
+
|
|
109
|
+
### `Perplexity(session_token, config?)`
|
|
110
|
+
|
|
111
|
+
| Parameter | Type | Description |
|
|
112
|
+
| --------------- | -------------- | ------------------ |
|
|
113
|
+
| `session_token` | `str` | Browser cookie |
|
|
114
|
+
| `config` | `ClientConfig` | Timeout, TLS, etc. |
|
|
115
|
+
|
|
116
|
+
### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
|
|
117
|
+
|
|
118
|
+
| Parameter | Type | Default | Description |
|
|
119
|
+
| --------------- | ----------------------- | ------------- | ------------------- |
|
|
120
|
+
| `query` | `str` | - | Question (required) |
|
|
121
|
+
| `model` | `Model` | `Models.BEST` | AI model |
|
|
122
|
+
| `files` | `list[str \| PathLike]` | `None` | File paths |
|
|
123
|
+
| `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
|
|
124
|
+
| `stream` | `bool` | `False` | Enable streaming |
|
|
125
|
+
|
|
126
|
+
### Models
|
|
127
|
+
|
|
128
|
+
| Model | Description |
|
|
129
|
+
| ---------------------------------- | ------------------------------------------------------------------------- |
|
|
130
|
+
| `Models.RESEARCH` | Research - Fast and thorough for routine research |
|
|
131
|
+
| `Models.LABS` | Labs - Multi-step tasks with advanced troubleshooting |
|
|
132
|
+
| `Models.BEST` | Best - Automatically selects the most responsive model based on the query |
|
|
133
|
+
| `Models.SONAR` | Sonar - Perplexity's fast model |
|
|
134
|
+
| `Models.GPT_52` | GPT-5.2 - OpenAI's latest model |
|
|
135
|
+
| `Models.GPT_52_THINKING` | GPT-5.2 Thinking - OpenAI's latest model with thinking |
|
|
136
|
+
| `Models.CLAUDE_45_OPUS` | Claude Opus 4.5 - Anthropic's Opus reasoning model |
|
|
137
|
+
| `Models.CLAUDE_45_OPUS_THINKING` | Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking |
|
|
138
|
+
| `Models.GEMINI_3_PRO` | Gemini 3 Pro - Google's newest reasoning model |
|
|
139
|
+
| `Models.GEMINI_3_FLASH` | Gemini 3 Flash - Google's fast reasoning model |
|
|
140
|
+
| `Models.GEMINI_3_FLASH_THINKING` | Gemini 3 Flash Thinking - Google's fast reasoning model with thinking |
|
|
141
|
+
| `Models.GROK_41` | Grok 4.1 - xAI's latest advanced model |
|
|
142
|
+
| `Models.GROK_41_THINKING` | Grok 4.1 Thinking - xAI's latest reasoning model |
|
|
143
|
+
| `Models.KIMI_K2_THINKING` | Kimi K2 Thinking - Moonshot AI's latest reasoning model |
|
|
144
|
+
| `Models.CLAUDE_45_SONNET` | Claude Sonnet 4.5 - Anthropic's newest advanced model |
|
|
145
|
+
| `Models.CLAUDE_45_SONNET_THINKING` | Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model |
|
|
146
|
+
|
|
147
|
+
### CitationMode
|
|
148
|
+
|
|
149
|
+
| Mode | Output |
|
|
150
|
+
| ---------- | --------------------- |
|
|
151
|
+
| `DEFAULT` | `text[1]` |
|
|
152
|
+
| `MARKDOWN` | `text[1](url)` |
|
|
153
|
+
| `CLEAN` | `text` (no citations) |
|
|
154
|
+
|
|
155
|
+
### ConversationConfig
|
|
156
|
+
|
|
157
|
+
| Parameter | Default | Description |
|
|
158
|
+
| ----------------- | ------------- | ------------------ |
|
|
159
|
+
| `model` | `Models.BEST` | Default model |
|
|
160
|
+
| `citation_mode` | `CLEAN` | Citation format |
|
|
161
|
+
| `save_to_library` | `False` | Save to library |
|
|
162
|
+
| `search_focus` | `WEB` | Search type |
|
|
163
|
+
| `source_focus` | `WEB` | Source types |
|
|
164
|
+
| `time_range` | `ALL` | Time filter |
|
|
165
|
+
| `language` | `"en-US"` | Response language |
|
|
166
|
+
| `timezone` | `None` | Timezone |
|
|
167
|
+
| `coordinates` | `None` | Location (lat/lng) |
|
|
168
|
+
|
|
169
|
+
## Exceptions
|
|
170
|
+
|
|
171
|
+
The library provides specific exception types for better error handling:
|
|
172
|
+
|
|
173
|
+
| Exception | Description |
|
|
174
|
+
| ---------------------------------- | ------------------------------------------------------------ |
|
|
175
|
+
| `PerplexityError` | Base exception for all library errors |
|
|
176
|
+
| `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
|
|
177
|
+
| `RateLimitError` | Rate limit exceeded (HTTP 429) |
|
|
178
|
+
| `FileUploadError` | File upload failed |
|
|
179
|
+
| `FileValidationError` | File validation failed (size, type, etc.) |
|
|
180
|
+
| `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
|
|
181
|
+
| `ResponseParsingError` | API response could not be parsed |
|
|
182
|
+
| `StreamingError` | Error during streaming response |
|
|
183
|
+
|
|
184
|
+
### Handling Research Mode Clarifying Questions
|
|
185
|
+
|
|
186
|
+
When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from perplexity_webui_scraper import (
|
|
190
|
+
Perplexity,
|
|
191
|
+
ResearchClarifyingQuestionsError,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
conversation.ask("Research this topic", model=Models.RESEARCH)
|
|
196
|
+
except ResearchClarifyingQuestionsError as error:
|
|
197
|
+
print("The AI needs clarification:")
|
|
198
|
+
for question in error.questions:
|
|
199
|
+
print(f" - {question}")
|
|
200
|
+
# Consider rephrasing your query to be more specific
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## MCP Server (Model Context Protocol)
|
|
204
|
+
|
|
205
|
+
The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
|
|
206
|
+
|
|
207
|
+
### Installation
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
uv pip install perplexity-webui-scraper[mcp]
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Running the Server
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# Set your session token
|
|
217
|
+
export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
|
|
218
|
+
set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
|
|
219
|
+
|
|
220
|
+
# Run with FastMCP
|
|
221
|
+
uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
|
|
222
|
+
|
|
223
|
+
# Or test with the dev inspector
|
|
224
|
+
uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Claude Desktop Configuration
|
|
228
|
+
|
|
229
|
+
Add to `~/.config/claude/claude_desktop_config.json`:
|
|
230
|
+
|
|
231
|
+
```json
|
|
232
|
+
{
|
|
233
|
+
"mcpServers": {
|
|
234
|
+
"perplexity": {
|
|
235
|
+
"command": "uv",
|
|
236
|
+
"args": [
|
|
237
|
+
"run",
|
|
238
|
+
"fastmcp",
|
|
239
|
+
"run",
|
|
240
|
+
"path/to/perplexity_webui_scraper/mcp/server.py"
|
|
241
|
+
],
|
|
242
|
+
"env": {
|
|
243
|
+
"PERPLEXITY_SESSION_TOKEN": "your_token_here"
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Available Tool
|
|
251
|
+
|
|
252
|
+
| Tool | Description |
|
|
253
|
+
| ---------------- | --------------------------------------------------------------------------- |
|
|
254
|
+
| `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
|
|
255
|
+
|
|
256
|
+
**Parameters:**
|
|
257
|
+
|
|
258
|
+
| Parameter | Type | Default | Description |
|
|
259
|
+
| -------------- | ----- | -------- | ------------------------------------------------------------- |
|
|
260
|
+
| `query` | `str` | - | Question to ask (required) |
|
|
261
|
+
| `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
|
|
262
|
+
| `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
|
|
263
|
+
|
|
264
|
+
## Disclaimer
|
|
265
|
+
|
|
266
|
+
This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
|
|
267
|
+
|
|
268
|
+
By using this library, you agree to Perplexity AI's Terms of Service.
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "perplexity-webui-scraper"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.5"
|
|
4
4
|
description = "Python scraper to extract AI responses from Perplexity's web interface."
|
|
5
5
|
authors = [{ name = "henrique-coder", email = "henriquemoreira10fk@gmail.com" }]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
|
-
requires-python = ">=3.10"
|
|
8
|
+
requires-python = ">=3.10,<3.15"
|
|
9
9
|
keywords = ["perplexity", "ai", "scraper", "webui", "api", "client"]
|
|
10
10
|
classifiers = [
|
|
11
11
|
"Development Status :: 4 - Beta",
|
|
@@ -23,23 +23,33 @@ classifiers = [
|
|
|
23
23
|
"Typing :: Typed",
|
|
24
24
|
]
|
|
25
25
|
dependencies = [
|
|
26
|
-
"curl-cffi>=0.
|
|
26
|
+
"curl-cffi>=0.14.0",
|
|
27
|
+
"loguru>=0.7.3",
|
|
27
28
|
"orjson>=3.11.5",
|
|
28
29
|
"pydantic>=2.12.5",
|
|
30
|
+
"tenacity>=9.1.2",
|
|
29
31
|
]
|
|
30
32
|
|
|
31
33
|
[dependency-groups]
|
|
32
34
|
dev = [
|
|
35
|
+
"beautifulsoup4>=4.14.3",
|
|
36
|
+
"jsbeautifier>=1.15.4",
|
|
37
|
+
"lxml>=6.0.2",
|
|
33
38
|
"python-dotenv>=1.2.1",
|
|
34
39
|
"rich>=14.2.0",
|
|
35
40
|
]
|
|
36
41
|
lint = [
|
|
37
|
-
"ruff>=0.14.
|
|
42
|
+
"ruff>=0.14.10",
|
|
38
43
|
]
|
|
39
44
|
tests = [
|
|
40
45
|
"pytest>=9.0.2",
|
|
41
46
|
]
|
|
42
47
|
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
mcp = [
|
|
50
|
+
"fastmcp>=2.14.1",
|
|
51
|
+
]
|
|
52
|
+
|
|
43
53
|
[project.urls]
|
|
44
54
|
Homepage = "https://github.com/henrique-coder/perplexity-webui-scraper"
|
|
45
55
|
Documentation = "https://github.com/henrique-coder/perplexity-webui-scraper#readme"
|
|
@@ -98,6 +108,10 @@ line-ending = "lf" # Unix-style line endings
|
|
|
98
108
|
docstring-code-format = true # Format code examples inside docstrings
|
|
99
109
|
skip-magic-trailing-comma = false # Preserve trailing commas as formatting hints
|
|
100
110
|
|
|
111
|
+
[project.scripts]
|
|
112
|
+
get-perplexity-session-token = "perplexity_webui_scraper.cli.get_perplexity_session_token:get_token"
|
|
113
|
+
perplexity-webui-scraper-mcp = "perplexity_webui_scraper.mcp:run_server"
|
|
114
|
+
|
|
101
115
|
[build-system]
|
|
102
116
|
requires = ["uv_build"]
|
|
103
117
|
build-backend = "uv_build"
|
|
@@ -1,36 +1,25 @@
|
|
|
1
1
|
"""Extract AI responses from Perplexity's web interface."""
|
|
2
2
|
|
|
3
|
-
from importlib
|
|
3
|
+
from importlib import metadata
|
|
4
4
|
|
|
5
5
|
from .config import ClientConfig, ConversationConfig
|
|
6
6
|
from .core import Conversation, Perplexity
|
|
7
|
-
from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
|
|
8
|
-
from .exceptions import (
|
|
9
|
-
AuthenticationError,
|
|
10
|
-
FileUploadError,
|
|
11
|
-
FileValidationError,
|
|
12
|
-
PerplexityError,
|
|
13
|
-
RateLimitError,
|
|
14
|
-
)
|
|
7
|
+
from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
|
|
15
8
|
from .models import Model, Models
|
|
16
9
|
from .types import Coordinates, Response, SearchResultItem
|
|
17
10
|
|
|
18
11
|
|
|
19
|
-
__version__: str = version("perplexity-webui-scraper")
|
|
12
|
+
__version__: str = metadata.version("perplexity-webui-scraper")
|
|
20
13
|
__all__: list[str] = [
|
|
21
|
-
"AuthenticationError",
|
|
22
14
|
"CitationMode",
|
|
23
15
|
"ClientConfig",
|
|
24
16
|
"Conversation",
|
|
25
17
|
"ConversationConfig",
|
|
26
18
|
"Coordinates",
|
|
27
|
-
"
|
|
28
|
-
"FileValidationError",
|
|
19
|
+
"LogLevel",
|
|
29
20
|
"Model",
|
|
30
21
|
"Models",
|
|
31
22
|
"Perplexity",
|
|
32
|
-
"PerplexityError",
|
|
33
|
-
"RateLimitError",
|
|
34
23
|
"Response",
|
|
35
24
|
"SearchFocus",
|
|
36
25
|
"SearchResultItem",
|