perplexity-webui-scraper 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ """
2
+ Resilience utilities for HTTP requests.
3
+
4
+ Provides retry mechanisms, rate limiting, and Cloudflare bypass utilities
5
+ using the tenacity library for robust retry handling.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from dataclasses import dataclass, field
12
+ import random
13
+ from threading import Lock
14
+ import time
15
+ from typing import TYPE_CHECKING, Any, TypeVar
16
+
17
+ from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
18
+
19
+
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Callable
22
+
23
+ T = TypeVar("T")
24
+
25
+
26
+ # Browser profiles supported by curl_cffi for fingerprint rotation
27
+ BROWSER_PROFILES: tuple[str, ...] = (
28
+ "chrome",
29
+ "chrome110",
30
+ "chrome116",
31
+ "chrome119",
32
+ "chrome120",
33
+ "chrome123",
34
+ "chrome124",
35
+ "chrome131",
36
+ "edge99",
37
+ "edge101",
38
+ "safari15_3",
39
+ "safari15_5",
40
+ "safari17_0",
41
+ "safari17_2_ios",
42
+ )
43
+
44
+ # Cloudflare challenge detection markers
45
+ CLOUDFLARE_MARKERS: tuple[str, ...] = (
46
+ "cf-ray",
47
+ "cf-mitigated",
48
+ "__cf_chl_",
49
+ "Checking your browser",
50
+ "Just a moment...",
51
+ "cloudflare",
52
+ "Enable JavaScript and cookies to continue",
53
+ "challenge-platform",
54
+ )
55
+
56
+
57
+ @dataclass(slots=True)
58
+ class RetryConfig:
59
+ """Configuration for retry behavior.
60
+
61
+ Attributes:
62
+ max_retries: Maximum number of retry attempts.
63
+ base_delay: Initial delay in seconds before first retry.
64
+ max_delay: Maximum delay between retries.
65
+ jitter: Random jitter factor to add to delays (0-1).
66
+ """
67
+
68
+ max_retries: int = 3
69
+ base_delay: float = 1.0
70
+ max_delay: float = 60.0
71
+ jitter: float = 0.5
72
+
73
+
74
+ @dataclass
75
+ class RateLimiter:
76
+ """Token bucket rate limiter for throttling requests.
77
+
78
+ Attributes:
79
+ requests_per_second: Maximum requests allowed per second.
80
+ """
81
+
82
+ requests_per_second: float = 0.5
83
+ _last_request: float = field(default=0.0, init=False)
84
+ _lock: Lock = field(default_factory=Lock, init=False)
85
+
86
+ def acquire(self) -> None:
87
+ """Wait until a request can be made within rate limits."""
88
+
89
+ with self._lock:
90
+ now = time.monotonic()
91
+ min_interval = 1.0 / self.requests_per_second
92
+
93
+ if self._last_request > 0:
94
+ elapsed = now - self._last_request
95
+ wait_time = min_interval - elapsed
96
+
97
+ if wait_time > 0:
98
+ time.sleep(wait_time)
99
+
100
+ self._last_request = time.monotonic()
101
+
102
+
103
+ def get_random_browser_profile() -> str:
104
+ """Get a random browser profile for fingerprint rotation.
105
+
106
+ Returns:
107
+ A browser profile identifier compatible with curl_cffi.
108
+ """
109
+
110
+ return random.choice(BROWSER_PROFILES)
111
+
112
+
113
+ def is_cloudflare_challenge(response_text: str, headers: dict[str, Any] | None = None) -> bool:
114
+ """Detect if a response is a Cloudflare challenge page.
115
+
116
+ Args:
117
+ response_text: The response body text.
118
+ headers: Optional response headers.
119
+
120
+ Returns:
121
+ True if Cloudflare challenge markers are detected.
122
+ """
123
+
124
+ text_lower = response_text.lower()
125
+
126
+ for marker in CLOUDFLARE_MARKERS:
127
+ if marker.lower() in text_lower:
128
+ return True
129
+
130
+ if headers:
131
+ for key in headers:
132
+ key_lower = key.lower()
133
+
134
+ if "cf-" in key_lower or "cloudflare" in key_lower:
135
+ return True
136
+
137
+ return False
138
+
139
+
140
+ def is_cloudflare_status(status_code: int) -> bool:
141
+ """Check if status code indicates a potential Cloudflare block.
142
+
143
+ Args:
144
+ status_code: HTTP status code.
145
+
146
+ Returns:
147
+ True if status code is commonly used by Cloudflare challenges.
148
+ """
149
+
150
+ return status_code in (403, 503, 520, 521, 522, 523, 524, 525, 526)
151
+
152
+
153
+ def create_retry_decorator(
154
+ config: RetryConfig,
155
+ retryable_exceptions: tuple[type[Exception], ...],
156
+ on_retry: Callable[[RetryCallState], None] | None = None,
157
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
158
+ """Create a tenacity retry decorator with the given configuration.
159
+
160
+ Args:
161
+ config: Retry configuration.
162
+ retryable_exceptions: Tuple of exception types to retry on.
163
+ on_retry: Optional callback to execute on each retry.
164
+
165
+ Returns:
166
+ A retry decorator configured with the given settings.
167
+ """
168
+
169
+ return retry(
170
+ stop=stop_after_attempt(config.max_retries + 1),
171
+ wait=wait_exponential_jitter(
172
+ initial=config.base_delay,
173
+ max=config.max_delay,
174
+ jitter=config.max_delay * config.jitter,
175
+ ),
176
+ retry=retry_if_exception_type(retryable_exceptions),
177
+ before_sleep=on_retry,
178
+ reraise=True,
179
+ )
@@ -0,0 +1,304 @@
1
+ Metadata-Version: 2.4
2
+ Name: perplexity-webui-scraper
3
+ Version: 0.3.5
4
+ Summary: Python scraper to extract AI responses from Perplexity's web interface.
5
+ Keywords: perplexity,ai,scraper,webui,api,client
6
+ Author: henrique-coder
7
+ Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
8
+ License-Expression: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Internet :: WWW/HTTP
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Typing :: Typed
22
+ Requires-Dist: curl-cffi>=0.14.0
23
+ Requires-Dist: loguru>=0.7.3
24
+ Requires-Dist: orjson>=3.11.5
25
+ Requires-Dist: pydantic>=2.12.5
26
+ Requires-Dist: tenacity>=9.1.2
27
+ Requires-Dist: fastmcp>=2.14.1 ; extra == 'mcp'
28
+ Requires-Python: >=3.10, <3.15
29
+ Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
30
+ Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
31
+ Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
32
+ Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
33
+ Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
34
+ Provides-Extra: mcp
35
+ Description-Content-Type: text/markdown
36
+
37
+ <div align="center">
38
+
39
+ # Perplexity WebUI Scraper
40
+
41
+ Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
42
+
43
+ [![PyPI](https://img.shields.io/pypi/v/perplexity-webui-scraper?color=blue)](https://pypi.org/project/perplexity-webui-scraper)
44
+ [![Python](https://img.shields.io/pypi/pyversions/perplexity-webui-scraper)](https://pypi.org/project/perplexity-webui-scraper)
45
+ [![License](https://img.shields.io/github/license/henrique-coder/perplexity-webui-scraper?color=green)](./LICENSE)
46
+
47
+ </div>
48
+
49
+ ---
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ uv pip install perplexity-webui-scraper # from PyPI (stable)
55
+ uv pip install git+https://github.com/henrique-coder/perplexity-webui-scraper.git@dev # from GitHub (development)
56
+ ```
57
+
58
+ ## Requirements
59
+
60
+ - **Perplexity Pro/Max account**
61
+ - **Session token** (`__Secure-next-auth.session-token` cookie from your browser)
62
+
63
+ ### Getting Your Session Token
64
+
65
+ You can obtain your session token in two ways:
66
+
67
+ #### Option 1: Automatic (CLI Tool)
68
+
69
+ The package includes a CLI tool to automatically generate and save your session token:
70
+
71
+ ```bash
72
+ get-perplexity-session-token
73
+ ```
74
+
75
+ This interactive tool will:
76
+
77
+ 1. Ask for your Perplexity email
78
+ 2. Send a verification code to your email
79
+ 3. Accept either a 6-digit code or magic link
80
+ 4. Extract and display your session token
81
+ 5. Optionally save it to your `.env` file
82
+
83
+ **Features:**
84
+
85
+ - Secure ephemeral session (cleared on exit)
86
+ - Automatic `.env` file management
87
+ - Support for both OTP codes and magic links
88
+ - Clean terminal interface with status updates
89
+
90
+ #### Option 2: Manual (Browser)
91
+
92
+ If you prefer to extract the token manually:
93
+
94
+ 1. Log in at [perplexity.ai](https://www.perplexity.ai)
95
+ 2. Open DevTools (`F12`) → Application/Storage → Cookies
96
+ 3. Copy the value of `__Secure-next-auth.session-token`
97
+ 4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN="your_token"`
98
+
99
+ ## Quick Start
100
+
101
+ ```python
102
+ from perplexity_webui_scraper import Perplexity
103
+
104
+ client = Perplexity(session_token="YOUR_TOKEN")
105
+ conversation = client.create_conversation()
106
+
107
+ conversation.ask("What is quantum computing?")
108
+ print(conversation.answer)
109
+
110
+ # Follow-up
111
+ conversation.ask("Explain it simpler")
112
+ print(conversation.answer)
113
+ ```
114
+
115
+ ### Streaming
116
+
117
+ ```python
118
+ for chunk in conversation.ask("Explain AI", stream=True):
119
+ print(chunk.answer)
120
+ ```
121
+
122
+ ### With Options
123
+
124
+ ```python
125
+ from perplexity_webui_scraper import (
126
+ ConversationConfig,
127
+ Coordinates,
128
+ Models,
129
+ SourceFocus,
130
+ )
131
+
132
+ config = ConversationConfig(
133
+ model=Models.RESEARCH,
134
+ source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
135
+ language="en-US",
136
+ coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
137
+ )
138
+
139
+ conversation = client.create_conversation(config)
140
+ conversation.ask("Latest AI research", files=["paper.pdf"])
141
+ ```
142
+
143
+ ## API
144
+
145
+ ### `Perplexity(session_token, config?)`
146
+
147
+ | Parameter | Type | Description |
148
+ | --------------- | -------------- | ------------------ |
149
+ | `session_token` | `str` | Browser cookie |
150
+ | `config` | `ClientConfig` | Timeout, TLS, etc. |
151
+
152
+ ### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
153
+
154
+ | Parameter | Type | Default | Description |
155
+ | --------------- | ----------------------- | ------------- | ------------------- |
156
+ | `query` | `str` | - | Question (required) |
157
+ | `model` | `Model` | `Models.BEST` | AI model |
158
+ | `files` | `list[str \| PathLike]` | `None` | File paths |
159
+ | `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
160
+ | `stream` | `bool` | `False` | Enable streaming |
161
+
162
+ ### Models
163
+
164
+ | Model | Description |
165
+ | ---------------------------------- | ------------------------------------------------------------------------- |
166
+ | `Models.RESEARCH` | Research - Fast and thorough for routine research |
167
+ | `Models.LABS` | Labs - Multi-step tasks with advanced troubleshooting |
168
+ | `Models.BEST` | Best - Automatically selects the most responsive model based on the query |
169
+ | `Models.SONAR` | Sonar - Perplexity's fast model |
170
+ | `Models.GPT_52` | GPT-5.2 - OpenAI's latest model |
171
+ | `Models.GPT_52_THINKING` | GPT-5.2 Thinking - OpenAI's latest model with thinking |
172
+ | `Models.CLAUDE_45_OPUS` | Claude Opus 4.5 - Anthropic's Opus reasoning model |
173
+ | `Models.CLAUDE_45_OPUS_THINKING` | Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking |
174
+ | `Models.GEMINI_3_PRO` | Gemini 3 Pro - Google's newest reasoning model |
175
+ | `Models.GEMINI_3_FLASH` | Gemini 3 Flash - Google's fast reasoning model |
176
+ | `Models.GEMINI_3_FLASH_THINKING` | Gemini 3 Flash Thinking - Google's fast reasoning model with thinking |
177
+ | `Models.GROK_41` | Grok 4.1 - xAI's latest advanced model |
178
+ | `Models.GROK_41_THINKING` | Grok 4.1 Thinking - xAI's latest reasoning model |
179
+ | `Models.KIMI_K2_THINKING` | Kimi K2 Thinking - Moonshot AI's latest reasoning model |
180
+ | `Models.CLAUDE_45_SONNET` | Claude Sonnet 4.5 - Anthropic's newest advanced model |
181
+ | `Models.CLAUDE_45_SONNET_THINKING` | Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model |
182
+
183
+ ### CitationMode
184
+
185
+ | Mode | Output |
186
+ | ---------- | --------------------- |
187
+ | `DEFAULT` | `text[1]` |
188
+ | `MARKDOWN` | `text[1](url)` |
189
+ | `CLEAN` | `text` (no citations) |
190
+
191
+ ### ConversationConfig
192
+
193
+ | Parameter | Default | Description |
194
+ | ----------------- | ------------- | ------------------ |
195
+ | `model` | `Models.BEST` | Default model |
196
+ | `citation_mode` | `CLEAN` | Citation format |
197
+ | `save_to_library` | `False` | Save to library |
198
+ | `search_focus` | `WEB` | Search type |
199
+ | `source_focus` | `WEB` | Source types |
200
+ | `time_range` | `ALL` | Time filter |
201
+ | `language` | `"en-US"` | Response language |
202
+ | `timezone` | `None` | Timezone |
203
+ | `coordinates` | `None` | Location (lat/lng) |
204
+
205
+ ## Exceptions
206
+
207
+ The library provides specific exception types for better error handling:
208
+
209
+ | Exception | Description |
210
+ | ---------------------------------- | ------------------------------------------------------------ |
211
+ | `PerplexityError` | Base exception for all library errors |
212
+ | `AuthenticationError` | Session token is invalid or expired (HTTP 403) |
213
+ | `RateLimitError` | Rate limit exceeded (HTTP 429) |
214
+ | `FileUploadError` | File upload failed |
215
+ | `FileValidationError` | File validation failed (size, type, etc.) |
216
+ | `ResearchClarifyingQuestionsError` | Research mode is asking clarifying questions (not supported) |
217
+ | `ResponseParsingError` | API response could not be parsed |
218
+ | `StreamingError` | Error during streaming response |
219
+
220
+ ### Handling Research Mode Clarifying Questions
221
+
222
+ When using Research mode (`Models.RESEARCH`), the API may ask clarifying questions before providing an answer. Since programmatic interaction is not supported, the library raises a `ResearchClarifyingQuestionsError` with the questions:
223
+
224
+ ```python
225
+ from perplexity_webui_scraper import (
226
+ Perplexity,
227
+ ResearchClarifyingQuestionsError,
228
+ )
229
+
230
+ try:
231
+ conversation.ask("Research this topic", model=Models.RESEARCH)
232
+ except ResearchClarifyingQuestionsError as error:
233
+ print("The AI needs clarification:")
234
+ for question in error.questions:
235
+ print(f" - {question}")
236
+ # Consider rephrasing your query to be more specific
237
+ ```
238
+
239
+ ## MCP Server (Model Context Protocol)
240
+
241
+ The library includes an MCP server that allows AI assistants (like Claude) to search using Perplexity AI directly.
242
+
243
+ ### Installation
244
+
245
+ ```bash
246
+ uv pip install perplexity-webui-scraper[mcp]
247
+ ```
248
+
249
+ ### Running the Server
250
+
251
+ ```bash
252
+ # Set your session token
253
+ export PERPLEXITY_SESSION_TOKEN="your_token_here" # For Linux/Mac
254
+ set PERPLEXITY_SESSION_TOKEN="your_token_here" # For Windows
255
+
256
+ # Run with FastMCP
257
+ uv run fastmcp run src/perplexity_webui_scraper/mcp/server.py
258
+
259
+ # Or test with the dev inspector
260
+ uv run fastmcp dev src/perplexity_webui_scraper/mcp/server.py
261
+ ```
262
+
263
+ ### Claude Desktop Configuration
264
+
265
+ Add to `~/.config/claude/claude_desktop_config.json`:
266
+
267
+ ```json
268
+ {
269
+ "mcpServers": {
270
+ "perplexity": {
271
+ "command": "uv",
272
+ "args": [
273
+ "run",
274
+ "fastmcp",
275
+ "run",
276
+ "path/to/perplexity_webui_scraper/mcp/server.py"
277
+ ],
278
+ "env": {
279
+ "PERPLEXITY_SESSION_TOKEN": "your_token_here"
280
+ }
281
+ }
282
+ }
283
+ }
284
+ ```
285
+
286
+ ### Available Tool
287
+
288
+ | Tool | Description |
289
+ | ---------------- | --------------------------------------------------------------------------- |
290
+ | `perplexity_ask` | Ask questions and get AI-generated answers with real-time data from the web |
291
+
292
+ **Parameters:**
293
+
294
+ | Parameter | Type | Default | Description |
295
+ | -------------- | ----- | -------- | ------------------------------------------------------------- |
296
+ | `query` | `str` | - | Question to ask (required) |
297
+ | `model` | `str` | `"best"` | AI model (`best`, `research`, `gpt52`, `claude_sonnet`, etc.) |
298
+ | `source_focus` | `str` | `"web"` | Source type (`web`, `academic`, `social`, `finance`, `all`) |
299
+
300
+ ## Disclaimer
301
+
302
+ This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk.
303
+
304
+ By using this library, you agree to Perplexity AI's Terms of Service.
@@ -0,0 +1,21 @@
1
+ perplexity_webui_scraper/__init__.py,sha256=7hLTMTHMC-aCvbDRMXCUBfJ3vztsW9WtEZ08LdIYs9I,713
2
+ perplexity_webui_scraper/cli/get_perplexity_session_token.py,sha256=67Ck4S2MJ0701LnRHq73qY5oRLCsFOyu_8SMgsbTNFc,6937
3
+ perplexity_webui_scraper/config.py,sha256=05lkW9PlMmbj-oh-4xc3-iFXXGvKfKCy8yK-O1GWJdw,2055
4
+ perplexity_webui_scraper/constants.py,sha256=Kq-4i6yyTZ5VhUvbiZmbUmHrjMQm-p7H82Emm7b10-c,1867
5
+ perplexity_webui_scraper/core.py,sha256=dpcx8tTxESWz_iz7nSKN1XWRvBOV_cD9gK-E2gJYJ8w,20894
6
+ perplexity_webui_scraper/enums.py,sha256=I-jMiQMzBW72PGJFBNZIc874wijBMynDF-pYQmS1OZc,2751
7
+ perplexity_webui_scraper/exceptions.py,sha256=Q2dx7j1OrM9CB7ty8fRhheAt4-QhN7szUDXzoT6rx1E,3985
8
+ perplexity_webui_scraper/http.py,sha256=1qxZ3cvkL-TXST2H1V7AUoA5_poZ9sDBm8OV3FEtWZU,19708
9
+ perplexity_webui_scraper/limits.py,sha256=GwcwC8CnSNhlcLWGLpuDYA37gn8OXSfsXLIOc-QbxNs,465
10
+ perplexity_webui_scraper/logging.py,sha256=5IyUiKjN88WCItKl6Yrbfn6rF6jz68rWjFTWQGdvTRo,7129
11
+ perplexity_webui_scraper/mcp/__init__.py,sha256=Ke166qPFVZORf39lc6cHjKoBbbuJztAfU29vYpCwOrA,366
12
+ perplexity_webui_scraper/mcp/__main__.py,sha256=N_cSeNjAzSJ861jspq60W0ZVjxWNXhM5O-FQ0aD1oPs,146
13
+ perplexity_webui_scraper/mcp/server.py,sha256=dadzelHJO3Fkw85hdUiTO10jFaZB26D5e5jWuL3yVoA,5982
14
+ perplexity_webui_scraper/models.py,sha256=QVeZI-WQzpyi9JnE15QIMJ7nsG0YjIjOsZEA6YfX0tw,2448
15
+ perplexity_webui_scraper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ perplexity_webui_scraper/resilience.py,sha256=-_XYYCazsx5jxrc2HbuJBP16TLh02EEhy8WOukLmbFE,4662
17
+ perplexity_webui_scraper/types.py,sha256=VlnzvNilIHrDXM2YOGjJa1y2VY0tfR-F0zaPjQHoPKs,1028
18
+ perplexity_webui_scraper-0.3.5.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
19
+ perplexity_webui_scraper-0.3.5.dist-info/entry_points.txt,sha256=ODpXpDTkmoQ_o3Y3lsy22PLs-8ndapvMKYwxcz6A9gs,189
20
+ perplexity_webui_scraper-0.3.5.dist-info/METADATA,sha256=NEkzx5B0HIj9gA9p72e6v0GOTG6cHoEWeSatgaoghhw,12175
21
+ perplexity_webui_scraper-0.3.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.9.17
2
+ Generator: uv 0.9.18
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+ get-perplexity-session-token = perplexity_webui_scraper.cli.get_perplexity_session_token:get_token
3
+ perplexity-webui-scraper-mcp = perplexity_webui_scraper.mcp:run_server
4
+
@@ -1,166 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: perplexity-webui-scraper
3
- Version: 0.3.3
4
- Summary: Python scraper to extract AI responses from Perplexity's web interface.
5
- Keywords: perplexity,ai,scraper,webui,api,client
6
- Author: henrique-coder
7
- Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
8
- License-Expression: MIT
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: Intended Audience :: Developers
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Operating System :: OS Independent
13
- Classifier: Programming Language :: Python :: 3
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Classifier: Programming Language :: Python :: 3.14
19
- Classifier: Topic :: Internet :: WWW/HTTP
20
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
- Classifier: Typing :: Typed
22
- Requires-Dist: curl-cffi>=0.13.0
23
- Requires-Dist: orjson>=3.11.5
24
- Requires-Dist: pydantic>=2.12.5
25
- Requires-Python: >=3.10
26
- Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
27
- Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
28
- Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
29
- Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
30
- Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
31
- Description-Content-Type: text/markdown
32
-
33
- <div align="center">
34
-
35
- # Perplexity WebUI Scraper
36
-
37
- Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
38
-
39
- [![PyPI](https://img.shields.io/pypi/v/perplexity-webui-scraper?color=blue)](https://pypi.org/project/perplexity-webui-scraper)
40
- [![Python](https://img.shields.io/pypi/pyversions/perplexity-webui-scraper)](https://pypi.org/project/perplexity-webui-scraper)
41
- [![License](https://img.shields.io/github/license/henrique-coder/perplexity-webui-scraper?color=green)](./LICENSE)
42
-
43
- </div>
44
-
45
- ---
46
-
47
- ## Installation
48
-
49
- ```bash
50
- uv pip install perplexity-webui-scraper
51
- ```
52
-
53
- ## Requirements
54
-
55
- - **Perplexity Pro subscription**
56
- - **Session token** (`__Secure-next-auth.session-token` cookie from browser)
57
-
58
- ### Getting Your Session Token
59
-
60
- 1. Log in at [perplexity.ai](https://www.perplexity.ai)
61
- 2. Open DevTools (`F12`) → Application → Cookies
62
- 3. Copy `__Secure-next-auth.session-token` value
63
- 4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN=your_token`
64
-
65
- ## Quick Start
66
-
67
- ```python
68
- from perplexity_webui_scraper import Perplexity
69
-
70
- client = Perplexity(session_token="YOUR_TOKEN")
71
- conversation = client.create_conversation()
72
-
73
- conversation.ask("What is quantum computing?")
74
- print(conversation.answer)
75
-
76
- # Follow-up
77
- conversation.ask("Explain it simpler")
78
- print(conversation.answer)
79
- ```
80
-
81
- ### Streaming
82
-
83
- ```python
84
- for chunk in conversation.ask("Explain AI", stream=True):
85
- print(chunk.answer)
86
- ```
87
-
88
- ### With Options
89
-
90
- ```python
91
- from perplexity_webui_scraper import (
92
- ConversationConfig,
93
- Coordinates,
94
- Models,
95
- SourceFocus,
96
- )
97
-
98
- config = ConversationConfig(
99
- model=Models.RESEARCH,
100
- source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
101
- language="en-US",
102
- coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
103
- )
104
-
105
- conversation = client.create_conversation(config)
106
- conversation.ask("Latest AI research", files=["paper.pdf"])
107
- ```
108
-
109
- ## API
110
-
111
- ### `Perplexity(session_token, config?)`
112
-
113
- | Parameter | Type | Description |
114
- | --------------- | -------------- | ------------------ |
115
- | `session_token` | `str` | Browser cookie |
116
- | `config` | `ClientConfig` | Timeout, TLS, etc. |
117
-
118
- ### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
119
-
120
- | Parameter | Type | Default | Description |
121
- | --------------- | -------------- | ------------- | ------------------- |
122
- | `query` | `str` | — | Question (required) |
123
- | `model` | `Model` | `Models.BEST` | AI model |
124
- | `files` | `list[str]` | `None` | File paths |
125
- | `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
126
- | `stream` | `bool` | `False` | Enable streaming |
127
-
128
- ### Models
129
-
130
- | Model | Description |
131
- | ------------------------------ | ----------------- |
132
- | `Models.BEST` | Auto-select best |
133
- | `Models.RESEARCH` | Deep research |
134
- | `Models.SONAR` | Fast queries |
135
- | `Models.GPT_51` | OpenAI GPT-5.1 |
136
- | `Models.CLAUDE_45_SONNET` | Claude 4.5 Sonnet |
137
- | `Models.GEMINI_3_PRO_THINKING` | Gemini 3.0 Pro |
138
- | `Models.GROK_41` | xAI Grok 4.1 |
139
-
140
- ### CitationMode
141
-
142
- | Mode | Output |
143
- | ---------- | --------------------- |
144
- | `DEFAULT` | `text[1]` |
145
- | `MARKDOWN` | `text[1](url)` |
146
- | `CLEAN` | `text` (no citations) |
147
-
148
- ### ConversationConfig
149
-
150
- | Parameter | Default | Description |
151
- | ----------------- | ------------- | ------------------ |
152
- | `model` | `Models.BEST` | Default model |
153
- | `citation_mode` | `CLEAN` | Citation format |
154
- | `save_to_library` | `False` | Save to library |
155
- | `search_focus` | `WEB` | Search type |
156
- | `source_focus` | `WEB` | Source types |
157
- | `time_range` | `ALL` | Time filter |
158
- | `language` | `"en-US"` | Response language |
159
- | `timezone` | `None` | Timezone |
160
- | `coordinates` | `None` | Location (lat/lng) |
161
-
162
- ## Disclaimer
163
-
164
- This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
165
-
166
- By using this library, you agree to Perplexity AI's Terms of Service.