perplexity-webui-scraper 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +14 -21
- perplexity_webui_scraper/cli/get_perplexity_session_token.py +216 -0
- perplexity_webui_scraper/config.py +2 -2
- perplexity_webui_scraper/constants.py +9 -35
- perplexity_webui_scraper/core.py +89 -27
- perplexity_webui_scraper/http.py +11 -4
- perplexity_webui_scraper/limits.py +2 -5
- perplexity_webui_scraper/models.py +34 -19
- perplexity_webui_scraper-0.3.4.dist-info/METADATA +214 -0
- perplexity_webui_scraper-0.3.4.dist-info/RECORD +16 -0
- {perplexity_webui_scraper-0.3.2.dist-info → perplexity_webui_scraper-0.3.4.dist-info}/WHEEL +1 -1
- perplexity_webui_scraper-0.3.4.dist-info/entry_points.txt +3 -0
- perplexity_webui_scraper-0.3.2.dist-info/METADATA +0 -146
- perplexity_webui_scraper-0.3.2.dist-info/RECORD +0 -14
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Extract AI responses from Perplexity's web interface."""
|
|
2
2
|
|
|
3
|
-
from importlib
|
|
3
|
+
from importlib import metadata
|
|
4
4
|
|
|
5
5
|
from .config import ClientConfig, ConversationConfig
|
|
6
6
|
from .core import Conversation, Perplexity
|
|
@@ -16,31 +16,24 @@ from .models import Model, Models
|
|
|
16
16
|
from .types import Coordinates, Response, SearchResultItem
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
__version__: str = version("perplexity-webui-scraper")
|
|
20
|
-
|
|
19
|
+
__version__: str = metadata.version("perplexity-webui-scraper")
|
|
21
20
|
__all__: list[str] = [
|
|
22
|
-
|
|
23
|
-
"
|
|
21
|
+
"AuthenticationError",
|
|
22
|
+
"CitationMode",
|
|
23
|
+
"ClientConfig",
|
|
24
24
|
"Conversation",
|
|
25
|
-
# Configuration
|
|
26
25
|
"ConversationConfig",
|
|
27
|
-
"ClientConfig",
|
|
28
26
|
"Coordinates",
|
|
29
|
-
# Enums
|
|
30
|
-
"CitationMode",
|
|
31
|
-
"SearchFocus",
|
|
32
|
-
"SourceFocus",
|
|
33
|
-
"TimeRange",
|
|
34
|
-
# Models
|
|
35
|
-
"Model",
|
|
36
|
-
"Models",
|
|
37
|
-
# Response types
|
|
38
|
-
"Response",
|
|
39
|
-
"SearchResultItem",
|
|
40
|
-
# Exceptions
|
|
41
|
-
"AuthenticationError",
|
|
42
27
|
"FileUploadError",
|
|
43
28
|
"FileValidationError",
|
|
29
|
+
"Model",
|
|
30
|
+
"Models",
|
|
31
|
+
"Perplexity",
|
|
44
32
|
"PerplexityError",
|
|
45
33
|
"RateLimitError",
|
|
34
|
+
"Response",
|
|
35
|
+
"SearchFocus",
|
|
36
|
+
"SearchResultItem",
|
|
37
|
+
"SourceFocus",
|
|
38
|
+
"TimeRange",
|
|
46
39
|
]
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""CLI utility for secure Perplexity authentication and session extraction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from sys import exit
|
|
7
|
+
from typing import NoReturn
|
|
8
|
+
|
|
9
|
+
from curl_cffi.requests import Session
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.panel import Panel
|
|
12
|
+
from rich.prompt import Confirm, Prompt
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Constants
|
|
16
|
+
BASE_URL: str = "https://www.perplexity.ai"
|
|
17
|
+
ENV_KEY: str = "PERPLEXITY_SESSION_TOKEN"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Initialize console on stderr to ensure secure alternate screen usage
|
|
21
|
+
console = Console(stderr=True, soft_wrap=True)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def update_env(token: str) -> bool:
|
|
25
|
+
"""
|
|
26
|
+
Securely updates the .env file with the session token.
|
|
27
|
+
|
|
28
|
+
Preserves existing content and comments.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
path = Path(".env")
|
|
32
|
+
line_entry = f'{ENV_KEY}="{token}"'
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
lines = path.read_text(encoding="utf-8").splitlines() if path.exists() else []
|
|
36
|
+
updated = False
|
|
37
|
+
new_lines = []
|
|
38
|
+
|
|
39
|
+
for line in lines:
|
|
40
|
+
if line.strip().startswith(ENV_KEY):
|
|
41
|
+
new_lines.append(line_entry)
|
|
42
|
+
updated = True
|
|
43
|
+
else:
|
|
44
|
+
new_lines.append(line)
|
|
45
|
+
|
|
46
|
+
if not updated:
|
|
47
|
+
if new_lines and new_lines[-1] != "":
|
|
48
|
+
new_lines.append("")
|
|
49
|
+
|
|
50
|
+
new_lines.append(line_entry)
|
|
51
|
+
|
|
52
|
+
path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
|
|
53
|
+
|
|
54
|
+
return True
|
|
55
|
+
except Exception:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _initialize_session() -> tuple[Session, str]:
|
|
60
|
+
"""Initialize session and obtain CSRF token."""
|
|
61
|
+
|
|
62
|
+
session = Session(impersonate="chrome", headers={"Referer": BASE_URL, "Origin": BASE_URL})
|
|
63
|
+
|
|
64
|
+
with console.status("[bold green]Initializing secure connection...", spinner="dots"):
|
|
65
|
+
session.get(BASE_URL)
|
|
66
|
+
csrf_data = session.get(f"{BASE_URL}/api/auth/csrf").json()
|
|
67
|
+
csrf = csrf_data.get("csrfToken")
|
|
68
|
+
|
|
69
|
+
if not csrf:
|
|
70
|
+
raise ValueError("Failed to obtain CSRF token.")
|
|
71
|
+
|
|
72
|
+
return session, csrf
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _request_verification_code(session: Session, csrf: str, email: str) -> None:
|
|
76
|
+
"""Send verification code to user's email."""
|
|
77
|
+
|
|
78
|
+
with console.status("[bold green]Sending verification code...", spinner="dots"):
|
|
79
|
+
r = session.post(
|
|
80
|
+
f"{BASE_URL}/api/auth/signin/email?version=2.18&source=default",
|
|
81
|
+
json={
|
|
82
|
+
"email": email,
|
|
83
|
+
"csrfToken": csrf,
|
|
84
|
+
"useNumericOtp": "true",
|
|
85
|
+
"json": "true",
|
|
86
|
+
"callbackUrl": f"{BASE_URL}/?login-source=floatingSignup",
|
|
87
|
+
},
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if r.status_code != 200:
|
|
91
|
+
raise ValueError(f"Authentication request failed: {r.text}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _validate_and_get_redirect_url(session: Session, email: str, user_input: str) -> str:
|
|
95
|
+
"""Validate user input (OTP or magic link) and return redirect URL."""
|
|
96
|
+
|
|
97
|
+
with console.status("[bold green]Validating...", spinner="dots"):
|
|
98
|
+
if user_input.startswith("http"):
|
|
99
|
+
return user_input
|
|
100
|
+
|
|
101
|
+
r_otp = session.post(
|
|
102
|
+
f"{BASE_URL}/api/auth/otp-redirect-link",
|
|
103
|
+
json={
|
|
104
|
+
"email": email,
|
|
105
|
+
"otp": user_input,
|
|
106
|
+
"redirectUrl": f"{BASE_URL}/?login-source=floatingSignup",
|
|
107
|
+
"emailLoginMethod": "web-otp",
|
|
108
|
+
},
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if r_otp.status_code != 200:
|
|
112
|
+
raise ValueError("Invalid verification code.")
|
|
113
|
+
|
|
114
|
+
redirect_path = r_otp.json().get("redirect")
|
|
115
|
+
|
|
116
|
+
if not redirect_path:
|
|
117
|
+
raise ValueError("No redirect URL received.")
|
|
118
|
+
|
|
119
|
+
return f"{BASE_URL}{redirect_path}" if redirect_path.startswith("/") else redirect_path
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _extract_session_token(session: Session, redirect_url: str) -> str:
|
|
123
|
+
"""Extract session token from cookies after authentication."""
|
|
124
|
+
|
|
125
|
+
session.get(redirect_url)
|
|
126
|
+
token = session.cookies.get("__Secure-next-auth.session-token")
|
|
127
|
+
|
|
128
|
+
if not token:
|
|
129
|
+
raise ValueError("Authentication successful, but token not found.")
|
|
130
|
+
|
|
131
|
+
return token
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _display_and_save_token(token: str) -> None:
|
|
135
|
+
"""Display token and optionally save to .env file."""
|
|
136
|
+
|
|
137
|
+
console.print("\n[bold green]✅ Token generated successfully![/bold green]")
|
|
138
|
+
console.print(f"\n[bold white]Your session token:[/bold white]\n[green]{token}[/green]\n")
|
|
139
|
+
|
|
140
|
+
prompt_text = f"Save token to [bold yellow].env[/bold yellow] file ({ENV_KEY})?"
|
|
141
|
+
|
|
142
|
+
if Confirm.ask(prompt_text, default=True, console=console):
|
|
143
|
+
if update_env(token):
|
|
144
|
+
console.print("[dim]Token saved to .env successfully.[/dim]")
|
|
145
|
+
else:
|
|
146
|
+
console.print("[red]Failed to save to .env file.[/red]")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _show_header() -> None:
|
|
150
|
+
"""Display welcome header."""
|
|
151
|
+
|
|
152
|
+
console.print(
|
|
153
|
+
Panel(
|
|
154
|
+
"[bold white]Perplexity WebUI Scraper[/bold white]\n\n"
|
|
155
|
+
"Automatic session token generator via email authentication.\n"
|
|
156
|
+
"[dim]All session data will be cleared on exit.[/dim]",
|
|
157
|
+
title="🔐 Token Generator",
|
|
158
|
+
border_style="cyan",
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _show_exit_message() -> None:
|
|
164
|
+
"""Display security note and wait for user to exit."""
|
|
165
|
+
|
|
166
|
+
console.print("\n[bold yellow]⚠️ Security Note:[/bold yellow]")
|
|
167
|
+
console.print("Press [bold white]ENTER[/bold white] to clear screen and exit.")
|
|
168
|
+
console.input()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_token() -> NoReturn:
|
|
172
|
+
"""
|
|
173
|
+
Executes the authentication flow within an ephemeral terminal screen.
|
|
174
|
+
|
|
175
|
+
Handles CSRF, Email OTP/Link validation, and secure token display.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
with console.screen():
|
|
179
|
+
try:
|
|
180
|
+
_show_header()
|
|
181
|
+
|
|
182
|
+
# Step 1: Initialize session and get CSRF token
|
|
183
|
+
session, csrf = _initialize_session()
|
|
184
|
+
|
|
185
|
+
# Step 2: Get email and request verification code
|
|
186
|
+
console.print("\n[bold cyan]Step 1: Email Verification[/bold cyan]")
|
|
187
|
+
email = Prompt.ask(" Enter your Perplexity email", console=console)
|
|
188
|
+
_request_verification_code(session, csrf, email)
|
|
189
|
+
|
|
190
|
+
# Step 3: Get and validate user input (OTP or magic link)
|
|
191
|
+
console.print("\n[bold cyan]Step 2: Verification[/bold cyan]")
|
|
192
|
+
console.print(" Check your email for a [bold]6-digit code[/bold] or [bold]magic link[/bold].")
|
|
193
|
+
user_input = Prompt.ask(" Enter code or paste link", console=console).strip()
|
|
194
|
+
redirect_url = _validate_and_get_redirect_url(session, email, user_input)
|
|
195
|
+
|
|
196
|
+
# Step 4: Extract session token
|
|
197
|
+
token = _extract_session_token(session, redirect_url)
|
|
198
|
+
|
|
199
|
+
# Step 5: Display and optionally save token
|
|
200
|
+
_display_and_save_token(token)
|
|
201
|
+
|
|
202
|
+
# Step 6: Exit
|
|
203
|
+
_show_exit_message()
|
|
204
|
+
|
|
205
|
+
exit(0)
|
|
206
|
+
except KeyboardInterrupt:
|
|
207
|
+
exit(0)
|
|
208
|
+
except Exception as error:
|
|
209
|
+
console.print(f"\n[bold red]⛔ Error:[/bold red] {error}")
|
|
210
|
+
console.input("[dim]Press ENTER to exit...[/dim]")
|
|
211
|
+
|
|
212
|
+
exit(1)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == "__main__":
|
|
216
|
+
get_token()
|
|
@@ -6,11 +6,11 @@ from dataclasses import dataclass
|
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
|
|
9
|
-
from .types import Coordinates
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
if TYPE_CHECKING:
|
|
13
12
|
from .models import Model
|
|
13
|
+
from .types import Coordinates
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass(slots=True)
|
|
@@ -32,5 +32,5 @@ class ConversationConfig:
|
|
|
32
32
|
class ClientConfig:
|
|
33
33
|
"""HTTP client settings."""
|
|
34
34
|
|
|
35
|
-
timeout: int =
|
|
35
|
+
timeout: int = 3600
|
|
36
36
|
impersonate: str = "chrome"
|
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
These are internal API values that should not be modified by users.
|
|
4
|
-
They represent fixed parameters required by the Perplexity WebUI API.
|
|
5
|
-
"""
|
|
1
|
+
"""Constants and values for the Perplexity internal API and HTTP interactions."""
|
|
6
2
|
|
|
7
3
|
from __future__ import annotations
|
|
8
4
|
|
|
@@ -10,21 +6,14 @@ from re import Pattern, compile
|
|
|
10
6
|
from typing import Final
|
|
11
7
|
|
|
12
8
|
|
|
13
|
-
# =============================================================================
|
|
14
9
|
# API Configuration
|
|
15
|
-
# =============================================================================
|
|
16
|
-
|
|
17
10
|
API_VERSION: Final[str] = "2.18"
|
|
18
11
|
"""Current API version used by Perplexity WebUI."""
|
|
19
12
|
|
|
20
13
|
API_BASE_URL: Final[str] = "https://www.perplexity.ai"
|
|
21
14
|
"""Base URL for all API requests."""
|
|
22
15
|
|
|
23
|
-
|
|
24
|
-
# =============================================================================
|
|
25
16
|
# API Endpoints
|
|
26
|
-
# =============================================================================
|
|
27
|
-
|
|
28
17
|
ENDPOINT_ASK: Final[str] = "/rest/sse/perplexity_ask"
|
|
29
18
|
"""SSE endpoint for sending prompts."""
|
|
30
19
|
|
|
@@ -34,54 +23,39 @@ ENDPOINT_SEARCH_INIT: Final[str] = "/search/new"
|
|
|
34
23
|
ENDPOINT_UPLOAD: Final[str] = "/rest/uploads/batch_create_upload_urls"
|
|
35
24
|
"""Endpoint for file upload URL generation."""
|
|
36
25
|
|
|
37
|
-
|
|
38
|
-
# =============================================================================
|
|
39
26
|
# API Fixed Parameters
|
|
40
|
-
# =============================================================================
|
|
41
|
-
|
|
42
27
|
SEND_BACK_TEXT: Final[bool] = True
|
|
43
|
-
"""
|
|
28
|
+
"""
|
|
29
|
+
Whether to receive full text in each streaming chunk.
|
|
44
30
|
|
|
45
31
|
True = API sends complete text each chunk (replace mode).
|
|
46
32
|
False = API sends delta chunks only (accumulate mode).
|
|
47
|
-
|
|
48
|
-
Currently must be True for the parser to work correctly.
|
|
49
33
|
"""
|
|
50
34
|
|
|
51
35
|
USE_SCHEMATIZED_API: Final[bool] = False
|
|
52
|
-
"""Whether to use the schematized API format.
|
|
53
|
-
|
|
54
|
-
Currently must be False - schematized format is not supported.
|
|
55
|
-
"""
|
|
36
|
+
"""Whether to use the schematized API format."""
|
|
56
37
|
|
|
57
38
|
PROMPT_SOURCE: Final[str] = "user"
|
|
58
39
|
"""Source identifier for prompts."""
|
|
59
40
|
|
|
60
|
-
|
|
61
|
-
# =============================================================================
|
|
62
|
-
# Regex Patterns (Pre-compiled for performance)
|
|
63
|
-
# =============================================================================
|
|
64
|
-
|
|
41
|
+
# Regex Patterns (Pre-compiled for performance in streaming parsing)
|
|
65
42
|
CITATION_PATTERN: Final[Pattern[str]] = compile(r"\[(\d{1,2})\]")
|
|
66
|
-
"""
|
|
43
|
+
"""
|
|
44
|
+
Regex pattern for matching citation markers like [1], [2], etc.
|
|
67
45
|
|
|
68
46
|
Uses word boundary to avoid matching things like [123].
|
|
69
|
-
Pre-compiled for performance in streaming scenarios.
|
|
70
47
|
"""
|
|
71
48
|
|
|
72
49
|
JSON_OBJECT_PATTERN: Final[Pattern[str]] = compile(r"^\{.*\}$")
|
|
73
50
|
"""Pattern to detect JSON object strings."""
|
|
74
51
|
|
|
75
|
-
|
|
76
|
-
# =============================================================================
|
|
77
52
|
# HTTP Headers
|
|
78
|
-
# =============================================================================
|
|
79
|
-
|
|
80
53
|
DEFAULT_HEADERS: Final[dict[str, str]] = {
|
|
81
54
|
"Accept": "text/event-stream, application/json",
|
|
82
55
|
"Content-Type": "application/json",
|
|
83
56
|
}
|
|
84
|
-
"""
|
|
57
|
+
"""
|
|
58
|
+
Default HTTP headers for API requests.
|
|
85
59
|
|
|
86
60
|
Referer and Origin are added dynamically based on BASE_URL.
|
|
87
61
|
"""
|
perplexity_webui_scraper/core.py
CHANGED
|
@@ -2,15 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from collections.abc import Generator
|
|
6
5
|
from mimetypes import guess_type
|
|
7
6
|
from os import PathLike
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from
|
|
10
|
-
from typing import Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
11
9
|
from uuid import uuid4
|
|
12
10
|
|
|
13
|
-
from orjson import loads
|
|
11
|
+
from orjson import JSONDecodeError, loads
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Generator
|
|
16
|
+
from re import Match
|
|
14
17
|
|
|
15
18
|
from .config import ClientConfig, ConversationConfig
|
|
16
19
|
from .constants import (
|
|
@@ -31,11 +34,21 @@ from .types import Response, SearchResultItem, _FileInfo
|
|
|
31
34
|
|
|
32
35
|
|
|
33
36
|
class Perplexity:
|
|
34
|
-
"""Perplexity AI
|
|
37
|
+
"""Web scraper for Perplexity AI conversations."""
|
|
35
38
|
|
|
36
39
|
__slots__ = ("_http",)
|
|
37
40
|
|
|
38
41
|
def __init__(self, session_token: str, config: ClientConfig | None = None) -> None:
|
|
42
|
+
"""Initialize web scraper with session token.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
session_token: Perplexity session cookie (__Secure-next-auth.session-token).
|
|
46
|
+
config: Optional HTTP client configuration.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
ValueError: If session_token is empty or whitespace.
|
|
50
|
+
"""
|
|
51
|
+
|
|
39
52
|
if not session_token or not session_token.strip():
|
|
40
53
|
raise ValueError("session_token cannot be empty")
|
|
41
54
|
|
|
@@ -44,10 +57,12 @@ class Perplexity:
|
|
|
44
57
|
|
|
45
58
|
def create_conversation(self, config: ConversationConfig | None = None) -> Conversation:
|
|
46
59
|
"""Create a new conversation."""
|
|
60
|
+
|
|
47
61
|
return Conversation(self._http, config or ConversationConfig())
|
|
48
62
|
|
|
49
63
|
def close(self) -> None:
|
|
50
64
|
"""Close the client."""
|
|
65
|
+
|
|
51
66
|
self._http.close()
|
|
52
67
|
|
|
53
68
|
def __enter__(self) -> Perplexity:
|
|
@@ -58,20 +73,20 @@ class Perplexity:
|
|
|
58
73
|
|
|
59
74
|
|
|
60
75
|
class Conversation:
|
|
61
|
-
"""
|
|
76
|
+
"""Manage a Perplexity conversation with query and follow-up support."""
|
|
62
77
|
|
|
63
78
|
__slots__ = (
|
|
64
|
-
"_http",
|
|
65
|
-
"_config",
|
|
66
|
-
"_citation_mode",
|
|
67
|
-
"_backend_uuid",
|
|
68
|
-
"_read_write_token",
|
|
69
|
-
"_title",
|
|
70
79
|
"_answer",
|
|
80
|
+
"_backend_uuid",
|
|
71
81
|
"_chunks",
|
|
72
|
-
"
|
|
82
|
+
"_citation_mode",
|
|
83
|
+
"_config",
|
|
84
|
+
"_http",
|
|
73
85
|
"_raw_data",
|
|
86
|
+
"_read_write_token",
|
|
87
|
+
"_search_results",
|
|
74
88
|
"_stream_generator",
|
|
89
|
+
"_title",
|
|
75
90
|
)
|
|
76
91
|
|
|
77
92
|
def __init__(self, http: HTTPClient, config: ConversationConfig) -> None:
|
|
@@ -90,55 +105,64 @@ class Conversation:
|
|
|
90
105
|
@property
|
|
91
106
|
def answer(self) -> str | None:
|
|
92
107
|
"""Last response text."""
|
|
108
|
+
|
|
93
109
|
return self._answer
|
|
94
110
|
|
|
95
111
|
@property
|
|
96
112
|
def title(self) -> str | None:
|
|
97
113
|
"""Conversation title."""
|
|
114
|
+
|
|
98
115
|
return self._title
|
|
99
116
|
|
|
100
117
|
@property
|
|
101
118
|
def search_results(self) -> list[SearchResultItem]:
|
|
102
119
|
"""Search results from last response."""
|
|
120
|
+
|
|
103
121
|
return self._search_results
|
|
104
122
|
|
|
105
123
|
@property
|
|
106
124
|
def uuid(self) -> str | None:
|
|
107
125
|
"""Conversation UUID."""
|
|
126
|
+
|
|
108
127
|
return self._backend_uuid
|
|
109
128
|
|
|
110
129
|
def __iter__(self) -> Generator[Response, None, None]:
|
|
111
130
|
if self._stream_generator is not None:
|
|
112
131
|
yield from self._stream_generator
|
|
132
|
+
|
|
113
133
|
self._stream_generator = None
|
|
114
134
|
|
|
115
135
|
def ask(
|
|
116
136
|
self,
|
|
117
137
|
query: str,
|
|
118
138
|
model: Model | None = None,
|
|
119
|
-
files: list[str | PathLike
|
|
139
|
+
files: list[str | PathLike] | None = None,
|
|
120
140
|
citation_mode: CitationMode | None = None,
|
|
121
141
|
stream: bool = False,
|
|
122
142
|
) -> Conversation:
|
|
123
|
-
"""
|
|
143
|
+
"""Ask a question. Returns self for method chaining or streaming iteration."""
|
|
144
|
+
|
|
124
145
|
effective_model = model or self._config.model or Models.BEST
|
|
125
146
|
effective_citation = citation_mode if citation_mode is not None else self._config.citation_mode
|
|
126
147
|
self._citation_mode = effective_citation
|
|
127
148
|
self._execute(query, effective_model, files, stream=stream)
|
|
149
|
+
|
|
128
150
|
return self
|
|
129
151
|
|
|
130
152
|
def _execute(
|
|
131
153
|
self,
|
|
132
154
|
query: str,
|
|
133
155
|
model: Model,
|
|
134
|
-
files: list[str | PathLike
|
|
156
|
+
files: list[str | PathLike] | None,
|
|
135
157
|
stream: bool = False,
|
|
136
158
|
) -> None:
|
|
137
159
|
"""Execute a query."""
|
|
160
|
+
|
|
138
161
|
self._reset_response_state()
|
|
139
162
|
|
|
140
163
|
# Upload files
|
|
141
164
|
file_urls: list[str] = []
|
|
165
|
+
|
|
142
166
|
if files:
|
|
143
167
|
validated = self._validate_files(files)
|
|
144
168
|
file_urls = [self._upload_file(f) for f in validated]
|
|
@@ -159,15 +183,17 @@ class Conversation:
|
|
|
159
183
|
self._raw_data = {}
|
|
160
184
|
self._stream_generator = None
|
|
161
185
|
|
|
162
|
-
def _validate_files(self, files: list[str | PathLike
|
|
186
|
+
def _validate_files(self, files: list[str | PathLike] | None) -> list[_FileInfo]:
|
|
163
187
|
if not files:
|
|
164
188
|
return []
|
|
165
189
|
|
|
166
190
|
seen: set[str] = set()
|
|
167
191
|
file_list: list[Path] = []
|
|
192
|
+
|
|
168
193
|
for item in files:
|
|
169
194
|
if item and isinstance(item, (str, PathLike)):
|
|
170
195
|
path = Path(item).resolve()
|
|
196
|
+
|
|
171
197
|
if path.as_posix() not in seen:
|
|
172
198
|
seen.add(path.as_posix())
|
|
173
199
|
file_list.append(path)
|
|
@@ -190,11 +216,13 @@ class Conversation:
|
|
|
190
216
|
raise FileValidationError(file_path, "Path is not a file")
|
|
191
217
|
|
|
192
218
|
file_size = path.stat().st_size
|
|
219
|
+
|
|
193
220
|
if file_size > MAX_FILE_SIZE:
|
|
194
221
|
raise FileValidationError(
|
|
195
222
|
file_path,
|
|
196
223
|
f"File exceeds 50MB limit: {file_size / (1024 * 1024):.1f}MB",
|
|
197
224
|
)
|
|
225
|
+
|
|
198
226
|
if file_size == 0:
|
|
199
227
|
raise FileValidationError(file_path, "File is empty")
|
|
200
228
|
|
|
@@ -211,10 +239,10 @@ class Conversation:
|
|
|
211
239
|
)
|
|
212
240
|
except FileValidationError:
|
|
213
241
|
raise
|
|
214
|
-
except (FileNotFoundError, PermissionError) as
|
|
215
|
-
raise FileValidationError(file_path, f"Cannot access file: {
|
|
216
|
-
except OSError as
|
|
217
|
-
raise FileValidationError(file_path, f"File system error: {
|
|
242
|
+
except (FileNotFoundError, PermissionError) as error:
|
|
243
|
+
raise FileValidationError(file_path, f"Cannot access file: {error}") from error
|
|
244
|
+
except OSError as error:
|
|
245
|
+
raise FileValidationError(file_path, f"File system error: {error}") from error
|
|
218
246
|
|
|
219
247
|
return result
|
|
220
248
|
|
|
@@ -242,8 +270,8 @@ class Conversation:
|
|
|
242
270
|
raise FileUploadError(file_info.path, "No upload URL returned")
|
|
243
271
|
|
|
244
272
|
return upload_url
|
|
245
|
-
except FileUploadError:
|
|
246
|
-
raise
|
|
273
|
+
except FileUploadError as error:
|
|
274
|
+
raise error
|
|
247
275
|
except Exception as e:
|
|
248
276
|
raise FileUploadError(file_info.path, str(e)) from e
|
|
249
277
|
|
|
@@ -255,7 +283,9 @@ class Conversation:
|
|
|
255
283
|
) -> dict[str, Any]:
|
|
256
284
|
cfg = self._config
|
|
257
285
|
|
|
258
|
-
sources =
|
|
286
|
+
sources = (
|
|
287
|
+
[s.value for s in cfg.source_focus] if isinstance(cfg.source_focus, list) else [cfg.source_focus.value]
|
|
288
|
+
)
|
|
259
289
|
|
|
260
290
|
client_coordinates = None
|
|
261
291
|
if cfg.coordinates is not None:
|
|
@@ -286,6 +316,7 @@ class Conversation:
|
|
|
286
316
|
if self._backend_uuid is not None:
|
|
287
317
|
params["last_backend_uuid"] = self._backend_uuid
|
|
288
318
|
params["query_source"] = "followup"
|
|
319
|
+
|
|
289
320
|
if self._read_write_token:
|
|
290
321
|
params["read_write_token"] = self._read_write_token
|
|
291
322
|
|
|
@@ -297,6 +328,7 @@ class Conversation:
|
|
|
297
328
|
|
|
298
329
|
def replacer(m: Match[str]) -> str:
|
|
299
330
|
num = m.group(1)
|
|
331
|
+
|
|
300
332
|
if not num.isdigit():
|
|
301
333
|
return m.group(0)
|
|
302
334
|
|
|
@@ -304,8 +336,10 @@ class Conversation:
|
|
|
304
336
|
return ""
|
|
305
337
|
|
|
306
338
|
idx = int(num) - 1
|
|
339
|
+
|
|
307
340
|
if 0 <= idx < len(self._search_results):
|
|
308
341
|
url = self._search_results[idx].url or ""
|
|
342
|
+
|
|
309
343
|
if self._citation_mode == CitationMode.MARKDOWN and url:
|
|
310
344
|
return f"[{num}]({url})"
|
|
311
345
|
|
|
@@ -315,8 +349,10 @@ class Conversation:
|
|
|
315
349
|
|
|
316
350
|
def _parse_line(self, line: str | bytes) -> dict[str, Any] | None:
|
|
317
351
|
prefix = b"data: " if isinstance(line, bytes) else "data: "
|
|
352
|
+
|
|
318
353
|
if (isinstance(line, bytes) and line.startswith(prefix)) or (isinstance(line, str) and line.startswith(prefix)):
|
|
319
354
|
return loads(line[6:])
|
|
355
|
+
|
|
320
356
|
return None
|
|
321
357
|
|
|
322
358
|
def _process_data(self, data: dict[str, Any]) -> None:
|
|
@@ -326,10 +362,25 @@ class Conversation:
|
|
|
326
362
|
if self._read_write_token is None and "read_write_token" in data:
|
|
327
363
|
self._read_write_token = data["read_write_token"]
|
|
328
364
|
|
|
329
|
-
if "
|
|
330
|
-
|
|
365
|
+
if "blocks" in data:
|
|
366
|
+
for block in data["blocks"]:
|
|
367
|
+
if block.get("intended_usage") == "web_results":
|
|
368
|
+
diff = block.get("diff_block", {})
|
|
369
|
+
|
|
370
|
+
for patch in diff.get("patches", []):
|
|
371
|
+
if patch.get("op") == "replace" and patch.get("path") == "/web_results":
|
|
372
|
+
pass
|
|
373
|
+
|
|
374
|
+
if "text" not in data and "blocks" not in data:
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
json_data = loads(data["text"])
|
|
379
|
+
except KeyError as e:
|
|
380
|
+
raise ValueError("Missing 'text' field in data") from e
|
|
381
|
+
except JSONDecodeError as e:
|
|
382
|
+
raise ValueError("Invalid JSON in 'text' field") from e
|
|
331
383
|
|
|
332
|
-
json_data = loads(data["text"])
|
|
333
384
|
answer_data: dict[str, Any] = {}
|
|
334
385
|
|
|
335
386
|
if isinstance(json_data, list):
|
|
@@ -344,14 +395,18 @@ class Conversation:
|
|
|
344
395
|
answer_data = raw_content
|
|
345
396
|
|
|
346
397
|
self._update_state(data.get("thread_title"), answer_data)
|
|
398
|
+
|
|
347
399
|
break
|
|
348
400
|
elif isinstance(json_data, dict):
|
|
349
401
|
self._update_state(data.get("thread_title"), json_data)
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError("Unexpected JSON structure in 'text' field")
|
|
350
404
|
|
|
351
405
|
def _update_state(self, title: str | None, answer_data: dict[str, Any]) -> None:
|
|
352
406
|
self._title = title
|
|
353
407
|
|
|
354
408
|
web_results = answer_data.get("web_results", [])
|
|
409
|
+
|
|
355
410
|
if web_results:
|
|
356
411
|
self._search_results = [
|
|
357
412
|
SearchResultItem(
|
|
@@ -364,10 +419,12 @@ class Conversation:
|
|
|
364
419
|
]
|
|
365
420
|
|
|
366
421
|
answer_text = answer_data.get("answer")
|
|
422
|
+
|
|
367
423
|
if answer_text is not None:
|
|
368
424
|
self._answer = self._format_citations(answer_text)
|
|
369
425
|
|
|
370
426
|
chunks = answer_data.get("chunks", [])
|
|
427
|
+
|
|
371
428
|
if chunks:
|
|
372
429
|
self._chunks = chunks
|
|
373
430
|
|
|
@@ -387,16 +444,21 @@ class Conversation:
|
|
|
387
444
|
def _complete(self, payload: dict[str, Any]) -> None:
|
|
388
445
|
for line in self._http.stream_ask(payload):
|
|
389
446
|
data = self._parse_line(line)
|
|
447
|
+
|
|
390
448
|
if data:
|
|
391
449
|
self._process_data(data)
|
|
450
|
+
|
|
392
451
|
if data.get("final"):
|
|
393
452
|
break
|
|
394
453
|
|
|
395
454
|
def _stream(self, payload: dict[str, Any]) -> Generator[Response, None, None]:
|
|
396
455
|
for line in self._http.stream_ask(payload):
|
|
397
456
|
data = self._parse_line(line)
|
|
457
|
+
|
|
398
458
|
if data:
|
|
399
459
|
self._process_data(data)
|
|
460
|
+
|
|
400
461
|
yield self._build_response()
|
|
462
|
+
|
|
401
463
|
if data.get("final"):
|
|
402
464
|
break
|
perplexity_webui_scraper/http.py
CHANGED
|
@@ -2,10 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
7
6
|
|
|
8
7
|
from curl_cffi.requests import Response as CurlResponse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import Generator
|
|
12
|
+
|
|
9
13
|
from curl_cffi.requests import Session
|
|
10
14
|
|
|
11
15
|
from .constants import (
|
|
@@ -42,6 +46,7 @@ class HTTPClient:
|
|
|
42
46
|
"Origin": API_BASE_URL,
|
|
43
47
|
}
|
|
44
48
|
cookies: dict[str, str] = {SESSION_COOKIE_NAME: session_token}
|
|
49
|
+
|
|
45
50
|
self._session: Session = Session(
|
|
46
51
|
headers=headers,
|
|
47
52
|
cookies=cookies,
|
|
@@ -72,9 +77,9 @@ class HTTPClient:
|
|
|
72
77
|
elif status_code == 429:
|
|
73
78
|
raise RateLimitError() from error
|
|
74
79
|
elif status_code is not None:
|
|
75
|
-
raise PerplexityError(f"{context}HTTP {status_code}: {
|
|
80
|
+
raise PerplexityError(f"{context}HTTP {status_code}: {error!s}", status_code=status_code) from error
|
|
76
81
|
else:
|
|
77
|
-
raise PerplexityError(f"{context}{
|
|
82
|
+
raise PerplexityError(f"{context}{error!s}") from error
|
|
78
83
|
|
|
79
84
|
def get(self, endpoint: str, params: dict[str, Any] | None = None) -> CurlResponse:
|
|
80
85
|
"""Make a GET request.
|
|
@@ -97,6 +102,7 @@ class HTTPClient:
|
|
|
97
102
|
try:
|
|
98
103
|
response = self._session.get(url, params=params)
|
|
99
104
|
response.raise_for_status()
|
|
105
|
+
|
|
100
106
|
return response
|
|
101
107
|
except Exception as e:
|
|
102
108
|
self._handle_error(e, f"GET {endpoint}: ")
|
|
@@ -128,6 +134,7 @@ class HTTPClient:
|
|
|
128
134
|
try:
|
|
129
135
|
response = self._session.post(url, json=json, stream=stream)
|
|
130
136
|
response.raise_for_status()
|
|
137
|
+
|
|
131
138
|
return response
|
|
132
139
|
except Exception as e:
|
|
133
140
|
self._handle_error(e, f"POST {endpoint}: ")
|
|
@@ -10,11 +10,8 @@ MAX_FILES: Final[int] = 30
|
|
|
10
10
|
"""Maximum number of files that can be attached to a single prompt."""
|
|
11
11
|
|
|
12
12
|
MAX_FILE_SIZE: Final[int] = 50 * 1024 * 1024 # 50 MB in bytes
|
|
13
|
-
"""Maximum file size in bytes
|
|
13
|
+
"""Maximum file size in bytes."""
|
|
14
14
|
|
|
15
15
|
# Request Limits
|
|
16
16
|
DEFAULT_TIMEOUT: Final[int] = 30 * 60 # 30 minutes in seconds
|
|
17
|
-
"""Default request timeout in seconds
|
|
18
|
-
|
|
19
|
-
Set high to accommodate complex models that may take longer to respond.
|
|
20
|
-
"""
|
|
17
|
+
"""Default request timeout in seconds"""
|
|
@@ -24,35 +24,50 @@ class Models:
|
|
|
24
24
|
All models use the "copilot" mode which enables web search.
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
-
LABS = Model(identifier="pplx_beta")
|
|
28
|
-
"""Create projects from scratch (turn your ideas into completed docs, slides, dashboards, and more)."""
|
|
29
|
-
|
|
30
27
|
RESEARCH = Model(identifier="pplx_alpha")
|
|
31
|
-
"""
|
|
28
|
+
"""Research - Fast and thorough for routine research"""
|
|
29
|
+
|
|
30
|
+
LABS = Model(identifier="pplx_beta")
|
|
31
|
+
"""Labs - Multi-step tasks with advanced troubleshooting"""
|
|
32
32
|
|
|
33
|
-
BEST = Model(identifier="
|
|
34
|
-
"""Automatically selects the
|
|
33
|
+
BEST = Model(identifier="pplx_pro_upgraded")
|
|
34
|
+
"""Best - Automatically selects the most responsive model based on the query"""
|
|
35
35
|
|
|
36
36
|
SONAR = Model(identifier="experimental")
|
|
37
|
-
"""Perplexity's fast model
|
|
37
|
+
"""Sonar - Perplexity's fast model"""
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
"""OpenAI's latest model
|
|
39
|
+
GPT_52 = Model(identifier="gpt52")
|
|
40
|
+
"""GPT-5.2 - OpenAI's latest model"""
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
"""OpenAI's latest model with
|
|
42
|
+
GPT_52_THINKING = Model(identifier="gpt52_thinking")
|
|
43
|
+
"""GPT-5.2 Thinking - OpenAI's latest model with thinking"""
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
"""
|
|
45
|
+
CLAUDE_45_OPUS = Model(identifier="claude45opus")
|
|
46
|
+
"""Claude Opus 4.5 - Anthropic's Opus reasoning model"""
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
"""
|
|
48
|
+
CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")
|
|
49
|
+
"""Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking"""
|
|
50
|
+
|
|
51
|
+
GEMINI_3_PRO = Model(identifier="gemini30pro")
|
|
52
|
+
"""Gemini 3 Pro - Google's newest reasoning model"""
|
|
50
53
|
|
|
51
|
-
|
|
52
|
-
"""
|
|
54
|
+
GEMINI_3_FLASH = Model(identifier="gemini30flash")
|
|
55
|
+
"""Gemini 3 Flash - Google's fast reasoning model"""
|
|
56
|
+
|
|
57
|
+
GEMINI_3_FLASH_THINKING = Model(identifier="gemini30flash_high")
|
|
58
|
+
"""Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking"""
|
|
53
59
|
|
|
54
60
|
GROK_41 = Model(identifier="grok41nonreasoning")
|
|
55
|
-
"""
|
|
61
|
+
"""Grok 4.1 - xAI's latest advanced model"""
|
|
62
|
+
|
|
63
|
+
GROK_41_THINKING = Model(identifier="grok41reasoning")
|
|
64
|
+
"""Grok 4.1 Thinking - xAI's latest reasoning model"""
|
|
56
65
|
|
|
57
66
|
KIMI_K2_THINKING = Model(identifier="kimik2thinking")
|
|
58
|
-
"""Moonshot AI's
|
|
67
|
+
"""Kimi K2 Thinking - Moonshot AI's latest reasoning model"""
|
|
68
|
+
|
|
69
|
+
CLAUDE_45_SONNET = Model(identifier="claude45sonnet")
|
|
70
|
+
"""Claude Sonnet 4.5 - Anthropic's newest advanced model"""
|
|
71
|
+
|
|
72
|
+
CLAUDE_45_SONNET_THINKING = Model(identifier="claude45sonnetthinking")
|
|
73
|
+
"""Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model"""
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: perplexity-webui-scraper
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: Python scraper to extract AI responses from Perplexity's web interface.
|
|
5
|
+
Keywords: perplexity,ai,scraper,webui,api,client
|
|
6
|
+
Author: henrique-coder
|
|
7
|
+
Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Dist: curl-cffi>=0.14.0
|
|
23
|
+
Requires-Dist: orjson>=3.11.5
|
|
24
|
+
Requires-Dist: pydantic>=2.12.5
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
|
|
27
|
+
Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
|
|
28
|
+
Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
|
|
29
|
+
Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
|
|
30
|
+
Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
<div align="center">
|
|
34
|
+
|
|
35
|
+
# Perplexity WebUI Scraper
|
|
36
|
+
|
|
37
|
+
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
38
|
+
|
|
39
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
40
|
+
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
41
|
+
[](./LICENSE)
|
|
42
|
+
|
|
43
|
+
</div>
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uv pip install perplexity-webui-scraper
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Requirements
|
|
54
|
+
|
|
55
|
+
- **Perplexity Pro/Max account**
|
|
56
|
+
- **Session token** (`__Secure-next-auth.session-token` cookie from your browser)
|
|
57
|
+
|
|
58
|
+
### Getting Your Session Token
|
|
59
|
+
|
|
60
|
+
You can obtain your session token in two ways:
|
|
61
|
+
|
|
62
|
+
#### Option 1: Automatic (CLI Tool)
|
|
63
|
+
|
|
64
|
+
The package includes a CLI tool to automatically generate and save your session token:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
get-perplexity-session-token
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
This interactive tool will:
|
|
71
|
+
|
|
72
|
+
1. Ask for your Perplexity email
|
|
73
|
+
2. Send a verification code to your email
|
|
74
|
+
3. Accept either a 6-digit code or magic link
|
|
75
|
+
4. Extract and display your session token
|
|
76
|
+
5. Optionally save it to your `.env` file
|
|
77
|
+
|
|
78
|
+
**Features:**
|
|
79
|
+
|
|
80
|
+
- Secure ephemeral session (cleared on exit)
|
|
81
|
+
- Automatic `.env` file management
|
|
82
|
+
- Support for both OTP codes and magic links
|
|
83
|
+
- Clean terminal interface with status updates
|
|
84
|
+
|
|
85
|
+
#### Option 2: Manual (Browser)
|
|
86
|
+
|
|
87
|
+
If you prefer to extract the token manually:
|
|
88
|
+
|
|
89
|
+
1. Log in at [perplexity.ai](https://www.perplexity.ai)
|
|
90
|
+
2. Open DevTools (`F12`) → Application/Storage → Cookies
|
|
91
|
+
3. Copy the value of `__Secure-next-auth.session-token`
|
|
92
|
+
4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN="your_token"`
|
|
93
|
+
|
|
94
|
+
## Quick Start
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from perplexity_webui_scraper import Perplexity
|
|
98
|
+
|
|
99
|
+
client = Perplexity(session_token="YOUR_TOKEN")
|
|
100
|
+
conversation = client.create_conversation()
|
|
101
|
+
|
|
102
|
+
conversation.ask("What is quantum computing?")
|
|
103
|
+
print(conversation.answer)
|
|
104
|
+
|
|
105
|
+
# Follow-up
|
|
106
|
+
conversation.ask("Explain it simpler")
|
|
107
|
+
print(conversation.answer)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Streaming
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
for chunk in conversation.ask("Explain AI", stream=True):
|
|
114
|
+
print(chunk.answer)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### With Options
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from perplexity_webui_scraper import (
|
|
121
|
+
ConversationConfig,
|
|
122
|
+
Coordinates,
|
|
123
|
+
Models,
|
|
124
|
+
SourceFocus,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
config = ConversationConfig(
|
|
128
|
+
model=Models.RESEARCH,
|
|
129
|
+
source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
|
|
130
|
+
language="en-US",
|
|
131
|
+
coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
conversation = client.create_conversation(config)
|
|
135
|
+
conversation.ask("Latest AI research", files=["paper.pdf"])
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## API
|
|
139
|
+
|
|
140
|
+
### `Perplexity(session_token, config?)`
|
|
141
|
+
|
|
142
|
+
| Parameter | Type | Description |
|
|
143
|
+
| --------------- | -------------- | ------------------ |
|
|
144
|
+
| `session_token` | `str` | Browser cookie |
|
|
145
|
+
| `config` | `ClientConfig` | Timeout, TLS, etc. |
|
|
146
|
+
|
|
147
|
+
### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
|
|
148
|
+
|
|
149
|
+
| Parameter | Type | Default | Description |
|
|
150
|
+
| --------------- | ----------------------- | ------------- | ------------------- |
|
|
151
|
+
| `query` | `str` | - | Question (required) |
|
|
152
|
+
| `model` | `Model` | `Models.BEST` | AI model |
|
|
153
|
+
| `files` | `list[str \| PathLike]` | `None` | File paths |
|
|
154
|
+
| `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
|
|
155
|
+
| `stream` | `bool` | `False` | Enable streaming |
|
|
156
|
+
|
|
157
|
+
### Models
|
|
158
|
+
|
|
159
|
+
| Model | Description |
|
|
160
|
+
| ---------------------------------- | ------------------------------------------------------------------------- |
|
|
161
|
+
| `Models.RESEARCH` | Research - Fast and thorough for routine research |
|
|
162
|
+
| `Models.LABS` | Labs - Multi-step tasks with advanced troubleshooting |
|
|
163
|
+
| `Models.BEST` | Best - Automatically selects the most responsive model based on the query |
|
|
164
|
+
| `Models.SONAR` | Sonar - Perplexity's fast model |
|
|
165
|
+
| `Models.GPT_52` | GPT-5.2 - OpenAI's latest model |
|
|
166
|
+
| `Models.GPT_52_THINKING` | GPT-5.2 Thinking - OpenAI's latest model with thinking |
|
|
167
|
+
| `Models.CLAUDE_45_OPUS` | Claude Opus 4.5 - Anthropic's Opus reasoning model |
|
|
168
|
+
| `Models.CLAUDE_45_OPUS_THINKING` | Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking |
|
|
169
|
+
| `Models.GEMINI_3_PRO` | Gemini 3 Pro - Google's newest reasoning model |
|
|
170
|
+
| `Models.GEMINI_3_FLASH` | Gemini 3 Flash - Google's fast reasoning model |
|
|
171
|
+
| `Models.GEMINI_3_FLASH_THINKING` | Gemini 3 Flash Thinking - Google's fast reasoning model with thinking |
|
|
172
|
+
| `Models.GROK_41` | Grok 4.1 - xAI's latest advanced model |
|
|
173
|
+
| `Models.GROK_41_THINKING` | Grok 4.1 Thinking - xAI's latest reasoning model |
|
|
174
|
+
| `Models.KIMI_K2_THINKING` | Kimi K2 Thinking - Moonshot AI's latest reasoning model |
|
|
175
|
+
| `Models.CLAUDE_45_SONNET` | Claude Sonnet 4.5 - Anthropic's newest advanced model |
|
|
176
|
+
| `Models.CLAUDE_45_SONNET_THINKING` | Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model |
|
|
177
|
+
|
|
178
|
+
### CitationMode
|
|
179
|
+
|
|
180
|
+
| Mode | Output |
|
|
181
|
+
| ---------- | --------------------- |
|
|
182
|
+
| `DEFAULT` | `text[1]` |
|
|
183
|
+
| `MARKDOWN` | `text[1](url)` |
|
|
184
|
+
| `CLEAN` | `text` (no citations) |
|
|
185
|
+
|
|
186
|
+
### ConversationConfig
|
|
187
|
+
|
|
188
|
+
| Parameter | Default | Description |
|
|
189
|
+
| ----------------- | ------------- | ------------------ |
|
|
190
|
+
| `model` | `Models.BEST` | Default model |
|
|
191
|
+
| `citation_mode` | `CLEAN` | Citation format |
|
|
192
|
+
| `save_to_library` | `False` | Save to library |
|
|
193
|
+
| `search_focus` | `WEB` | Search type |
|
|
194
|
+
| `source_focus` | `WEB` | Source types |
|
|
195
|
+
| `time_range` | `ALL` | Time filter |
|
|
196
|
+
| `language` | `"en-US"` | Response language |
|
|
197
|
+
| `timezone` | `None` | Timezone |
|
|
198
|
+
| `coordinates` | `None` | Location (lat/lng) |
|
|
199
|
+
|
|
200
|
+
## CLI Tools
|
|
201
|
+
|
|
202
|
+
### Session Token Generator
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
get-perplexity-session-token
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Interactive tool to automatically obtain your Perplexity session token via email authentication. The token can be automatically saved to your `.env` file for immediate use.
|
|
209
|
+
|
|
210
|
+
## Disclaimer
|
|
211
|
+
|
|
212
|
+
This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
|
|
213
|
+
|
|
214
|
+
By using this library, you agree to Perplexity AI's Terms of Service.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
perplexity_webui_scraper/__init__.py,sha256=aUs8lsx11WeqDi-ZT4Y2RiEhQVd6K66EZZ6x0BDuU90,949
|
|
2
|
+
perplexity_webui_scraper/cli/get_perplexity_session_token.py,sha256=67Ck4S2MJ0701LnRHq73qY5oRLCsFOyu_8SMgsbTNFc,6937
|
|
3
|
+
perplexity_webui_scraper/config.py,sha256=tjTwTFO39ONI4yOodqAov6sAXti18DiOOwFndpct68o,944
|
|
4
|
+
perplexity_webui_scraper/constants.py,sha256=Kq-4i6yyTZ5VhUvbiZmbUmHrjMQm-p7H82Emm7b10-c,1867
|
|
5
|
+
perplexity_webui_scraper/core.py,sha256=QGDjdd_h8fV7yE4ukV6iL8xuidxlxDzu5HvUx0W-0tA,14824
|
|
6
|
+
perplexity_webui_scraper/enums.py,sha256=Xo7RmtWFxhSQU2Zma5sFmMyitOqlqjAb4XwRC0KJON0,2124
|
|
7
|
+
perplexity_webui_scraper/exceptions.py,sha256=0oOWe_A0B0wBsFeogt323BGJY3oBzaFK9PItXs77J70,1629
|
|
8
|
+
perplexity_webui_scraper/http.py,sha256=4x0LSCpKFtIR_izFYaGWXvNcewnXGnICFoPJNP892W8,5615
|
|
9
|
+
perplexity_webui_scraper/limits.py,sha256=GwcwC8CnSNhlcLWGLpuDYA37gn8OXSfsXLIOc-QbxNs,465
|
|
10
|
+
perplexity_webui_scraper/models.py,sha256=QVeZI-WQzpyi9JnE15QIMJ7nsG0YjIjOsZEA6YfX0tw,2448
|
|
11
|
+
perplexity_webui_scraper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
perplexity_webui_scraper/types.py,sha256=VlnzvNilIHrDXM2YOGjJa1y2VY0tfR-F0zaPjQHoPKs,1028
|
|
13
|
+
perplexity_webui_scraper-0.3.4.dist-info/WHEEL,sha256=ZyFSCYkV2BrxH6-HRVRg3R9Fo7MALzer9KiPYqNxSbo,79
|
|
14
|
+
perplexity_webui_scraper-0.3.4.dist-info/entry_points.txt,sha256=x98Wqg3iD6aXxtm27KmB-BXPFo1ccDDGRaCp6fWn9m4,118
|
|
15
|
+
perplexity_webui_scraper-0.3.4.dist-info/METADATA,sha256=Mm1LlknQT4ipS2bT-mtrcnNvCSjgjrW79tyQU0mxu9s,8557
|
|
16
|
+
perplexity_webui_scraper-0.3.4.dist-info/RECORD,,
|
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: perplexity-webui-scraper
|
|
3
|
-
Version: 0.3.2
|
|
4
|
-
Summary: An unofficial Python client library for interacting with Perplexity AI through its web interface.
|
|
5
|
-
Author: henrique-coder
|
|
6
|
-
Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
|
|
7
|
-
Requires-Dist: curl-cffi>=0.13.0
|
|
8
|
-
Requires-Dist: orjson>=3.11.5
|
|
9
|
-
Requires-Dist: pydantic>=2.12.5
|
|
10
|
-
Requires-Python: >=3.10
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
|
|
13
|
-
<div align="center">
|
|
14
|
-
|
|
15
|
-
# Perplexity WebUI Scraper
|
|
16
|
-
|
|
17
|
-
Unofficial Python client for [Perplexity AI](https://www.perplexity.ai).
|
|
18
|
-
|
|
19
|
-
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
20
|
-
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
21
|
-
[](./LICENSE)
|
|
22
|
-
|
|
23
|
-
</div>
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
## Installation
|
|
28
|
-
|
|
29
|
-
```bash
|
|
30
|
-
uv pip install perplexity-webui-scraper
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
## Requirements
|
|
34
|
-
|
|
35
|
-
- **Perplexity Pro subscription**
|
|
36
|
-
- **Session token** (`__Secure-next-auth.session-token` cookie from browser)
|
|
37
|
-
|
|
38
|
-
### Getting Your Session Token
|
|
39
|
-
|
|
40
|
-
1. Log in at [perplexity.ai](https://www.perplexity.ai)
|
|
41
|
-
2. Open DevTools (`F12`) → Application → Cookies
|
|
42
|
-
3. Copy `__Secure-next-auth.session-token` value
|
|
43
|
-
4. Store in `.env`: `PERPLEXITY_SESSION_TOKEN=your_token`
|
|
44
|
-
|
|
45
|
-
## Quick Start
|
|
46
|
-
|
|
47
|
-
```python
|
|
48
|
-
from perplexity_webui_scraper import Perplexity
|
|
49
|
-
|
|
50
|
-
client = Perplexity(session_token="YOUR_TOKEN")
|
|
51
|
-
conversation = client.create_conversation()
|
|
52
|
-
|
|
53
|
-
conversation.ask("What is quantum computing?")
|
|
54
|
-
print(conversation.answer)
|
|
55
|
-
|
|
56
|
-
# Follow-up
|
|
57
|
-
conversation.ask("Explain it simpler")
|
|
58
|
-
print(conversation.answer)
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### Streaming
|
|
62
|
-
|
|
63
|
-
```python
|
|
64
|
-
for chunk in conversation.ask("Explain AI", stream=True):
|
|
65
|
-
print(chunk.answer)
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
### With Options
|
|
69
|
-
|
|
70
|
-
```python
|
|
71
|
-
from perplexity_webui_scraper import (
|
|
72
|
-
ConversationConfig,
|
|
73
|
-
Coordinates,
|
|
74
|
-
Models,
|
|
75
|
-
SourceFocus,
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
config = ConversationConfig(
|
|
79
|
-
model=Models.RESEARCH,
|
|
80
|
-
source_focus=[SourceFocus.WEB, SourceFocus.ACADEMIC],
|
|
81
|
-
language="en-US",
|
|
82
|
-
coordinates=Coordinates(latitude=40.7128, longitude=-74.0060),
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
conversation = client.create_conversation(config)
|
|
86
|
-
conversation.ask("Latest AI research", files=["paper.pdf"])
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
## API
|
|
90
|
-
|
|
91
|
-
### `Perplexity(session_token, config?)`
|
|
92
|
-
|
|
93
|
-
| Parameter | Type | Description |
|
|
94
|
-
| --------------- | -------------- | ------------------ |
|
|
95
|
-
| `session_token` | `str` | Browser cookie |
|
|
96
|
-
| `config` | `ClientConfig` | Timeout, TLS, etc. |
|
|
97
|
-
|
|
98
|
-
### `Conversation.ask(query, model?, files?, citation_mode?, stream?)`
|
|
99
|
-
|
|
100
|
-
| Parameter | Type | Default | Description |
|
|
101
|
-
| --------------- | -------------- | ------------- | ------------------- |
|
|
102
|
-
| `query` | `str` | — | Question (required) |
|
|
103
|
-
| `model` | `Model` | `Models.BEST` | AI model |
|
|
104
|
-
| `files` | `list[str]` | `None` | File paths |
|
|
105
|
-
| `citation_mode` | `CitationMode` | `CLEAN` | Citation format |
|
|
106
|
-
| `stream` | `bool` | `False` | Enable streaming |
|
|
107
|
-
|
|
108
|
-
### Models
|
|
109
|
-
|
|
110
|
-
| Model | Description |
|
|
111
|
-
| ------------------------------ | ----------------- |
|
|
112
|
-
| `Models.BEST` | Auto-select best |
|
|
113
|
-
| `Models.RESEARCH` | Deep research |
|
|
114
|
-
| `Models.SONAR` | Fast queries |
|
|
115
|
-
| `Models.GPT_51` | OpenAI GPT-5.1 |
|
|
116
|
-
| `Models.CLAUDE_45_SONNET` | Claude 4.5 Sonnet |
|
|
117
|
-
| `Models.GEMINI_3_PRO_THINKING` | Gemini 3.0 Pro |
|
|
118
|
-
| `Models.GROK_41` | xAI Grok 4.1 |
|
|
119
|
-
|
|
120
|
-
### CitationMode
|
|
121
|
-
|
|
122
|
-
| Mode | Output |
|
|
123
|
-
| ---------- | --------------------- |
|
|
124
|
-
| `DEFAULT` | `text[1]` |
|
|
125
|
-
| `MARKDOWN` | `text[1](url)` |
|
|
126
|
-
| `CLEAN` | `text` (no citations) |
|
|
127
|
-
|
|
128
|
-
### ConversationConfig
|
|
129
|
-
|
|
130
|
-
| Parameter | Default | Description |
|
|
131
|
-
| ----------------- | ------------- | ------------------ |
|
|
132
|
-
| `model` | `Models.BEST` | Default model |
|
|
133
|
-
| `citation_mode` | `CLEAN` | Citation format |
|
|
134
|
-
| `save_to_library` | `False` | Save to library |
|
|
135
|
-
| `search_focus` | `WEB` | Search type |
|
|
136
|
-
| `source_focus` | `WEB` | Source types |
|
|
137
|
-
| `time_range` | `ALL` | Time filter |
|
|
138
|
-
| `language` | `"en-US"` | Response language |
|
|
139
|
-
| `timezone` | `None` | Timezone |
|
|
140
|
-
| `coordinates` | `None` | Location (lat/lng) |
|
|
141
|
-
|
|
142
|
-
## Disclaimer
|
|
143
|
-
|
|
144
|
-
This is an **unofficial** library. It uses internal APIs that may change without notice. Use at your own risk. Not for production use.
|
|
145
|
-
|
|
146
|
-
By using this library, you agree to Perplexity AI's Terms of Service.
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
perplexity_webui_scraper/__init__.py,sha256=pOj45Fk5ZspW7SniWrakjNpBEUNh3Zw0510h-R01p_U,1040
|
|
2
|
-
perplexity_webui_scraper/config.py,sha256=Tslz5p1Ig4oooRiRIZkgnxBNH4k7VmEswkogmEcDd1U,940
|
|
3
|
-
perplexity_webui_scraper/constants.py,sha256=ycXV8SCcP_vUCkmznVhE9oykZccaxuDNWmgrdJLGLyM,2943
|
|
4
|
-
perplexity_webui_scraper/core.py,sha256=wrlVhRWMyhlIzXeJD6TMzuTAcMDw6jpIimIajLgDV-k,13586
|
|
5
|
-
perplexity_webui_scraper/enums.py,sha256=Xo7RmtWFxhSQU2Zma5sFmMyitOqlqjAb4XwRC0KJON0,2124
|
|
6
|
-
perplexity_webui_scraper/exceptions.py,sha256=0oOWe_A0B0wBsFeogt323BGJY3oBzaFK9PItXs77J70,1629
|
|
7
|
-
perplexity_webui_scraper/http.py,sha256=cKxqW1_dJwP9TZbqM_BrJcft3sBmRZxO1owewZKYE7A,5578
|
|
8
|
-
perplexity_webui_scraper/limits.py,sha256=qFKTO-qCy3f_XE2RWFGIgZXeAEaTTvEqLjuIX8cBUN8,561
|
|
9
|
-
perplexity_webui_scraper/models.py,sha256=HufYUW0aM8MHvU0L4i8muGi6_Rmi7sNZC9JpY_BINY0,1892
|
|
10
|
-
perplexity_webui_scraper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
perplexity_webui_scraper/types.py,sha256=VlnzvNilIHrDXM2YOGjJa1y2VY0tfR-F0zaPjQHoPKs,1028
|
|
12
|
-
perplexity_webui_scraper-0.3.2.dist-info/WHEEL,sha256=93kfTGt3a0Dykt_T-gsjtyS5_p8F_d6CE1NwmBOirzo,79
|
|
13
|
-
perplexity_webui_scraper-0.3.2.dist-info/METADATA,sha256=EmEo2CBTL6rckxyYYc3pq3s07cNopGK8axeEfoA44_w,4775
|
|
14
|
-
perplexity_webui_scraper-0.3.2.dist-info/RECORD,,
|