perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,30 +1,51 @@
1
- """
2
- Extract AI responses from Perplexity's web interface.
3
- """
1
+ """Extract AI responses from Perplexity's web interface."""
4
2
 
5
3
  from importlib import metadata
6
4
 
7
5
  from .config import ClientConfig, ConversationConfig
8
6
  from .core import Conversation, Perplexity
9
7
  from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
8
+ from .exceptions import (
9
+ AuthenticationError,
10
+ FileUploadError,
11
+ FileValidationError,
12
+ HTTPError,
13
+ PerplexityError,
14
+ RateLimitError,
15
+ ResearchClarifyingQuestionsError,
16
+ ResponseParsingError,
17
+ StreamingError,
18
+ )
10
19
  from .models import Model, Models
11
20
  from .types import Coordinates, Response, SearchResultItem
12
21
 
13
22
 
23
+ ConversationConfig.model_rebuild()
24
+
25
+
14
26
  __version__: str = metadata.version("perplexity-webui-scraper")
15
27
  __all__: list[str] = [
28
+ "AuthenticationError",
16
29
  "CitationMode",
17
30
  "ClientConfig",
18
31
  "Conversation",
19
32
  "ConversationConfig",
20
33
  "Coordinates",
34
+ "FileUploadError",
35
+ "FileValidationError",
36
+ "HTTPError",
21
37
  "LogLevel",
22
38
  "Model",
23
39
  "Models",
24
40
  "Perplexity",
41
+ "PerplexityError",
42
+ "RateLimitError",
43
+ "ResearchClarifyingQuestionsError",
25
44
  "Response",
45
+ "ResponseParsingError",
26
46
  "SearchFocus",
27
47
  "SearchResultItem",
28
48
  "SourceFocus",
49
+ "StreamingError",
29
50
  "TimeRange",
30
51
  ]
@@ -1,6 +1,4 @@
1
- """
2
- CLI utility for secure Perplexity authentication and session extraction.
3
- """
1
+ """CLI utility for secure Perplexity authentication and session extraction."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
@@ -9,26 +7,20 @@ from sys import exit
9
7
  from typing import NoReturn
10
8
 
11
9
  from curl_cffi.requests import Session
10
+ from orjson import loads
12
11
  from rich.console import Console
13
12
  from rich.panel import Panel
14
13
  from rich.prompt import Confirm, Prompt
15
14
 
16
15
 
17
- # Constants
18
16
  BASE_URL: str = "https://www.perplexity.ai"
19
17
  ENV_KEY: str = "PERPLEXITY_SESSION_TOKEN"
20
18
 
21
-
22
- # Initialize console on stderr to ensure secure alternate screen usage
23
19
  console = Console(stderr=True, soft_wrap=True)
24
20
 
25
21
 
26
22
  def update_env(token: str) -> bool:
27
- """
28
- Securely updates the .env file with the session token.
29
-
30
- Preserves existing content and comments.
31
- """
23
+ """Securely updates the .env file with the session token."""
32
24
 
33
25
  path = Path(".env")
34
26
  line_entry = f'{ENV_KEY}="{token}"'
@@ -48,26 +40,23 @@ def update_env(token: str) -> bool:
48
40
  if not updated:
49
41
  if new_lines and new_lines[-1] != "":
50
42
  new_lines.append("")
51
-
52
43
  new_lines.append(line_entry)
53
44
 
54
45
  path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
55
-
56
46
  return True
47
+
57
48
  except Exception:
58
49
  return False
59
50
 
60
51
 
61
52
  def _initialize_session() -> tuple[Session, str]:
62
- """
63
- Initialize session and obtain CSRF token.
64
- """
53
+ """Initialize session and obtain CSRF token."""
65
54
 
66
55
  session = Session(impersonate="chrome", headers={"Referer": BASE_URL, "Origin": BASE_URL})
67
56
 
68
57
  with console.status("[bold green]Initializing secure connection...", spinner="dots"):
69
58
  session.get(BASE_URL)
70
- csrf_data = session.get(f"{BASE_URL}/api/auth/csrf").json()
59
+ csrf_data = loads(session.get(f"{BASE_URL}/api/auth/csrf").content)
71
60
  csrf = csrf_data.get("csrfToken")
72
61
 
73
62
  if not csrf:
@@ -77,12 +66,10 @@ def _initialize_session() -> tuple[Session, str]:
77
66
 
78
67
 
79
68
  def _request_verification_code(session: Session, csrf: str, email: str) -> None:
80
- """
81
- Send verification code to user's email.
82
- """
69
+ """Send verification code to user's email."""
83
70
 
84
71
  with console.status("[bold green]Sending verification code...", spinner="dots"):
85
- r = session.post(
72
+ response = session.post(
86
73
  f"{BASE_URL}/api/auth/signin/email?version=2.18&source=default",
87
74
  json={
88
75
  "email": email,
@@ -93,20 +80,18 @@ def _request_verification_code(session: Session, csrf: str, email: str) -> None:
93
80
  },
94
81
  )
95
82
 
96
- if r.status_code != 200:
97
- raise ValueError(f"Authentication request failed: {r.text}")
83
+ if response.status_code != 200:
84
+ raise ValueError(f"Authentication request failed: {response.text}")
98
85
 
99
86
 
100
87
  def _validate_and_get_redirect_url(session: Session, email: str, user_input: str) -> str:
101
- """
102
- Validate user input (OTP or magic link) and return redirect URL.
103
- """
88
+ """Validate user input (OTP or magic link) and return redirect URL."""
104
89
 
105
90
  with console.status("[bold green]Validating...", spinner="dots"):
106
91
  if user_input.startswith("http"):
107
92
  return user_input
108
93
 
109
- r_otp = session.post(
94
+ response_otp = session.post(
110
95
  f"{BASE_URL}/api/auth/otp-redirect-link",
111
96
  json={
112
97
  "email": email,
@@ -116,10 +101,10 @@ def _validate_and_get_redirect_url(session: Session, email: str, user_input: str
116
101
  },
117
102
  )
118
103
 
119
- if r_otp.status_code != 200:
104
+ if response_otp.status_code != 200:
120
105
  raise ValueError("Invalid verification code.")
121
106
 
122
- redirect_path = r_otp.json().get("redirect")
107
+ redirect_path = loads(response_otp.content).get("redirect")
123
108
 
124
109
  if not redirect_path:
125
110
  raise ValueError("No redirect URL received.")
@@ -128,9 +113,7 @@ def _validate_and_get_redirect_url(session: Session, email: str, user_input: str
128
113
 
129
114
 
130
115
  def _extract_session_token(session: Session, redirect_url: str) -> str:
131
- """
132
- Extract session token from cookies after authentication.
133
- """
116
+ """Extract session token from cookies after authentication."""
134
117
 
135
118
  session.get(redirect_url)
136
119
  token = session.cookies.get("__Secure-next-auth.session-token")
@@ -142,9 +125,7 @@ def _extract_session_token(session: Session, redirect_url: str) -> str:
142
125
 
143
126
 
144
127
  def _display_and_save_token(token: str) -> None:
145
- """
146
- Display token and optionally save to .env file.
147
- """
128
+ """Display token and optionally save to .env file."""
148
129
 
149
130
  console.print("\n[bold green]✅ Token generated successfully![/bold green]")
150
131
  console.print(f"\n[bold white]Your session token:[/bold white]\n[green]{token}[/green]\n")
@@ -159,9 +140,7 @@ def _display_and_save_token(token: str) -> None:
159
140
 
160
141
 
161
142
  def _show_header() -> None:
162
- """
163
- Display welcome header.
164
- """
143
+ """Display welcome header."""
165
144
 
166
145
  console.print(
167
146
  Panel(
@@ -175,9 +154,7 @@ def _show_header() -> None:
175
154
 
176
155
 
177
156
  def _show_exit_message() -> None:
178
- """
179
- Display security note and wait for user to exit.
180
- """
157
+ """Display security note and wait for user to exit."""
181
158
 
182
159
  console.print("\n[bold yellow]⚠️ Security Note:[/bold yellow]")
183
160
  console.print("Press [bold white]ENTER[/bold white] to clear screen and exit.")
@@ -185,46 +162,37 @@ def _show_exit_message() -> None:
185
162
 
186
163
 
187
164
  def get_token() -> NoReturn:
188
- """
189
- Executes the authentication flow within an ephemeral terminal screen.
190
-
191
- Handles CSRF, Email OTP/Link validation, and secure token display.
192
- """
165
+ """Executes the authentication flow within an ephemeral terminal screen."""
193
166
 
194
167
  with console.screen():
195
168
  try:
196
169
  _show_header()
197
170
 
198
- # Step 1: Initialize session and get CSRF token
199
171
  session, csrf = _initialize_session()
200
172
 
201
- # Step 2: Get email and request verification code
202
173
  console.print("\n[bold cyan]Step 1: Email Verification[/bold cyan]")
203
174
  email = Prompt.ask(" Enter your Perplexity email", console=console)
204
175
  _request_verification_code(session, csrf, email)
205
176
 
206
- # Step 3: Get and validate user input (OTP or magic link)
207
177
  console.print("\n[bold cyan]Step 2: Verification[/bold cyan]")
208
178
  console.print(" Check your email for a [bold]6-digit code[/bold] or [bold]magic link[/bold].")
209
179
  user_input = Prompt.ask(" Enter code or paste link", console=console).strip()
210
180
  redirect_url = _validate_and_get_redirect_url(session, email, user_input)
211
181
 
212
- # Step 4: Extract session token
213
182
  token = _extract_session_token(session, redirect_url)
214
183
 
215
- # Step 5: Display and optionally save token
216
184
  _display_and_save_token(token)
217
185
 
218
- # Step 6: Exit
219
186
  _show_exit_message()
220
187
 
221
188
  exit(0)
189
+
222
190
  except KeyboardInterrupt:
223
191
  exit(0)
192
+
224
193
  except Exception as error:
225
194
  console.print(f"\n[bold red]⛔ Error:[/bold red] {error}")
226
195
  console.input("[dim]Press ENTER to exit...[/dim]")
227
-
228
196
  exit(1)
229
197
 
230
198
 
@@ -1,27 +1,24 @@
1
- """
2
- Configuration classes.
3
- """
1
+ """Configuration classes."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
7
- from dataclasses import dataclass
5
+ from os import PathLike # noqa: TC003
8
6
  from typing import TYPE_CHECKING
9
7
 
8
+ from pydantic import BaseModel, ConfigDict
9
+
10
10
  from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
11
11
 
12
12
 
13
13
  if TYPE_CHECKING:
14
- from pathlib import Path
15
-
16
14
  from .models import Model
17
15
  from .types import Coordinates
18
16
 
19
17
 
20
- @dataclass(slots=True)
21
- class ConversationConfig:
22
- """
23
- Default settings for a conversation. Can be overridden per message.
24
- """
18
+ class ConversationConfig(BaseModel):
19
+ """Default settings for a conversation."""
20
+
21
+ model_config = ConfigDict(arbitrary_types_allowed=True)
25
22
 
26
23
  model: Model | None = None
27
24
  citation_mode: CitationMode = CitationMode.CLEAN
@@ -34,24 +31,10 @@ class ConversationConfig:
34
31
  coordinates: Coordinates | None = None
35
32
 
36
33
 
37
- @dataclass(frozen=True, slots=True)
38
- class ClientConfig:
39
- """
40
- HTTP client settings.
34
+ class ClientConfig(BaseModel):
35
+ """HTTP client settings."""
41
36
 
42
- Attributes:
43
- timeout: Request timeout in seconds.
44
- impersonate: Browser to impersonate (e.g., "chrome", "edge", "safari").
45
- max_retries: Maximum retry attempts for failed requests.
46
- retry_base_delay: Initial delay in seconds before first retry.
47
- retry_max_delay: Maximum delay between retries.
48
- retry_jitter: Random jitter factor (0-1) to add to delays.
49
- requests_per_second: Rate limit for requests (0 to disable).
50
- rotate_fingerprint: Whether to rotate browser fingerprint on retries.
51
- logging_level: Logging verbosity level. Default is DISABLED.
52
- log_file: Optional file path for persistent logging. If set, logs go to file only.
53
- If None, logs go to console. All logs are appended.
54
- """
37
+ model_config = ConfigDict(frozen=True)
55
38
 
56
39
  timeout: int = 3600
57
40
  impersonate: str = "chrome"
@@ -62,4 +45,4 @@ class ClientConfig:
62
45
  requests_per_second: float = 0.5
63
46
  rotate_fingerprint: bool = True
64
47
  logging_level: LogLevel = LogLevel.DISABLED
65
- log_file: str | Path | None = None
48
+ log_file: str | PathLike[str] | None = None
@@ -1,6 +1,4 @@
1
- """
2
- Constants and values for the Perplexity internal API and HTTP interactions.
3
- """
1
+ """Constants and values for the Perplexity internal API."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
@@ -8,77 +6,41 @@ from re import Pattern, compile
8
6
  from typing import Final
9
7
 
10
8
 
11
- # API Configuration
12
9
  API_VERSION: Final[str] = "2.18"
13
- """
14
- Current API version used by Perplexity WebUI.
15
- """
10
+ """Current API version used by Perplexity WebUI."""
16
11
 
17
12
  API_BASE_URL: Final[str] = "https://www.perplexity.ai"
18
- """
19
- Base URL for all API requests.
20
- """
13
+ """Base URL for all API requests."""
21
14
 
22
- # API Endpoints
23
15
  ENDPOINT_ASK: Final[str] = "/rest/sse/perplexity_ask"
24
- """
25
- SSE endpoint for sending prompts.
26
- """
16
+ """SSE endpoint for sending prompts."""
27
17
 
28
18
  ENDPOINT_SEARCH_INIT: Final[str] = "/search/new"
29
- """
30
- Endpoint to initialize a search session.
31
- """
19
+ """Endpoint to initialize a search session."""
32
20
 
33
21
  ENDPOINT_UPLOAD: Final[str] = "/rest/uploads/batch_create_upload_urls"
34
- """
35
- Endpoint for file upload URL generation.
36
- """
22
+ """Endpoint for file upload URL generation."""
37
23
 
38
- # API Fixed Parameters
39
24
  SEND_BACK_TEXT: Final[bool] = True
40
- """
41
- Whether to receive full text in each streaming chunk.
42
-
43
- True = API sends complete text each chunk (replace mode).
44
- False = API sends delta chunks only (accumulate mode).
45
- """
25
+ """Whether to receive full text in each streaming chunk (replace mode)."""
46
26
 
47
27
  USE_SCHEMATIZED_API: Final[bool] = False
48
- """
49
- Whether to use the schematized API format.
50
- """
28
+ """Whether to use the schematized API format."""
51
29
 
52
30
  PROMPT_SOURCE: Final[str] = "user"
53
- """
54
- Source identifier for prompts.
55
- """
31
+ """Source identifier for prompts."""
56
32
 
57
- # Regex Patterns (Pre-compiled for performance in streaming parsing)
58
33
  CITATION_PATTERN: Final[Pattern[str]] = compile(r"\[(\d{1,2})\]")
59
- """
60
- Regex pattern for matching citation markers like [1], [2], etc.
61
-
62
- Uses word boundary to avoid matching things like [123].
63
- """
34
+ """Regex pattern for matching citation markers like [1], [2]."""
64
35
 
65
36
  JSON_OBJECT_PATTERN: Final[Pattern[str]] = compile(r"^\{.*\}$")
66
- """
67
- Pattern to detect JSON object strings.
68
- """
37
+ """Pattern to detect JSON object strings."""
69
38
 
70
- # HTTP Headers
71
39
  DEFAULT_HEADERS: Final[dict[str, str]] = {
72
40
  "Accept": "text/event-stream, application/json",
73
41
  "Content-Type": "application/json",
74
42
  }
75
- """
76
- Default HTTP headers for API requests.
77
-
78
- Referer and Origin are added dynamically based on BASE_URL.
79
- """
43
+ """Default HTTP headers for API requests."""
80
44
 
81
45
  SESSION_COOKIE_NAME: Final[str] = "__Secure-next-auth.session-token"
82
- """
83
- Name of the session cookie used for authentication.
84
- """
46
+ """Name of the session cookie used for authentication."""