perplexity-webui-scraper 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,36 +1,25 @@
1
1
  """Extract AI responses from Perplexity's web interface."""
2
2
 
3
- from importlib.metadata import version
3
+ from importlib import metadata
4
4
 
5
5
  from .config import ClientConfig, ConversationConfig
6
6
  from .core import Conversation, Perplexity
7
- from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
8
- from .exceptions import (
9
- AuthenticationError,
10
- FileUploadError,
11
- FileValidationError,
12
- PerplexityError,
13
- RateLimitError,
14
- )
7
+ from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
15
8
  from .models import Model, Models
16
9
  from .types import Coordinates, Response, SearchResultItem
17
10
 
18
11
 
19
- __version__: str = version("perplexity-webui-scraper")
12
+ __version__: str = metadata.version("perplexity-webui-scraper")
20
13
  __all__: list[str] = [
21
- "AuthenticationError",
22
14
  "CitationMode",
23
15
  "ClientConfig",
24
16
  "Conversation",
25
17
  "ConversationConfig",
26
18
  "Coordinates",
27
- "FileUploadError",
28
- "FileValidationError",
19
+ "LogLevel",
29
20
  "Model",
30
21
  "Models",
31
22
  "Perplexity",
32
- "PerplexityError",
33
- "RateLimitError",
34
23
  "Response",
35
24
  "SearchFocus",
36
25
  "SearchResultItem",
@@ -0,0 +1,216 @@
1
+ """CLI utility for secure Perplexity authentication and session extraction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from sys import exit
7
+ from typing import NoReturn
8
+
9
+ from curl_cffi.requests import Session
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.prompt import Confirm, Prompt
13
+
14
+
15
+ # Constants
16
+ BASE_URL: str = "https://www.perplexity.ai"
17
+ ENV_KEY: str = "PERPLEXITY_SESSION_TOKEN"
18
+
19
+
20
+ # Initialize console on stderr to ensure secure alternate screen usage
21
+ console = Console(stderr=True, soft_wrap=True)
22
+
23
+
24
+ def update_env(token: str) -> bool:
25
+ """
26
+ Securely updates the .env file with the session token.
27
+
28
+ Preserves existing content and comments.
29
+ """
30
+
31
+ path = Path(".env")
32
+ line_entry = f'{ENV_KEY}="{token}"'
33
+
34
+ try:
35
+ lines = path.read_text(encoding="utf-8").splitlines() if path.exists() else []
36
+ updated = False
37
+ new_lines = []
38
+
39
+ for line in lines:
40
+ if line.strip().startswith(ENV_KEY):
41
+ new_lines.append(line_entry)
42
+ updated = True
43
+ else:
44
+ new_lines.append(line)
45
+
46
+ if not updated:
47
+ if new_lines and new_lines[-1] != "":
48
+ new_lines.append("")
49
+
50
+ new_lines.append(line_entry)
51
+
52
+ path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
53
+
54
+ return True
55
+ except Exception:
56
+ return False
57
+
58
+
59
+ def _initialize_session() -> tuple[Session, str]:
60
+ """Initialize session and obtain CSRF token."""
61
+
62
+ session = Session(impersonate="chrome", headers={"Referer": BASE_URL, "Origin": BASE_URL})
63
+
64
+ with console.status("[bold green]Initializing secure connection...", spinner="dots"):
65
+ session.get(BASE_URL)
66
+ csrf_data = session.get(f"{BASE_URL}/api/auth/csrf").json()
67
+ csrf = csrf_data.get("csrfToken")
68
+
69
+ if not csrf:
70
+ raise ValueError("Failed to obtain CSRF token.")
71
+
72
+ return session, csrf
73
+
74
+
75
+ def _request_verification_code(session: Session, csrf: str, email: str) -> None:
76
+ """Send verification code to user's email."""
77
+
78
+ with console.status("[bold green]Sending verification code...", spinner="dots"):
79
+ r = session.post(
80
+ f"{BASE_URL}/api/auth/signin/email?version=2.18&source=default",
81
+ json={
82
+ "email": email,
83
+ "csrfToken": csrf,
84
+ "useNumericOtp": "true",
85
+ "json": "true",
86
+ "callbackUrl": f"{BASE_URL}/?login-source=floatingSignup",
87
+ },
88
+ )
89
+
90
+ if r.status_code != 200:
91
+ raise ValueError(f"Authentication request failed: {r.text}")
92
+
93
+
94
+ def _validate_and_get_redirect_url(session: Session, email: str, user_input: str) -> str:
95
+ """Validate user input (OTP or magic link) and return redirect URL."""
96
+
97
+ with console.status("[bold green]Validating...", spinner="dots"):
98
+ if user_input.startswith("http"):
99
+ return user_input
100
+
101
+ r_otp = session.post(
102
+ f"{BASE_URL}/api/auth/otp-redirect-link",
103
+ json={
104
+ "email": email,
105
+ "otp": user_input,
106
+ "redirectUrl": f"{BASE_URL}/?login-source=floatingSignup",
107
+ "emailLoginMethod": "web-otp",
108
+ },
109
+ )
110
+
111
+ if r_otp.status_code != 200:
112
+ raise ValueError("Invalid verification code.")
113
+
114
+ redirect_path = r_otp.json().get("redirect")
115
+
116
+ if not redirect_path:
117
+ raise ValueError("No redirect URL received.")
118
+
119
+ return f"{BASE_URL}{redirect_path}" if redirect_path.startswith("/") else redirect_path
120
+
121
+
122
+ def _extract_session_token(session: Session, redirect_url: str) -> str:
123
+ """Extract session token from cookies after authentication."""
124
+
125
+ session.get(redirect_url)
126
+ token = session.cookies.get("__Secure-next-auth.session-token")
127
+
128
+ if not token:
129
+ raise ValueError("Authentication successful, but token not found.")
130
+
131
+ return token
132
+
133
+
134
+ def _display_and_save_token(token: str) -> None:
135
+ """Display token and optionally save to .env file."""
136
+
137
+ console.print("\n[bold green]✅ Token generated successfully![/bold green]")
138
+ console.print(f"\n[bold white]Your session token:[/bold white]\n[green]{token}[/green]\n")
139
+
140
+ prompt_text = f"Save token to [bold yellow].env[/bold yellow] file ({ENV_KEY})?"
141
+
142
+ if Confirm.ask(prompt_text, default=True, console=console):
143
+ if update_env(token):
144
+ console.print("[dim]Token saved to .env successfully.[/dim]")
145
+ else:
146
+ console.print("[red]Failed to save to .env file.[/red]")
147
+
148
+
149
+ def _show_header() -> None:
150
+ """Display welcome header."""
151
+
152
+ console.print(
153
+ Panel(
154
+ "[bold white]Perplexity WebUI Scraper[/bold white]\n\n"
155
+ "Automatic session token generator via email authentication.\n"
156
+ "[dim]All session data will be cleared on exit.[/dim]",
157
+ title="🔐 Token Generator",
158
+ border_style="cyan",
159
+ )
160
+ )
161
+
162
+
163
+ def _show_exit_message() -> None:
164
+ """Display security note and wait for user to exit."""
165
+
166
+ console.print("\n[bold yellow]⚠️ Security Note:[/bold yellow]")
167
+ console.print("Press [bold white]ENTER[/bold white] to clear screen and exit.")
168
+ console.input()
169
+
170
+
171
+ def get_token() -> NoReturn:
172
+ """
173
+ Executes the authentication flow within an ephemeral terminal screen.
174
+
175
+ Handles CSRF, Email OTP/Link validation, and secure token display.
176
+ """
177
+
178
+ with console.screen():
179
+ try:
180
+ _show_header()
181
+
182
+ # Step 1: Initialize session and get CSRF token
183
+ session, csrf = _initialize_session()
184
+
185
+ # Step 2: Get email and request verification code
186
+ console.print("\n[bold cyan]Step 1: Email Verification[/bold cyan]")
187
+ email = Prompt.ask(" Enter your Perplexity email", console=console)
188
+ _request_verification_code(session, csrf, email)
189
+
190
+ # Step 3: Get and validate user input (OTP or magic link)
191
+ console.print("\n[bold cyan]Step 2: Verification[/bold cyan]")
192
+ console.print(" Check your email for a [bold]6-digit code[/bold] or [bold]magic link[/bold].")
193
+ user_input = Prompt.ask(" Enter code or paste link", console=console).strip()
194
+ redirect_url = _validate_and_get_redirect_url(session, email, user_input)
195
+
196
+ # Step 4: Extract session token
197
+ token = _extract_session_token(session, redirect_url)
198
+
199
+ # Step 5: Display and optionally save token
200
+ _display_and_save_token(token)
201
+
202
+ # Step 6: Exit
203
+ _show_exit_message()
204
+
205
+ exit(0)
206
+ except KeyboardInterrupt:
207
+ exit(0)
208
+ except Exception as error:
209
+ console.print(f"\n[bold red]⛔ Error:[/bold red] {error}")
210
+ console.input("[dim]Press ENTER to exit...[/dim]")
211
+
212
+ exit(1)
213
+
214
+
215
+ if __name__ == "__main__":
216
+ get_token()
@@ -5,10 +5,12 @@ from __future__ import annotations
5
5
  from dataclasses import dataclass
6
6
  from typing import TYPE_CHECKING
7
7
 
8
- from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
8
+ from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
9
9
 
10
10
 
11
11
  if TYPE_CHECKING:
12
+ from pathlib import Path
13
+
12
14
  from .models import Model
13
15
  from .types import Coordinates
14
16
 
@@ -30,7 +32,30 @@ class ConversationConfig:
30
32
 
31
33
  @dataclass(frozen=True, slots=True)
32
34
  class ClientConfig:
33
- """HTTP client settings."""
34
-
35
- timeout: int = 1800
35
+ """
36
+ HTTP client settings.
37
+
38
+ Attributes:
39
+ timeout: Request timeout in seconds.
40
+ impersonate: Browser to impersonate (e.g., "chrome", "edge", "safari").
41
+ max_retries: Maximum retry attempts for failed requests.
42
+ retry_base_delay: Initial delay in seconds before first retry.
43
+ retry_max_delay: Maximum delay between retries.
44
+ retry_jitter: Random jitter factor (0-1) to add to delays.
45
+ requests_per_second: Rate limit for requests (0 to disable).
46
+ rotate_fingerprint: Whether to rotate browser fingerprint on retries.
47
+ logging_level: Logging verbosity level. Default is DISABLED.
48
+ log_file: Optional file path for persistent logging. If set, logs go to file only.
49
+ If None, logs go to console. All logs are appended.
50
+ """
51
+
52
+ timeout: int = 3600
36
53
  impersonate: str = "chrome"
54
+ max_retries: int = 3
55
+ retry_base_delay: float = 1.0
56
+ retry_max_delay: float = 60.0
57
+ retry_jitter: float = 0.5
58
+ requests_per_second: float = 0.5
59
+ rotate_fingerprint: bool = True
60
+ logging_level: LogLevel = LogLevel.DISABLED
61
+ log_file: str | Path | None = None
@@ -1,8 +1,4 @@
1
- """Fixed constants and values for the Perplexity API.
2
-
3
- These are internal API values that should not be modified by users.
4
- They represent fixed parameters required by the Perplexity WebUI API.
5
- """
1
+ """Constants and values for the Perplexity internal API and HTTP interactions."""
6
2
 
7
3
  from __future__ import annotations
8
4
 
@@ -10,21 +6,14 @@ from re import Pattern, compile
10
6
  from typing import Final
11
7
 
12
8
 
13
- # =============================================================================
14
9
  # API Configuration
15
- # =============================================================================
16
-
17
10
  API_VERSION: Final[str] = "2.18"
18
11
  """Current API version used by Perplexity WebUI."""
19
12
 
20
13
  API_BASE_URL: Final[str] = "https://www.perplexity.ai"
21
14
  """Base URL for all API requests."""
22
15
 
23
-
24
- # =============================================================================
25
16
  # API Endpoints
26
- # =============================================================================
27
-
28
17
  ENDPOINT_ASK: Final[str] = "/rest/sse/perplexity_ask"
29
18
  """SSE endpoint for sending prompts."""
30
19
 
@@ -34,54 +23,39 @@ ENDPOINT_SEARCH_INIT: Final[str] = "/search/new"
34
23
  ENDPOINT_UPLOAD: Final[str] = "/rest/uploads/batch_create_upload_urls"
35
24
  """Endpoint for file upload URL generation."""
36
25
 
37
-
38
- # =============================================================================
39
26
  # API Fixed Parameters
40
- # =============================================================================
41
-
42
27
  SEND_BACK_TEXT: Final[bool] = True
43
- """Whether to receive full text in each streaming chunk.
28
+ """
29
+ Whether to receive full text in each streaming chunk.
44
30
 
45
31
  True = API sends complete text each chunk (replace mode).
46
32
  False = API sends delta chunks only (accumulate mode).
47
-
48
- Currently must be True for the parser to work correctly.
49
33
  """
50
34
 
51
35
  USE_SCHEMATIZED_API: Final[bool] = False
52
- """Whether to use the schematized API format.
53
-
54
- Currently must be False - schematized format is not supported.
55
- """
36
+ """Whether to use the schematized API format."""
56
37
 
57
38
  PROMPT_SOURCE: Final[str] = "user"
58
39
  """Source identifier for prompts."""
59
40
 
60
-
61
- # =============================================================================
62
- # Regex Patterns (Pre-compiled for performance)
63
- # =============================================================================
64
-
41
+ # Regex Patterns (Pre-compiled for performance in streaming parsing)
65
42
  CITATION_PATTERN: Final[Pattern[str]] = compile(r"\[(\d{1,2})\]")
66
- """Regex pattern for matching citation markers like [1], [2], etc.
43
+ """
44
+ Regex pattern for matching citation markers like [1], [2], etc.
67
45
 
68
46
  Uses word boundary to avoid matching things like [123].
69
- Pre-compiled for performance in streaming scenarios.
70
47
  """
71
48
 
72
49
  JSON_OBJECT_PATTERN: Final[Pattern[str]] = compile(r"^\{.*\}$")
73
50
  """Pattern to detect JSON object strings."""
74
51
 
75
-
76
- # =============================================================================
77
52
  # HTTP Headers
78
- # =============================================================================
79
-
80
53
  DEFAULT_HEADERS: Final[dict[str, str]] = {
81
54
  "Accept": "text/event-stream, application/json",
82
55
  "Content-Type": "application/json",
83
56
  }
84
- """Default HTTP headers for API requests.
57
+ """
58
+ Default HTTP headers for API requests.
85
59
 
86
60
  Referer and Origin are added dynamically based on BASE_URL.
87
61
  """