PyPI - perplexity-webui-scraper - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

perplexity-webui-scraper 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

perplexity_webui_scraper/__init__.py +4 -15
perplexity_webui_scraper/cli/get_perplexity_session_token.py +216 -0
perplexity_webui_scraper/config.py +29 -4
perplexity_webui_scraper/constants.py +9 -35
perplexity_webui_scraper/core.py +225 -21
perplexity_webui_scraper/enums.py +34 -4
perplexity_webui_scraper/exceptions.py +74 -0
perplexity_webui_scraper/http.py +370 -36
perplexity_webui_scraper/limits.py +2 -5
perplexity_webui_scraper/logging.py +256 -0
perplexity_webui_scraper/mcp/__init__.py +18 -0
perplexity_webui_scraper/mcp/__main__.py +9 -0
perplexity_webui_scraper/mcp/server.py +181 -0
perplexity_webui_scraper/models.py +34 -19
perplexity_webui_scraper/resilience.py +179 -0
perplexity_webui_scraper-0.3.5.dist-info/METADATA +304 -0
perplexity_webui_scraper-0.3.5.dist-info/RECORD +21 -0
{perplexity_webui_scraper-0.3.3.dist-info → perplexity_webui_scraper-0.3.5.dist-info}/WHEEL +1 -1
perplexity_webui_scraper-0.3.5.dist-info/entry_points.txt +4 -0
perplexity_webui_scraper-0.3.3.dist-info/METADATA +0 -166
perplexity_webui_scraper-0.3.3.dist-info/RECORD +0 -14

perplexity_webui_scraper/__init__.py CHANGED Viewed

@@ -1,36 +1,25 @@
 """Extract AI responses from Perplexity's web interface."""
-from importlib.metadata import version
+from importlib import metadata
 from .config import ClientConfig, ConversationConfig
 from .core import Conversation, Perplexity
-from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
-from .exceptions import (
-    AuthenticationError,
-    FileUploadError,
-    FileValidationError,
-    PerplexityError,
-    RateLimitError,
-)
+from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
 from .models import Model, Models
 from .types import Coordinates, Response, SearchResultItem
-__version__: str = version("perplexity-webui-scraper")
+__version__: str = metadata.version("perplexity-webui-scraper")
 __all__: list[str] = [
-    "AuthenticationError",
     "CitationMode",
     "ClientConfig",
     "Conversation",
     "ConversationConfig",
     "Coordinates",
-    "FileUploadError",
-    "FileValidationError",
+    "LogLevel",
     "Model",
     "Models",
     "Perplexity",
-    "PerplexityError",
-    "RateLimitError",
     "Response",
     "SearchFocus",
     "SearchResultItem",

perplexity_webui_scraper/cli/get_perplexity_session_token.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""CLI utility for secure Perplexity authentication and session extraction."""
+from __future__ import annotations
+from pathlib import Path
+from sys import exit
+from typing import NoReturn
+from curl_cffi.requests import Session
+from rich.console import Console
+from rich.panel import Panel
+from rich.prompt import Confirm, Prompt
+# Constants
+BASE_URL: str = "https://www.perplexity.ai"
+ENV_KEY: str = "PERPLEXITY_SESSION_TOKEN"
+# Initialize console on stderr to ensure secure alternate screen usage
+console = Console(stderr=True, soft_wrap=True)
+def update_env(token: str) -> bool:
+    """
+    Securely updates the .env file with the session token.
+    Preserves existing content and comments.
+    """
+    path = Path(".env")
+    line_entry = f'{ENV_KEY}="{token}"'
+    try:
+        lines = path.read_text(encoding="utf-8").splitlines() if path.exists() else []
+        updated = False
+        new_lines = []
+        for line in lines:
+            if line.strip().startswith(ENV_KEY):
+                new_lines.append(line_entry)
+                updated = True
+            else:
+                new_lines.append(line)
+        if not updated:
+            if new_lines and new_lines[-1] != "":
+                new_lines.append("")
+            new_lines.append(line_entry)
+        path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
+        return True
+    except Exception:
+        return False
+def _initialize_session() -> tuple[Session, str]:
+    """Initialize session and obtain CSRF token."""
+    session = Session(impersonate="chrome", headers={"Referer": BASE_URL, "Origin": BASE_URL})
+    with console.status("[bold green]Initializing secure connection...", spinner="dots"):
+        session.get(BASE_URL)
+        csrf_data = session.get(f"{BASE_URL}/api/auth/csrf").json()
+        csrf = csrf_data.get("csrfToken")
+        if not csrf:
+            raise ValueError("Failed to obtain CSRF token.")
+    return session, csrf
+def _request_verification_code(session: Session, csrf: str, email: str) -> None:
+    """Send verification code to user's email."""
+    with console.status("[bold green]Sending verification code...", spinner="dots"):
+        r = session.post(
+            f"{BASE_URL}/api/auth/signin/email?version=2.18&source=default",
+            json={
+                "email": email,
+                "csrfToken": csrf,
+                "useNumericOtp": "true",
+                "json": "true",
+                "callbackUrl": f"{BASE_URL}/?login-source=floatingSignup",
+            },
+        )
+        if r.status_code != 200:
+            raise ValueError(f"Authentication request failed: {r.text}")
+def _validate_and_get_redirect_url(session: Session, email: str, user_input: str) -> str:
+    """Validate user input (OTP or magic link) and return redirect URL."""
+    with console.status("[bold green]Validating...", spinner="dots"):
+        if user_input.startswith("http"):
+            return user_input
+        r_otp = session.post(
+            f"{BASE_URL}/api/auth/otp-redirect-link",
+            json={
+                "email": email,
+                "otp": user_input,
+                "redirectUrl": f"{BASE_URL}/?login-source=floatingSignup",
+                "emailLoginMethod": "web-otp",
+            },
+        )
+        if r_otp.status_code != 200:
+            raise ValueError("Invalid verification code.")
+        redirect_path = r_otp.json().get("redirect")
+        if not redirect_path:
+            raise ValueError("No redirect URL received.")
+        return f"{BASE_URL}{redirect_path}" if redirect_path.startswith("/") else redirect_path
+def _extract_session_token(session: Session, redirect_url: str) -> str:
+    """Extract session token from cookies after authentication."""
+    session.get(redirect_url)
+    token = session.cookies.get("__Secure-next-auth.session-token")
+    if not token:
+        raise ValueError("Authentication successful, but token not found.")
+    return token
+def _display_and_save_token(token: str) -> None:
+    """Display token and optionally save to .env file."""
+    console.print("\n[bold green]✅ Token generated successfully![/bold green]")
+    console.print(f"\n[bold white]Your session token:[/bold white]\n[green]{token}[/green]\n")
+    prompt_text = f"Save token to [bold yellow].env[/bold yellow] file ({ENV_KEY})?"
+    if Confirm.ask(prompt_text, default=True, console=console):
+        if update_env(token):
+            console.print("[dim]Token saved to .env successfully.[/dim]")
+        else:
+            console.print("[red]Failed to save to .env file.[/red]")
+def _show_header() -> None:
+    """Display welcome header."""
+    console.print(
+        Panel(
+            "[bold white]Perplexity WebUI Scraper[/bold white]\n\n"
+            "Automatic session token generator via email authentication.\n"
+            "[dim]All session data will be cleared on exit.[/dim]",
+            title="🔐 Token Generator",
+            border_style="cyan",
+        )
+    )
+def _show_exit_message() -> None:
+    """Display security note and wait for user to exit."""
+    console.print("\n[bold yellow]⚠️ Security Note:[/bold yellow]")
+    console.print("Press [bold white]ENTER[/bold white] to clear screen and exit.")
+    console.input()
+def get_token() -> NoReturn:
+    """
+    Executes the authentication flow within an ephemeral terminal screen.
+    Handles CSRF, Email OTP/Link validation, and secure token display.
+    """
+    with console.screen():
+        try:
+            _show_header()
+            # Step 1: Initialize session and get CSRF token
+            session, csrf = _initialize_session()
+            # Step 2: Get email and request verification code
+            console.print("\n[bold cyan]Step 1: Email Verification[/bold cyan]")
+            email = Prompt.ask("  Enter your Perplexity email", console=console)
+            _request_verification_code(session, csrf, email)
+            # Step 3: Get and validate user input (OTP or magic link)
+            console.print("\n[bold cyan]Step 2: Verification[/bold cyan]")
+            console.print("  Check your email for a [bold]6-digit code[/bold] or [bold]magic link[/bold].")
+            user_input = Prompt.ask("  Enter code or paste link", console=console).strip()
+            redirect_url = _validate_and_get_redirect_url(session, email, user_input)
+            # Step 4: Extract session token
+            token = _extract_session_token(session, redirect_url)
+            # Step 5: Display and optionally save token
+            _display_and_save_token(token)
+            # Step 6: Exit
+            _show_exit_message()
+            exit(0)
+        except KeyboardInterrupt:
+            exit(0)
+        except Exception as error:
+            console.print(f"\n[bold red]⛔ Error:[/bold red] {error}")
+            console.input("[dim]Press ENTER to exit...[/dim]")
+            exit(1)
+if __name__ == "__main__":
+    get_token()

perplexity_webui_scraper/config.py CHANGED Viewed

@@ -5,10 +5,12 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
-from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
+from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
 if TYPE_CHECKING:
+    from pathlib import Path
     from .models import Model
     from .types import Coordinates
@@ -30,7 +32,30 @@ class ConversationConfig:
 @dataclass(frozen=True, slots=True)
 class ClientConfig:
-    """HTTP client settings."""
-    timeout: int = 1800
+    """
+    HTTP client settings.
+    Attributes:
+        timeout: Request timeout in seconds.
+        impersonate: Browser to impersonate (e.g., "chrome", "edge", "safari").
+        max_retries: Maximum retry attempts for failed requests.
+        retry_base_delay: Initial delay in seconds before first retry.
+        retry_max_delay: Maximum delay between retries.
+        retry_jitter: Random jitter factor (0-1) to add to delays.
+        requests_per_second: Rate limit for requests (0 to disable).
+        rotate_fingerprint: Whether to rotate browser fingerprint on retries.
+        logging_level: Logging verbosity level. Default is DISABLED.
+        log_file: Optional file path for persistent logging. If set, logs go to file only.
+                  If None, logs go to console. All logs are appended.
+    """
+    timeout: int = 3600
     impersonate: str = "chrome"
+    max_retries: int = 3
+    retry_base_delay: float = 1.0
+    retry_max_delay: float = 60.0
+    retry_jitter: float = 0.5
+    requests_per_second: float = 0.5
+    rotate_fingerprint: bool = True
+    logging_level: LogLevel = LogLevel.DISABLED
+    log_file: str | Path | None = None

perplexity_webui_scraper/constants.py CHANGED Viewed

@@ -1,8 +1,4 @@
-"""Fixed constants and values for the Perplexity API.
-These are internal API values that should not be modified by users.
-They represent fixed parameters required by the Perplexity WebUI API.
-"""
+"""Constants and values for the Perplexity internal API and HTTP interactions."""
 from __future__ import annotations
@@ -10,21 +6,14 @@ from re import Pattern, compile
 from typing import Final
-# =============================================================================
 # API Configuration
-# =============================================================================
 API_VERSION: Final[str] = "2.18"
 """Current API version used by Perplexity WebUI."""
 API_BASE_URL: Final[str] = "https://www.perplexity.ai"
 """Base URL for all API requests."""
-# =============================================================================
 # API Endpoints
-# =============================================================================
 ENDPOINT_ASK: Final[str] = "/rest/sse/perplexity_ask"
 """SSE endpoint for sending prompts."""
@@ -34,54 +23,39 @@ ENDPOINT_SEARCH_INIT: Final[str] = "/search/new"
 ENDPOINT_UPLOAD: Final[str] = "/rest/uploads/batch_create_upload_urls"
 """Endpoint for file upload URL generation."""
-# =============================================================================
 # API Fixed Parameters
-# =============================================================================
 SEND_BACK_TEXT: Final[bool] = True
-"""Whether to receive full text in each streaming chunk.
+"""
+Whether to receive full text in each streaming chunk.
 True = API sends complete text each chunk (replace mode).
 False = API sends delta chunks only (accumulate mode).
-Currently must be True for the parser to work correctly.
 """
 USE_SCHEMATIZED_API: Final[bool] = False
-"""Whether to use the schematized API format.
-Currently must be False - schematized format is not supported.
-"""
+"""Whether to use the schematized API format."""
 PROMPT_SOURCE: Final[str] = "user"
 """Source identifier for prompts."""
-# =============================================================================
-# Regex Patterns (Pre-compiled for performance)
-# =============================================================================
+# Regex Patterns (Pre-compiled for performance in streaming parsing)
 CITATION_PATTERN: Final[Pattern[str]] = compile(r"\[(\d{1,2})\]")
-"""Regex pattern for matching citation markers like [1], [2], etc.
+"""
+Regex pattern for matching citation markers like [1], [2], etc.
 Uses word boundary to avoid matching things like [123].
-Pre-compiled for performance in streaming scenarios.
 """
 JSON_OBJECT_PATTERN: Final[Pattern[str]] = compile(r"^\{.*\}$")
 """Pattern to detect JSON object strings."""
-# =============================================================================
 # HTTP Headers
-# =============================================================================
 DEFAULT_HEADERS: Final[dict[str, str]] = {
     "Accept": "text/event-stream, application/json",
     "Content-Type": "application/json",
 }
-"""Default HTTP headers for API requests.
+"""
+Default HTTP headers for API requests.
 Referer and Origin are added dynamically based on BASE_URL.
 """

perplexity-webui-scraper 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

perplexity-webui-scraper 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl