PyPI - perplexity-webui-scraper - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

perplexity-webui-scraper 0.3.7py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

perplexity_webui_scraper/__init__.py +24 -3
perplexity_webui_scraper/cli/get_perplexity_session_token.py +21 -53
perplexity_webui_scraper/config.py +12 -29
perplexity_webui_scraper/constants.py +13 -51
perplexity_webui_scraper/core.py +18 -154
perplexity_webui_scraper/enums.py +26 -88
perplexity_webui_scraper/exceptions.py +29 -50
perplexity_webui_scraper/http.py +39 -332
perplexity_webui_scraper/limits.py +6 -16
perplexity_webui_scraper/logging.py +23 -180
perplexity_webui_scraper/mcp/__init__.py +2 -8
perplexity_webui_scraper/mcp/__main__.py +1 -3
perplexity_webui_scraper/mcp/server.py +105 -82
perplexity_webui_scraper/models.py +27 -71
perplexity_webui_scraper/resilience.py +17 -100
perplexity_webui_scraper/types.py +18 -25
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/METADATA +120 -101
perplexity_webui_scraper-0.4.0.dist-info/RECORD +21 -0
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/WHEEL +1 -1
perplexity_webui_scraper-0.3.7.dist-info/RECORD +0 -21
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/entry_points.txt +0 -0

perplexity_webui_scraper/__init__.py CHANGED Viewed

@@ -1,30 +1,51 @@
-"""
-Extract AI responses from Perplexity's web interface.
-"""
+"""Extract AI responses from Perplexity's web interface."""
 from importlib import metadata
 from .config import ClientConfig, ConversationConfig
 from .core import Conversation, Perplexity
 from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
+from .exceptions import (
+    AuthenticationError,
+    FileUploadError,
+    FileValidationError,
+    HTTPError,
+    PerplexityError,
+    RateLimitError,
+    ResearchClarifyingQuestionsError,
+    ResponseParsingError,
+    StreamingError,
+)
 from .models import Model, Models
 from .types import Coordinates, Response, SearchResultItem
+ConversationConfig.model_rebuild()
 __version__: str = metadata.version("perplexity-webui-scraper")
 __all__: list[str] = [
+    "AuthenticationError",
     "CitationMode",
     "ClientConfig",
     "Conversation",
     "ConversationConfig",
     "Coordinates",
+    "FileUploadError",
+    "FileValidationError",
+    "HTTPError",
     "LogLevel",
     "Model",
     "Models",
     "Perplexity",
+    "PerplexityError",
+    "RateLimitError",
+    "ResearchClarifyingQuestionsError",
     "Response",
+    "ResponseParsingError",
     "SearchFocus",
     "SearchResultItem",
     "SourceFocus",
+    "StreamingError",
     "TimeRange",
 ]

perplexity_webui_scraper/cli/get_perplexity_session_token.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""
-CLI utility for secure Perplexity authentication and session extraction.
-"""
+"""CLI utility for secure Perplexity authentication and session extraction."""
 from __future__ import annotations
@@ -9,26 +7,20 @@ from sys import exit
 from typing import NoReturn
 from curl_cffi.requests import Session
+from orjson import loads
 from rich.console import Console
 from rich.panel import Panel
 from rich.prompt import Confirm, Prompt
-# Constants
 BASE_URL: str = "https://www.perplexity.ai"
 ENV_KEY: str = "PERPLEXITY_SESSION_TOKEN"
-# Initialize console on stderr to ensure secure alternate screen usage
 console = Console(stderr=True, soft_wrap=True)
 def update_env(token: str) -> bool:
-    """
-    Securely updates the .env file with the session token.
-    Preserves existing content and comments.
-    """
+    """Securely updates the .env file with the session token."""
     path = Path(".env")
     line_entry = f'{ENV_KEY}="{token}"'
@@ -48,26 +40,23 @@ def update_env(token: str) -> bool:
         if not updated:
             if new_lines and new_lines[-1] != "":
                 new_lines.append("")
             new_lines.append(line_entry)
         path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
         return True
     except Exception:
         return False
 def _initialize_session() -> tuple[Session, str]:
-    """
-    Initialize session and obtain CSRF token.
-    """
+    """Initialize session and obtain CSRF token."""
     session = Session(impersonate="chrome", headers={"Referer": BASE_URL, "Origin": BASE_URL})
     with console.status("[bold green]Initializing secure connection...", spinner="dots"):
         session.get(BASE_URL)
-        csrf_data = session.get(f"{BASE_URL}/api/auth/csrf").json()
+        csrf_data = loads(session.get(f"{BASE_URL}/api/auth/csrf").content)
         csrf = csrf_data.get("csrfToken")
         if not csrf:
@@ -77,12 +66,10 @@ def _initialize_session() -> tuple[Session, str]:
 def _request_verification_code(session: Session, csrf: str, email: str) -> None:
-    """
-    Send verification code to user's email.
-    """
+    """Send verification code to user's email."""
     with console.status("[bold green]Sending verification code...", spinner="dots"):
-        r = session.post(
+        response = session.post(
             f"{BASE_URL}/api/auth/signin/email?version=2.18&source=default",
             json={
                 "email": email,
@@ -93,20 +80,18 @@ def _request_verification_code(session: Session, csrf: str, email: str) -> None:
             },
         )
-        if r.status_code != 200:
-            raise ValueError(f"Authentication request failed: {r.text}")
+        if response.status_code != 200:
+            raise ValueError(f"Authentication request failed: {response.text}")
 def _validate_and_get_redirect_url(session: Session, email: str, user_input: str) -> str:
-    """
-    Validate user input (OTP or magic link) and return redirect URL.
-    """
+    """Validate user input (OTP or magic link) and return redirect URL."""
     with console.status("[bold green]Validating...", spinner="dots"):
         if user_input.startswith("http"):
             return user_input
-        r_otp = session.post(
+        response_otp = session.post(
             f"{BASE_URL}/api/auth/otp-redirect-link",
             json={
                 "email": email,
@@ -116,10 +101,10 @@ def _validate_and_get_redirect_url(session: Session, email: str, user_input: str
             },
         )
-        if r_otp.status_code != 200:
+        if response_otp.status_code != 200:
             raise ValueError("Invalid verification code.")
-        redirect_path = r_otp.json().get("redirect")
+        redirect_path = loads(response_otp.content).get("redirect")
         if not redirect_path:
             raise ValueError("No redirect URL received.")
@@ -128,9 +113,7 @@ def _validate_and_get_redirect_url(session: Session, email: str, user_input: str
 def _extract_session_token(session: Session, redirect_url: str) -> str:
-    """
-    Extract session token from cookies after authentication.
-    """
+    """Extract session token from cookies after authentication."""
     session.get(redirect_url)
     token = session.cookies.get("__Secure-next-auth.session-token")
@@ -142,9 +125,7 @@ def _extract_session_token(session: Session, redirect_url: str) -> str:
 def _display_and_save_token(token: str) -> None:
-    """
-    Display token and optionally save to .env file.
-    """
+    """Display token and optionally save to .env file."""
     console.print("\n[bold green]✅ Token generated successfully![/bold green]")
     console.print(f"\n[bold white]Your session token:[/bold white]\n[green]{token}[/green]\n")
@@ -159,9 +140,7 @@ def _display_and_save_token(token: str) -> None:
 def _show_header() -> None:
-    """
-    Display welcome header.
-    """
+    """Display welcome header."""
     console.print(
         Panel(
@@ -175,9 +154,7 @@ def _show_header() -> None:
 def _show_exit_message() -> None:
-    """
-    Display security note and wait for user to exit.
-    """
+    """Display security note and wait for user to exit."""
     console.print("\n[bold yellow]⚠️ Security Note:[/bold yellow]")
     console.print("Press [bold white]ENTER[/bold white] to clear screen and exit.")
@@ -185,46 +162,37 @@ def _show_exit_message() -> None:
 def get_token() -> NoReturn:
-    """
-    Executes the authentication flow within an ephemeral terminal screen.
-    Handles CSRF, Email OTP/Link validation, and secure token display.
-    """
+    """Executes the authentication flow within an ephemeral terminal screen."""
     with console.screen():
         try:
             _show_header()
-            # Step 1: Initialize session and get CSRF token
             session, csrf = _initialize_session()
-            # Step 2: Get email and request verification code
             console.print("\n[bold cyan]Step 1: Email Verification[/bold cyan]")
             email = Prompt.ask("  Enter your Perplexity email", console=console)
             _request_verification_code(session, csrf, email)
-            # Step 3: Get and validate user input (OTP or magic link)
             console.print("\n[bold cyan]Step 2: Verification[/bold cyan]")
             console.print("  Check your email for a [bold]6-digit code[/bold] or [bold]magic link[/bold].")
             user_input = Prompt.ask("  Enter code or paste link", console=console).strip()
             redirect_url = _validate_and_get_redirect_url(session, email, user_input)
-            # Step 4: Extract session token
             token = _extract_session_token(session, redirect_url)
-            # Step 5: Display and optionally save token
             _display_and_save_token(token)
-            # Step 6: Exit
             _show_exit_message()
             exit(0)
         except KeyboardInterrupt:
             exit(0)
         except Exception as error:
             console.print(f"\n[bold red]⛔ Error:[/bold red] {error}")
             console.input("[dim]Press ENTER to exit...[/dim]")
             exit(1)

perplexity_webui_scraper/config.py CHANGED Viewed

@@ -1,27 +1,24 @@
-"""
-Configuration classes.
-"""
+"""Configuration classes."""
 from __future__ import annotations
-from dataclasses import dataclass
+from os import PathLike  # noqa: TC003
 from typing import TYPE_CHECKING
+from pydantic import BaseModel, ConfigDict
 from .enums import CitationMode, LogLevel, SearchFocus, SourceFocus, TimeRange
 if TYPE_CHECKING:
-    from pathlib import Path
     from .models import Model
     from .types import Coordinates
-@dataclass(slots=True)
-class ConversationConfig:
-    """
-    Default settings for a conversation. Can be overridden per message.
-    """
+class ConversationConfig(BaseModel):
+    """Default settings for a conversation."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     model: Model | None = None
     citation_mode: CitationMode = CitationMode.CLEAN
@@ -34,24 +31,10 @@ class ConversationConfig:
     coordinates: Coordinates | None = None
-@dataclass(frozen=True, slots=True)
-class ClientConfig:
-    """
-    HTTP client settings.
+class ClientConfig(BaseModel):
+    """HTTP client settings."""
-    Attributes:
-        timeout: Request timeout in seconds.
-        impersonate: Browser to impersonate (e.g., "chrome", "edge", "safari").
-        max_retries: Maximum retry attempts for failed requests.
-        retry_base_delay: Initial delay in seconds before first retry.
-        retry_max_delay: Maximum delay between retries.
-        retry_jitter: Random jitter factor (0-1) to add to delays.
-        requests_per_second: Rate limit for requests (0 to disable).
-        rotate_fingerprint: Whether to rotate browser fingerprint on retries.
-        logging_level: Logging verbosity level. Default is DISABLED.
-        log_file: Optional file path for persistent logging. If set, logs go to file only.
-                  If None, logs go to console. All logs are appended.
-    """
+    model_config = ConfigDict(frozen=True)
     timeout: int = 3600
     impersonate: str = "chrome"
@@ -62,4 +45,4 @@ class ClientConfig:
     requests_per_second: float = 0.5
     rotate_fingerprint: bool = True
     logging_level: LogLevel = LogLevel.DISABLED
-    log_file: str | Path | None = None
+    log_file: str | PathLike[str] | None = None

perplexity_webui_scraper/constants.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""
-Constants and values for the Perplexity internal API and HTTP interactions.
-"""
+"""Constants and values for the Perplexity internal API."""
 from __future__ import annotations
@@ -8,77 +6,41 @@ from re import Pattern, compile
 from typing import Final
-# API Configuration
 API_VERSION: Final[str] = "2.18"
-"""
-Current API version used by Perplexity WebUI.
-"""
+"""Current API version used by Perplexity WebUI."""
 API_BASE_URL: Final[str] = "https://www.perplexity.ai"
-"""
-Base URL for all API requests.
-"""
+"""Base URL for all API requests."""
-# API Endpoints
 ENDPOINT_ASK: Final[str] = "/rest/sse/perplexity_ask"
-"""
-SSE endpoint for sending prompts.
-"""
+"""SSE endpoint for sending prompts."""
 ENDPOINT_SEARCH_INIT: Final[str] = "/search/new"
-"""
-Endpoint to initialize a search session.
-"""
+"""Endpoint to initialize a search session."""
 ENDPOINT_UPLOAD: Final[str] = "/rest/uploads/batch_create_upload_urls"
-"""
-Endpoint for file upload URL generation.
-"""
+"""Endpoint for file upload URL generation."""
-# API Fixed Parameters
 SEND_BACK_TEXT: Final[bool] = True
-"""
-Whether to receive full text in each streaming chunk.
-True = API sends complete text each chunk (replace mode).
-False = API sends delta chunks only (accumulate mode).
-"""
+"""Whether to receive full text in each streaming chunk (replace mode)."""
 USE_SCHEMATIZED_API: Final[bool] = False
-"""
-Whether to use the schematized API format.
-"""
+"""Whether to use the schematized API format."""
 PROMPT_SOURCE: Final[str] = "user"
-"""
-Source identifier for prompts.
-"""
+"""Source identifier for prompts."""
-# Regex Patterns (Pre-compiled for performance in streaming parsing)
 CITATION_PATTERN: Final[Pattern[str]] = compile(r"\[(\d{1,2})\]")
-"""
-Regex pattern for matching citation markers like [1], [2], etc.
-Uses word boundary to avoid matching things like [123].
-"""
+"""Regex pattern for matching citation markers like [1], [2]."""
 JSON_OBJECT_PATTERN: Final[Pattern[str]] = compile(r"^\{.*\}$")
-"""
-Pattern to detect JSON object strings.
-"""
+"""Pattern to detect JSON object strings."""
-# HTTP Headers
 DEFAULT_HEADERS: Final[dict[str, str]] = {
     "Accept": "text/event-stream, application/json",
     "Content-Type": "application/json",
 }
-"""
-Default HTTP headers for API requests.
-Referer and Origin are added dynamically based on BASE_URL.
-"""
+"""Default HTTP headers for API requests."""
 SESSION_COOKIE_NAME: Final[str] = "__Secure-next-auth.session-token"
-"""
-Name of the session cookie used for authentication.
-"""
+"""Name of the session cookie used for authentication."""

perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl

perplexity-webui-scraper 0.3.7py3-none-any.whl → 0.4.0py3-none-any.whl