PyPI - perplexity-webui-scraper - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

perplexity-webui-scraper 0.3.7py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

perplexity_webui_scraper/__init__.py +24 -3
perplexity_webui_scraper/cli/get_perplexity_session_token.py +21 -53
perplexity_webui_scraper/config.py +12 -29
perplexity_webui_scraper/constants.py +13 -51
perplexity_webui_scraper/core.py +18 -154
perplexity_webui_scraper/enums.py +26 -88
perplexity_webui_scraper/exceptions.py +29 -50
perplexity_webui_scraper/http.py +39 -332
perplexity_webui_scraper/limits.py +6 -16
perplexity_webui_scraper/logging.py +23 -180
perplexity_webui_scraper/mcp/__init__.py +2 -8
perplexity_webui_scraper/mcp/__main__.py +1 -3
perplexity_webui_scraper/mcp/server.py +105 -82
perplexity_webui_scraper/models.py +27 -71
perplexity_webui_scraper/resilience.py +17 -100
perplexity_webui_scraper/types.py +18 -25
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/METADATA +120 -101
perplexity_webui_scraper-0.4.0.dist-info/RECORD +21 -0
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/WHEEL +1 -1
perplexity_webui_scraper-0.3.7.dist-info/RECORD +0 -21
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.0.dist-info}/entry_points.txt +0 -0

perplexity_webui_scraper/models.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""
-AI model definitions for Perplexity WebUI Scraper.
-"""
+"""AI model definitions."""
 from __future__ import annotations
@@ -9,101 +7,59 @@ from dataclasses import dataclass
 @dataclass(frozen=True, slots=True)
 class Model:
-    """
-    AI model configuration.
-    Attributes:
-        identifier: Model identifier used by the API.
-        mode: Model execution mode. Default: "copilot".
-    """
+    """AI model configuration."""
     identifier: str
     mode: str = "copilot"
 class Models:
-    """
-    Available AI models with their configurations.
-    All models use the "copilot" mode which enables web search.
-    """
+    """Available AI models (all use copilot mode with web search)."""
-    RESEARCH = Model(identifier="pplx_alpha")
-    """
-    Research - Fast and thorough for routine research.
-    """
+    DEEP_RESEARCH = Model(identifier="pplx_alpha")
+    """Deep Research - Create in-depth reports with more sources, charts, and advanced reasoning."""
-    LABS = Model(identifier="pplx_beta")
-    """
-    Labs - Multi-step tasks with advanced troubleshooting.
-    """
+    CREATE_FILES_AND_APPS = Model(identifier="pplx_beta")
+    """Create files and apps (previously known as Labs) - Turn your ideas into docs, slides, dashboards, and more."""
-    BEST = Model(identifier="pplx_pro_upgraded")
-    """
-    Best - Automatically selects the most responsive model based on the query.
-    """
+    BEST = Model(identifier="pplx_pro")
+    """Best - Automatically selects the best model based on the query."""
     SONAR = Model(identifier="experimental")
-    """
-    Sonar - Perplexity's fast model.
-    """
+    """Sonar - Perplexity's latest model."""
     GEMINI_3_FLASH = Model(identifier="gemini30flash")
-    """
-    Gemini 3 Flash - Google's fast reasoning model.
-    """
+    """Gemini 3 Flash - Google's fast model."""
     GEMINI_3_FLASH_THINKING = Model(identifier="gemini30flash_high")
-    """
-    Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking.
-    """
+    """Gemini 3 Flash Thinking - Google's fast model (thinking)."""
-    GEMINI_3_PRO = Model(identifier="gemini30pro")
-    """
-    Gemini 3 Pro - Google's newest reasoning model.
-    """
+    GEMINI_3_PRO_THINKING = Model(identifier="gemini30pro")
+    """Gemini 3 Pro Thinking - Google's most advanced model (thinking)."""
     GPT_52 = Model(identifier="gpt52")
-    """
-    GPT-5.2 - OpenAI's latest model.
-    """
+    """GPT-5.2 - OpenAI's latest model."""
     GPT_52_THINKING = Model(identifier="gpt52_thinking")
-    """
-    GPT-5.2 Thinking - OpenAI's latest model with thinking.
-    """
+    """GPT-5.2 Thinking - OpenAI's latest model (thinking)."""
     CLAUDE_45_SONNET = Model(identifier="claude45sonnet")
-    """
-    Claude Sonnet 4.5 - Anthropic's newest advanced model.
-    """
+    """Claude Sonnet 4.5 - Anthropic's fast model."""
     CLAUDE_45_SONNET_THINKING = Model(identifier="claude45sonnetthinking")
-    """
-    Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model.
-    """
+    """Claude Sonnet 4.5 Thinking - Anthropic's fast model (thinking)."""
-    CLAUDE_45_OPUS = Model(identifier="claude45opus")
-    """
-    Claude Opus 4.5 - Anthropic's Opus reasoning model.
-    """
+    CLAUDE_45_OPUS = Model(identifier="claude45opus")  # TODO: check correct identifier
+    """Claude Opus 4.5 - Anthropic's Opus reasoning model."""
-    CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")
-    """
-    Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking.
-    """
+    CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")  # TODO: check correct identifier
+    """Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model (thinking)."""
     GROK_41 = Model(identifier="grok41nonreasoning")
-    """
-    Grok 4.1 - xAI's latest advanced model.
-    """
+    """Grok 4.1 - xAI's latest model."""
     GROK_41_THINKING = Model(identifier="grok41reasoning")
-    """
-    Grok 4.1 Thinking - xAI's latest reasoning model.
-    """
-    KIMI_K2_THINKING = Model(identifier="kimik2thinking")
-    """
-    Kimi K2 Thinking - Moonshot AI's latest reasoning model.
-    """
+    """Grok 4.1 Thinking - xAI's latest model (thinking)."""
+    KIMI_K25_THINKING = Model(identifier="kimik25thinking")
+    """Kimi K2.5 Thinking - Moonshot AI's latest model."""

perplexity_webui_scraper/resilience.py CHANGED Viewed

@@ -1,29 +1,24 @@
-"""
-Resilience utilities for HTTP requests.
-Provides retry mechanisms, rate limiting, and Cloudflare bypass utilities
-using the tenacity library for robust retry handling.
-"""
+"""Resilience utilities for HTTP requests."""
 from __future__ import annotations
-from collections.abc import Callable
 from dataclasses import dataclass, field
-import random
+from random import choice
 from threading import Lock
-import time
-from typing import TYPE_CHECKING, Any, TypeVar
+from time import monotonic, sleep
+from typing import TYPE_CHECKING, TypeVar
-from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
 if TYPE_CHECKING:
     from collections.abc import Callable
-T = TypeVar("T")
+    from tenacity import RetryCallState
-# Browser profiles supported by curl_cffi for fingerprint rotation
+T = TypeVar("T")
 BROWSER_PROFILES: tuple[str, ...] = (
     "chrome",
     "chrome110",
@@ -41,29 +36,10 @@ BROWSER_PROFILES: tuple[str, ...] = (
     "safari17_2_ios",
 )
-# Cloudflare challenge detection markers
-CLOUDFLARE_MARKERS: tuple[str, ...] = (
-    "cf-ray",
-    "cf-mitigated",
-    "__cf_chl_",
-    "Checking your browser",
-    "Just a moment...",
-    "cloudflare",
-    "Enable JavaScript and cookies to continue",
-    "challenge-platform",
-)
 @dataclass(slots=True)
 class RetryConfig:
-    """Configuration for retry behavior.
-    Attributes:
-        max_retries: Maximum number of retry attempts.
-        base_delay: Initial delay in seconds before first retry.
-        max_delay: Maximum delay between retries.
-        jitter: Random jitter factor to add to delays (0-1).
-    """
+    """Configuration for retry behavior."""
     max_retries: int = 3
     base_delay: float = 1.0
@@ -73,23 +49,17 @@ class RetryConfig:
 @dataclass
 class RateLimiter:
-    """Token bucket rate limiter for throttling requests.
-    Attributes:
-        requests_per_second: Maximum requests allowed per second.
-    """
+    """Token bucket rate limiter."""
     requests_per_second: float = 0.5
     _last_request: float = field(default=0.0, init=False)
     _lock: Lock = field(default_factory=Lock, init=False)
     def acquire(self) -> None:
-        """
-        Wait until a request can be made within rate limits.
-        """
+        """Wait until a request can be made within rate limits."""
         with self._lock:
-            now = time.monotonic()
+            now = monotonic()
             min_interval = 1.0 / self.requests_per_second
             if self._last_request > 0:
@@ -97,59 +67,15 @@ class RateLimiter:
                 wait_time = min_interval - elapsed
                 if wait_time > 0:
-                    time.sleep(wait_time)
+                    sleep(wait_time)
-            self._last_request = time.monotonic()
+            self._last_request = monotonic()
 def get_random_browser_profile() -> str:
-    """Get a random browser profile for fingerprint rotation.
-    Returns:
-        A browser profile identifier compatible with curl_cffi.
-    """
-    return random.choice(BROWSER_PROFILES)
-def is_cloudflare_challenge(response_text: str, headers: dict[str, Any] | None = None) -> bool:
-    """Detect if a response is a Cloudflare challenge page.
-    Args:
-        response_text: The response body text.
-        headers: Optional response headers.
+    """Get a random browser profile for fingerprint rotation."""
-    Returns:
-        True if Cloudflare challenge markers are detected.
-    """
-    text_lower = response_text.lower()
-    for marker in CLOUDFLARE_MARKERS:
-        if marker.lower() in text_lower:
-            return True
-    if headers:
-        for key in headers:
-            key_lower = key.lower()
-            if "cf-" in key_lower or "cloudflare" in key_lower:
-                return True
-    return False
-def is_cloudflare_status(status_code: int) -> bool:
-    """Check if status code indicates a potential Cloudflare block.
-    Args:
-        status_code: HTTP status code.
-    Returns:
-        True if status code is commonly used by Cloudflare challenges.
-    """
-    return status_code in (403, 503, 520, 521, 522, 523, 524, 525, 526)
+    return choice(BROWSER_PROFILES)
 def create_retry_decorator(
@@ -157,16 +83,7 @@ def create_retry_decorator(
     retryable_exceptions: tuple[type[Exception], ...],
     on_retry: Callable[[RetryCallState], None] | None = None,
 ) -> Callable[[Callable[..., T]], Callable[..., T]]:
-    """Create a tenacity retry decorator with the given configuration.
-    Args:
-        config: Retry configuration.
-        retryable_exceptions: Tuple of exception types to retry on.
-        on_retry: Optional callback to execute on each retry.
-    Returns:
-        A retry decorator configured with the given settings.
-    """
+    """Create a tenacity retry decorator with the given configuration."""
     return retry(
         stop=stop_after_attempt(config.max_retries + 1),

perplexity_webui_scraper/types.py CHANGED Viewed

@@ -1,54 +1,47 @@
-"""
-Response types and data models.
-"""
+"""Response types and data models."""
 from __future__ import annotations
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any
+from pydantic import BaseModel, ConfigDict
-@dataclass(frozen=True, slots=True)
-class Coordinates:
-    """
-    Geographic coordinates (lat/lng).
-    """
+class Coordinates(BaseModel):
+    """Geographic coordinates (lat/lng)."""
+    model_config = ConfigDict(frozen=True)
     latitude: float
     longitude: float
-@dataclass(frozen=True, slots=True)
-class SearchResultItem:
-    """
-    A single search result.
-    """
+class SearchResultItem(BaseModel):
+    """A single search result."""
+    model_config = ConfigDict(frozen=True)
     title: str | None = None
     snippet: str | None = None
     url: str | None = None
-@dataclass(slots=True)
-class Response:
-    """
-    Response from Perplexity AI.
-    """
+class Response(BaseModel):
+    """Response from Perplexity AI."""
     title: str | None = None
     answer: str | None = None
-    chunks: list[str] = field(default_factory=list)
+    chunks: list[str] = []
     last_chunk: str | None = None
-    search_results: list[SearchResultItem] = field(default_factory=list)
+    search_results: list[SearchResultItem] = []
     conversation_uuid: str | None = None
-    raw_data: dict[str, Any] = field(default_factory=dict)
+    raw_data: dict[str, Any] = {}
 @dataclass(frozen=True, slots=True)
 class _FileInfo:
-    """
-    Internal file info for uploads.
-    """
+    """Internal file info for uploads."""
     path: str
     size: int

perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.0__py3-none-any.whl

perplexity-webui-scraper 0.3.7py3-none-any.whl → 0.4.0py3-none-any.whl