PyPI - perplexity-webui-scraper - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

perplexity-webui-scraper 0.3.7py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

perplexity_webui_scraper/__init__.py +24 -3
perplexity_webui_scraper/cli/get_perplexity_session_token.py +21 -53
perplexity_webui_scraper/config.py +12 -29
perplexity_webui_scraper/constants.py +13 -51
perplexity_webui_scraper/core.py +19 -155
perplexity_webui_scraper/enums.py +26 -88
perplexity_webui_scraper/exceptions.py +29 -50
perplexity_webui_scraper/http.py +39 -332
perplexity_webui_scraper/limits.py +6 -16
perplexity_webui_scraper/logging.py +23 -180
perplexity_webui_scraper/mcp/__init__.py +2 -8
perplexity_webui_scraper/mcp/__main__.py +1 -3
perplexity_webui_scraper/mcp/server.py +105 -82
perplexity_webui_scraper/models.py +27 -71
perplexity_webui_scraper/resilience.py +17 -100
perplexity_webui_scraper/types.py +18 -25
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.1.dist-info}/METADATA +121 -102
perplexity_webui_scraper-0.4.1.dist-info/RECORD +21 -0
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.1.dist-info}/WHEEL +1 -1
perplexity_webui_scraper-0.3.7.dist-info/RECORD +0 -21
{perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.1.dist-info}/entry_points.txt +0 -0

perplexity_webui_scraper/limits.py CHANGED Viewed

@@ -1,25 +1,15 @@
-"""
-Upload and request limits for Perplexity WebUI Scraper.
-"""
+"""Upload and request limits."""
 from __future__ import annotations
 from typing import Final
-# File Upload Limits
 MAX_FILES: Final[int] = 30
-"""
-Maximum number of files that can be attached to a single prompt.
-"""
+"""Maximum number of files per prompt."""
-MAX_FILE_SIZE: Final[int] = 50 * 1024 * 1024  # 50 MB in bytes
-"""
-Maximum file size in bytes.
-"""
+MAX_FILE_SIZE: Final[int] = 50 * 1024 * 1024
+"""Maximum file size in bytes (50 MB)."""
-# Request Limits
-DEFAULT_TIMEOUT: Final[int] = 30 * 60  # 30 minutes in seconds
-"""
-Default request timeout in seconds.
-"""
+DEFAULT_TIMEOUT: Final[int] = 30 * 60
+"""Default request timeout in seconds (30 minutes)."""

perplexity_webui_scraper/logging.py CHANGED Viewed

@@ -1,27 +1,18 @@
-"""Logging configuration using loguru.
-Provides detailed, structured logging for all library operations.
-Logging is disabled by default and can be enabled via ClientConfig.
-"""
+"""Logging configuration using loguru."""
 from __future__ import annotations
+from os import PathLike  # noqa: TC003
 from pathlib import Path
-import sys
-from typing import TYPE_CHECKING, Any
+from sys import stderr
+from typing import Any
 from loguru import logger
 from .enums import LogLevel
-if TYPE_CHECKING:
-    from os import PathLike
-# Remove default handler to start with a clean slate
 logger.remove()
-# Flag to track if logging is configured
 _logging_configured: bool = False
@@ -29,37 +20,20 @@ def configure_logging(
     level: LogLevel | str = LogLevel.DISABLED,
     log_file: str | PathLike[str] | None = None,
 ) -> None:
-    """Configure logging for the library.
-    Args:
-        level: Logging level (LogLevel enum or string). Default is DISABLED.
-        log_file: Optional file path to write logs. If set, logs go to file only.
-                  If None, logs go to console. Logs are appended, never deleted.
-    Note:
-        - If log_file is set: logs go to file only (no console output)
-        - If log_file is None: logs go to console only
-        - Log format includes timestamp, level, module, function, and message
-    """
+    """Configure logging for the library."""
     global _logging_configured  # noqa: PLW0603
-    # Remove any existing handlers
     logger.remove()
-    # Normalize level to string
     level_str = level.value if isinstance(level, LogLevel) else str(level).upper()
     if level_str == "DISABLED":
-        # Logging disabled, add a null handler to suppress all output
         logger.disable("perplexity_webui_scraper")
         _logging_configured = False
-        return
+        return None
-    # Enable the logger
     logger.enable("perplexity_webui_scraper")
-    # Console format - concise but informative
     console_format = (
         "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
         "<level>{level: <8}</level> | "
@@ -67,28 +41,25 @@ def configure_logging(
         "<level>{message}</level>"
     )
-    # File format - detailed with extra context
     file_format = "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} | {message} | {extra}"
     if log_file is not None:
-        # Log to file only (no console output)
         log_path = Path(log_file)
         logger.add(
             log_path,
             format=file_format,
             level=level_str,
-            rotation=None,  # Never rotate
-            retention=None,  # Never delete
-            compression=None,  # No compression
-            mode="a",  # Append mode
+            rotation=None,
+            retention=None,
+            compression=None,
+            mode="a",
             encoding="utf-8",
             filter="perplexity_webui_scraper",
-            enqueue=True,  # Thread-safe
+            enqueue=True,
         )
     else:
-        # Log to console only (no file)
         logger.add(
-            sys.stderr,
+            stderr,
             format=console_format,
             level=level_str,
             colorize=True,
@@ -99,40 +70,21 @@ def configure_logging(
 def get_logger(name: str) -> Any:
-    """Get a logger instance bound to the given module name.
-    Args:
-        name: Module name (typically __name__).
-    Returns:
-        A loguru logger instance bound to the module.
-    """
+    """Get a logger instance bound to the given module name."""
     return logger.bind(module=name)
-# Convenience shortcuts for common log operations
 def log_request(
     method: str,
     url: str,
     *,
     params: dict[str, Any] | None = None,
-    headers: dict[str, str] | None = None,
     body_size: int | None = None,
 ) -> None:
-    """
-    Log an outgoing HTTP request with full details.
-    """
+    """Log an outgoing HTTP request."""
-    logger.debug(
-        "HTTP request initiated | method={method} url={url} params={params} "
-        "headers_count={headers_count} body_size={body_size}",
-        method=method,
-        url=url,
-        params=params,
-        headers_count=len(headers) if headers else 0,
-        body_size=body_size,
-    )
+    logger.debug(f"HTTP {method} {url} | params={params} body_size={body_size}")
 def log_response(
@@ -141,24 +93,11 @@ def log_response(
     status_code: int,
     *,
     elapsed_ms: float | None = None,
-    content_length: int | None = None,
-    headers: dict[str, str] | None = None,
 ) -> None:
-    """
-    Log an HTTP response with full details.
-    """
+    """Log an HTTP response."""
     level = "DEBUG" if status_code < 400 else "WARNING"
-    logger.log(
-        level,
-        "HTTP response received | method={method} url={url} status={status_code} "
-        "elapsed_ms={elapsed_ms:.2f} content_length={content_length}",
-        method=method,
-        url=url,
-        status_code=status_code,
-        elapsed_ms=elapsed_ms or 0,
-        content_length=content_length,
-    )
+    logger.log(level, f"HTTP {method} {url} | status={status_code} elapsed_ms={elapsed_ms:.2f}")
 def log_retry(
@@ -167,112 +106,16 @@ def log_retry(
     exception: BaseException | None,
     wait_seconds: float,
 ) -> None:
-    """
-    Log a retry attempt.
-    """
+    """Log a retry attempt."""
     logger.warning(
-        "Retry attempt | attempt={attempt}/{max_attempts} exception={exception_type}: {exception_msg} "
-        "wait_seconds={wait_seconds:.2f}",
-        attempt=attempt,
-        max_attempts=max_attempts,
-        exception_type=type(exception).__name__ if exception else "None",
-        exception_msg=str(exception) if exception else "None",
-        wait_seconds=wait_seconds,
-    )
-def log_cloudflare_detected(status_code: int, markers_found: list[str]) -> None:
-    """
-    Log Cloudflare challenge detection.
-    """
-    logger.warning(
-        "Cloudflare challenge detected | status_code={status_code} markers={markers}",
-        status_code=status_code,
-        markers=markers_found,
-    )
-def log_fingerprint_rotation(old_profile: str, new_profile: str) -> None:
-    """
-    Log browser fingerprint rotation.
-    """
-    logger.info(
-        "Browser fingerprint rotated | old_profile={old} new_profile={new}",
-        old=old_profile,
-        new=new_profile,
-    )
-def log_rate_limit(wait_seconds: float) -> None:
-    """
-    Log rate limiting wait.
-    """
-    logger.debug(
-        "Rate limiter throttling | wait_seconds={wait_seconds:.3f}",
-        wait_seconds=wait_seconds,
-    )
-def log_session_created(impersonate: str, timeout: int) -> None:
-    """
-    Log HTTP session creation.
-    """
-    logger.info(
-        "HTTP session created | browser_profile={profile} timeout={timeout}s",
-        profile=impersonate,
-        timeout=timeout,
-    )
-def log_conversation_created(config_summary: str) -> None:
-    """
-    Log conversation creation.
-    """
-    logger.info(
-        "Conversation created | config={config}",
-        config=config_summary,
-    )
-def log_query_sent(query: str, model: str, has_files: bool) -> None:
-    """
-    Log a query being sent.
-    """
-    logger.info(
-        "Query sent | model={model} has_files={has_files} query_preview={query_preview}",
-        model=model,
-        has_files=has_files,
-        query_preview=query[:100] + "..." if len(query) > 100 else query,
-    )
-def log_stream_chunk(chunk_size: int, is_final: bool) -> None:
-    """
-    Log a streaming chunk received.
-    """
-    logger.debug(
-        "Stream chunk received | size={size} is_final={is_final}",
-        size=chunk_size,
-        is_final=is_final,
+        f"Retry {attempt}/{max_attempts} | "
+        f"exception={type(exception).__name__ if exception else 'None'} "
+        f"wait={wait_seconds:.2f}s"
     )
 def log_error(error: Exception, context: str = "") -> None:
-    """
-    Log an error with full traceback.
-    """
+    """Log an error with traceback."""
-    logger.exception(
-        "Error occurred | context={context} error_type={error_type} message={message}",
-        context=context,
-        error_type=type(error).__name__,
-        message=str(error),
-    )
+    logger.exception(f"Error | context={context} type={type(error).__name__} message={error}")

perplexity_webui_scraper/mcp/__init__.py CHANGED Viewed

@@ -1,8 +1,4 @@
-"""
-MCP (Model Context Protocol) server for Perplexity WebUI Scraper.
-This module provides an MCP server that exposes Perplexity AI search capabilities to AI assistants.
-"""
+"""MCP server for Perplexity WebUI Scraper."""
 from __future__ import annotations
@@ -11,9 +7,7 @@ __all__: list[str] = ["run_server"]
 def run_server() -> None:
-    """
-    Run the MCP server.
-    """
+    """Run the MCP server."""
     from .server import main  # noqa: PLC0415

perplexity_webui_scraper/mcp/__main__.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""
-CLI entry point for MCP server.
-"""
+"""CLI entry point for MCP server."""
 from __future__ import annotations

perplexity_webui_scraper/mcp/server.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""
-MCP server implementation using FastMCP.
-"""
+"""MCP server implementation using FastMCP."""
 from __future__ import annotations
@@ -12,61 +10,18 @@ from fastmcp import FastMCP
 from perplexity_webui_scraper.config import ClientConfig, ConversationConfig
 from perplexity_webui_scraper.core import Perplexity
 from perplexity_webui_scraper.enums import CitationMode, SearchFocus, SourceFocus
-from perplexity_webui_scraper.models import Models
+from perplexity_webui_scraper.models import Model, Models
-# Create FastMCP server
 mcp = FastMCP(
-    "perplexity-webui-scraper-mcp",
+    "perplexity-webui-scraper",
     instructions=(
-        "Search the web with Perplexity AI using the full range of premium models. "
-        "Unlike the official Perplexity API, this tool provides access to GPT-5.2, Claude 4.5, "
-        "Gemini 3, Grok 4.1, and other cutting-edge models with reasoning capabilities. "
-        "Use for real-time web research, academic searches, financial data, and current events. "
-        "Supports multiple source types: web, academic papers, social media, and SEC filings."
+        "Search the web with Perplexity AI using premium models. "
+        "Each tool uses a specific AI model - enable only the ones you need. "
+        "All tools support source_focus: web, academic, social, finance, all."
     ),
 )
-# Model name mapping to Model objects
-MODEL_MAP = {
-    "best": Models.BEST,
-    "research": Models.RESEARCH,
-    "labs": Models.LABS,
-    "sonar": Models.SONAR,
-    "gpt52": Models.GPT_52,
-    "gpt52_thinking": Models.GPT_52_THINKING,
-    "claude_opus": Models.CLAUDE_45_OPUS,
-    "claude_opus_thinking": Models.CLAUDE_45_OPUS_THINKING,
-    "claude_sonnet": Models.CLAUDE_45_SONNET,
-    "claude_sonnet_thinking": Models.CLAUDE_45_SONNET_THINKING,
-    "gemini_pro": Models.GEMINI_3_PRO,
-    "gemini_flash": Models.GEMINI_3_FLASH,
-    "gemini_flash_thinking": Models.GEMINI_3_FLASH_THINKING,
-    "grok": Models.GROK_41,
-    "grok_thinking": Models.GROK_41_THINKING,
-    "kimi_thinking": Models.KIMI_K2_THINKING,
-}
-ModelName = Literal[
-    "best",
-    "research",
-    "labs",
-    "sonar",
-    "gpt52",
-    "gpt52_thinking",
-    "claude_opus",
-    "claude_opus_thinking",
-    "claude_sonnet",
-    "claude_sonnet_thinking",
-    "gemini_pro",
-    "gemini_flash",
-    "gemini_flash_thinking",
-    "grok",
-    "grok_thinking",
-    "kimi_thinking",
-]
-# Source focus mapping
 SOURCE_FOCUS_MAP = {
     "web": [SourceFocus.WEB],
     "academic": [SourceFocus.ACADEMIC],
@@ -77,16 +32,14 @@ SOURCE_FOCUS_MAP = {
 SourceFocusName = Literal["web", "academic", "social", "finance", "all"]
-# Client singleton
 _client: Perplexity | None = None
 def _get_client() -> Perplexity:
-    """
-    Get or create Perplexity client.
-    """
+    """Get or create Perplexity client."""
     global _client  # noqa: PLW0603
     if _client is None:
         token = environ.get("PERPLEXITY_SESSION_TOKEN", "")
@@ -95,41 +48,22 @@ def _get_client() -> Perplexity:
                 "PERPLEXITY_SESSION_TOKEN environment variable is required. "
                 "Set it with: export PERPLEXITY_SESSION_TOKEN='your_token_here'"
             )
         _client = Perplexity(token, config=ClientConfig())
     return _client
-@mcp.tool
-def perplexity_ask(
-    query: str,
-    model: ModelName = "best",
-    source_focus: SourceFocusName = "web",
-) -> str:
-    """
-    Ask a question and get AI-generated answers with real-time data from the internet.
-    Returns up-to-date information from web sources. Use for factual queries, research,
-    current events, news, library versions, documentation, or any question requiring
-    the latest information.
-    Args:
-        query: The question to ask.
-        model: AI model to use.
-        source_focus: Type of sources to prioritize (web, academic, social, finance, all).
-    Returns:
-        AI-generated answer with inline citations and a Citations section.
-    """
+def _ask(query: str, model: Model, source_focus: SourceFocusName = "web") -> str:
+    """Execute a query with a specific model."""
     client = _get_client()
-    selected_model = MODEL_MAP.get(model, Models.BEST)
     sources = SOURCE_FOCUS_MAP.get(source_focus, [SourceFocus.WEB])
     try:
         conversation = client.create_conversation(
             ConversationConfig(
-                model=selected_model,
+                model=model,
                 citation_mode=CitationMode.DEFAULT,
                 search_focus=SearchFocus.WEB,
                 source_focus=sources,
@@ -139,7 +73,6 @@ def perplexity_ask(
         conversation.ask(query)
         answer = conversation.answer or "No answer received"
-        # Build response with Perplexity-style citations
         response_parts = [answer]
         if conversation.search_results:
@@ -150,14 +83,104 @@ def perplexity_ask(
                 response_parts.append(f"\n[{i}]: {url}")
         return "".join(response_parts)
     except Exception as error:
         return f"Error: {error!s}"
+@mcp.tool
+def pplx_ask(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Ask a question with real-time data from the internet (auto-selects best model)."""
+    return _ask(query, Models.BEST, source_focus)
+@mcp.tool
+def pplx_deep_research(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Deep Research - In-depth reports with more sources, charts, and advanced reasoning."""
+    return _ask(query, Models.DEEP_RESEARCH, source_focus)
+@mcp.tool
+def pplx_sonar(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Sonar - Perplexity's latest model."""
+    return _ask(query, Models.SONAR, source_focus)
+@mcp.tool
+def pplx_gpt52(query: str, source_focus: SourceFocusName = "web") -> str:
+    """GPT-5.2 - OpenAI's latest model."""
+    return _ask(query, Models.GPT_52, source_focus)
+@mcp.tool
+def pplx_gpt52_thinking(query: str, source_focus: SourceFocusName = "web") -> str:
+    """GPT-5.2 Thinking - OpenAI's latest model with extended thinking."""
+    return _ask(query, Models.GPT_52_THINKING, source_focus)
+@mcp.tool
+def pplx_claude_sonnet(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Claude Sonnet 4.5 - Anthropic's fast model."""
+    return _ask(query, Models.CLAUDE_45_SONNET, source_focus)
+@mcp.tool
+def pplx_claude_sonnet_think(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Claude Sonnet 4.5 Thinking - Anthropic's fast model with extended thinking."""
+    return _ask(query, Models.CLAUDE_45_SONNET_THINKING, source_focus)
+@mcp.tool
+def pplx_gemini_flash(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Gemini 3 Flash - Google's fast model."""
+    return _ask(query, Models.GEMINI_3_FLASH, source_focus)
+@mcp.tool
+def pplx_gemini_flash_think(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Gemini 3 Flash Thinking - Google's fast model with extended thinking."""
+    return _ask(query, Models.GEMINI_3_FLASH_THINKING, source_focus)
+@mcp.tool
+def pplx_gemini_pro_think(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Gemini 3 Pro Thinking - Google's most advanced model with extended thinking."""
+    return _ask(query, Models.GEMINI_3_PRO_THINKING, source_focus)
+@mcp.tool
+def pplx_grok(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Grok 4.1 - xAI's latest model."""
+    return _ask(query, Models.GROK_41, source_focus)
+@mcp.tool
+def pplx_grok_thinking(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Grok 4.1 Thinking - xAI's latest model with extended thinking."""
+    return _ask(query, Models.GROK_41_THINKING, source_focus)
+@mcp.tool
+def pplx_kimi_thinking(query: str, source_focus: SourceFocusName = "web") -> str:
+    """Kimi K2.5 Thinking - Moonshot AI's latest model."""
+    return _ask(query, Models.KIMI_K25_THINKING, source_focus)
 def main() -> None:
-    """
-    Run the MCP server.
-    """
+    """Run the MCP server."""
     mcp.run()

perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl

perplexity-webui-scraper 0.3.7py3-none-any.whl → 0.4.1py3-none-any.whl