perplexity-webui-scraper 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,278 @@
1
+ """Logging configuration using loguru.
2
+
3
+ Provides detailed, structured logging for all library operations.
4
+ Logging is disabled by default and can be enabled via ClientConfig.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ import sys
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from loguru import logger
14
+
15
+ from .enums import LogLevel
16
+
17
+
18
+ if TYPE_CHECKING:
19
+ from os import PathLike
20
+
21
+ # Remove default handler to start with a clean slate
22
+ logger.remove()
23
+
24
+ # Flag to track if logging is configured
25
+ _logging_configured: bool = False
26
+
27
+
28
+ def configure_logging(
29
+ level: LogLevel | str = LogLevel.DISABLED,
30
+ log_file: str | PathLike[str] | None = None,
31
+ ) -> None:
32
+ """Configure logging for the library.
33
+
34
+ Args:
35
+ level: Logging level (LogLevel enum or string). Default is DISABLED.
36
+ log_file: Optional file path to write logs. If set, logs go to file only.
37
+ If None, logs go to console. Logs are appended, never deleted.
38
+
39
+ Note:
40
+ - If log_file is set: logs go to file only (no console output)
41
+ - If log_file is None: logs go to console only
42
+ - Log format includes timestamp, level, module, function, and message
43
+ """
44
+
45
+ global _logging_configured # noqa: PLW0603
46
+
47
+ # Remove any existing handlers
48
+ logger.remove()
49
+
50
+ # Normalize level to string
51
+ level_str = level.value if isinstance(level, LogLevel) else str(level).upper()
52
+
53
+ if level_str == "DISABLED":
54
+ # Logging disabled, add a null handler to suppress all output
55
+ logger.disable("perplexity_webui_scraper")
56
+ _logging_configured = False
57
+ return
58
+
59
+ # Enable the logger
60
+ logger.enable("perplexity_webui_scraper")
61
+
62
+ # Console format - concise but informative
63
+ console_format = (
64
+ "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
65
+ "<level>{level: <8}</level> | "
66
+ "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
67
+ "<level>{message}</level>"
68
+ )
69
+
70
+ # File format - detailed with extra context
71
+ file_format = "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} | {message} | {extra}"
72
+
73
+ if log_file is not None:
74
+ # Log to file only (no console output)
75
+ log_path = Path(log_file)
76
+ logger.add(
77
+ log_path,
78
+ format=file_format,
79
+ level=level_str,
80
+ rotation=None, # Never rotate
81
+ retention=None, # Never delete
82
+ compression=None, # No compression
83
+ mode="a", # Append mode
84
+ encoding="utf-8",
85
+ filter="perplexity_webui_scraper",
86
+ enqueue=True, # Thread-safe
87
+ )
88
+ else:
89
+ # Log to console only (no file)
90
+ logger.add(
91
+ sys.stderr,
92
+ format=console_format,
93
+ level=level_str,
94
+ colorize=True,
95
+ filter="perplexity_webui_scraper",
96
+ )
97
+
98
+ _logging_configured = True
99
+
100
+
101
+ def get_logger(name: str) -> Any:
102
+ """Get a logger instance bound to the given module name.
103
+
104
+ Args:
105
+ name: Module name (typically __name__).
106
+
107
+ Returns:
108
+ A loguru logger instance bound to the module.
109
+ """
110
+
111
+ return logger.bind(module=name)
112
+
113
+
114
+ # Convenience shortcuts for common log operations
115
+ def log_request(
116
+ method: str,
117
+ url: str,
118
+ *,
119
+ params: dict[str, Any] | None = None,
120
+ headers: dict[str, str] | None = None,
121
+ body_size: int | None = None,
122
+ ) -> None:
123
+ """
124
+ Log an outgoing HTTP request with full details.
125
+ """
126
+
127
+ logger.debug(
128
+ "HTTP request initiated | method={method} url={url} params={params} "
129
+ "headers_count={headers_count} body_size={body_size}",
130
+ method=method,
131
+ url=url,
132
+ params=params,
133
+ headers_count=len(headers) if headers else 0,
134
+ body_size=body_size,
135
+ )
136
+
137
+
138
+ def log_response(
139
+ method: str,
140
+ url: str,
141
+ status_code: int,
142
+ *,
143
+ elapsed_ms: float | None = None,
144
+ content_length: int | None = None,
145
+ headers: dict[str, str] | None = None,
146
+ ) -> None:
147
+ """
148
+ Log an HTTP response with full details.
149
+ """
150
+
151
+ level = "DEBUG" if status_code < 400 else "WARNING"
152
+ logger.log(
153
+ level,
154
+ "HTTP response received | method={method} url={url} status={status_code} "
155
+ "elapsed_ms={elapsed_ms:.2f} content_length={content_length}",
156
+ method=method,
157
+ url=url,
158
+ status_code=status_code,
159
+ elapsed_ms=elapsed_ms or 0,
160
+ content_length=content_length,
161
+ )
162
+
163
+
164
+ def log_retry(
165
+ attempt: int,
166
+ max_attempts: int,
167
+ exception: BaseException | None,
168
+ wait_seconds: float,
169
+ ) -> None:
170
+ """
171
+ Log a retry attempt.
172
+ """
173
+
174
+ logger.warning(
175
+ "Retry attempt | attempt={attempt}/{max_attempts} exception={exception_type}: {exception_msg} "
176
+ "wait_seconds={wait_seconds:.2f}",
177
+ attempt=attempt,
178
+ max_attempts=max_attempts,
179
+ exception_type=type(exception).__name__ if exception else "None",
180
+ exception_msg=str(exception) if exception else "None",
181
+ wait_seconds=wait_seconds,
182
+ )
183
+
184
+
185
+ def log_cloudflare_detected(status_code: int, markers_found: list[str]) -> None:
186
+ """
187
+ Log Cloudflare challenge detection.
188
+ """
189
+
190
+ logger.warning(
191
+ "Cloudflare challenge detected | status_code={status_code} markers={markers}",
192
+ status_code=status_code,
193
+ markers=markers_found,
194
+ )
195
+
196
+
197
+ def log_fingerprint_rotation(old_profile: str, new_profile: str) -> None:
198
+ """
199
+ Log browser fingerprint rotation.
200
+ """
201
+
202
+ logger.info(
203
+ "Browser fingerprint rotated | old_profile={old} new_profile={new}",
204
+ old=old_profile,
205
+ new=new_profile,
206
+ )
207
+
208
+
209
+ def log_rate_limit(wait_seconds: float) -> None:
210
+ """
211
+ Log rate limiting wait.
212
+ """
213
+
214
+ logger.debug(
215
+ "Rate limiter throttling | wait_seconds={wait_seconds:.3f}",
216
+ wait_seconds=wait_seconds,
217
+ )
218
+
219
+
220
+ def log_session_created(impersonate: str, timeout: int) -> None:
221
+ """
222
+ Log HTTP session creation.
223
+ """
224
+
225
+ logger.info(
226
+ "HTTP session created | browser_profile={profile} timeout={timeout}s",
227
+ profile=impersonate,
228
+ timeout=timeout,
229
+ )
230
+
231
+
232
+ def log_conversation_created(config_summary: str) -> None:
233
+ """
234
+ Log conversation creation.
235
+ """
236
+
237
+ logger.info(
238
+ "Conversation created | config={config}",
239
+ config=config_summary,
240
+ )
241
+
242
+
243
+ def log_query_sent(query: str, model: str, has_files: bool) -> None:
244
+ """
245
+ Log a query being sent.
246
+ """
247
+
248
+ logger.info(
249
+ "Query sent | model={model} has_files={has_files} query_preview={query_preview}",
250
+ model=model,
251
+ has_files=has_files,
252
+ query_preview=query[:100] + "..." if len(query) > 100 else query,
253
+ )
254
+
255
+
256
+ def log_stream_chunk(chunk_size: int, is_final: bool) -> None:
257
+ """
258
+ Log a streaming chunk received.
259
+ """
260
+
261
+ logger.debug(
262
+ "Stream chunk received | size={size} is_final={is_final}",
263
+ size=chunk_size,
264
+ is_final=is_final,
265
+ )
266
+
267
+
268
+ def log_error(error: Exception, context: str = "") -> None:
269
+ """
270
+ Log an error with full traceback.
271
+ """
272
+
273
+ logger.exception(
274
+ "Error occurred | context={context} error_type={error_type} message={message}",
275
+ context=context,
276
+ error_type=type(error).__name__,
277
+ message=str(error),
278
+ )
@@ -0,0 +1,20 @@
1
+ """
2
+ MCP (Model Context Protocol) server for Perplexity WebUI Scraper.
3
+
4
+ This module provides an MCP server that exposes Perplexity AI search capabilities to AI assistants.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+
10
+ __all__: list[str] = ["run_server"]
11
+
12
+
13
+ def run_server() -> None:
14
+ """
15
+ Run the MCP server.
16
+ """
17
+
18
+ from .server import main # noqa: PLC0415
19
+
20
+ main()
@@ -0,0 +1,11 @@
1
+ """
2
+ CLI entry point for MCP server.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from . import run_server
8
+
9
+
10
+ if __name__ == "__main__":
11
+ run_server()
@@ -0,0 +1,166 @@
1
+ """
2
+ MCP server implementation using FastMCP.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from os import environ
8
+ from typing import Literal
9
+
10
+ from fastmcp import FastMCP
11
+
12
+ from perplexity_webui_scraper.config import ClientConfig, ConversationConfig
13
+ from perplexity_webui_scraper.core import Perplexity
14
+ from perplexity_webui_scraper.enums import CitationMode, SearchFocus, SourceFocus
15
+ from perplexity_webui_scraper.models import Models
16
+
17
+
18
+ # Create FastMCP server
19
+ mcp = FastMCP(
20
+ "perplexity-webui-scraper-mcp",
21
+ instructions=(
22
+ "Search the web with Perplexity AI using the full range of premium models. "
23
+ "Unlike the official Perplexity API, this tool provides access to GPT-5.2, Claude 4.5, "
24
+ "Gemini 3, Grok 4.1, and other cutting-edge models with reasoning capabilities. "
25
+ "Use for real-time web research, academic searches, financial data, and current events. "
26
+ "Supports multiple source types: web, academic papers, social media, and SEC filings."
27
+ ),
28
+ )
29
+
30
+ # Model name mapping to Model objects
31
+ MODEL_MAP = {
32
+ "best": Models.BEST,
33
+ "research": Models.RESEARCH,
34
+ "labs": Models.LABS,
35
+ "sonar": Models.SONAR,
36
+ "gpt52": Models.GPT_52,
37
+ "gpt52_thinking": Models.GPT_52_THINKING,
38
+ "claude_opus": Models.CLAUDE_45_OPUS,
39
+ "claude_opus_thinking": Models.CLAUDE_45_OPUS_THINKING,
40
+ "claude_sonnet": Models.CLAUDE_45_SONNET,
41
+ "claude_sonnet_thinking": Models.CLAUDE_45_SONNET_THINKING,
42
+ "gemini_pro": Models.GEMINI_3_PRO,
43
+ "gemini_flash": Models.GEMINI_3_FLASH,
44
+ "gemini_flash_thinking": Models.GEMINI_3_FLASH_THINKING,
45
+ "grok": Models.GROK_41,
46
+ "grok_thinking": Models.GROK_41_THINKING,
47
+ "kimi_thinking": Models.KIMI_K2_THINKING,
48
+ }
49
+
50
+ ModelName = Literal[
51
+ "best",
52
+ "research",
53
+ "labs",
54
+ "sonar",
55
+ "gpt52",
56
+ "gpt52_thinking",
57
+ "claude_opus",
58
+ "claude_opus_thinking",
59
+ "claude_sonnet",
60
+ "claude_sonnet_thinking",
61
+ "gemini_pro",
62
+ "gemini_flash",
63
+ "gemini_flash_thinking",
64
+ "grok",
65
+ "grok_thinking",
66
+ "kimi_thinking",
67
+ ]
68
+
69
+ # Source focus mapping
70
+ SOURCE_FOCUS_MAP = {
71
+ "web": [SourceFocus.WEB],
72
+ "academic": [SourceFocus.ACADEMIC],
73
+ "social": [SourceFocus.SOCIAL],
74
+ "finance": [SourceFocus.FINANCE],
75
+ "all": [SourceFocus.WEB, SourceFocus.ACADEMIC, SourceFocus.SOCIAL],
76
+ }
77
+
78
+ SourceFocusName = Literal["web", "academic", "social", "finance", "all"]
79
+
80
+ # Client singleton
81
+ _client: Perplexity | None = None
82
+
83
+
84
+ def _get_client() -> Perplexity:
85
+ """
86
+ Get or create Perplexity client.
87
+ """
88
+
89
+ global _client # noqa: PLW0603
90
+ if _client is None:
91
+ token = environ.get("PERPLEXITY_SESSION_TOKEN", "")
92
+
93
+ if not token:
94
+ raise ValueError(
95
+ "PERPLEXITY_SESSION_TOKEN environment variable is required. "
96
+ "Set it with: export PERPLEXITY_SESSION_TOKEN='your_token_here'"
97
+ )
98
+ _client = Perplexity(token, config=ClientConfig())
99
+
100
+ return _client
101
+
102
+
103
+ @mcp.tool
104
+ def perplexity_ask(
105
+ query: str,
106
+ model: ModelName = "best",
107
+ source_focus: SourceFocusName = "web",
108
+ ) -> str:
109
+ """
110
+ Ask a question and get AI-generated answers with real-time data from the internet.
111
+
112
+ Returns up-to-date information from web sources. Use for factual queries, research,
113
+ current events, news, library versions, documentation, or any question requiring
114
+ the latest information.
115
+
116
+ Args:
117
+ query: The question to ask.
118
+ model: AI model to use.
119
+ source_focus: Type of sources to prioritize (web, academic, social, finance, all).
120
+
121
+ Returns:
122
+ AI-generated answer with inline citations and a Citations section.
123
+ """
124
+
125
+ client = _get_client()
126
+ selected_model = MODEL_MAP.get(model, Models.BEST)
127
+ sources = SOURCE_FOCUS_MAP.get(source_focus, [SourceFocus.WEB])
128
+
129
+ try:
130
+ conversation = client.create_conversation(
131
+ ConversationConfig(
132
+ model=selected_model,
133
+ citation_mode=CitationMode.DEFAULT,
134
+ search_focus=SearchFocus.WEB,
135
+ source_focus=sources,
136
+ )
137
+ )
138
+
139
+ conversation.ask(query)
140
+ answer = conversation.answer or "No answer received"
141
+
142
+ # Build response with Perplexity-style citations
143
+ response_parts = [answer]
144
+
145
+ if conversation.search_results:
146
+ response_parts.append("\n\nCitations:")
147
+
148
+ for i, result in enumerate(conversation.search_results, 1):
149
+ url = result.url or ""
150
+ response_parts.append(f"\n[{i}]: {url}")
151
+
152
+ return "".join(response_parts)
153
+ except Exception as error:
154
+ return f"Error: {error!s}"
155
+
156
+
157
+ def main() -> None:
158
+ """
159
+ Run the MCP server.
160
+ """
161
+
162
+ mcp.run()
163
+
164
+
165
+ if __name__ == "__main__":
166
+ main()
@@ -1,4 +1,6 @@
1
- """AI model definitions for Perplexity WebUI Scraper."""
1
+ """
2
+ AI model definitions for Perplexity WebUI Scraper.
3
+ """
2
4
 
3
5
  from __future__ import annotations
4
6
 
@@ -7,7 +9,8 @@ from dataclasses import dataclass
7
9
 
8
10
  @dataclass(frozen=True, slots=True)
9
11
  class Model:
10
- """AI model configuration.
12
+ """
13
+ AI model configuration.
11
14
 
12
15
  Attributes:
13
16
  identifier: Model identifier used by the API.
@@ -19,55 +22,88 @@ class Model:
19
22
 
20
23
 
21
24
  class Models:
22
- """Available AI models with their configurations.
25
+ """
26
+ Available AI models with their configurations.
23
27
 
24
28
  All models use the "copilot" mode which enables web search.
25
29
  """
26
30
 
27
31
  RESEARCH = Model(identifier="pplx_alpha")
28
- """Research - Fast and thorough for routine research"""
32
+ """
33
+ Research - Fast and thorough for routine research.
34
+ """
29
35
 
30
36
  LABS = Model(identifier="pplx_beta")
31
- """Labs - Multi-step tasks with advanced troubleshooting"""
37
+ """
38
+ Labs - Multi-step tasks with advanced troubleshooting.
39
+ """
32
40
 
33
41
  BEST = Model(identifier="pplx_pro_upgraded")
34
- """Best - Automatically selects the most responsive model based on the query"""
42
+ """
43
+ Best - Automatically selects the most responsive model based on the query.
44
+ """
35
45
 
36
46
  SONAR = Model(identifier="experimental")
37
- """Sonar - Perplexity's fast model"""
47
+ """
48
+ Sonar - Perplexity's fast model.
49
+ """
38
50
 
39
51
  GPT_52 = Model(identifier="gpt52")
40
- """GPT-5.2 - OpenAI's latest model"""
52
+ """
53
+ GPT-5.2 - OpenAI's latest model.
54
+ """
41
55
 
42
56
  GPT_52_THINKING = Model(identifier="gpt52_thinking")
43
- """GPT-5.2 Thinking - OpenAI's latest model with thinking"""
57
+ """
58
+ GPT-5.2 Thinking - OpenAI's latest model with thinking.
59
+ """
44
60
 
45
61
  CLAUDE_45_OPUS = Model(identifier="claude45opus")
46
- """Claude Opus 4.5 - Anthropic's Opus reasoning model"""
62
+ """
63
+ Claude Opus 4.5 - Anthropic's Opus reasoning model.
64
+ """
47
65
 
48
66
  CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")
49
- """Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking"""
67
+ """
68
+ Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking.
69
+ """
50
70
 
51
71
  GEMINI_3_PRO = Model(identifier="gemini30pro")
52
- """Gemini 3 Pro - Google's newest reasoning model"""
72
+ """
73
+ Gemini 3 Pro - Google's newest reasoning model.
74
+ """
53
75
 
54
76
  GEMINI_3_FLASH = Model(identifier="gemini30flash")
55
- """Gemini 3 Flash - Google's fast reasoning model"""
77
+ """
78
+ Gemini 3 Flash - Google's fast reasoning model.
79
+ """
56
80
 
57
81
  GEMINI_3_FLASH_THINKING = Model(identifier="gemini30flash_high")
58
- """Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking"""
82
+ """
83
+ Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking.
84
+ """
59
85
 
60
86
  GROK_41 = Model(identifier="grok41nonreasoning")
61
- """Grok 4.1 - xAI's latest advanced model"""
87
+ """
88
+ Grok 4.1 - xAI's latest advanced model.
89
+ """
62
90
 
63
91
  GROK_41_THINKING = Model(identifier="grok41reasoning")
64
- """Grok 4.1 Thinking - xAI's latest reasoning model"""
92
+ """
93
+ Grok 4.1 Thinking - xAI's latest reasoning model.
94
+ """
65
95
 
66
96
  KIMI_K2_THINKING = Model(identifier="kimik2thinking")
67
- """Kimi K2 Thinking - Moonshot AI's latest reasoning model"""
97
+ """
98
+ Kimi K2 Thinking - Moonshot AI's latest reasoning model.
99
+ """
68
100
 
69
101
  CLAUDE_45_SONNET = Model(identifier="claude45sonnet")
70
- """Claude Sonnet 4.5 - Anthropic's newest advanced model"""
102
+ """
103
+ Claude Sonnet 4.5 - Anthropic's newest advanced model.
104
+ """
71
105
 
72
106
  CLAUDE_45_SONNET_THINKING = Model(identifier="claude45sonnetthinking")
73
- """Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model"""
107
+ """
108
+ Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model.
109
+ """