perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,15 @@
1
- """
2
- Upload and request limits for Perplexity WebUI Scraper.
3
- """
1
+ """Upload and request limits."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
7
5
  from typing import Final
8
6
 
9
7
 
10
- # File Upload Limits
11
8
  MAX_FILES: Final[int] = 30
12
- """
13
- Maximum number of files that can be attached to a single prompt.
14
- """
9
+ """Maximum number of files per prompt."""
15
10
 
16
- MAX_FILE_SIZE: Final[int] = 50 * 1024 * 1024 # 50 MB in bytes
17
- """
18
- Maximum file size in bytes.
19
- """
11
+ MAX_FILE_SIZE: Final[int] = 50 * 1024 * 1024
12
+ """Maximum file size in bytes (50 MB)."""
20
13
 
21
- # Request Limits
22
- DEFAULT_TIMEOUT: Final[int] = 30 * 60 # 30 minutes in seconds
23
- """
24
- Default request timeout in seconds.
25
- """
14
+ DEFAULT_TIMEOUT: Final[int] = 30 * 60
15
+ """Default request timeout in seconds (30 minutes)."""
@@ -1,27 +1,18 @@
1
- """Logging configuration using loguru.
2
-
3
- Provides detailed, structured logging for all library operations.
4
- Logging is disabled by default and can be enabled via ClientConfig.
5
- """
1
+ """Logging configuration using loguru."""
6
2
 
7
3
  from __future__ import annotations
8
4
 
5
+ from os import PathLike # noqa: TC003
9
6
  from pathlib import Path
10
- import sys
11
- from typing import TYPE_CHECKING, Any
7
+ from sys import stderr
8
+ from typing import Any
12
9
 
13
10
  from loguru import logger
14
11
 
15
12
  from .enums import LogLevel
16
13
 
17
14
 
18
- if TYPE_CHECKING:
19
- from os import PathLike
20
-
21
- # Remove default handler to start with a clean slate
22
15
  logger.remove()
23
-
24
- # Flag to track if logging is configured
25
16
  _logging_configured: bool = False
26
17
 
27
18
 
@@ -29,37 +20,20 @@ def configure_logging(
29
20
  level: LogLevel | str = LogLevel.DISABLED,
30
21
  log_file: str | PathLike[str] | None = None,
31
22
  ) -> None:
32
- """Configure logging for the library.
33
-
34
- Args:
35
- level: Logging level (LogLevel enum or string). Default is DISABLED.
36
- log_file: Optional file path to write logs. If set, logs go to file only.
37
- If None, logs go to console. Logs are appended, never deleted.
38
-
39
- Note:
40
- - If log_file is set: logs go to file only (no console output)
41
- - If log_file is None: logs go to console only
42
- - Log format includes timestamp, level, module, function, and message
43
- """
23
+ """Configure logging for the library."""
44
24
 
45
25
  global _logging_configured # noqa: PLW0603
46
26
 
47
- # Remove any existing handlers
48
27
  logger.remove()
49
-
50
- # Normalize level to string
51
28
  level_str = level.value if isinstance(level, LogLevel) else str(level).upper()
52
29
 
53
30
  if level_str == "DISABLED":
54
- # Logging disabled, add a null handler to suppress all output
55
31
  logger.disable("perplexity_webui_scraper")
56
32
  _logging_configured = False
57
- return
33
+ return None
58
34
 
59
- # Enable the logger
60
35
  logger.enable("perplexity_webui_scraper")
61
36
 
62
- # Console format - concise but informative
63
37
  console_format = (
64
38
  "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | "
65
39
  "<level>{level: <8}</level> | "
@@ -67,28 +41,25 @@ def configure_logging(
67
41
  "<level>{message}</level>"
68
42
  )
69
43
 
70
- # File format - detailed with extra context
71
44
  file_format = "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} | {message} | {extra}"
72
45
 
73
46
  if log_file is not None:
74
- # Log to file only (no console output)
75
47
  log_path = Path(log_file)
76
48
  logger.add(
77
49
  log_path,
78
50
  format=file_format,
79
51
  level=level_str,
80
- rotation=None, # Never rotate
81
- retention=None, # Never delete
82
- compression=None, # No compression
83
- mode="a", # Append mode
52
+ rotation=None,
53
+ retention=None,
54
+ compression=None,
55
+ mode="a",
84
56
  encoding="utf-8",
85
57
  filter="perplexity_webui_scraper",
86
- enqueue=True, # Thread-safe
58
+ enqueue=True,
87
59
  )
88
60
  else:
89
- # Log to console only (no file)
90
61
  logger.add(
91
- sys.stderr,
62
+ stderr,
92
63
  format=console_format,
93
64
  level=level_str,
94
65
  colorize=True,
@@ -99,40 +70,21 @@ def configure_logging(
99
70
 
100
71
 
101
72
  def get_logger(name: str) -> Any:
102
- """Get a logger instance bound to the given module name.
103
-
104
- Args:
105
- name: Module name (typically __name__).
106
-
107
- Returns:
108
- A loguru logger instance bound to the module.
109
- """
73
+ """Get a logger instance bound to the given module name."""
110
74
 
111
75
  return logger.bind(module=name)
112
76
 
113
77
 
114
- # Convenience shortcuts for common log operations
115
78
  def log_request(
116
79
  method: str,
117
80
  url: str,
118
81
  *,
119
82
  params: dict[str, Any] | None = None,
120
- headers: dict[str, str] | None = None,
121
83
  body_size: int | None = None,
122
84
  ) -> None:
123
- """
124
- Log an outgoing HTTP request with full details.
125
- """
85
+ """Log an outgoing HTTP request."""
126
86
 
127
- logger.debug(
128
- "HTTP request initiated | method={method} url={url} params={params} "
129
- "headers_count={headers_count} body_size={body_size}",
130
- method=method,
131
- url=url,
132
- params=params,
133
- headers_count=len(headers) if headers else 0,
134
- body_size=body_size,
135
- )
87
+ logger.debug(f"HTTP {method} {url} | params={params} body_size={body_size}")
136
88
 
137
89
 
138
90
  def log_response(
@@ -141,24 +93,11 @@ def log_response(
141
93
  status_code: int,
142
94
  *,
143
95
  elapsed_ms: float | None = None,
144
- content_length: int | None = None,
145
- headers: dict[str, str] | None = None,
146
96
  ) -> None:
147
- """
148
- Log an HTTP response with full details.
149
- """
97
+ """Log an HTTP response."""
150
98
 
151
99
  level = "DEBUG" if status_code < 400 else "WARNING"
152
- logger.log(
153
- level,
154
- "HTTP response received | method={method} url={url} status={status_code} "
155
- "elapsed_ms={elapsed_ms:.2f} content_length={content_length}",
156
- method=method,
157
- url=url,
158
- status_code=status_code,
159
- elapsed_ms=elapsed_ms or 0,
160
- content_length=content_length,
161
- )
100
+ logger.log(level, f"HTTP {method} {url} | status={status_code} elapsed_ms={elapsed_ms:.2f}")
162
101
 
163
102
 
164
103
  def log_retry(
@@ -167,112 +106,16 @@ def log_retry(
167
106
  exception: BaseException | None,
168
107
  wait_seconds: float,
169
108
  ) -> None:
170
- """
171
- Log a retry attempt.
172
- """
109
+ """Log a retry attempt."""
173
110
 
174
111
  logger.warning(
175
- "Retry attempt | attempt={attempt}/{max_attempts} exception={exception_type}: {exception_msg} "
176
- "wait_seconds={wait_seconds:.2f}",
177
- attempt=attempt,
178
- max_attempts=max_attempts,
179
- exception_type=type(exception).__name__ if exception else "None",
180
- exception_msg=str(exception) if exception else "None",
181
- wait_seconds=wait_seconds,
182
- )
183
-
184
-
185
- def log_cloudflare_detected(status_code: int, markers_found: list[str]) -> None:
186
- """
187
- Log Cloudflare challenge detection.
188
- """
189
-
190
- logger.warning(
191
- "Cloudflare challenge detected | status_code={status_code} markers={markers}",
192
- status_code=status_code,
193
- markers=markers_found,
194
- )
195
-
196
-
197
- def log_fingerprint_rotation(old_profile: str, new_profile: str) -> None:
198
- """
199
- Log browser fingerprint rotation.
200
- """
201
-
202
- logger.info(
203
- "Browser fingerprint rotated | old_profile={old} new_profile={new}",
204
- old=old_profile,
205
- new=new_profile,
206
- )
207
-
208
-
209
- def log_rate_limit(wait_seconds: float) -> None:
210
- """
211
- Log rate limiting wait.
212
- """
213
-
214
- logger.debug(
215
- "Rate limiter throttling | wait_seconds={wait_seconds:.3f}",
216
- wait_seconds=wait_seconds,
217
- )
218
-
219
-
220
- def log_session_created(impersonate: str, timeout: int) -> None:
221
- """
222
- Log HTTP session creation.
223
- """
224
-
225
- logger.info(
226
- "HTTP session created | browser_profile={profile} timeout={timeout}s",
227
- profile=impersonate,
228
- timeout=timeout,
229
- )
230
-
231
-
232
- def log_conversation_created(config_summary: str) -> None:
233
- """
234
- Log conversation creation.
235
- """
236
-
237
- logger.info(
238
- "Conversation created | config={config}",
239
- config=config_summary,
240
- )
241
-
242
-
243
- def log_query_sent(query: str, model: str, has_files: bool) -> None:
244
- """
245
- Log a query being sent.
246
- """
247
-
248
- logger.info(
249
- "Query sent | model={model} has_files={has_files} query_preview={query_preview}",
250
- model=model,
251
- has_files=has_files,
252
- query_preview=query[:100] + "..." if len(query) > 100 else query,
253
- )
254
-
255
-
256
- def log_stream_chunk(chunk_size: int, is_final: bool) -> None:
257
- """
258
- Log a streaming chunk received.
259
- """
260
-
261
- logger.debug(
262
- "Stream chunk received | size={size} is_final={is_final}",
263
- size=chunk_size,
264
- is_final=is_final,
112
+ f"Retry {attempt}/{max_attempts} | "
113
+ f"exception={type(exception).__name__ if exception else 'None'} "
114
+ f"wait={wait_seconds:.2f}s"
265
115
  )
266
116
 
267
117
 
268
118
  def log_error(error: Exception, context: str = "") -> None:
269
- """
270
- Log an error with full traceback.
271
- """
119
+ """Log an error with traceback."""
272
120
 
273
- logger.exception(
274
- "Error occurred | context={context} error_type={error_type} message={message}",
275
- context=context,
276
- error_type=type(error).__name__,
277
- message=str(error),
278
- )
121
+ logger.exception(f"Error | context={context} type={type(error).__name__} message={error}")
@@ -1,8 +1,4 @@
1
- """
2
- MCP (Model Context Protocol) server for Perplexity WebUI Scraper.
3
-
4
- This module provides an MCP server that exposes Perplexity AI search capabilities to AI assistants.
5
- """
1
+ """MCP server for Perplexity WebUI Scraper."""
6
2
 
7
3
  from __future__ import annotations
8
4
 
@@ -11,9 +7,7 @@ __all__: list[str] = ["run_server"]
11
7
 
12
8
 
13
9
  def run_server() -> None:
14
- """
15
- Run the MCP server.
16
- """
10
+ """Run the MCP server."""
17
11
 
18
12
  from .server import main # noqa: PLC0415
19
13
 
@@ -1,6 +1,4 @@
1
- """
2
- CLI entry point for MCP server.
3
- """
1
+ """CLI entry point for MCP server."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
@@ -1,6 +1,4 @@
1
- """
2
- MCP server implementation using FastMCP.
3
- """
1
+ """MCP server implementation using FastMCP."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
@@ -12,61 +10,18 @@ from fastmcp import FastMCP
12
10
  from perplexity_webui_scraper.config import ClientConfig, ConversationConfig
13
11
  from perplexity_webui_scraper.core import Perplexity
14
12
  from perplexity_webui_scraper.enums import CitationMode, SearchFocus, SourceFocus
15
- from perplexity_webui_scraper.models import Models
13
+ from perplexity_webui_scraper.models import Model, Models
16
14
 
17
15
 
18
- # Create FastMCP server
19
16
  mcp = FastMCP(
20
- "perplexity-webui-scraper-mcp",
17
+ "perplexity-webui-scraper",
21
18
  instructions=(
22
- "Search the web with Perplexity AI using the full range of premium models. "
23
- "Unlike the official Perplexity API, this tool provides access to GPT-5.2, Claude 4.5, "
24
- "Gemini 3, Grok 4.1, and other cutting-edge models with reasoning capabilities. "
25
- "Use for real-time web research, academic searches, financial data, and current events. "
26
- "Supports multiple source types: web, academic papers, social media, and SEC filings."
19
+ "Search the web with Perplexity AI using premium models. "
20
+ "Each tool uses a specific AI model - enable only the ones you need. "
21
+ "All tools support source_focus: web, academic, social, finance, all."
27
22
  ),
28
23
  )
29
24
 
30
- # Model name mapping to Model objects
31
- MODEL_MAP = {
32
- "best": Models.BEST,
33
- "research": Models.RESEARCH,
34
- "labs": Models.LABS,
35
- "sonar": Models.SONAR,
36
- "gpt52": Models.GPT_52,
37
- "gpt52_thinking": Models.GPT_52_THINKING,
38
- "claude_opus": Models.CLAUDE_45_OPUS,
39
- "claude_opus_thinking": Models.CLAUDE_45_OPUS_THINKING,
40
- "claude_sonnet": Models.CLAUDE_45_SONNET,
41
- "claude_sonnet_thinking": Models.CLAUDE_45_SONNET_THINKING,
42
- "gemini_pro": Models.GEMINI_3_PRO,
43
- "gemini_flash": Models.GEMINI_3_FLASH,
44
- "gemini_flash_thinking": Models.GEMINI_3_FLASH_THINKING,
45
- "grok": Models.GROK_41,
46
- "grok_thinking": Models.GROK_41_THINKING,
47
- "kimi_thinking": Models.KIMI_K2_THINKING,
48
- }
49
-
50
- ModelName = Literal[
51
- "best",
52
- "research",
53
- "labs",
54
- "sonar",
55
- "gpt52",
56
- "gpt52_thinking",
57
- "claude_opus",
58
- "claude_opus_thinking",
59
- "claude_sonnet",
60
- "claude_sonnet_thinking",
61
- "gemini_pro",
62
- "gemini_flash",
63
- "gemini_flash_thinking",
64
- "grok",
65
- "grok_thinking",
66
- "kimi_thinking",
67
- ]
68
-
69
- # Source focus mapping
70
25
  SOURCE_FOCUS_MAP = {
71
26
  "web": [SourceFocus.WEB],
72
27
  "academic": [SourceFocus.ACADEMIC],
@@ -77,16 +32,14 @@ SOURCE_FOCUS_MAP = {
77
32
 
78
33
  SourceFocusName = Literal["web", "academic", "social", "finance", "all"]
79
34
 
80
- # Client singleton
81
35
  _client: Perplexity | None = None
82
36
 
83
37
 
84
38
  def _get_client() -> Perplexity:
85
- """
86
- Get or create Perplexity client.
87
- """
39
+ """Get or create Perplexity client."""
88
40
 
89
41
  global _client # noqa: PLW0603
42
+
90
43
  if _client is None:
91
44
  token = environ.get("PERPLEXITY_SESSION_TOKEN", "")
92
45
 
@@ -95,41 +48,22 @@ def _get_client() -> Perplexity:
95
48
  "PERPLEXITY_SESSION_TOKEN environment variable is required. "
96
49
  "Set it with: export PERPLEXITY_SESSION_TOKEN='your_token_here'"
97
50
  )
51
+
98
52
  _client = Perplexity(token, config=ClientConfig())
99
53
 
100
54
  return _client
101
55
 
102
56
 
103
- @mcp.tool
104
- def perplexity_ask(
105
- query: str,
106
- model: ModelName = "best",
107
- source_focus: SourceFocusName = "web",
108
- ) -> str:
109
- """
110
- Ask a question and get AI-generated answers with real-time data from the internet.
111
-
112
- Returns up-to-date information from web sources. Use for factual queries, research,
113
- current events, news, library versions, documentation, or any question requiring
114
- the latest information.
115
-
116
- Args:
117
- query: The question to ask.
118
- model: AI model to use.
119
- source_focus: Type of sources to prioritize (web, academic, social, finance, all).
120
-
121
- Returns:
122
- AI-generated answer with inline citations and a Citations section.
123
- """
57
+ def _ask(query: str, model: Model, source_focus: SourceFocusName = "web") -> str:
58
+ """Execute a query with a specific model."""
124
59
 
125
60
  client = _get_client()
126
- selected_model = MODEL_MAP.get(model, Models.BEST)
127
61
  sources = SOURCE_FOCUS_MAP.get(source_focus, [SourceFocus.WEB])
128
62
 
129
63
  try:
130
64
  conversation = client.create_conversation(
131
65
  ConversationConfig(
132
- model=selected_model,
66
+ model=model,
133
67
  citation_mode=CitationMode.DEFAULT,
134
68
  search_focus=SearchFocus.WEB,
135
69
  source_focus=sources,
@@ -139,7 +73,6 @@ def perplexity_ask(
139
73
  conversation.ask(query)
140
74
  answer = conversation.answer or "No answer received"
141
75
 
142
- # Build response with Perplexity-style citations
143
76
  response_parts = [answer]
144
77
 
145
78
  if conversation.search_results:
@@ -150,14 +83,104 @@ def perplexity_ask(
150
83
  response_parts.append(f"\n[{i}]: {url}")
151
84
 
152
85
  return "".join(response_parts)
86
+
153
87
  except Exception as error:
154
88
  return f"Error: {error!s}"
155
89
 
156
90
 
91
+ @mcp.tool
92
+ def pplx_ask(query: str, source_focus: SourceFocusName = "web") -> str:
93
+ """Ask a question with real-time data from the internet (auto-selects best model)."""
94
+
95
+ return _ask(query, Models.BEST, source_focus)
96
+
97
+
98
+ @mcp.tool
99
+ def pplx_deep_research(query: str, source_focus: SourceFocusName = "web") -> str:
100
+ """Deep Research - In-depth reports with more sources, charts, and advanced reasoning."""
101
+
102
+ return _ask(query, Models.DEEP_RESEARCH, source_focus)
103
+
104
+
105
+ @mcp.tool
106
+ def pplx_sonar(query: str, source_focus: SourceFocusName = "web") -> str:
107
+ """Sonar - Perplexity's latest model."""
108
+
109
+ return _ask(query, Models.SONAR, source_focus)
110
+
111
+
112
+ @mcp.tool
113
+ def pplx_gpt52(query: str, source_focus: SourceFocusName = "web") -> str:
114
+ """GPT-5.2 - OpenAI's latest model."""
115
+
116
+ return _ask(query, Models.GPT_52, source_focus)
117
+
118
+
119
+ @mcp.tool
120
+ def pplx_gpt52_thinking(query: str, source_focus: SourceFocusName = "web") -> str:
121
+ """GPT-5.2 Thinking - OpenAI's latest model with extended thinking."""
122
+
123
+ return _ask(query, Models.GPT_52_THINKING, source_focus)
124
+
125
+
126
+ @mcp.tool
127
+ def pplx_claude_sonnet(query: str, source_focus: SourceFocusName = "web") -> str:
128
+ """Claude Sonnet 4.5 - Anthropic's fast model."""
129
+
130
+ return _ask(query, Models.CLAUDE_45_SONNET, source_focus)
131
+
132
+
133
+ @mcp.tool
134
+ def pplx_claude_sonnet_think(query: str, source_focus: SourceFocusName = "web") -> str:
135
+ """Claude Sonnet 4.5 Thinking - Anthropic's fast model with extended thinking."""
136
+
137
+ return _ask(query, Models.CLAUDE_45_SONNET_THINKING, source_focus)
138
+
139
+
140
+ @mcp.tool
141
+ def pplx_gemini_flash(query: str, source_focus: SourceFocusName = "web") -> str:
142
+ """Gemini 3 Flash - Google's fast model."""
143
+
144
+ return _ask(query, Models.GEMINI_3_FLASH, source_focus)
145
+
146
+
147
+ @mcp.tool
148
+ def pplx_gemini_flash_think(query: str, source_focus: SourceFocusName = "web") -> str:
149
+ """Gemini 3 Flash Thinking - Google's fast model with extended thinking."""
150
+
151
+ return _ask(query, Models.GEMINI_3_FLASH_THINKING, source_focus)
152
+
153
+
154
+ @mcp.tool
155
+ def pplx_gemini_pro_think(query: str, source_focus: SourceFocusName = "web") -> str:
156
+ """Gemini 3 Pro Thinking - Google's most advanced model with extended thinking."""
157
+
158
+ return _ask(query, Models.GEMINI_3_PRO_THINKING, source_focus)
159
+
160
+
161
+ @mcp.tool
162
+ def pplx_grok(query: str, source_focus: SourceFocusName = "web") -> str:
163
+ """Grok 4.1 - xAI's latest model."""
164
+
165
+ return _ask(query, Models.GROK_41, source_focus)
166
+
167
+
168
+ @mcp.tool
169
+ def pplx_grok_thinking(query: str, source_focus: SourceFocusName = "web") -> str:
170
+ """Grok 4.1 Thinking - xAI's latest model with extended thinking."""
171
+
172
+ return _ask(query, Models.GROK_41_THINKING, source_focus)
173
+
174
+
175
+ @mcp.tool
176
+ def pplx_kimi_thinking(query: str, source_focus: SourceFocusName = "web") -> str:
177
+ """Kimi K2.5 Thinking - Moonshot AI's latest model."""
178
+
179
+ return _ask(query, Models.KIMI_K25_THINKING, source_focus)
180
+
181
+
157
182
  def main() -> None:
158
- """
159
- Run the MCP server.
160
- """
183
+ """Run the MCP server."""
161
184
 
162
185
  mcp.run()
163
186