perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,4 @@
1
- """
2
- AI model definitions for Perplexity WebUI Scraper.
3
- """
1
+ """AI model definitions."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
@@ -9,101 +7,59 @@ from dataclasses import dataclass
9
7
 
10
8
  @dataclass(frozen=True, slots=True)
11
9
  class Model:
12
- """
13
- AI model configuration.
14
-
15
- Attributes:
16
- identifier: Model identifier used by the API.
17
- mode: Model execution mode. Default: "copilot".
18
- """
10
+ """AI model configuration."""
19
11
 
20
12
  identifier: str
21
13
  mode: str = "copilot"
22
14
 
23
15
 
24
16
  class Models:
25
- """
26
- Available AI models with their configurations.
27
-
28
- All models use the "copilot" mode which enables web search.
29
- """
17
+ """Available AI models (all use copilot mode with web search)."""
30
18
 
31
- RESEARCH = Model(identifier="pplx_alpha")
32
- """
33
- Research - Fast and thorough for routine research.
34
- """
19
+ DEEP_RESEARCH = Model(identifier="pplx_alpha")
20
+ """Deep Research - Create in-depth reports with more sources, charts, and advanced reasoning."""
35
21
 
36
- LABS = Model(identifier="pplx_beta")
37
- """
38
- Labs - Multi-step tasks with advanced troubleshooting.
39
- """
22
+ CREATE_FILES_AND_APPS = Model(identifier="pplx_beta")
23
+ """Create files and apps (previously known as Labs) - Turn your ideas into docs, slides, dashboards, and more."""
40
24
 
41
- BEST = Model(identifier="pplx_pro_upgraded")
42
- """
43
- Best - Automatically selects the most responsive model based on the query.
44
- """
25
+ BEST = Model(identifier="pplx_pro")
26
+ """Best - Automatically selects the best model based on the query."""
45
27
 
46
28
  SONAR = Model(identifier="experimental")
47
- """
48
- Sonar - Perplexity's fast model.
49
- """
29
+ """Sonar - Perplexity's latest model."""
50
30
 
51
31
  GEMINI_3_FLASH = Model(identifier="gemini30flash")
52
- """
53
- Gemini 3 Flash - Google's fast reasoning model.
54
- """
32
+ """Gemini 3 Flash - Google's fast model."""
55
33
 
56
34
  GEMINI_3_FLASH_THINKING = Model(identifier="gemini30flash_high")
57
- """
58
- Gemini 3 Flash Thinking - Google's fast reasoning model with enhanced thinking.
59
- """
35
+ """Gemini 3 Flash Thinking - Google's fast model (thinking)."""
60
36
 
61
- GEMINI_3_PRO = Model(identifier="gemini30pro")
62
- """
63
- Gemini 3 Pro - Google's newest reasoning model.
64
- """
37
+ GEMINI_3_PRO_THINKING = Model(identifier="gemini30pro")
38
+ """Gemini 3 Pro Thinking - Google's most advanced model (thinking)."""
65
39
 
66
40
  GPT_52 = Model(identifier="gpt52")
67
- """
68
- GPT-5.2 - OpenAI's latest model.
69
- """
41
+ """GPT-5.2 - OpenAI's latest model."""
70
42
 
71
43
  GPT_52_THINKING = Model(identifier="gpt52_thinking")
72
- """
73
- GPT-5.2 Thinking - OpenAI's latest model with thinking.
74
- """
44
+ """GPT-5.2 Thinking - OpenAI's latest model (thinking)."""
75
45
 
76
46
  CLAUDE_45_SONNET = Model(identifier="claude45sonnet")
77
- """
78
- Claude Sonnet 4.5 - Anthropic's newest advanced model.
79
- """
47
+ """Claude Sonnet 4.5 - Anthropic's fast model."""
80
48
 
81
49
  CLAUDE_45_SONNET_THINKING = Model(identifier="claude45sonnetthinking")
82
- """
83
- Claude Sonnet 4.5 Thinking - Anthropic's newest reasoning model.
84
- """
50
+ """Claude Sonnet 4.5 Thinking - Anthropic's fast model (thinking)."""
85
51
 
86
- CLAUDE_45_OPUS = Model(identifier="claude45opus")
87
- """
88
- Claude Opus 4.5 - Anthropic's Opus reasoning model.
89
- """
52
+ CLAUDE_45_OPUS = Model(identifier="claude45opus") # TODO: check correct identifier
53
+ """Claude Opus 4.5 - Anthropic's Opus reasoning model."""
90
54
 
91
- CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking")
92
- """
93
- Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model with thinking.
94
- """
55
+ CLAUDE_45_OPUS_THINKING = Model(identifier="claude45opusthinking") # TODO: check correct identifier
56
+ """Claude Opus 4.5 Thinking - Anthropic's Opus reasoning model (thinking)."""
95
57
 
96
58
  GROK_41 = Model(identifier="grok41nonreasoning")
97
- """
98
- Grok 4.1 - xAI's latest advanced model.
99
- """
59
+ """Grok 4.1 - xAI's latest model."""
100
60
 
101
61
  GROK_41_THINKING = Model(identifier="grok41reasoning")
102
- """
103
- Grok 4.1 Thinking - xAI's latest reasoning model.
104
- """
105
-
106
- KIMI_K2_THINKING = Model(identifier="kimik2thinking")
107
- """
108
- Kimi K2 Thinking - Moonshot AI's latest reasoning model.
109
- """
62
+ """Grok 4.1 Thinking - xAI's latest model (thinking)."""
63
+
64
+ KIMI_K25_THINKING = Model(identifier="kimik25thinking")
65
+ """Kimi K2.5 Thinking - Moonshot AI's latest model."""
@@ -1,29 +1,24 @@
1
- """
2
- Resilience utilities for HTTP requests.
3
-
4
- Provides retry mechanisms, rate limiting, and Cloudflare bypass utilities
5
- using the tenacity library for robust retry handling.
6
- """
1
+ """Resilience utilities for HTTP requests."""
7
2
 
8
3
  from __future__ import annotations
9
4
 
10
- from collections.abc import Callable
11
5
  from dataclasses import dataclass, field
12
- import random
6
+ from random import choice
13
7
  from threading import Lock
14
- import time
15
- from typing import TYPE_CHECKING, Any, TypeVar
8
+ from time import monotonic, sleep
9
+ from typing import TYPE_CHECKING, TypeVar
16
10
 
17
- from tenacity import RetryCallState, retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
11
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter
18
12
 
19
13
 
20
14
  if TYPE_CHECKING:
21
15
  from collections.abc import Callable
22
16
 
23
- T = TypeVar("T")
17
+ from tenacity import RetryCallState
24
18
 
25
19
 
26
- # Browser profiles supported by curl_cffi for fingerprint rotation
20
+ T = TypeVar("T")
21
+
27
22
  BROWSER_PROFILES: tuple[str, ...] = (
28
23
  "chrome",
29
24
  "chrome110",
@@ -41,29 +36,10 @@ BROWSER_PROFILES: tuple[str, ...] = (
41
36
  "safari17_2_ios",
42
37
  )
43
38
 
44
- # Cloudflare challenge detection markers
45
- CLOUDFLARE_MARKERS: tuple[str, ...] = (
46
- "cf-ray",
47
- "cf-mitigated",
48
- "__cf_chl_",
49
- "Checking your browser",
50
- "Just a moment...",
51
- "cloudflare",
52
- "Enable JavaScript and cookies to continue",
53
- "challenge-platform",
54
- )
55
-
56
39
 
57
40
  @dataclass(slots=True)
58
41
  class RetryConfig:
59
- """Configuration for retry behavior.
60
-
61
- Attributes:
62
- max_retries: Maximum number of retry attempts.
63
- base_delay: Initial delay in seconds before first retry.
64
- max_delay: Maximum delay between retries.
65
- jitter: Random jitter factor to add to delays (0-1).
66
- """
42
+ """Configuration for retry behavior."""
67
43
 
68
44
  max_retries: int = 3
69
45
  base_delay: float = 1.0
@@ -73,23 +49,17 @@ class RetryConfig:
73
49
 
74
50
  @dataclass
75
51
  class RateLimiter:
76
- """Token bucket rate limiter for throttling requests.
77
-
78
- Attributes:
79
- requests_per_second: Maximum requests allowed per second.
80
- """
52
+ """Token bucket rate limiter."""
81
53
 
82
54
  requests_per_second: float = 0.5
83
55
  _last_request: float = field(default=0.0, init=False)
84
56
  _lock: Lock = field(default_factory=Lock, init=False)
85
57
 
86
58
  def acquire(self) -> None:
87
- """
88
- Wait until a request can be made within rate limits.
89
- """
59
+ """Wait until a request can be made within rate limits."""
90
60
 
91
61
  with self._lock:
92
- now = time.monotonic()
62
+ now = monotonic()
93
63
  min_interval = 1.0 / self.requests_per_second
94
64
 
95
65
  if self._last_request > 0:
@@ -97,59 +67,15 @@ class RateLimiter:
97
67
  wait_time = min_interval - elapsed
98
68
 
99
69
  if wait_time > 0:
100
- time.sleep(wait_time)
70
+ sleep(wait_time)
101
71
 
102
- self._last_request = time.monotonic()
72
+ self._last_request = monotonic()
103
73
 
104
74
 
105
75
  def get_random_browser_profile() -> str:
106
- """Get a random browser profile for fingerprint rotation.
107
-
108
- Returns:
109
- A browser profile identifier compatible with curl_cffi.
110
- """
111
-
112
- return random.choice(BROWSER_PROFILES)
113
-
114
-
115
- def is_cloudflare_challenge(response_text: str, headers: dict[str, Any] | None = None) -> bool:
116
- """Detect if a response is a Cloudflare challenge page.
117
-
118
- Args:
119
- response_text: The response body text.
120
- headers: Optional response headers.
76
+ """Get a random browser profile for fingerprint rotation."""
121
77
 
122
- Returns:
123
- True if Cloudflare challenge markers are detected.
124
- """
125
-
126
- text_lower = response_text.lower()
127
-
128
- for marker in CLOUDFLARE_MARKERS:
129
- if marker.lower() in text_lower:
130
- return True
131
-
132
- if headers:
133
- for key in headers:
134
- key_lower = key.lower()
135
-
136
- if "cf-" in key_lower or "cloudflare" in key_lower:
137
- return True
138
-
139
- return False
140
-
141
-
142
- def is_cloudflare_status(status_code: int) -> bool:
143
- """Check if status code indicates a potential Cloudflare block.
144
-
145
- Args:
146
- status_code: HTTP status code.
147
-
148
- Returns:
149
- True if status code is commonly used by Cloudflare challenges.
150
- """
151
-
152
- return status_code in (403, 503, 520, 521, 522, 523, 524, 525, 526)
78
+ return choice(BROWSER_PROFILES)
153
79
 
154
80
 
155
81
  def create_retry_decorator(
@@ -157,16 +83,7 @@ def create_retry_decorator(
157
83
  retryable_exceptions: tuple[type[Exception], ...],
158
84
  on_retry: Callable[[RetryCallState], None] | None = None,
159
85
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
160
- """Create a tenacity retry decorator with the given configuration.
161
-
162
- Args:
163
- config: Retry configuration.
164
- retryable_exceptions: Tuple of exception types to retry on.
165
- on_retry: Optional callback to execute on each retry.
166
-
167
- Returns:
168
- A retry decorator configured with the given settings.
169
- """
86
+ """Create a tenacity retry decorator with the given configuration."""
170
87
 
171
88
  return retry(
172
89
  stop=stop_after_attempt(config.max_retries + 1),
@@ -1,54 +1,47 @@
1
- """
2
- Response types and data models.
3
- """
1
+ """Response types and data models."""
4
2
 
5
3
  from __future__ import annotations
6
4
 
7
- from dataclasses import dataclass, field
5
+ from dataclasses import dataclass
8
6
  from typing import Any
9
7
 
8
+ from pydantic import BaseModel, ConfigDict
10
9
 
11
- @dataclass(frozen=True, slots=True)
12
- class Coordinates:
13
- """
14
- Geographic coordinates (lat/lng).
15
- """
10
+
11
+ class Coordinates(BaseModel):
12
+ """Geographic coordinates (lat/lng)."""
13
+
14
+ model_config = ConfigDict(frozen=True)
16
15
 
17
16
  latitude: float
18
17
  longitude: float
19
18
 
20
19
 
21
- @dataclass(frozen=True, slots=True)
22
- class SearchResultItem:
23
- """
24
- A single search result.
25
- """
20
+ class SearchResultItem(BaseModel):
21
+ """A single search result."""
22
+
23
+ model_config = ConfigDict(frozen=True)
26
24
 
27
25
  title: str | None = None
28
26
  snippet: str | None = None
29
27
  url: str | None = None
30
28
 
31
29
 
32
- @dataclass(slots=True)
33
- class Response:
34
- """
35
- Response from Perplexity AI.
36
- """
30
+ class Response(BaseModel):
31
+ """Response from Perplexity AI."""
37
32
 
38
33
  title: str | None = None
39
34
  answer: str | None = None
40
- chunks: list[str] = field(default_factory=list)
35
+ chunks: list[str] = []
41
36
  last_chunk: str | None = None
42
- search_results: list[SearchResultItem] = field(default_factory=list)
37
+ search_results: list[SearchResultItem] = []
43
38
  conversation_uuid: str | None = None
44
- raw_data: dict[str, Any] = field(default_factory=dict)
39
+ raw_data: dict[str, Any] = {}
45
40
 
46
41
 
47
42
  @dataclass(frozen=True, slots=True)
48
43
  class _FileInfo:
49
- """
50
- Internal file info for uploads.
51
- """
44
+ """Internal file info for uploads."""
52
45
 
53
46
  path: str
54
47
  size: int