perplexity-webui-scraper 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +24 -3
- perplexity_webui_scraper/cli/get_perplexity_session_token.py +21 -53
- perplexity_webui_scraper/config.py +12 -29
- perplexity_webui_scraper/constants.py +13 -51
- perplexity_webui_scraper/core.py +19 -155
- perplexity_webui_scraper/enums.py +26 -88
- perplexity_webui_scraper/exceptions.py +29 -50
- perplexity_webui_scraper/http.py +39 -332
- perplexity_webui_scraper/limits.py +6 -16
- perplexity_webui_scraper/logging.py +23 -180
- perplexity_webui_scraper/mcp/__init__.py +2 -8
- perplexity_webui_scraper/mcp/__main__.py +1 -3
- perplexity_webui_scraper/mcp/server.py +105 -82
- perplexity_webui_scraper/models.py +27 -71
- perplexity_webui_scraper/resilience.py +17 -100
- perplexity_webui_scraper/types.py +18 -25
- {perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.1.dist-info}/METADATA +121 -102
- perplexity_webui_scraper-0.4.1.dist-info/RECORD +21 -0
- {perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.1.dist-info}/WHEEL +1 -1
- perplexity_webui_scraper-0.3.7.dist-info/RECORD +0 -21
- {perplexity_webui_scraper-0.3.7.dist-info → perplexity_webui_scraper-0.4.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Custom exceptions for Perplexity WebUI Scraper.
|
|
3
|
-
"""
|
|
1
|
+
"""Custom exceptions."""
|
|
4
2
|
|
|
5
3
|
from __future__ import annotations
|
|
6
4
|
|
|
7
5
|
|
|
8
6
|
__all__: list[str] = [
|
|
9
7
|
"AuthenticationError",
|
|
10
|
-
"CloudflareBlockError",
|
|
11
8
|
"FileUploadError",
|
|
12
9
|
"FileValidationError",
|
|
10
|
+
"HTTPError",
|
|
13
11
|
"PerplexityError",
|
|
14
12
|
"RateLimitError",
|
|
15
13
|
"ResearchClarifyingQuestionsError",
|
|
@@ -21,53 +19,50 @@ __all__: list[str] = [
|
|
|
21
19
|
class PerplexityError(Exception):
|
|
22
20
|
"""Base exception for all Perplexity-related errors."""
|
|
23
21
|
|
|
24
|
-
def __init__(self, message: str
|
|
22
|
+
def __init__(self, message: str) -> None:
|
|
25
23
|
self.message = message
|
|
24
|
+
super().__init__(message)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class HTTPError(PerplexityError):
|
|
28
|
+
"""Raised when an HTTP request fails."""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
message: str,
|
|
33
|
+
status_code: int | None = None,
|
|
34
|
+
url: str | None = None,
|
|
35
|
+
response_body: str | None = None,
|
|
36
|
+
) -> None:
|
|
26
37
|
self.status_code = status_code
|
|
38
|
+
self.url = url
|
|
39
|
+
self.response_body = response_body[:500] if response_body and len(response_body) > 500 else response_body
|
|
27
40
|
super().__init__(message)
|
|
28
41
|
|
|
42
|
+
def __repr__(self) -> str:
|
|
43
|
+
return f"HTTPError(status={self.status_code}, url={self.url!r}, message={self.message!r})"
|
|
29
44
|
|
|
30
|
-
|
|
45
|
+
|
|
46
|
+
class AuthenticationError(HTTPError):
|
|
31
47
|
"""Raised when session token is invalid or expired (HTTP 403)."""
|
|
32
48
|
|
|
33
49
|
def __init__(self, message: str | None = None) -> None:
|
|
34
50
|
super().__init__(
|
|
35
|
-
message
|
|
36
|
-
or "Access forbidden (403). Your session token is invalid or expired. "
|
|
37
|
-
"Please obtain a new session token from your browser cookies.",
|
|
51
|
+
message or "Access forbidden (403). Session token invalid or expired.",
|
|
38
52
|
status_code=403,
|
|
39
53
|
)
|
|
40
54
|
|
|
41
55
|
|
|
42
|
-
class RateLimitError(
|
|
56
|
+
class RateLimitError(HTTPError):
|
|
43
57
|
"""Raised when rate limit is exceeded (HTTP 429)."""
|
|
44
58
|
|
|
45
59
|
def __init__(self, message: str | None = None) -> None:
|
|
46
60
|
super().__init__(
|
|
47
|
-
message or "Rate limit exceeded (429). Please wait
|
|
61
|
+
message or "Rate limit exceeded (429). Please wait before retrying.",
|
|
48
62
|
status_code=429,
|
|
49
63
|
)
|
|
50
64
|
|
|
51
65
|
|
|
52
|
-
class CloudflareBlockError(PerplexityError):
|
|
53
|
-
"""
|
|
54
|
-
Raised when Cloudflare blocks the request with a challenge page.
|
|
55
|
-
|
|
56
|
-
This typically means the request triggered Cloudflare's bot detection.
|
|
57
|
-
The client will automatically retry with fingerprint rotation, but if
|
|
58
|
-
this exception is raised, all retry attempts have failed.
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
def __init__(self, message: str | None = None) -> None:
|
|
62
|
-
super().__init__(
|
|
63
|
-
message
|
|
64
|
-
or "Cloudflare challenge detected. The request was blocked by Cloudflare's "
|
|
65
|
-
"bot protection. Try waiting a few minutes before retrying, or obtain a "
|
|
66
|
-
"fresh session token.",
|
|
67
|
-
status_code=403,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
|
|
71
66
|
class FileUploadError(PerplexityError):
|
|
72
67
|
"""Raised when file upload fails."""
|
|
73
68
|
|
|
@@ -85,34 +80,18 @@ class FileValidationError(PerplexityError):
|
|
|
85
80
|
|
|
86
81
|
|
|
87
82
|
class ResearchClarifyingQuestionsError(PerplexityError):
|
|
88
|
-
"""
|
|
89
|
-
Raised when Research mode requires clarifying questions.
|
|
90
|
-
|
|
91
|
-
This library does not support programmatic interaction with clarifying questions.
|
|
92
|
-
Consider rephrasing your query to be more specific.
|
|
93
|
-
|
|
94
|
-
Attributes:
|
|
95
|
-
questions: List of clarifying questions from the API.
|
|
96
|
-
"""
|
|
83
|
+
"""Raised when Research mode requires clarifying questions."""
|
|
97
84
|
|
|
98
85
|
def __init__(self, questions: list[str]) -> None:
|
|
99
86
|
self.questions = questions
|
|
100
|
-
questions_text = "\n".join(f" - {q}" for q in questions) if questions else " (
|
|
101
|
-
|
|
87
|
+
questions_text = "\n".join(f" - {q}" for q in questions) if questions else " (none)"
|
|
102
88
|
super().__init__(
|
|
103
|
-
f"Research mode
|
|
104
|
-
"Programmatic interaction with clarifying questions is not supported. "
|
|
105
|
-
"Please rephrase your query to be more specific."
|
|
89
|
+
f"Research mode requires clarification:\n{questions_text}\nPlease rephrase your query to be more specific."
|
|
106
90
|
)
|
|
107
91
|
|
|
108
92
|
|
|
109
93
|
class ResponseParsingError(PerplexityError):
|
|
110
|
-
"""
|
|
111
|
-
Raised when the API response cannot be parsed.
|
|
112
|
-
|
|
113
|
-
Attributes:
|
|
114
|
-
raw_data: The raw data that failed to parse.
|
|
115
|
-
"""
|
|
94
|
+
"""Raised when the API response cannot be parsed."""
|
|
116
95
|
|
|
117
96
|
def __init__(self, message: str, raw_data: str | None = None) -> None:
|
|
118
97
|
self.raw_data = raw_data
|
perplexity_webui_scraper/http.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
HTTP client wrapper for Perplexity API requests.
|
|
3
|
-
"""
|
|
1
|
+
"""HTTP client wrapper."""
|
|
4
2
|
|
|
5
3
|
from __future__ import annotations
|
|
6
4
|
|
|
@@ -12,28 +10,10 @@ from curl_cffi.requests import Response as CurlResponse
|
|
|
12
10
|
from curl_cffi.requests import Session
|
|
13
11
|
|
|
14
12
|
from .constants import API_BASE_URL, DEFAULT_HEADERS, ENDPOINT_ASK, ENDPOINT_SEARCH_INIT, SESSION_COOKIE_NAME
|
|
15
|
-
from .exceptions import AuthenticationError,
|
|
13
|
+
from .exceptions import AuthenticationError, HTTPError, PerplexityError, RateLimitError
|
|
16
14
|
from .limits import DEFAULT_TIMEOUT
|
|
17
|
-
from .logging import
|
|
18
|
-
|
|
19
|
-
log_cloudflare_detected,
|
|
20
|
-
log_error,
|
|
21
|
-
log_fingerprint_rotation,
|
|
22
|
-
log_rate_limit,
|
|
23
|
-
log_request,
|
|
24
|
-
log_response,
|
|
25
|
-
log_retry,
|
|
26
|
-
log_session_created,
|
|
27
|
-
)
|
|
28
|
-
from .resilience import (
|
|
29
|
-
CLOUDFLARE_MARKERS,
|
|
30
|
-
RateLimiter,
|
|
31
|
-
RetryConfig,
|
|
32
|
-
create_retry_decorator,
|
|
33
|
-
get_random_browser_profile,
|
|
34
|
-
is_cloudflare_challenge,
|
|
35
|
-
is_cloudflare_status,
|
|
36
|
-
)
|
|
15
|
+
from .logging import get_logger, log_request, log_response, log_retry
|
|
16
|
+
from .resilience import RateLimiter, RetryConfig, create_retry_decorator, get_random_browser_profile
|
|
37
17
|
|
|
38
18
|
|
|
39
19
|
if TYPE_CHECKING:
|
|
@@ -41,16 +21,12 @@ if TYPE_CHECKING:
|
|
|
41
21
|
|
|
42
22
|
from tenacity import RetryCallState
|
|
43
23
|
|
|
24
|
+
|
|
44
25
|
logger = get_logger(__name__)
|
|
45
26
|
|
|
46
27
|
|
|
47
28
|
class HTTPClient:
|
|
48
|
-
"""
|
|
49
|
-
HTTP client wrapper with error handling for Perplexity API.
|
|
50
|
-
|
|
51
|
-
Provides a unified interface for making HTTP requests with automatic
|
|
52
|
-
error handling, retry mechanisms, rate limiting, and Cloudflare bypass.
|
|
53
|
-
"""
|
|
29
|
+
"""HTTP client with retry, rate limiting, and error handling."""
|
|
54
30
|
|
|
55
31
|
__slots__ = (
|
|
56
32
|
"_impersonate",
|
|
@@ -74,33 +50,6 @@ class HTTPClient:
|
|
|
74
50
|
requests_per_second: float = 0.5,
|
|
75
51
|
rotate_fingerprint: bool = True,
|
|
76
52
|
) -> None:
|
|
77
|
-
"""Initialize the HTTP client.
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
session_token: Perplexity session cookie.
|
|
81
|
-
timeout: Request timeout in seconds.
|
|
82
|
-
impersonate: Browser profile to impersonate.
|
|
83
|
-
max_retries: Maximum retry attempts for failed requests.
|
|
84
|
-
retry_base_delay: Initial delay before first retry.
|
|
85
|
-
retry_max_delay: Maximum delay between retries.
|
|
86
|
-
retry_jitter: Random jitter factor for delays.
|
|
87
|
-
requests_per_second: Rate limit (0 to disable).
|
|
88
|
-
rotate_fingerprint: Whether to rotate browser fingerprint on retries.
|
|
89
|
-
"""
|
|
90
|
-
|
|
91
|
-
logger.debug(
|
|
92
|
-
"Initializing HTTPClient | "
|
|
93
|
-
f"session_token_length={len(session_token)} "
|
|
94
|
-
f"timeout={timeout}s "
|
|
95
|
-
f"impersonate={impersonate} "
|
|
96
|
-
f"max_retries={max_retries} "
|
|
97
|
-
f"retry_base_delay={retry_base_delay}s "
|
|
98
|
-
f"retry_max_delay={retry_max_delay}s "
|
|
99
|
-
f"retry_jitter={retry_jitter} "
|
|
100
|
-
f"requests_per_second={requests_per_second} "
|
|
101
|
-
f"rotate_fingerprint={rotate_fingerprint}"
|
|
102
|
-
)
|
|
103
|
-
|
|
104
53
|
self._session_token = session_token
|
|
105
54
|
self._timeout = timeout
|
|
106
55
|
self._impersonate = impersonate
|
|
@@ -113,29 +62,15 @@ class HTTPClient:
|
|
|
113
62
|
jitter=retry_jitter,
|
|
114
63
|
)
|
|
115
64
|
|
|
116
|
-
logger.debug(
|
|
117
|
-
"RetryConfig created | "
|
|
118
|
-
f"max_retries={self._retry_config.max_retries} "
|
|
119
|
-
f"base_delay={self._retry_config.base_delay}s "
|
|
120
|
-
f"max_delay={self._retry_config.max_delay}s "
|
|
121
|
-
f"jitter={self._retry_config.jitter}"
|
|
122
|
-
)
|
|
123
|
-
|
|
124
65
|
self._rate_limiter: RateLimiter | None = None
|
|
125
|
-
|
|
126
66
|
if requests_per_second > 0:
|
|
127
67
|
self._rate_limiter = RateLimiter(requests_per_second=requests_per_second)
|
|
128
|
-
logger.debug(f"RateLimiter enabled | requests_per_second={requests_per_second}")
|
|
129
|
-
else:
|
|
130
|
-
logger.debug("RateLimiter disabled | requests_per_second=0")
|
|
131
68
|
|
|
132
69
|
self._session = self._create_session(impersonate)
|
|
133
|
-
|
|
70
|
+
logger.debug(f"HTTPClient initialized | impersonate={impersonate}")
|
|
134
71
|
|
|
135
72
|
def _create_session(self, impersonate: str) -> Session:
|
|
136
|
-
"""Create a new HTTP session
|
|
137
|
-
|
|
138
|
-
logger.debug(f"Creating new HTTP session | browser_profile={impersonate}")
|
|
73
|
+
"""Create a new HTTP session."""
|
|
139
74
|
|
|
140
75
|
headers: dict[str, str] = {
|
|
141
76
|
**DEFAULT_HEADERS,
|
|
@@ -144,239 +79,98 @@ class HTTPClient:
|
|
|
144
79
|
}
|
|
145
80
|
cookies: dict[str, str] = {SESSION_COOKIE_NAME: self._session_token}
|
|
146
81
|
|
|
147
|
-
|
|
148
|
-
f"Session configuration | headers_count={len(headers)} cookies_count={len(cookies)} base_url={API_BASE_URL}"
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
session = Session(
|
|
82
|
+
return Session(
|
|
152
83
|
headers=headers,
|
|
153
84
|
cookies=cookies,
|
|
154
85
|
timeout=self._timeout,
|
|
155
86
|
impersonate=impersonate,
|
|
156
87
|
)
|
|
157
88
|
|
|
158
|
-
logger.debug(f"HTTP session created successfully | browser_profile={impersonate}")
|
|
159
|
-
|
|
160
|
-
return session
|
|
161
|
-
|
|
162
89
|
def _rotate_session(self) -> None:
|
|
163
|
-
"""Rotate
|
|
90
|
+
"""Rotate browser fingerprint."""
|
|
164
91
|
|
|
165
92
|
if self._rotate_fingerprint:
|
|
166
|
-
old_profile = self._impersonate
|
|
167
93
|
new_profile = get_random_browser_profile()
|
|
168
|
-
|
|
169
|
-
logger.debug(f"Rotating browser fingerprint | old={old_profile} new={new_profile}")
|
|
170
|
-
log_fingerprint_rotation(old_profile, new_profile)
|
|
94
|
+
logger.debug(f"Rotating fingerprint | old={self._impersonate} new={new_profile}")
|
|
171
95
|
|
|
172
96
|
with suppress(Exception):
|
|
173
97
|
self._session.close()
|
|
174
|
-
logger.debug("Previous session closed")
|
|
175
98
|
|
|
176
99
|
self._impersonate = new_profile
|
|
177
100
|
self._session = self._create_session(new_profile)
|
|
178
101
|
|
|
179
|
-
logger.debug(f"Browser fingerprint rotated successfully | new_profile={new_profile}")
|
|
180
|
-
|
|
181
102
|
def _on_retry(self, retry_state: RetryCallState) -> None:
|
|
182
|
-
"""
|
|
183
|
-
Callback executed before each retry attempt.
|
|
184
|
-
"""
|
|
103
|
+
"""Callback before each retry attempt."""
|
|
185
104
|
|
|
186
105
|
attempt = retry_state.attempt_number
|
|
187
106
|
exception = retry_state.outcome.exception() if retry_state.outcome else None
|
|
188
107
|
wait_time = retry_state.next_action.sleep if retry_state.next_action else 0
|
|
189
108
|
|
|
190
|
-
logger.warning(
|
|
191
|
-
f"Retry triggered | "
|
|
192
|
-
f"attempt={attempt}/{self._retry_config.max_retries} "
|
|
193
|
-
f"exception_type={type(exception).__name__ if exception else 'None'} "
|
|
194
|
-
f"exception_message={str(exception) if exception else 'None'} "
|
|
195
|
-
f"wait_seconds={wait_time:.2f}"
|
|
196
|
-
)
|
|
197
109
|
log_retry(attempt, self._retry_config.max_retries, exception, wait_time)
|
|
198
110
|
|
|
199
|
-
# Rotate fingerprint on retry to avoid detection
|
|
200
111
|
if self._rotate_fingerprint:
|
|
201
|
-
logger.debug("Rotating fingerprint due to retry")
|
|
202
112
|
self._rotate_session()
|
|
203
113
|
|
|
204
|
-
def _check_cloudflare(self, response: CurlResponse) -> None:
|
|
205
|
-
"""Check if response is a Cloudflare challenge and raise if so."""
|
|
206
|
-
|
|
207
|
-
logger.debug(f"Checking for Cloudflare challenge | status_code={response.status_code}")
|
|
208
|
-
|
|
209
|
-
if is_cloudflare_status(response.status_code):
|
|
210
|
-
logger.debug(f"Status code indicates potential Cloudflare block | status_code={response.status_code}")
|
|
211
|
-
|
|
212
|
-
try:
|
|
213
|
-
body = response.text
|
|
214
|
-
headers = dict(response.headers) if hasattr(response, "headers") else None
|
|
215
|
-
|
|
216
|
-
logger.debug(
|
|
217
|
-
f"Analyzing response for Cloudflare markers | "
|
|
218
|
-
f"body_length={len(body)} "
|
|
219
|
-
f"headers_count={len(headers) if headers else 0}"
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
if is_cloudflare_challenge(body, headers):
|
|
223
|
-
# Find which markers were detected
|
|
224
|
-
markers_found = [m for m in CLOUDFLARE_MARKERS if m.lower() in body.lower()]
|
|
225
|
-
logger.warning(
|
|
226
|
-
f"Cloudflare challenge detected | "
|
|
227
|
-
f"status_code={response.status_code} "
|
|
228
|
-
f"markers_found={markers_found}"
|
|
229
|
-
)
|
|
230
|
-
log_cloudflare_detected(response.status_code, markers_found)
|
|
231
|
-
raise CloudflareBlockError()
|
|
232
|
-
else:
|
|
233
|
-
logger.debug("No Cloudflare markers found in response")
|
|
234
|
-
except CloudflareBlockError as error:
|
|
235
|
-
raise error
|
|
236
|
-
except Exception as error:
|
|
237
|
-
logger.debug(f"Error checking Cloudflare response | error={error}")
|
|
238
|
-
|
|
239
114
|
def _handle_error(self, error: Exception, context: str = "") -> None:
|
|
240
|
-
"""Handle HTTP errors and raise appropriate
|
|
241
|
-
|
|
242
|
-
Args:
|
|
243
|
-
error: The original exception.
|
|
244
|
-
context: Additional context for the error message.
|
|
245
|
-
|
|
246
|
-
Raises:
|
|
247
|
-
AuthenticationError: If status code is 403 (not Cloudflare).
|
|
248
|
-
RateLimitError: If status code is 429.
|
|
249
|
-
CloudflareBlockError: If Cloudflare challenge detected.
|
|
250
|
-
PerplexityError: For other HTTP errors.
|
|
251
|
-
"""
|
|
252
|
-
|
|
253
|
-
logger.debug(f"Handling error | context={context} error_type={type(error).__name__} error={error}")
|
|
254
|
-
log_error(error, context)
|
|
115
|
+
"""Handle HTTP errors and raise appropriate exceptions."""
|
|
255
116
|
|
|
256
117
|
status_code = None
|
|
118
|
+
response_body = None
|
|
119
|
+
url = None
|
|
257
120
|
response = getattr(error, "response", None)
|
|
258
121
|
|
|
259
122
|
if response is not None:
|
|
260
123
|
status_code = getattr(response, "status_code", None)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
try:
|
|
268
|
-
body = response.text if hasattr(response, "text") else ""
|
|
269
|
-
headers = dict(response.headers) if hasattr(response, "headers") else None
|
|
270
|
-
|
|
271
|
-
if is_cloudflare_challenge(body, headers):
|
|
272
|
-
markers_found = [m for m in CLOUDFLARE_MARKERS if m.lower() in body.lower()]
|
|
273
|
-
logger.warning(
|
|
274
|
-
f"Cloudflare challenge confirmed in error response | "
|
|
275
|
-
f"status_code={status_code} "
|
|
276
|
-
f"markers={markers_found}"
|
|
277
|
-
)
|
|
278
|
-
log_cloudflare_detected(status_code, markers_found)
|
|
279
|
-
raise CloudflareBlockError() from error
|
|
280
|
-
except CloudflareBlockError:
|
|
281
|
-
raise
|
|
124
|
+
url = getattr(response, "url", None)
|
|
125
|
+
try:
|
|
126
|
+
response_body = response.text if hasattr(response, "text") else None
|
|
127
|
+
except Exception:
|
|
128
|
+
response_body = None
|
|
282
129
|
|
|
283
130
|
if status_code == 403:
|
|
284
|
-
logger.error(f"Authentication error | status_code=403 context={context}")
|
|
285
131
|
raise AuthenticationError() from error
|
|
286
132
|
elif status_code == 429:
|
|
287
|
-
logger.warning(f"Rate limit exceeded | status_code=429 context={context}")
|
|
288
133
|
raise RateLimitError() from error
|
|
289
134
|
elif status_code is not None:
|
|
290
|
-
|
|
291
|
-
|
|
135
|
+
raise HTTPError(
|
|
136
|
+
f"{context}HTTP {status_code}: {error!s}",
|
|
137
|
+
status_code=status_code,
|
|
138
|
+
url=str(url) if url else None,
|
|
139
|
+
response_body=response_body,
|
|
140
|
+
) from error
|
|
292
141
|
else:
|
|
293
|
-
logger.error(f"Unknown error | context={context} error={error}")
|
|
294
142
|
raise PerplexityError(f"{context}{error!s}") from error
|
|
295
143
|
|
|
296
144
|
def _throttle(self) -> None:
|
|
297
|
-
"""Apply rate limiting
|
|
145
|
+
"""Apply rate limiting."""
|
|
298
146
|
|
|
299
147
|
if self._rate_limiter:
|
|
300
|
-
start_time = monotonic()
|
|
301
|
-
logger.debug("Acquiring rate limiter")
|
|
302
148
|
self._rate_limiter.acquire()
|
|
303
|
-
wait_time = monotonic() - start_time
|
|
304
|
-
|
|
305
|
-
if wait_time > 0.001: # Only log if we actually waited
|
|
306
|
-
logger.debug(f"Rate limiter throttled request | wait_seconds={wait_time:.3f}")
|
|
307
|
-
log_rate_limit(wait_time)
|
|
308
149
|
|
|
309
150
|
def get(self, endpoint: str, params: dict[str, Any] | None = None) -> CurlResponse:
|
|
310
|
-
"""Make a GET request with retry and rate limiting.
|
|
311
|
-
|
|
312
|
-
Args:
|
|
313
|
-
endpoint: The API endpoint (relative to BASE_URL).
|
|
314
|
-
params: Optional query parameters.
|
|
315
|
-
|
|
316
|
-
Returns:
|
|
317
|
-
The response object.
|
|
318
|
-
|
|
319
|
-
Raises:
|
|
320
|
-
AuthenticationError: If session token is invalid.
|
|
321
|
-
RateLimitError: If rate limit is exceeded.
|
|
322
|
-
CloudflareBlockError: If Cloudflare blocks the request.
|
|
323
|
-
PerplexityError: For other errors.
|
|
324
|
-
"""
|
|
151
|
+
"""Make a GET request with retry and rate limiting."""
|
|
325
152
|
|
|
326
153
|
url = f"{API_BASE_URL}{endpoint}" if endpoint.startswith("/") else endpoint
|
|
327
|
-
|
|
328
|
-
logger.debug(f"GET request initiated | endpoint={endpoint} url={url} params={params}")
|
|
329
154
|
log_request("GET", url, params=params)
|
|
330
155
|
|
|
331
|
-
|
|
332
|
-
retryable_exceptions = (RateLimitError, CloudflareBlockError, ConnectionError, TimeoutError)
|
|
156
|
+
retryable_exceptions = (RateLimitError, ConnectionError, TimeoutError)
|
|
333
157
|
|
|
334
158
|
@create_retry_decorator(self._retry_config, retryable_exceptions, self._on_retry)
|
|
335
159
|
def _do_get() -> CurlResponse:
|
|
336
160
|
self._throttle()
|
|
337
|
-
|
|
338
161
|
request_start = monotonic()
|
|
339
|
-
logger.debug(f"Executing GET request | url={url}")
|
|
340
162
|
|
|
341
163
|
try:
|
|
342
164
|
response = self._session.get(url, params=params)
|
|
343
165
|
elapsed_ms = (monotonic() - request_start) * 1000
|
|
166
|
+
log_response("GET", url, response.status_code, elapsed_ms=elapsed_ms)
|
|
344
167
|
|
|
345
|
-
logger.debug(
|
|
346
|
-
f"GET response received | "
|
|
347
|
-
f"status_code={response.status_code} "
|
|
348
|
-
f"elapsed_ms={elapsed_ms:.2f} "
|
|
349
|
-
f"content_length={len(response.content) if hasattr(response, 'content') else 'unknown'}"
|
|
350
|
-
)
|
|
351
|
-
log_response(
|
|
352
|
-
"GET",
|
|
353
|
-
url,
|
|
354
|
-
response.status_code,
|
|
355
|
-
elapsed_ms=elapsed_ms,
|
|
356
|
-
content_length=len(response.content) if hasattr(response, "content") else None,
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
self._check_cloudflare(response)
|
|
360
168
|
response.raise_for_status()
|
|
361
|
-
|
|
362
|
-
logger.debug(f"GET request successful | endpoint={endpoint}")
|
|
363
169
|
return response
|
|
364
170
|
except Exception as error:
|
|
365
|
-
|
|
366
|
-
logger.debug(
|
|
367
|
-
f"GET request failed | "
|
|
368
|
-
f"endpoint={endpoint} "
|
|
369
|
-
f"elapsed_ms={elapsed_ms:.2f} "
|
|
370
|
-
f"error_type={type(error).__name__} "
|
|
371
|
-
f"error={error}"
|
|
372
|
-
)
|
|
373
|
-
|
|
374
|
-
if isinstance(error, (CloudflareBlockError, RateLimitError)):
|
|
171
|
+
if isinstance(error, RateLimitError):
|
|
375
172
|
raise
|
|
376
|
-
|
|
377
173
|
self._handle_error(error, f"GET {endpoint}: ")
|
|
378
|
-
|
|
379
|
-
# Never reached but satisfies type checker
|
|
380
174
|
raise error
|
|
381
175
|
|
|
382
176
|
return _do_get()
|
|
@@ -387,147 +181,60 @@ class HTTPClient:
|
|
|
387
181
|
json: dict[str, Any] | None = None,
|
|
388
182
|
stream: bool = False,
|
|
389
183
|
) -> CurlResponse:
|
|
390
|
-
"""Make a POST request with retry and rate limiting.
|
|
391
|
-
|
|
392
|
-
Args:
|
|
393
|
-
endpoint: The API endpoint (relative to BASE_URL).
|
|
394
|
-
json: JSON data to send.
|
|
395
|
-
stream: Whether to stream the response.
|
|
396
|
-
|
|
397
|
-
Returns:
|
|
398
|
-
The response object.
|
|
399
|
-
|
|
400
|
-
Raises:
|
|
401
|
-
AuthenticationError: If session token is invalid.
|
|
402
|
-
RateLimitError: If rate limit is exceeded.
|
|
403
|
-
CloudflareBlockError: If Cloudflare blocks the request.
|
|
404
|
-
PerplexityError: For other errors.
|
|
405
|
-
"""
|
|
184
|
+
"""Make a POST request with retry and rate limiting."""
|
|
406
185
|
|
|
407
186
|
url = f"{API_BASE_URL}{endpoint}" if endpoint.startswith("/") else endpoint
|
|
408
|
-
body_size
|
|
409
|
-
|
|
410
|
-
logger.debug(f"POST request initiated | endpoint={endpoint} url={url} stream={stream} body_size={body_size}")
|
|
411
|
-
log_request("POST", url, body_size=body_size)
|
|
187
|
+
log_request("POST", url, body_size=len(str(json)) if json else 0)
|
|
412
188
|
|
|
413
|
-
retryable_exceptions = (RateLimitError,
|
|
189
|
+
retryable_exceptions = (RateLimitError, ConnectionError, TimeoutError)
|
|
414
190
|
|
|
415
191
|
@create_retry_decorator(self._retry_config, retryable_exceptions, self._on_retry)
|
|
416
192
|
def _do_post() -> CurlResponse:
|
|
417
193
|
self._throttle()
|
|
418
|
-
|
|
419
194
|
request_start = monotonic()
|
|
420
|
-
logger.debug(f"Executing POST request | url={url} stream={stream}")
|
|
421
195
|
|
|
422
196
|
try:
|
|
423
197
|
response = self._session.post(url, json=json, stream=stream)
|
|
424
198
|
elapsed_ms = (monotonic() - request_start) * 1000
|
|
425
|
-
|
|
426
|
-
logger.debug(
|
|
427
|
-
f"POST response received | "
|
|
428
|
-
f"status_code={response.status_code} "
|
|
429
|
-
f"elapsed_ms={elapsed_ms:.2f} "
|
|
430
|
-
f"stream={stream}"
|
|
431
|
-
)
|
|
432
199
|
log_response("POST", url, response.status_code, elapsed_ms=elapsed_ms)
|
|
433
200
|
|
|
434
|
-
self._check_cloudflare(response)
|
|
435
201
|
response.raise_for_status()
|
|
436
|
-
|
|
437
|
-
logger.debug(f"POST request successful | endpoint={endpoint}")
|
|
438
|
-
|
|
439
202
|
return response
|
|
440
203
|
except Exception as error:
|
|
441
|
-
|
|
442
|
-
logger.debug(
|
|
443
|
-
f"POST request failed | "
|
|
444
|
-
f"endpoint={endpoint} "
|
|
445
|
-
f"elapsed_ms={elapsed_ms:.2f} "
|
|
446
|
-
f"error_type={type(error).__name__} "
|
|
447
|
-
f"error={error}"
|
|
448
|
-
)
|
|
449
|
-
|
|
450
|
-
if isinstance(error, (CloudflareBlockError, RateLimitError)):
|
|
204
|
+
if isinstance(error, RateLimitError):
|
|
451
205
|
raise error
|
|
452
|
-
|
|
453
206
|
self._handle_error(error, f"POST {endpoint}: ")
|
|
454
|
-
|
|
455
|
-
# Never reached but satisfies type checker
|
|
456
207
|
raise error
|
|
457
208
|
|
|
458
209
|
return _do_post()
|
|
459
210
|
|
|
460
211
|
def stream_lines(self, endpoint: str, json: dict[str, Any]) -> Generator[bytes, None, None]:
|
|
461
|
-
"""Make a streaming POST request and yield lines.
|
|
462
|
-
|
|
463
|
-
Args:
|
|
464
|
-
endpoint: The API endpoint.
|
|
465
|
-
json: JSON data to send.
|
|
466
|
-
|
|
467
|
-
Yields:
|
|
468
|
-
Response lines as bytes.
|
|
469
|
-
|
|
470
|
-
Raises:
|
|
471
|
-
AuthenticationError: If session token is invalid.
|
|
472
|
-
RateLimitError: If rate limit is exceeded.
|
|
473
|
-
CloudflareBlockError: If Cloudflare blocks the request.
|
|
474
|
-
PerplexityError: For other errors.
|
|
475
|
-
"""
|
|
476
|
-
|
|
477
|
-
logger.debug(f"Starting streaming request | endpoint={endpoint}")
|
|
212
|
+
"""Make a streaming POST request and yield lines."""
|
|
478
213
|
|
|
479
214
|
response = self.post(endpoint, json=json, stream=True)
|
|
480
|
-
lines_count = 0
|
|
481
215
|
|
|
482
216
|
try:
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
for line in response.iter_lines():
|
|
486
|
-
lines_count += 1
|
|
487
|
-
yield line
|
|
488
|
-
|
|
489
|
-
logger.debug(f"Stream completed | total_lines={lines_count}")
|
|
217
|
+
yield from response.iter_lines()
|
|
490
218
|
finally:
|
|
491
219
|
response.close()
|
|
492
|
-
logger.debug(f"Stream response closed | lines_yielded={lines_count}")
|
|
493
220
|
|
|
494
221
|
def init_search(self, query: str) -> None:
|
|
495
|
-
"""Initialize a search session.
|
|
496
|
-
|
|
497
|
-
This is required before making a prompt request.
|
|
222
|
+
"""Initialize a search session (required before prompts)."""
|
|
498
223
|
|
|
499
|
-
Args:
|
|
500
|
-
query: The search query.
|
|
501
|
-
"""
|
|
502
|
-
|
|
503
|
-
logger.debug(f"Initializing search session | query_length={len(query)} query_preview={query[:50]}...")
|
|
504
224
|
self.get(ENDPOINT_SEARCH_INIT, params={"q": query})
|
|
505
|
-
logger.debug("Search session initialized successfully")
|
|
506
225
|
|
|
507
226
|
def stream_ask(self, payload: dict[str, Any]) -> Generator[bytes, None, None]:
|
|
508
|
-
"""Stream a prompt request to the ask endpoint.
|
|
509
|
-
|
|
510
|
-
Args:
|
|
511
|
-
payload: The request payload.
|
|
512
|
-
|
|
513
|
-
Yields:
|
|
514
|
-
Response lines as bytes.
|
|
515
|
-
"""
|
|
227
|
+
"""Stream a prompt request to the ask endpoint."""
|
|
516
228
|
|
|
517
|
-
logger.debug(f"Streaming ask request | payload_keys={list(payload.keys())}")
|
|
518
229
|
yield from self.stream_lines(ENDPOINT_ASK, json=payload)
|
|
519
230
|
|
|
520
231
|
def close(self) -> None:
|
|
521
232
|
"""Close the HTTP session."""
|
|
522
233
|
|
|
523
|
-
logger.debug("Closing HTTP client")
|
|
524
234
|
self._session.close()
|
|
525
|
-
logger.debug("HTTP client closed successfully")
|
|
526
235
|
|
|
527
236
|
def __enter__(self) -> HTTPClient:
|
|
528
|
-
logger.debug("Entering HTTPClient context manager")
|
|
529
237
|
return self
|
|
530
238
|
|
|
531
239
|
def __exit__(self, *args: Any) -> None:
|
|
532
|
-
logger.debug("Exiting HTTPClient context manager")
|
|
533
240
|
self.close()
|