esuls 0.1.10__tar.gz → 0.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {esuls-0.1.10/src/esuls.egg-info → esuls-0.1.12}/PKG-INFO +2 -2
- {esuls-0.1.10 → esuls-0.1.12}/pyproject.toml +2 -2
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls/db_cli.py +15 -6
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls/request_cli.py +196 -125
- {esuls-0.1.10 → esuls-0.1.12/src/esuls.egg-info}/PKG-INFO +2 -2
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls.egg-info/requires.txt +1 -1
- {esuls-0.1.10 → esuls-0.1.12}/LICENSE +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/README.md +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/setup.cfg +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls/__init__.py +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls/download_icon.py +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls/utils.py +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls.egg-info/SOURCES.txt +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls.egg-info/dependency_links.txt +0 -0
- {esuls-0.1.10 → esuls-0.1.12}/src/esuls.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: esuls
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: Utility library for async database operations, HTTP requests, and parallel execution
|
|
5
5
|
Author-email: IperGiove <ipergiove@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -15,7 +15,7 @@ License-File: LICENSE
|
|
|
15
15
|
Requires-Dist: aiosqlite>=0.21.0
|
|
16
16
|
Requires-Dist: curl-cffi>=0.13.0
|
|
17
17
|
Requires-Dist: fake-useragent>=2.2.0
|
|
18
|
-
Requires-Dist: httpx>=0.28.1
|
|
18
|
+
Requires-Dist: httpx[http2]>=0.28.1
|
|
19
19
|
Requires-Dist: loguru>=0.7.3
|
|
20
20
|
Requires-Dist: pillow>=12.0.0
|
|
21
21
|
Requires-Dist: python-magic>=0.4.27
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "esuls"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.12"
|
|
8
8
|
description = "Utility library for async database operations, HTTP requests, and parallel execution"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.14"
|
|
@@ -23,7 +23,7 @@ dependencies = [
|
|
|
23
23
|
"aiosqlite>=0.21.0",
|
|
24
24
|
"curl-cffi>=0.13.0",
|
|
25
25
|
"fake-useragent>=2.2.0",
|
|
26
|
-
"httpx>=0.28.1",
|
|
26
|
+
"httpx[http2]>=0.28.1",
|
|
27
27
|
"loguru>=0.7.3",
|
|
28
28
|
"pillow>=12.0.0",
|
|
29
29
|
"python-magic>=0.4.27",
|
|
@@ -23,25 +23,34 @@ class BaseModel:
|
|
|
23
23
|
|
|
24
24
|
class AsyncDB(Generic[SchemaType]):
|
|
25
25
|
"""High-performance async SQLite with dataclass schema and reliable connection handling."""
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
OPERATOR_MAP = {
|
|
28
|
-
'gt': '>', 'lt': '<', 'gte': '>=', 'lte': '<=',
|
|
28
|
+
'gt': '>', 'lt': '<', 'gte': '>=', 'lte': '<=',
|
|
29
29
|
'neq': '!=', 'like': 'LIKE', 'in': 'IN', 'eq': '='
|
|
30
30
|
}
|
|
31
|
-
|
|
31
|
+
|
|
32
|
+
# Shared write locks per database file (class-level)
|
|
33
|
+
_db_locks: dict[str, asyncio.Lock] = {}
|
|
34
|
+
|
|
32
35
|
def __init__(self, db_path: Union[str, Path], table_name: str, schema_class: Type[SchemaType]):
|
|
33
36
|
"""Initialize AsyncDB with a path and schema dataclass."""
|
|
34
37
|
if not is_dataclass(schema_class):
|
|
35
38
|
raise TypeError(f"Schema must be a dataclass, got {schema_class}")
|
|
36
|
-
|
|
39
|
+
|
|
37
40
|
self.db_path = Path(db_path).resolve()
|
|
38
41
|
self.schema_class = schema_class
|
|
39
42
|
self.table_name = table_name
|
|
40
43
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
-
|
|
44
|
+
|
|
42
45
|
# Make schema initialization unique per instance
|
|
43
46
|
self._db_key = f"{str(self.db_path)}:{self.table_name}:{self.schema_class.__name__}"
|
|
44
|
-
|
|
47
|
+
|
|
48
|
+
# Use shared lock per database file (not per instance)
|
|
49
|
+
db_path_str = str(self.db_path)
|
|
50
|
+
if db_path_str not in AsyncDB._db_locks:
|
|
51
|
+
AsyncDB._db_locks[db_path_str] = asyncio.Lock()
|
|
52
|
+
self._write_lock = AsyncDB._db_locks[db_path_str]
|
|
53
|
+
|
|
45
54
|
self._type_hints = get_type_hints(schema_class)
|
|
46
55
|
|
|
47
56
|
# Use a class-level set to track initialized schemas
|
|
@@ -1,24 +1,28 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from functools import lru_cache
|
|
3
|
-
from typing import TypeAlias, Union, Optional, Dict, Any,
|
|
3
|
+
from typing import TypeAlias, Union, Optional, Dict, Any, AsyncContextManager, Literal
|
|
4
|
+
from urllib.parse import urlparse
|
|
4
5
|
import asyncio
|
|
5
6
|
import json
|
|
7
|
+
import random
|
|
6
8
|
import ssl
|
|
7
9
|
from loguru import logger
|
|
8
10
|
import httpx
|
|
9
11
|
from fake_useragent import UserAgent
|
|
10
12
|
from curl_cffi.requests import AsyncSession
|
|
11
13
|
|
|
12
|
-
# Type definitions
|
|
13
|
-
ResponseT = TypeVar('ResponseT', bound='Response')
|
|
14
|
+
# Type definitions
|
|
14
15
|
JsonType: TypeAlias = Dict[str, Any]
|
|
15
16
|
FileData: TypeAlias = tuple[str, Union[bytes, str], str]
|
|
16
17
|
Headers: TypeAlias = Dict[str, str]
|
|
17
|
-
HttpMethod: TypeAlias = Literal["GET", "POST",
|
|
18
|
-
"PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"]
|
|
18
|
+
HttpMethod: TypeAlias = Literal["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"]
|
|
19
19
|
|
|
20
|
-
#
|
|
21
|
-
|
|
20
|
+
# Constants
|
|
21
|
+
_FALLBACK_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
22
|
+
_SUCCESS_STATUS_RANGE = range(200, 300)
|
|
23
|
+
|
|
24
|
+
# Global connection pool per domain to prevent "Too many open files" error
|
|
25
|
+
_domain_clients: Dict[str, httpx.AsyncClient] = {}
|
|
22
26
|
_client_lock = asyncio.Lock()
|
|
23
27
|
|
|
24
28
|
# Global cached UserAgent to prevent file descriptor exhaustion
|
|
@@ -27,24 +31,21 @@ _user_agent_lock = asyncio.Lock()
|
|
|
27
31
|
|
|
28
32
|
|
|
29
33
|
async def _get_user_agent() -> str:
|
|
30
|
-
"""Get or create cached UserAgent instance to avoid file descriptor leaks"""
|
|
34
|
+
"""Get or create cached UserAgent instance to avoid file descriptor leaks."""
|
|
31
35
|
global _user_agent
|
|
32
36
|
async with _user_agent_lock:
|
|
33
37
|
if _user_agent is None:
|
|
34
38
|
try:
|
|
35
39
|
_user_agent = UserAgent()
|
|
36
|
-
except
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
f"Failed to initialize UserAgent, using fallback: {e}")
|
|
40
|
-
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
40
|
+
except (OSError, IOError) as e:
|
|
41
|
+
logger.warning(f"Failed to initialize UserAgent, using fallback: {e}")
|
|
42
|
+
return _FALLBACK_USER_AGENT
|
|
41
43
|
|
|
42
44
|
try:
|
|
43
45
|
return _user_agent.random
|
|
44
|
-
except
|
|
45
|
-
logger.warning(
|
|
46
|
-
|
|
47
|
-
return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
46
|
+
except (AttributeError, IndexError) as e:
|
|
47
|
+
logger.warning(f"Failed to get random user agent, using fallback: {e}")
|
|
48
|
+
return _FALLBACK_USER_AGENT
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
@lru_cache(maxsize=1)
|
|
@@ -58,22 +59,37 @@ def _create_optimized_ssl_context() -> ssl.SSLContext:
|
|
|
58
59
|
return ctx
|
|
59
60
|
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
"""
|
|
63
|
-
|
|
62
|
+
def _extract_domain(url: str) -> str:
|
|
63
|
+
"""Extract domain from URL for connection pooling."""
|
|
64
|
+
parsed = urlparse(url)
|
|
65
|
+
return f"{parsed.scheme}://{parsed.netloc}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _apply_jitter(delay: float, jitter: float) -> float:
|
|
69
|
+
"""Add random jitter to delay to prevent thundering herd."""
|
|
70
|
+
if jitter <= 0:
|
|
71
|
+
return delay
|
|
72
|
+
return delay + random.uniform(0, delay * jitter)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
async def _get_domain_client(url: str, http2: bool = True) -> httpx.AsyncClient:
|
|
76
|
+
"""Get or create HTTP client for a specific domain with connection pooling"""
|
|
77
|
+
domain = _extract_domain(url)
|
|
78
|
+
cache_key = f"{domain}:{'h2' if http2 else 'h1'}"
|
|
64
79
|
async with _client_lock:
|
|
65
|
-
if
|
|
66
|
-
|
|
80
|
+
if cache_key not in _domain_clients or _domain_clients[cache_key].is_closed:
|
|
81
|
+
_domain_clients[cache_key] = httpx.AsyncClient(
|
|
67
82
|
verify=_create_optimized_ssl_context(),
|
|
68
83
|
timeout=60,
|
|
69
84
|
follow_redirects=True,
|
|
85
|
+
http2=http2,
|
|
70
86
|
limits=httpx.Limits(
|
|
71
|
-
max_connections=
|
|
72
|
-
max_keepalive_connections=
|
|
87
|
+
max_connections=20,
|
|
88
|
+
max_keepalive_connections=10,
|
|
73
89
|
keepalive_expiry=30.0
|
|
74
90
|
)
|
|
75
91
|
)
|
|
76
|
-
return
|
|
92
|
+
return _domain_clients[cache_key]
|
|
77
93
|
|
|
78
94
|
|
|
79
95
|
@dataclass(frozen=True)
|
|
@@ -83,6 +99,7 @@ class Response:
|
|
|
83
99
|
headers: Headers
|
|
84
100
|
_content: bytes
|
|
85
101
|
text: str
|
|
102
|
+
url: str = "" # final URL after redirects
|
|
86
103
|
|
|
87
104
|
@property
|
|
88
105
|
def content(self) -> bytes:
|
|
@@ -93,22 +110,12 @@ class Response:
|
|
|
93
110
|
|
|
94
111
|
|
|
95
112
|
class AsyncRequest(AsyncContextManager['AsyncRequest']):
|
|
113
|
+
"""Context manager for HTTP requests with automatic client lifecycle."""
|
|
114
|
+
|
|
96
115
|
def __init__(self) -> None:
|
|
97
|
-
|
|
98
|
-
self._ssl_context = self._create_optimized_ssl_context()
|
|
116
|
+
self._ssl_context = _create_optimized_ssl_context()
|
|
99
117
|
self._client: Optional[httpx.AsyncClient] = None
|
|
100
118
|
|
|
101
|
-
@staticmethod
|
|
102
|
-
@lru_cache(maxsize=1)
|
|
103
|
-
def _create_optimized_ssl_context() -> ssl.SSLContext:
|
|
104
|
-
"""Create an SSL context optimized for performance"""
|
|
105
|
-
ctx = ssl._create_default_https_context()
|
|
106
|
-
ctx.check_hostname = False
|
|
107
|
-
ctx.verify_mode = ssl.CERT_NONE
|
|
108
|
-
ctx.set_alpn_protocols(['http/1.1'])
|
|
109
|
-
ctx.post_handshake_auth = True
|
|
110
|
-
return ctx
|
|
111
|
-
|
|
112
119
|
async def request(
|
|
113
120
|
self,
|
|
114
121
|
url: str,
|
|
@@ -171,11 +178,12 @@ class AsyncRequest(AsyncContextManager['AsyncRequest']):
|
|
|
171
178
|
status_code=httpx_response.status_code,
|
|
172
179
|
headers=dict(httpx_response.headers),
|
|
173
180
|
_content=httpx_response.content,
|
|
174
|
-
text=httpx_response.text
|
|
181
|
+
text=httpx_response.text,
|
|
182
|
+
url=str(httpx_response.url),
|
|
175
183
|
)
|
|
176
184
|
|
|
177
185
|
# Handle unsuccessful status codes
|
|
178
|
-
if response.status_code not in
|
|
186
|
+
if response.status_code not in _SUCCESS_STATUS_RANGE:
|
|
179
187
|
logger.warning(
|
|
180
188
|
f"Request: {response.status_code}\n"
|
|
181
189
|
f"Attempt {attempt + 1}/{max_attempt}\n"
|
|
@@ -185,9 +193,7 @@ class AsyncRequest(AsyncContextManager['AsyncRequest']):
|
|
|
185
193
|
f"Request data: {json_data}\n"
|
|
186
194
|
)
|
|
187
195
|
if skip_response:
|
|
188
|
-
patterns = [skip_response] if isinstance(
|
|
189
|
-
skip_response, str) else skip_response
|
|
190
|
-
# Skip if patterns list is empty
|
|
196
|
+
patterns = [skip_response] if isinstance(skip_response, str) else skip_response
|
|
191
197
|
if patterns and any(pattern in response.text for pattern in patterns if pattern):
|
|
192
198
|
return response if force_response else None
|
|
193
199
|
|
|
@@ -196,7 +202,7 @@ class AsyncRequest(AsyncContextManager['AsyncRequest']):
|
|
|
196
202
|
|
|
197
203
|
# Exponential backoff for 429 (rate limit)
|
|
198
204
|
if response.status_code == 429:
|
|
199
|
-
backoff = min(120, exception_sleep * (2 ** attempt))
|
|
205
|
+
backoff = min(120.0, exception_sleep * (2 ** attempt))
|
|
200
206
|
logger.info(f"Rate limited (429), backing off for {backoff:.1f}s")
|
|
201
207
|
await asyncio.sleep(backoff)
|
|
202
208
|
else:
|
|
@@ -220,9 +226,8 @@ class AsyncRequest(AsyncContextManager['AsyncRequest']):
|
|
|
220
226
|
|
|
221
227
|
return response
|
|
222
228
|
|
|
223
|
-
except
|
|
224
|
-
logger.error(
|
|
225
|
-
f"Request error: {e} - {url} - attempt {attempt + 1}/{max_attempt}")
|
|
229
|
+
except (httpx.HTTPError, OSError) as e:
|
|
230
|
+
logger.error(f"Request error: {e} - {url} - attempt {attempt + 1}/{max_attempt}")
|
|
226
231
|
if attempt + 1 == max_attempt:
|
|
227
232
|
return None
|
|
228
233
|
await asyncio.sleep(exception_sleep)
|
|
@@ -241,6 +246,33 @@ class AsyncRequest(AsyncContextManager['AsyncRequest']):
|
|
|
241
246
|
self._client = None
|
|
242
247
|
|
|
243
248
|
|
|
249
|
+
async def close_shared_client() -> None:
|
|
250
|
+
"""Close all domain HTTP clients to release resources"""
|
|
251
|
+
global _domain_clients
|
|
252
|
+
async with _client_lock:
|
|
253
|
+
for domain, client in list(_domain_clients.items()):
|
|
254
|
+
if not client.is_closed:
|
|
255
|
+
await client.aclose()
|
|
256
|
+
_domain_clients.clear()
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
async def close_domain_client(url: str, http2: Optional[bool] = None) -> None:
|
|
260
|
+
"""Close HTTP client for a specific domain. If http2 is None, closes both h1 and h2 clients."""
|
|
261
|
+
domain = _extract_domain(url)
|
|
262
|
+
async with _client_lock:
|
|
263
|
+
keys_to_close = []
|
|
264
|
+
if http2 is None:
|
|
265
|
+
keys_to_close = [f"{domain}:h1", f"{domain}:h2"]
|
|
266
|
+
else:
|
|
267
|
+
keys_to_close = [f"{domain}:{'h2' if http2 else 'h1'}"]
|
|
268
|
+
|
|
269
|
+
for key in keys_to_close:
|
|
270
|
+
if key in _domain_clients:
|
|
271
|
+
if not _domain_clients[key].is_closed:
|
|
272
|
+
await _domain_clients[key].aclose()
|
|
273
|
+
del _domain_clients[key]
|
|
274
|
+
|
|
275
|
+
|
|
244
276
|
async def make_request(
|
|
245
277
|
url: str,
|
|
246
278
|
method: HttpMethod = "GET",
|
|
@@ -250,6 +282,7 @@ async def make_request(
|
|
|
250
282
|
json_data: Optional[JsonType] = None,
|
|
251
283
|
files: Optional[Dict[str, FileData]] = None,
|
|
252
284
|
data: Optional[Union[str, bytes]] = None,
|
|
285
|
+
form_data: Optional[Dict[str, Any]] = None,
|
|
253
286
|
proxy: Optional[str] = None,
|
|
254
287
|
timeout_request: int = 60,
|
|
255
288
|
max_attempt: int = 10,
|
|
@@ -259,102 +292,141 @@ async def make_request(
|
|
|
259
292
|
skip_response: Optional[Union[str, list[str]]] = None,
|
|
260
293
|
exception_sleep: float = 10,
|
|
261
294
|
add_user_agent: bool = False,
|
|
295
|
+
follow_redirects: bool = True,
|
|
296
|
+
verify_ssl: bool = False,
|
|
297
|
+
no_retry_status_codes: Optional[list[int]] = None,
|
|
298
|
+
log_errors: bool = True,
|
|
299
|
+
http2: bool = True,
|
|
300
|
+
jitter: float = 0.1,
|
|
262
301
|
) -> Optional[Response]:
|
|
263
|
-
"""
|
|
264
|
-
# Use
|
|
265
|
-
|
|
302
|
+
"""Execute HTTP requests using per-domain client for connection reuse."""
|
|
303
|
+
# Use dedicated client if proxy is specified, otherwise use per-domain pooled client
|
|
304
|
+
own_client = None
|
|
305
|
+
if proxy:
|
|
306
|
+
ssl_context = _create_optimized_ssl_context() if not verify_ssl else True
|
|
307
|
+
own_client = httpx.AsyncClient(
|
|
308
|
+
verify=ssl_context,
|
|
309
|
+
timeout=timeout_request,
|
|
310
|
+
follow_redirects=follow_redirects,
|
|
311
|
+
proxy=proxy,
|
|
312
|
+
http2=http2,
|
|
313
|
+
limits=httpx.Limits(
|
|
314
|
+
max_connections=20,
|
|
315
|
+
max_keepalive_connections=10,
|
|
316
|
+
keepalive_expiry=30.0
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
client = own_client
|
|
320
|
+
else:
|
|
321
|
+
client = await _get_domain_client(url, http2=http2)
|
|
266
322
|
|
|
267
323
|
# Prepare headers
|
|
268
|
-
request_headers =
|
|
324
|
+
request_headers = headers.copy() if headers else {}
|
|
269
325
|
if add_user_agent:
|
|
270
326
|
request_headers["User-Agent"] = await _get_user_agent()
|
|
271
327
|
|
|
272
328
|
# Prepare files for multipart/form-data
|
|
273
|
-
files_dict =
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
files_dict[field_name] = (filename, content, content_type)
|
|
329
|
+
files_dict = {
|
|
330
|
+
field_name: (filename, content, content_type)
|
|
331
|
+
for field_name, (filename, content, content_type) in files.items()
|
|
332
|
+
} if files else None
|
|
278
333
|
|
|
334
|
+
# Filter empty params
|
|
279
335
|
if params:
|
|
280
336
|
params = {k: v for k, v in params.items() if v}
|
|
281
337
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
# Execute request with all necessary parameters
|
|
285
|
-
httpx_response = await client.request(
|
|
286
|
-
method=method,
|
|
287
|
-
url=url,
|
|
288
|
-
params=params,
|
|
289
|
-
json=json_data,
|
|
290
|
-
files=files_dict,
|
|
291
|
-
headers=request_headers,
|
|
292
|
-
timeout=timeout_request,
|
|
293
|
-
data=data,
|
|
294
|
-
)
|
|
338
|
+
# Determine data payload: form_data takes precedence over raw data
|
|
339
|
+
request_data = form_data if form_data else data
|
|
295
340
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
341
|
+
try:
|
|
342
|
+
for attempt in range(max_attempt):
|
|
343
|
+
try:
|
|
344
|
+
# Execute request with all necessary parameters
|
|
345
|
+
httpx_response = await client.request(
|
|
346
|
+
method=method,
|
|
347
|
+
url=url,
|
|
348
|
+
params=params,
|
|
349
|
+
json=json_data,
|
|
350
|
+
files=files_dict,
|
|
351
|
+
headers=request_headers,
|
|
352
|
+
timeout=timeout_request,
|
|
353
|
+
data=request_data,
|
|
354
|
+
cookies=cookies,
|
|
355
|
+
follow_redirects=follow_redirects,
|
|
356
|
+
)
|
|
303
357
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
f"Response: {response.text[:1000]}\n"
|
|
312
|
-
f"Request data: {json_data}\n"
|
|
358
|
+
# Create custom Response object
|
|
359
|
+
response = Response(
|
|
360
|
+
status_code=httpx_response.status_code,
|
|
361
|
+
headers=dict(httpx_response.headers),
|
|
362
|
+
_content=httpx_response.content,
|
|
363
|
+
text=httpx_response.text,
|
|
364
|
+
url=str(httpx_response.url),
|
|
313
365
|
)
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
if
|
|
366
|
+
|
|
367
|
+
# Handle unsuccessful status codes
|
|
368
|
+
if response.status_code not in _SUCCESS_STATUS_RANGE:
|
|
369
|
+
if log_errors:
|
|
370
|
+
logger.warning(
|
|
371
|
+
f"Request: {response.status_code}\n"
|
|
372
|
+
f"Attempt {attempt + 1}/{max_attempt}\n"
|
|
373
|
+
f"Url: {url}\n"
|
|
374
|
+
f"Params: {params}\n"
|
|
375
|
+
f"Response: {response.text[:1000]}\n"
|
|
376
|
+
f"Request data: {json_data}\n"
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Exit immediately for specific status codes (no retry)
|
|
380
|
+
if no_retry_status_codes and response.status_code in no_retry_status_codes:
|
|
318
381
|
return response if force_response else None
|
|
319
382
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
383
|
+
if skip_response:
|
|
384
|
+
patterns = [skip_response] if isinstance(skip_response, str) else skip_response
|
|
385
|
+
if patterns and any(pattern in response.text for pattern in patterns if pattern):
|
|
386
|
+
return response if force_response else None
|
|
387
|
+
|
|
388
|
+
if attempt + 1 == max_attempt:
|
|
389
|
+
return response if force_response else None
|
|
390
|
+
|
|
391
|
+
# Exponential backoff for 429 (rate limit)
|
|
392
|
+
if response.status_code == 429:
|
|
393
|
+
backoff = min(120.0, exception_sleep * (2 ** attempt))
|
|
394
|
+
if log_errors:
|
|
395
|
+
logger.info(f"Rate limited (429), backing off for {backoff:.1f}s")
|
|
396
|
+
await asyncio.sleep(_apply_jitter(backoff, jitter))
|
|
397
|
+
else:
|
|
398
|
+
await asyncio.sleep(_apply_jitter(exception_sleep, jitter))
|
|
399
|
+
continue
|
|
331
400
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
401
|
+
# Validate JSON response
|
|
402
|
+
if json_response:
|
|
403
|
+
try:
|
|
404
|
+
response_data = response.json()
|
|
405
|
+
if json_response_check and json_response_check not in response_data:
|
|
406
|
+
if attempt + 1 == max_attempt:
|
|
407
|
+
return None
|
|
408
|
+
await asyncio.sleep(_apply_jitter(exception_sleep, jitter))
|
|
409
|
+
continue
|
|
410
|
+
except json.JSONDecodeError:
|
|
337
411
|
if attempt + 1 == max_attempt:
|
|
338
412
|
return None
|
|
339
|
-
await asyncio.sleep(exception_sleep)
|
|
413
|
+
await asyncio.sleep(_apply_jitter(exception_sleep, jitter))
|
|
340
414
|
continue
|
|
341
|
-
except json.JSONDecodeError:
|
|
342
|
-
if attempt + 1 == max_attempt:
|
|
343
|
-
return None
|
|
344
|
-
await asyncio.sleep(exception_sleep)
|
|
345
|
-
continue
|
|
346
415
|
|
|
347
|
-
|
|
416
|
+
return response
|
|
348
417
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
418
|
+
except (httpx.HTTPError, OSError) as e:
|
|
419
|
+
if log_errors:
|
|
420
|
+
logger.error(f"Request error: {e} - {url} - attempt {attempt + 1}/{max_attempt}")
|
|
421
|
+
if attempt + 1 == max_attempt:
|
|
422
|
+
return None
|
|
423
|
+
await asyncio.sleep(_apply_jitter(exception_sleep, jitter))
|
|
424
|
+
continue
|
|
356
425
|
|
|
357
|
-
|
|
426
|
+
return None
|
|
427
|
+
finally:
|
|
428
|
+
if own_client:
|
|
429
|
+
await own_client.aclose()
|
|
358
430
|
|
|
359
431
|
|
|
360
432
|
@lru_cache(maxsize=1)
|
|
@@ -368,13 +440,12 @@ def _get_session_cffi() -> AsyncSession:
|
|
|
368
440
|
|
|
369
441
|
|
|
370
442
|
async def make_request_cffi(url: str) -> Optional[str]:
|
|
371
|
-
"""
|
|
443
|
+
"""HTTP client using curl_cffi for browser impersonation."""
|
|
372
444
|
try:
|
|
373
445
|
response = await _get_session_cffi().get(url)
|
|
374
|
-
print(response)
|
|
375
446
|
response.raise_for_status()
|
|
376
447
|
return response.text
|
|
377
|
-
except
|
|
448
|
+
except (OSError, IOError):
|
|
378
449
|
return None
|
|
379
450
|
|
|
380
451
|
|
|
@@ -393,6 +464,6 @@ async def test_make_request_cffi():
|
|
|
393
464
|
print(r)
|
|
394
465
|
|
|
395
466
|
if __name__ == "__main__":
|
|
396
|
-
|
|
467
|
+
print(asyncio.run(make_request("https://italiaonline.it", method="GET")))
|
|
397
468
|
# asyncio.run(test_proxy())
|
|
398
|
-
asyncio.run(test_make_request_cffi())
|
|
469
|
+
# asyncio.run(test_make_request_cffi())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: esuls
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: Utility library for async database operations, HTTP requests, and parallel execution
|
|
5
5
|
Author-email: IperGiove <ipergiove@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -15,7 +15,7 @@ License-File: LICENSE
|
|
|
15
15
|
Requires-Dist: aiosqlite>=0.21.0
|
|
16
16
|
Requires-Dist: curl-cffi>=0.13.0
|
|
17
17
|
Requires-Dist: fake-useragent>=2.2.0
|
|
18
|
-
Requires-Dist: httpx>=0.28.1
|
|
18
|
+
Requires-Dist: httpx[http2]>=0.28.1
|
|
19
19
|
Requires-Dist: loguru>=0.7.3
|
|
20
20
|
Requires-Dist: pillow>=12.0.0
|
|
21
21
|
Requires-Dist: python-magic>=0.4.27
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|