crawlee 1.0.1b9__py3-none-any.whl → 1.3.1b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- crawlee/__init__.py +2 -1
- crawlee/_browserforge_workaround.py +7 -3
- crawlee/_request.py +62 -32
- crawlee/_service_locator.py +4 -4
- crawlee/_types.py +52 -19
- crawlee/_utils/context.py +3 -3
- crawlee/_utils/file.py +8 -1
- crawlee/_utils/globs.py +4 -4
- crawlee/_utils/recoverable_state.py +32 -8
- crawlee/_utils/recurring_task.py +27 -3
- crawlee/_utils/robots.py +17 -5
- crawlee/_utils/sitemap.py +13 -6
- crawlee/_utils/system.py +27 -11
- crawlee/_utils/time.py +41 -1
- crawlee/_utils/urls.py +9 -2
- crawlee/browsers/_browser_pool.py +5 -2
- crawlee/browsers/_playwright_browser.py +2 -1
- crawlee/browsers/_playwright_browser_controller.py +1 -1
- crawlee/browsers/_playwright_browser_plugin.py +17 -3
- crawlee/browsers/_types.py +1 -1
- crawlee/configuration.py +3 -1
- crawlee/crawlers/__init__.py +5 -1
- crawlee/crawlers/_abstract_http/__init__.py +2 -1
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +54 -16
- crawlee/crawlers/_adaptive_playwright/__init__.py +5 -2
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +21 -30
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +6 -2
- crawlee/crawlers/_basic/_basic_crawler.py +160 -134
- crawlee/crawlers/_basic/_context_utils.py +24 -0
- crawlee/crawlers/_basic/_logging_utils.py +23 -4
- crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +2 -2
- crawlee/crawlers/_parsel/_parsel_crawler.py +2 -2
- crawlee/crawlers/_parsel/_parsel_parser.py +1 -1
- crawlee/crawlers/_playwright/_playwright_crawler.py +68 -23
- crawlee/crawlers/_playwright/_playwright_http_client.py +7 -1
- crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
- crawlee/crawlers/_playwright/_types.py +12 -2
- crawlee/errors.py +4 -0
- crawlee/events/_event_manager.py +12 -6
- crawlee/fingerprint_suite/_browserforge_adapter.py +1 -1
- crawlee/fingerprint_suite/_header_generator.py +2 -2
- crawlee/http_clients/_base.py +4 -0
- crawlee/http_clients/_curl_impersonate.py +68 -14
- crawlee/http_clients/_httpx.py +16 -6
- crawlee/http_clients/_impit.py +25 -10
- crawlee/otel/crawler_instrumentor.py +4 -6
- crawlee/request_loaders/_sitemap_request_loader.py +23 -5
- crawlee/router.py +13 -3
- crawlee/sessions/_cookies.py +13 -8
- crawlee/sessions/_models.py +3 -3
- crawlee/sessions/_session_pool.py +1 -1
- crawlee/statistics/_error_snapshotter.py +1 -1
- crawlee/statistics/_models.py +51 -9
- crawlee/statistics/_statistics.py +24 -33
- crawlee/storage_clients/__init__.py +4 -0
- crawlee/storage_clients/_base/_dataset_client.py +2 -2
- crawlee/storage_clients/_base/_key_value_store_client.py +2 -2
- crawlee/storage_clients/_file_system/_dataset_client.py +8 -7
- crawlee/storage_clients/_file_system/_key_value_store_client.py +9 -6
- crawlee/storage_clients/_file_system/_request_queue_client.py +31 -12
- crawlee/storage_clients/_memory/_dataset_client.py +2 -2
- crawlee/storage_clients/_memory/_key_value_store_client.py +2 -2
- crawlee/storage_clients/_memory/_request_queue_client.py +2 -2
- crawlee/storage_clients/_redis/__init__.py +6 -0
- crawlee/storage_clients/_redis/_client_mixin.py +292 -0
- crawlee/storage_clients/_redis/_dataset_client.py +329 -0
- crawlee/storage_clients/_redis/_key_value_store_client.py +262 -0
- crawlee/storage_clients/_redis/_request_queue_client.py +583 -0
- crawlee/storage_clients/_redis/_storage_client.py +149 -0
- crawlee/storage_clients/_redis/_utils.py +23 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
- crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
- crawlee/storage_clients/_redis/py.typed +0 -0
- crawlee/storage_clients/_sql/_client_mixin.py +1 -1
- crawlee/storage_clients/_sql/_dataset_client.py +2 -2
- crawlee/storage_clients/_sql/_db_models.py +1 -2
- crawlee/storage_clients/_sql/_key_value_store_client.py +5 -4
- crawlee/storage_clients/_sql/_request_queue_client.py +20 -6
- crawlee/storage_clients/_sql/_storage_client.py +1 -1
- crawlee/storage_clients/models.py +8 -3
- crawlee/storages/_base.py +3 -1
- crawlee/storages/_dataset.py +3 -0
- crawlee/storages/_key_value_store.py +8 -2
- crawlee/storages/_request_queue.py +3 -0
- crawlee/storages/_storage_instance_manager.py +109 -42
- crawlee/storages/_utils.py +11 -0
- {crawlee-1.0.1b9.dist-info → crawlee-1.3.1b3.dist-info}/METADATA +14 -16
- {crawlee-1.0.1b9.dist-info → crawlee-1.3.1b3.dist-info}/RECORD +93 -79
- {crawlee-1.0.1b9.dist-info → crawlee-1.3.1b3.dist-info}/WHEEL +1 -1
- {crawlee-1.0.1b9.dist-info → crawlee-1.3.1b3.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.1b9.dist-info → crawlee-1.3.1b3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from asyncio import Lock
|
|
3
4
|
from collections import defaultdict
|
|
4
5
|
from collections.abc import Coroutine, Hashable
|
|
5
6
|
from dataclasses import dataclass, field
|
|
6
7
|
from typing import TYPE_CHECKING, TypeVar
|
|
8
|
+
from weakref import WeakValueDictionary
|
|
7
9
|
|
|
8
10
|
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
9
11
|
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
|
|
10
12
|
|
|
13
|
+
from ._utils import validate_storage_name
|
|
14
|
+
|
|
11
15
|
if TYPE_CHECKING:
|
|
12
16
|
from ._base import Storage
|
|
13
17
|
|
|
@@ -74,6 +78,7 @@ class StorageInstanceManager:
|
|
|
74
78
|
|
|
75
79
|
def __init__(self) -> None:
|
|
76
80
|
self._cache: _StorageCache = _StorageCache()
|
|
81
|
+
self._opener_locks: WeakValueDictionary[tuple, Lock] = WeakValueDictionary()
|
|
77
82
|
|
|
78
83
|
async def open_storage_instance(
|
|
79
84
|
self,
|
|
@@ -90,7 +95,9 @@ class StorageInstanceManager:
|
|
|
90
95
|
Args:
|
|
91
96
|
cls: The storage class to instantiate.
|
|
92
97
|
id: Storage ID.
|
|
93
|
-
name: Storage name. (global scope, persists across runs).
|
|
98
|
+
name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z",
|
|
99
|
+
the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
|
|
100
|
+
(e.g. "my-value-1").
|
|
94
101
|
alias: Storage alias (run scope, creates unnamed storage).
|
|
95
102
|
client_opener_coro: Coroutine to open the storage client when storage instance not found in cache.
|
|
96
103
|
storage_client_cache_key: Additional optional key from storage client to differentiate cache entries.
|
|
@@ -115,59 +122,71 @@ class StorageInstanceManager:
|
|
|
115
122
|
if not any([name, alias, id]):
|
|
116
123
|
alias = self._DEFAULT_STORAGE_ALIAS
|
|
117
124
|
|
|
118
|
-
# Check cache
|
|
119
|
-
if
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if isinstance(cached_instance, cls):
|
|
126
|
-
return cached_instance
|
|
127
|
-
raise RuntimeError('Cached instance type mismatch.')
|
|
128
|
-
|
|
129
|
-
if alias is not None and (
|
|
130
|
-
cached_instance := self._cache.by_alias[cls][alias].get(storage_client_cache_key)
|
|
125
|
+
# Check cache without lock first for performance.
|
|
126
|
+
if cached_instance := self._get_from_cache(
|
|
127
|
+
cls,
|
|
128
|
+
id=id,
|
|
129
|
+
name=name,
|
|
130
|
+
alias=alias,
|
|
131
|
+
storage_client_cache_key=storage_client_cache_key,
|
|
131
132
|
):
|
|
132
|
-
|
|
133
|
+
return cached_instance
|
|
134
|
+
|
|
135
|
+
# Validate storage name
|
|
136
|
+
if name is not None:
|
|
137
|
+
validate_storage_name(name)
|
|
138
|
+
|
|
139
|
+
# Acquire lock for this opener
|
|
140
|
+
opener_lock_key = (cls, str(id or name or alias), storage_client_cache_key)
|
|
141
|
+
if not (lock := self._opener_locks.get(opener_lock_key)):
|
|
142
|
+
lock = Lock()
|
|
143
|
+
self._opener_locks[opener_lock_key] = lock
|
|
144
|
+
|
|
145
|
+
async with lock:
|
|
146
|
+
# Another task could have created the storage while we were waiting for the lock - check if that
|
|
147
|
+
# happened
|
|
148
|
+
if cached_instance := self._get_from_cache(
|
|
149
|
+
cls,
|
|
150
|
+
id=id,
|
|
151
|
+
name=name,
|
|
152
|
+
alias=alias,
|
|
153
|
+
storage_client_cache_key=storage_client_cache_key,
|
|
154
|
+
):
|
|
133
155
|
return cached_instance
|
|
134
|
-
raise RuntimeError('Cached instance type mismatch.')
|
|
135
156
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
157
|
+
# Check for conflicts between named and alias storages
|
|
158
|
+
self._check_name_alias_conflict(
|
|
159
|
+
cls,
|
|
160
|
+
name=name,
|
|
161
|
+
alias=alias,
|
|
162
|
+
storage_client_cache_key=storage_client_cache_key,
|
|
141
163
|
)
|
|
142
164
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
f'Use a different name or drop the existing alias storage first.'
|
|
147
|
-
)
|
|
165
|
+
# Create new instance
|
|
166
|
+
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
167
|
+
client = await client_opener_coro
|
|
148
168
|
|
|
149
|
-
|
|
150
|
-
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
151
|
-
client = await client_opener_coro
|
|
169
|
+
metadata = await client.get_metadata()
|
|
152
170
|
|
|
153
|
-
|
|
171
|
+
instance = cls(client, metadata.id, metadata.name) # type: ignore[call-arg]
|
|
172
|
+
instance_name = getattr(instance, 'name', None)
|
|
154
173
|
|
|
155
|
-
|
|
156
|
-
|
|
174
|
+
# Cache the instance.
|
|
175
|
+
# Note: No awaits in this section. All cache entries must be written
|
|
176
|
+
# atomically to ensure pre-checks outside the lock see consistent state.
|
|
157
177
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
self._cache.by_id[cls][instance.id][storage_client_cache_key] = instance
|
|
178
|
+
# Always cache by id.
|
|
179
|
+
self._cache.by_id[cls][instance.id][storage_client_cache_key] = instance
|
|
161
180
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
181
|
+
# Cache named storage.
|
|
182
|
+
if instance_name is not None:
|
|
183
|
+
self._cache.by_name[cls][instance_name][storage_client_cache_key] = instance
|
|
165
184
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
185
|
+
# Cache unnamed storage.
|
|
186
|
+
if alias is not None:
|
|
187
|
+
self._cache.by_alias[cls][alias][storage_client_cache_key] = instance
|
|
169
188
|
|
|
170
|
-
|
|
189
|
+
return instance
|
|
171
190
|
|
|
172
191
|
finally:
|
|
173
192
|
# Make sure the client opener is closed.
|
|
@@ -185,3 +204,51 @@ class StorageInstanceManager:
|
|
|
185
204
|
def clear_cache(self) -> None:
|
|
186
205
|
"""Clear all cached storage instances."""
|
|
187
206
|
self._cache = _StorageCache()
|
|
207
|
+
|
|
208
|
+
def _get_from_cache(
|
|
209
|
+
self,
|
|
210
|
+
cls: type[T],
|
|
211
|
+
*,
|
|
212
|
+
id: str | None = None,
|
|
213
|
+
name: str | None = None,
|
|
214
|
+
alias: str | None = None,
|
|
215
|
+
storage_client_cache_key: Hashable = '',
|
|
216
|
+
) -> T | None:
|
|
217
|
+
"""Get a storage instance from the cache."""
|
|
218
|
+
if id is not None and (cached_instance := self._cache.by_id[cls][id].get(storage_client_cache_key)):
|
|
219
|
+
if isinstance(cached_instance, cls):
|
|
220
|
+
return cached_instance
|
|
221
|
+
raise RuntimeError('Cached instance type mismatch.')
|
|
222
|
+
|
|
223
|
+
if name is not None and (cached_instance := self._cache.by_name[cls][name].get(storage_client_cache_key)):
|
|
224
|
+
if isinstance(cached_instance, cls):
|
|
225
|
+
return cached_instance
|
|
226
|
+
raise RuntimeError('Cached instance type mismatch.')
|
|
227
|
+
|
|
228
|
+
if alias is not None and (cached_instance := self._cache.by_alias[cls][alias].get(storage_client_cache_key)):
|
|
229
|
+
if isinstance(cached_instance, cls):
|
|
230
|
+
return cached_instance
|
|
231
|
+
raise RuntimeError('Cached instance type mismatch.')
|
|
232
|
+
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
def _check_name_alias_conflict(
|
|
236
|
+
self,
|
|
237
|
+
cls: type[T],
|
|
238
|
+
*,
|
|
239
|
+
name: str | None = None,
|
|
240
|
+
alias: str | None = None,
|
|
241
|
+
storage_client_cache_key: Hashable = '',
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Check for conflicts between named and alias storages."""
|
|
244
|
+
if alias and (self._cache.by_name[cls][alias].get(storage_client_cache_key)):
|
|
245
|
+
raise ValueError(
|
|
246
|
+
f'Cannot create alias storage "{alias}" because a named storage with the same name already exists. '
|
|
247
|
+
f'Use a different alias or drop the existing named storage first.'
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if name and (self._cache.by_alias[cls][name].get(storage_client_cache_key)):
|
|
251
|
+
raise ValueError(
|
|
252
|
+
f'Cannot create named storage "{name}" because an alias storage with the same name already exists. '
|
|
253
|
+
f'Use a different name or drop the existing alias storage first.'
|
|
254
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$')
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def validate_storage_name(name: str | None) -> None:
|
|
7
|
+
if name and not NAME_REGEX.match(name):
|
|
8
|
+
raise ValueError(
|
|
9
|
+
f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through'
|
|
10
|
+
'"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")'
|
|
11
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1b3
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -223,15 +223,17 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
223
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
224
|
Classifier: Programming Language :: Python :: 3.12
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
226
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
228
|
Requires-Python: >=3.10
|
|
229
|
+
Requires-Dist: async-timeout>=5.0.1
|
|
228
230
|
Requires-Dist: cachetools>=5.5.0
|
|
229
231
|
Requires-Dist: colorama>=0.4.0
|
|
230
|
-
Requires-Dist: impit>=0.
|
|
232
|
+
Requires-Dist: impit>=0.8.0
|
|
231
233
|
Requires-Dist: more-itertools>=10.2.0
|
|
232
234
|
Requires-Dist: protego>=0.5.0
|
|
233
235
|
Requires-Dist: psutil>=6.0.0
|
|
234
|
-
Requires-Dist: pydantic-settings
|
|
236
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
235
237
|
Requires-Dist: pydantic>=2.11.0
|
|
236
238
|
Requires-Dist: pyee>=9.0.0
|
|
237
239
|
Requires-Dist: tldextract>=5.1.0
|
|
@@ -263,6 +265,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
|
|
|
263
265
|
Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
|
|
264
266
|
Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
265
267
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
268
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
|
|
266
269
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
267
270
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
268
271
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
@@ -296,6 +299,8 @@ Provides-Extra: playwright
|
|
|
296
299
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
297
300
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
298
301
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
302
|
+
Provides-Extra: redis
|
|
303
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
299
304
|
Provides-Extra: sql-postgres
|
|
300
305
|
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
301
306
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
@@ -319,19 +324,12 @@ Description-Content-Type: text/markdown
|
|
|
319
324
|
<a href="https://trendshift.io/repositories/11169" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11169" alt="apify%2Fcrawlee-python | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
320
325
|
</p>
|
|
321
326
|
|
|
322
|
-
<p align=center>
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
</a>
|
|
329
|
-
<a href="https://pypi.org/project/crawlee/" rel="nofollow">
|
|
330
|
-
<img src="https://img.shields.io/pypi/pyversions/crawlee" alt="PyPI - Python Version" style="max-width: 100%;">
|
|
331
|
-
</a>
|
|
332
|
-
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
|
|
333
|
-
<img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
|
|
334
|
-
</a>
|
|
327
|
+
<p align="center">
|
|
328
|
+
<a href="https://badge.fury.io/py/crawlee" rel="nofollow"><img src="https://badge.fury.io/py/crawlee.svg" alt="PyPI package version"></a>
|
|
329
|
+
<a href="https://pypi.org/project/crawlee/" rel="nofollow"><img src="https://img.shields.io/pypi/dm/crawlee" alt="PyPI package downloads"></a>
|
|
330
|
+
<a href="https://codecov.io/gh/apify/crawlee-python"><img src="https://codecov.io/gh/apify/crawlee-python/graph/badge.svg?token=cCju61iPQG" alt="Codecov report"></a>
|
|
331
|
+
<a href="https://pypi.org/project/crawlee/" rel="nofollow"><img src="https://img.shields.io/pypi/pyversions/crawlee" alt="PyPI Python version"></a>
|
|
332
|
+
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow"><img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on Discord"></a>
|
|
335
333
|
</p>
|
|
336
334
|
|
|
337
335
|
Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
crawlee/__init__.py,sha256=
|
|
2
|
-
crawlee/_browserforge_workaround.py,sha256=
|
|
1
|
+
crawlee/__init__.py,sha256=ECFcNbLQp3HX-o6K4eMo38rZQ5NnZg7udvEEkjkqnuw,548
|
|
2
|
+
crawlee/_browserforge_workaround.py,sha256=jajzR5W6C2CBehLKsXH00ZhP0KeRu4_LOx13KkKb0Xw,1736
|
|
3
3
|
crawlee/_cli.py,sha256=czuEsGD8QYEiq5gtMcBxrL08hQ5OJQQkMVhAr1pvDaQ,10353
|
|
4
4
|
crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
|
|
5
5
|
crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
|
|
6
|
-
crawlee/_request.py,sha256=
|
|
7
|
-
crawlee/_service_locator.py,sha256=
|
|
8
|
-
crawlee/_types.py,sha256=
|
|
9
|
-
crawlee/configuration.py,sha256=
|
|
10
|
-
crawlee/errors.py,sha256=
|
|
6
|
+
crawlee/_request.py,sha256=Esa4yMRu5WeeUHyy9Hfz1j9qWFkrkYU2lkXEsk-mHcI,17378
|
|
7
|
+
crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
|
|
8
|
+
crawlee/_types.py,sha256=LJbn4MBeTfU3RQN62BF8b7b5VwS9BqWH6HgUL8KXIfs,30495
|
|
9
|
+
crawlee/configuration.py,sha256=DWS2z1FC6Ua93W2tStK3R1ZKZbZjVQYWGiGFbZFaRtA,8064
|
|
10
|
+
crawlee/errors.py,sha256=fnAFpyvJKMDq3VDGr1iq1E-JqnfoOEI7cd8YjDaqb9s,4062
|
|
11
11
|
crawlee/proxy_configuration.py,sha256=rqf67yerXvLvraBaAHW04nvf5ECze3wMQbK7LlqXucM,10386
|
|
12
12
|
crawlee/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
crawlee/router.py,sha256=
|
|
13
|
+
crawlee/router.py,sha256=HbKxE22r8ZVu93tIxBdGObMa3fGPcuSvKthqibimekU,4252
|
|
14
14
|
crawlee/_autoscaling/__init__.py,sha256=t6Z44gU488C0UmkBCTtwsgAR8iqJcv2g4ZlC4NYh0ZI,182
|
|
15
15
|
crawlee/_autoscaling/_types.py,sha256=xnrRHXYOVn7GwELLVHi_y7B-Ic7u3hPkYl3P-LT3Fhk,5453
|
|
16
16
|
crawlee/_autoscaling/autoscaled_pool.py,sha256=Bcu2jDgK2SYMnZN5xfjs8Oxti0ZxrktjydWv3J0Hz48,12214
|
|
@@ -21,57 +21,58 @@ crawlee/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
21
21
|
crawlee/_utils/blocked.py,sha256=sxN99AouFXMoe6uG1EvCTCmKMGk73DBMUk9nOkWK86I,863
|
|
22
22
|
crawlee/_utils/byte_size.py,sha256=zs4qWUEDgTGDqYfUJ7t5edWNYYJCG8Y1EyJ9GASfRL4,3744
|
|
23
23
|
crawlee/_utils/console.py,sha256=vAIM8AO7cT-HdXg44eR8zQyHAHk8X8G7J1KKFCBL2LY,2242
|
|
24
|
-
crawlee/_utils/context.py,sha256=
|
|
24
|
+
crawlee/_utils/context.py,sha256=nqTJGG46HKdBx4Pl0aWsTkCAFKPUxY0Hpi0GHLB3KT0,1713
|
|
25
25
|
crawlee/_utils/crypto.py,sha256=tYzn2z91KgV3ugxz4CKtSTcCjW-3FC8un7hpDNCl6rs,757
|
|
26
26
|
crawlee/_utils/docs.py,sha256=S09-3xAQAlUvrmPpBXVJpE8wblB8LtS6QduLNncfqdQ,1130
|
|
27
|
-
crawlee/_utils/file.py,sha256=
|
|
28
|
-
crawlee/_utils/globs.py,sha256=
|
|
27
|
+
crawlee/_utils/file.py,sha256=qK_0w0ddXUqaS_z_b8IFBOtNaZsfaKX-yNVPBITsX4Y,5732
|
|
28
|
+
crawlee/_utils/globs.py,sha256=g0W6tSVdghzesfNm0iKmkLnlcyanIIhPxQOBvWKfDKI,5324
|
|
29
29
|
crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZkY,902
|
|
30
30
|
crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
|
|
31
31
|
crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
|
|
32
|
-
crawlee/_utils/recoverable_state.py,sha256=
|
|
33
|
-
crawlee/_utils/recurring_task.py,sha256=
|
|
32
|
+
crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
|
|
33
|
+
crawlee/_utils/recurring_task.py,sha256=ZBuGbKoSs5lCLZIT8XWEJean2rHvoBizirQCnSmkfsU,2378
|
|
34
34
|
crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
|
|
35
|
-
crawlee/_utils/robots.py,sha256=
|
|
36
|
-
crawlee/_utils/sitemap.py,sha256=
|
|
37
|
-
crawlee/_utils/system.py,sha256=
|
|
38
|
-
crawlee/_utils/time.py,sha256=
|
|
35
|
+
crawlee/_utils/robots.py,sha256=DBU5ni4Y-p7bIKMbLd_ws8wgHSFc4K8zPVF3JvH_pkw,4661
|
|
36
|
+
crawlee/_utils/sitemap.py,sha256=aNaU3uMpNKoVRrZ0uEiK9qAeDW59xQFYi4URSCBExS8,16802
|
|
37
|
+
crawlee/_utils/system.py,sha256=bI3ptp11Zty0JvvgHT42aI7mKPUjYVJQAC1-EeK60yc,4941
|
|
38
|
+
crawlee/_utils/time.py,sha256=awyzUqMvoC3BrEDINHaGPaM-u98AW0UtQqQsVJPAnsA,3899
|
|
39
39
|
crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
|
|
40
|
-
crawlee/_utils/urls.py,sha256=
|
|
40
|
+
crawlee/_utils/urls.py,sha256=fEYXJxBT02f-DIYKF_h7PdaKAShfXBs99-dHDjDX03A,1725
|
|
41
41
|
crawlee/_utils/wait.py,sha256=RfiXhp5VUBxOEtEMtru7_jNfKDr2BJCcFge5qGg2gxk,2848
|
|
42
42
|
crawlee/_utils/web.py,sha256=nnKhg8pUSWz0RY64Qd-_GPNBX1fWI2hXS-gzcfQ-rig,364
|
|
43
43
|
crawlee/browsers/__init__.py,sha256=TghkrNSbI_k87UgVBlgNNcEm8Ot05pSLEAPRSv6YsUs,1064
|
|
44
44
|
crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6kcV-g3AXE0I,3068
|
|
45
45
|
crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
|
|
46
|
-
crawlee/browsers/_browser_pool.py,sha256=
|
|
47
|
-
crawlee/browsers/_playwright_browser.py,sha256=
|
|
48
|
-
crawlee/browsers/_playwright_browser_controller.py,sha256=
|
|
49
|
-
crawlee/browsers/_playwright_browser_plugin.py,sha256=
|
|
50
|
-
crawlee/browsers/_types.py,sha256=
|
|
46
|
+
crawlee/browsers/_browser_pool.py,sha256=MJTZludtka12xu1vtAfsruC2ON9HQjy1_H1ELTIxgNY,15866
|
|
47
|
+
crawlee/browsers/_playwright_browser.py,sha256=EijCGBrEHy0aE6mFOqR955_w_krfpxJ2fH6x1yV8O9s,3852
|
|
48
|
+
crawlee/browsers/_playwright_browser_controller.py,sha256=W6G5MjZpg9IcZoHts6lTML5VxSEpBTgzx5qeQ8XDigY,10216
|
|
49
|
+
crawlee/browsers/_playwright_browser_plugin.py,sha256=A1qa1nJhTSKNP9uOiO-oGzR7VGlnOMo0A0YNedccb2A,8869
|
|
50
|
+
crawlee/browsers/_types.py,sha256=ZnDgJHeQNSd_s_mXhgQnAf09c2smuiXC31VbawHHXUM,436
|
|
51
51
|
crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
-
crawlee/crawlers/__init__.py,sha256=
|
|
52
|
+
crawlee/crawlers/__init__.py,sha256=1cplBBySmWFVsUk-2bjlhrtkloQzbtFCVBRWlA84KQI,2529
|
|
53
53
|
crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
|
|
54
54
|
crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
crawlee/crawlers/_abstract_http/__init__.py,sha256=
|
|
56
|
-
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=
|
|
55
|
+
crawlee/crawlers/_abstract_http/__init__.py,sha256=h8jVWcPbDXzWHill1Vm7J7iliJW0hIrea0gkg-Hkb-M,319
|
|
56
|
+
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=QYGfr-I0nb-0nUuoVj8GjM_ygm0WBpORLHJA5PGXEnU,13082
|
|
57
57
|
crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
|
|
58
58
|
crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
|
|
59
59
|
crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
-
crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=
|
|
61
|
-
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=
|
|
60
|
+
crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=C171YB0HFOF08-XADHH3B_7URs4FAU1clLtpCNcEsUc,1319
|
|
61
|
+
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=MJ84sUJEtA06DzK94RS2w70-DrGvdnRcvZH4ald4P8A,21776
|
|
62
62
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py,sha256=_At8T8S3JLGPA-1AeCFGrpE-FuCDW9sazrXt9U0tK6U,1048
|
|
63
|
-
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=
|
|
63
|
+
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=tejw-yfA8zVR8L-shIZOTFoMUQOI5Kt7FBJa8H0q4H0,10664
|
|
64
64
|
crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkbIN_059jUyCG8Z6XAb_FBLClIKw7z-aDvjon2I,10834
|
|
65
65
|
crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
|
|
66
66
|
crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
|
|
67
67
|
crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
|
|
68
|
-
crawlee/crawlers/_basic/_basic_crawler.py,sha256=
|
|
68
|
+
crawlee/crawlers/_basic/_basic_crawler.py,sha256=L2RQZ3PWjIrX62LW4cxWc7XU06h3mqDfVqj4iqPT63E,73945
|
|
69
69
|
crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
|
|
70
70
|
crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
|
|
71
|
-
crawlee/crawlers/_basic/
|
|
71
|
+
crawlee/crawlers/_basic/_context_utils.py,sha256=EiJQidm5i9k_s7fDEZ6gT6XdjyDE515N6HBGKSkMnhw,656
|
|
72
|
+
crawlee/crawlers/_basic/_logging_utils.py,sha256=6Q206Sv0RzHztwu5y5XSdUpZhpqQ5-zSapQzUY9GxCo,4014
|
|
72
73
|
crawlee/crawlers/_basic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
74
|
crawlee/crawlers/_beautifulsoup/__init__.py,sha256=7pL273ashA7yYDrH6nokYZ7SAMUAezilGIWdfThi_Co,822
|
|
74
|
-
crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py,sha256=
|
|
75
|
+
crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py,sha256=Q8Sb_lflpdYIwDZ1fIeuquPzdDG2zCnKsrcj8fe8n6k,3056
|
|
75
76
|
crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawling_context.py,sha256=CMHQbYmXdhnXSo3hjtmAqWPH2lEaoVHzwPY2Ka85s70,1149
|
|
76
77
|
crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py,sha256=egCBcaE6RaQoJOFuxex_McItCd4Ddd3ZDE-2DVGictA,1661
|
|
77
78
|
crawlee/crawlers/_beautifulsoup/_utils.py,sha256=xUJM7Y0RGlTEe0TJe252uuIdjk8gwBFjuhQnRG7-jv4,3127
|
|
@@ -80,36 +81,36 @@ crawlee/crawlers/_http/__init__.py,sha256=BY6KKY1eD8CWDFfurJDIgcnFIfgXAVuMHKSL3w
|
|
|
80
81
|
crawlee/crawlers/_http/_http_crawler.py,sha256=HoCTku4FL9EN3WMlzbZNbb2BtXedP487CN0JlObsoGg,2041
|
|
81
82
|
crawlee/crawlers/_http/_http_parser.py,sha256=Bfe3TEz6i5UhONO2qfyw2p31QoVoXbvGci3T3HeObiU,1495
|
|
82
83
|
crawlee/crawlers/_parsel/__init__.py,sha256=c_FveWxCuaEaG2HAYlaN9N_aPEaVdpaYVMsIcCcTdnU,617
|
|
83
|
-
crawlee/crawlers/_parsel/_parsel_crawler.py,sha256=
|
|
84
|
+
crawlee/crawlers/_parsel/_parsel_crawler.py,sha256=Aolo96FU_U3nsEjGoxEothgF7pVuoXLkhRmNWgKzGYg,2717
|
|
84
85
|
crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOMPeolIN7apG76aqRmKQvEep8,1142
|
|
85
|
-
crawlee/crawlers/_parsel/_parsel_parser.py,sha256=
|
|
86
|
+
crawlee/crawlers/_parsel/_parsel_parser.py,sha256=qdsDK2rKRDgNFYs0f70xpjqKZQwpJQIxsWUm2mx4m7g,1532
|
|
86
87
|
crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
|
|
87
88
|
crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
|
|
88
|
-
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=
|
|
89
|
+
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=Tr1qc316527Gf2JGwIXubc_1LUIW97B-30auWRZ9oss,26329
|
|
89
90
|
crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
|
|
90
|
-
crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=
|
|
91
|
-
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=
|
|
92
|
-
crawlee/crawlers/_playwright/_types.py,sha256=
|
|
91
|
+
crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=4mvaCI9Zum7znbm0F-ZZ6T1FEqZ-N-cvPOk1iqtcUSo,4164
|
|
92
|
+
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=NFenJKgXcPuifaVYc2sdU5AV2BX6836GUuqFTE2Q0lU,1545
|
|
93
|
+
crawlee/crawlers/_playwright/_types.py,sha256=D4MaRWgYdps1CwgNWURJRLKkJk_9Oyue70jvkHAxnEU,2534
|
|
93
94
|
crawlee/crawlers/_playwright/_utils.py,sha256=FQ_-LYo7DGHsNHRrTtWt3mC06VzQvQ2wkGqpA2wBzYU,3441
|
|
94
95
|
crawlee/events/__init__.py,sha256=YMgOXKI0LsXfImKQy06PZ2Vdjy-uD_-acioagHft1do,577
|
|
95
|
-
crawlee/events/_event_manager.py,sha256=
|
|
96
|
+
crawlee/events/_event_manager.py,sha256=LZ1x8ygEVZnGGLHLzdFzdzgDEZjatVRjXL4q1TO_BAk,11673
|
|
96
97
|
crawlee/events/_local_event_manager.py,sha256=CSiMJ6a_BwX0PPwtffEOtHm21dmALJz1zifo3AuMAk8,3708
|
|
97
98
|
crawlee/events/_types.py,sha256=MKsI014OOKKhjPJRrvWYrezIDGoLjGGhWXrkqYw26Ns,3313
|
|
98
99
|
crawlee/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
100
|
crawlee/fingerprint_suite/__init__.py,sha256=noY9qw80B0seZGj_B3bBvCDIDk2YWOSN-llIssLdY5c,550
|
|
100
|
-
crawlee/fingerprint_suite/_browserforge_adapter.py,sha256=
|
|
101
|
+
crawlee/fingerprint_suite/_browserforge_adapter.py,sha256=tDTONP8jlT6TQ-OSiKVmi16fziAvKsbMfgJsPjUjzo4,11871
|
|
101
102
|
crawlee/fingerprint_suite/_consts.py,sha256=SgykWfxD-pYvOpRp_ooQ4ZTPS0sQ2b3wDyyCjwU_8-w,258
|
|
102
103
|
crawlee/fingerprint_suite/_fingerprint_generator.py,sha256=Di4sDk1qioiFGx4ZcoVyHhtFHF8JXDhxQt8ZPug99k8,730
|
|
103
|
-
crawlee/fingerprint_suite/_header_generator.py,sha256=
|
|
104
|
+
crawlee/fingerprint_suite/_header_generator.py,sha256=9X9FbStehXdw-FZc_D0y-nLk1BUHXVYFxs7fv4dl9zU,3513
|
|
104
105
|
crawlee/fingerprint_suite/_types.py,sha256=7n2LJTiL2XvL-H4G-Y26Uoq5-ZXzH07Dq4o50uhMa-w,2423
|
|
105
106
|
crawlee/fingerprint_suite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
107
|
crawlee/http_clients/__init__.py,sha256=OQFhR9F8BrdlIaS5aRS7hvgQ0tKJPQ8FiyYPualyQcU,890
|
|
107
|
-
crawlee/http_clients/_base.py,sha256=
|
|
108
|
-
crawlee/http_clients/_curl_impersonate.py,sha256=
|
|
109
|
-
crawlee/http_clients/_httpx.py,sha256=
|
|
110
|
-
crawlee/http_clients/_impit.py,sha256=
|
|
108
|
+
crawlee/http_clients/_base.py,sha256=sz-NCgGyRKhNz2tPR7Zq10iYQ8Itig-fls4h7GMHhNM,7593
|
|
109
|
+
crawlee/http_clients/_curl_impersonate.py,sha256=tmRPkNDby-8GRuyUKLY5bv9YosVMcJgZErDoY1zPB8g,12948
|
|
110
|
+
crawlee/http_clients/_httpx.py,sha256=abRe2YjQUfIrtmOwOAB72viwd8CF0V2mAXQceLuIWvo,12375
|
|
111
|
+
crawlee/http_clients/_impit.py,sha256=wEfDSOGIOmfLUkmLXAmZkiM5wp4PltUv-oH7zq2NMfQ,9434
|
|
111
112
|
crawlee/otel/__init__.py,sha256=g5y1tJfpDKfcIPGcKBztMgP6sptum-vJrtemeR8_-co,108
|
|
112
|
-
crawlee/otel/crawler_instrumentor.py,sha256
|
|
113
|
+
crawlee/otel/crawler_instrumentor.py,sha256=XYWylvS9VtT1X3qX_oKqwzoxFerh-Lga6JTJzizOQQ4,6796
|
|
113
114
|
crawlee/project_template/cookiecutter.json,sha256=dJeYxLx5QEy2DCzXsDpqJQJlIJ3nw42lJrclZFoSZ8w,622
|
|
114
115
|
crawlee/project_template/hooks/post_gen_project.py,sha256=EBNgb_-eodDxaYJljYYznZGASnduxJ54RDO_b4ofm4M,1296
|
|
115
116
|
crawlee/project_template/hooks/pre_gen_project.py,sha256=AqvHJRTOTQzKFX10Zt8uKt8UFHczDJ1ogPPHQVdY2ZU,1175
|
|
@@ -137,57 +138,70 @@ crawlee/request_loaders/_request_list.py,sha256=SIalHBMuFanE5GLnFocI0QCppWUiJQjr
|
|
|
137
138
|
crawlee/request_loaders/_request_loader.py,sha256=2Bg-AWWkIV1W-Dwjqo91dPY8nmc7H3teQy7d6OSgliQ,3620
|
|
138
139
|
crawlee/request_loaders/_request_manager.py,sha256=qFizyJuV2meIb9iiPfuii7ciuERMrp4SldAufiH46dc,3000
|
|
139
140
|
crawlee/request_loaders/_request_manager_tandem.py,sha256=lv-s94KPsoQAqx1KaXFch96ejhO147uOflF3UK5ORTk,4058
|
|
140
|
-
crawlee/request_loaders/_sitemap_request_loader.py,sha256=
|
|
141
|
+
crawlee/request_loaders/_sitemap_request_loader.py,sha256=9nSC-s3bD6e9zv7LXILs6zUUOACBzRryZlAB0OUxghc,16584
|
|
141
142
|
crawlee/sessions/__init__.py,sha256=dJdelbL-6MK5sW4SMU4QrjFbb9kRZ9uRnN-VS3R5-8Y,190
|
|
142
|
-
crawlee/sessions/_cookies.py,sha256=
|
|
143
|
-
crawlee/sessions/_models.py,sha256=
|
|
143
|
+
crawlee/sessions/_cookies.py,sha256=fcFlZYQW-OBfmorWfMWpv2VDiC0kxFxm0g1CMHvzGYk,9563
|
|
144
|
+
crawlee/sessions/_models.py,sha256=bw6eqe0b6SwfaRrIvT1SA3oS8CUfYb-XF0RIqGzP7jg,2820
|
|
144
145
|
crawlee/sessions/_session.py,sha256=cMXVf6QjfGJDgdLUB4MhUP-zTm3pEDHRs-W5SBA4JFI,9638
|
|
145
|
-
crawlee/sessions/_session_pool.py,sha256
|
|
146
|
+
crawlee/sessions/_session_pool.py,sha256=_FYTXRANDiREK09DSoHIu-536sSUj9wazbrcl6iKdFM,9631
|
|
146
147
|
crawlee/sessions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
148
|
crawlee/statistics/__init__.py,sha256=lXAsHNkeRZQBffW1B7rERarivXIUJveNlcKTGOXQZY0,154
|
|
148
|
-
crawlee/statistics/_error_snapshotter.py,sha256=
|
|
149
|
+
crawlee/statistics/_error_snapshotter.py,sha256=g-roZgkJ-glyStZL7gXrOhrpdZvZ686W9lR43uZjPao,3279
|
|
149
150
|
crawlee/statistics/_error_tracker.py,sha256=x9Yw1TuyEptjwgPPJ4gIom-0oVjawcNReQDsHH2nZ3w,8553
|
|
150
|
-
crawlee/statistics/_models.py,sha256=
|
|
151
|
-
crawlee/statistics/_statistics.py,sha256=
|
|
152
|
-
crawlee/storage_clients/__init__.py,sha256=
|
|
153
|
-
crawlee/storage_clients/models.py,sha256=
|
|
151
|
+
crawlee/statistics/_models.py,sha256=DtlJtm57XZ4MGGtbni3VBj_PypXDPpCRSeNO1dztvDo,6895
|
|
152
|
+
crawlee/statistics/_statistics.py,sha256=mSgnCnV7q2buJdyuXBxbUU9MQEUjxaLST_NO4ej3XRw,12341
|
|
153
|
+
crawlee/storage_clients/__init__.py,sha256=X3M6Z_WAOJ3M9I8JhGhJDnrtbCOmM27DpGAzgt87R2A,874
|
|
154
|
+
crawlee/storage_clients/models.py,sha256=c8CHlMvmjC22RKRPrsQc861pq3LLl0Ec5lK8edpg5xc,6825
|
|
154
155
|
crawlee/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
156
|
crawlee/storage_clients/_base/__init__.py,sha256=-f0VIaGjw7Oo6HAgEK9ABtmKnnSRwzkA3WUQZMX5w0w,307
|
|
156
|
-
crawlee/storage_clients/_base/_dataset_client.py,sha256=
|
|
157
|
-
crawlee/storage_clients/_base/_key_value_store_client.py,sha256=
|
|
157
|
+
crawlee/storage_clients/_base/_dataset_client.py,sha256=BzFtEvp0sM-tdVfL4635hoQ4ZzkbLFMsI0u9BGBQYhY,3028
|
|
158
|
+
crawlee/storage_clients/_base/_key_value_store_client.py,sha256=5fx29Bp3NobZvDjfuOiMc1cPLYQtEN8Agsy5wjnJpIw,3388
|
|
158
159
|
crawlee/storage_clients/_base/_request_queue_client.py,sha256=cgM4yk6xJwgfzP-xaN9ApqJn32sh0FrSEPIdxN7kujw,4926
|
|
159
160
|
crawlee/storage_clients/_base/_storage_client.py,sha256=RvmKCV1U9_KxyG7n8xhClm2vwD2SKChWIiBLk6cuqw0,3523
|
|
160
161
|
crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
162
|
crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
|
|
162
|
-
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=
|
|
163
|
-
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=
|
|
164
|
-
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=
|
|
163
|
+
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=Yvd-kGA4m6aXlshSH1kwq3f9176v_1ufmd4A5gn4ksI,17820
|
|
164
|
+
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=3RrI5CrL6fPCjf5N34Y3qpNs7_7iq7yS0UlncIEeDGc,18841
|
|
165
|
+
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=XZRLbzHBpOxdmBluDphpUlUq1C6LbI8g0902vTf4R9k,34086
|
|
165
166
|
crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
|
|
166
167
|
crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
168
|
crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
169
|
crawlee/storage_clients/_memory/__init__.py,sha256=WHyBhckxdw2k0epkM_B3ymNASebNTOCU_NrvfzUAn14,355
|
|
169
|
-
crawlee/storage_clients/_memory/_dataset_client.py,sha256=
|
|
170
|
-
crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=
|
|
171
|
-
crawlee/storage_clients/_memory/_request_queue_client.py,sha256=
|
|
170
|
+
crawlee/storage_clients/_memory/_dataset_client.py,sha256=wxuhyVnWyi9jUfI2eY4kSgD6vVje4gL2Bursto348Ps,8827
|
|
171
|
+
crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=rBXRggALe-0kBAe03sdLVkABhkEFmHqXRabR28IugUE,6472
|
|
172
|
+
crawlee/storage_clients/_memory/_request_queue_client.py,sha256=hPI78S1sOopVKFKDRW7ndkha7TVcJfwdd4onXgfb4Pk,13096
|
|
172
173
|
crawlee/storage_clients/_memory/_storage_client.py,sha256=EyiH-MgM_6iBItjmy2SkWAdjVbviacnxr3la-yiGfIw,2724
|
|
173
174
|
crawlee/storage_clients/_memory/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
175
|
+
crawlee/storage_clients/_redis/__init__.py,sha256=HrtfC0mRgz2dCNe8olCWjwe2NXnFFDPoXOuBIDBFlIg,328
|
|
176
|
+
crawlee/storage_clients/_redis/_client_mixin.py,sha256=QvHZ2lvkH9mKBhjiKyeUkCnwWTSfQY5S4aJLxEUghEs,12263
|
|
177
|
+
crawlee/storage_clients/_redis/_dataset_client.py,sha256=E424nkgnCtUIUZ9bsPD-5DCBriMFKsxahI_akTJiCHg,11928
|
|
178
|
+
crawlee/storage_clients/_redis/_key_value_store_client.py,sha256=Cmm-m81wvmxjHxDZghrVnj0imFYwilzj5pmfd1ZGKmI,10428
|
|
179
|
+
crawlee/storage_clients/_redis/_request_queue_client.py,sha256=TMwHf6CZOKaYvFZ8PN0KH9VcXyNh3ER3rHPj2-VrPF8,24715
|
|
180
|
+
crawlee/storage_clients/_redis/_storage_client.py,sha256=N8USUyeJg53O-LKKHl3xJzUPsS5z3z18b5uRn02Ky5Q,5520
|
|
181
|
+
crawlee/storage_clients/_redis/_utils.py,sha256=0SY9W0EGm2TaC2RMJ5KCvOS1u_Kh4xog-fc3mY3NimY,705
|
|
182
|
+
crawlee/storage_clients/_redis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua,sha256=gYtZfjXp-D-vvlRZBzf7J4OCP2_js_oNnzAkbPfK3Bg,1054
|
|
184
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua,sha256=66aJjIR9f6hQcmVhOA8Km_7PaduJFqOdsisieWDIn6E,1351
|
|
185
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua,sha256=coxBfdPMc0X9rpszMCvdZXEbQzrRc7qU4flhN3MKwjc,1123
|
|
186
|
+
crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua,sha256=gvDTseg5vo-YNVvftip6f_unCdM7TaM_Btc9sj-5AOw,896
|
|
174
187
|
crawlee/storage_clients/_sql/__init__.py,sha256=X_fDMc6jn50gEBZ9QyUw54sjovYfFvE-dgXAdci6Y2M,312
|
|
175
|
-
crawlee/storage_clients/_sql/_client_mixin.py,sha256=
|
|
176
|
-
crawlee/storage_clients/_sql/_dataset_client.py,sha256=
|
|
177
|
-
crawlee/storage_clients/_sql/_db_models.py,sha256=
|
|
178
|
-
crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=
|
|
179
|
-
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=
|
|
180
|
-
crawlee/storage_clients/_sql/_storage_client.py,sha256=
|
|
188
|
+
crawlee/storage_clients/_sql/_client_mixin.py,sha256=IryX6b14IiJ1pGPSCTBd36qpKLEISSnQ8H0bv9ROi0Q,15960
|
|
189
|
+
crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
|
|
190
|
+
crawlee/storage_clients/_sql/_db_models.py,sha256=KzA-R_L6zv9gqQg7B27mF-fERNJuMUEnewV9iofmTnI,9812
|
|
191
|
+
crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=LnVLWhOjo4LdvtCac4fwuf__DgEQjlqSxz8KkjY3Qx4,11311
|
|
192
|
+
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=OlvAOwEoYY5f4NO7BdhLFRT_i_E3YzJDb_ptKKK2huY,29478
|
|
193
|
+
crawlee/storage_clients/_sql/_storage_client.py,sha256=ITtMpwfotIW4SZjO4rycB5wfMKaqTAJgMvzcUZxckrk,10905
|
|
181
194
|
crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
195
|
crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
|
|
183
|
-
crawlee/storages/_base.py,sha256=
|
|
184
|
-
crawlee/storages/_dataset.py,sha256=
|
|
185
|
-
crawlee/storages/_key_value_store.py,sha256=
|
|
186
|
-
crawlee/storages/_request_queue.py,sha256=
|
|
187
|
-
crawlee/storages/_storage_instance_manager.py,sha256=
|
|
196
|
+
crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
|
|
197
|
+
crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
|
|
198
|
+
crawlee/storages/_key_value_store.py,sha256=xdkMJYdH3zXzwB3jtkijq-YkMlwBtfXxDFIUlpvpXAE,10298
|
|
199
|
+
crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
|
|
200
|
+
crawlee/storages/_storage_instance_manager.py,sha256=XX8qcQVcHwy9s-TO8--XdMXuHZfKt0WnTpLQF4YVxjA,10345
|
|
201
|
+
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
188
202
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
189
|
-
crawlee-1.
|
|
190
|
-
crawlee-1.
|
|
191
|
-
crawlee-1.
|
|
192
|
-
crawlee-1.
|
|
193
|
-
crawlee-1.
|
|
203
|
+
crawlee-1.3.1b3.dist-info/METADATA,sha256=6Kn8rGr2XFObvOmIAZbeyBlN6WfGjiZnrDwf7yTcz3A,29526
|
|
204
|
+
crawlee-1.3.1b3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
205
|
+
crawlee-1.3.1b3.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
206
|
+
crawlee-1.3.1b3.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
207
|
+
crawlee-1.3.1b3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|