crawlee 0.6.13b43__py3-none-any.whl → 1.1.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- crawlee/_request.py +32 -21
- crawlee/_service_locator.py +4 -4
- crawlee/_types.py +67 -24
- crawlee/_utils/raise_if_too_many_kwargs.py +12 -0
- crawlee/_utils/recoverable_state.py +32 -8
- crawlee/_utils/recurring_task.py +15 -0
- crawlee/_utils/robots.py +17 -5
- crawlee/_utils/sitemap.py +1 -1
- crawlee/_utils/urls.py +9 -2
- crawlee/browsers/_browser_pool.py +4 -1
- crawlee/browsers/_playwright_browser_controller.py +21 -15
- crawlee/browsers/_playwright_browser_plugin.py +17 -3
- crawlee/browsers/_types.py +1 -1
- crawlee/configuration.py +3 -1
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +38 -14
- crawlee/crawlers/_basic/_basic_crawler.py +51 -14
- crawlee/crawlers/_playwright/_playwright_crawler.py +16 -4
- crawlee/events/_event_manager.py +3 -1
- crawlee/fingerprint_suite/_header_generator.py +2 -2
- crawlee/otel/crawler_instrumentor.py +3 -3
- crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +2 -2
- crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +3 -0
- crawlee/request_loaders/_sitemap_request_loader.py +22 -4
- crawlee/sessions/_session_pool.py +1 -1
- crawlee/statistics/_error_snapshotter.py +1 -1
- crawlee/statistics/_models.py +32 -1
- crawlee/statistics/_statistics.py +24 -33
- crawlee/storage_clients/__init__.py +16 -0
- crawlee/storage_clients/_base/_storage_client.py +5 -4
- crawlee/storage_clients/_file_system/_dataset_client.py +4 -5
- crawlee/storage_clients/_file_system/_key_value_store_client.py +4 -5
- crawlee/storage_clients/_file_system/_request_queue_client.py +28 -12
- crawlee/storage_clients/_file_system/_storage_client.py +2 -2
- crawlee/storage_clients/_memory/_dataset_client.py +4 -5
- crawlee/storage_clients/_memory/_key_value_store_client.py +4 -5
- crawlee/storage_clients/_memory/_request_queue_client.py +4 -5
- crawlee/storage_clients/_redis/__init__.py +6 -0
- crawlee/storage_clients/_redis/_client_mixin.py +295 -0
- crawlee/storage_clients/_redis/_dataset_client.py +325 -0
- crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
- crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
- crawlee/storage_clients/_redis/_storage_client.py +146 -0
- crawlee/storage_clients/_redis/_utils.py +23 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
- crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
- crawlee/storage_clients/_redis/py.typed +0 -0
- crawlee/storage_clients/_sql/__init__.py +6 -0
- crawlee/storage_clients/_sql/_client_mixin.py +385 -0
- crawlee/storage_clients/_sql/_dataset_client.py +310 -0
- crawlee/storage_clients/_sql/_db_models.py +268 -0
- crawlee/storage_clients/_sql/_key_value_store_client.py +300 -0
- crawlee/storage_clients/_sql/_request_queue_client.py +720 -0
- crawlee/storage_clients/_sql/_storage_client.py +291 -0
- crawlee/storage_clients/_sql/py.typed +0 -0
- crawlee/storage_clients/models.py +10 -10
- crawlee/storages/_base.py +3 -1
- crawlee/storages/_dataset.py +5 -3
- crawlee/storages/_key_value_store.py +11 -6
- crawlee/storages/_request_queue.py +5 -3
- crawlee/storages/_storage_instance_manager.py +54 -68
- crawlee/storages/_utils.py +11 -0
- {crawlee-0.6.13b43.dist-info → crawlee-1.1.1b1.dist-info}/METADATA +16 -5
- {crawlee-0.6.13b43.dist-info → crawlee-1.1.1b1.dist-info}/RECORD +69 -47
- {crawlee-0.6.13b43.dist-info → crawlee-1.1.1b1.dist-info}/WHEEL +0 -0
- {crawlee-0.6.13b43.dist-info → crawlee-1.1.1b1.dist-info}/entry_points.txt +0 -0
- {crawlee-0.6.13b43.dist-info → crawlee-1.1.1b1.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,45 +5,62 @@ from collections.abc import Coroutine, Hashable
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from typing import TYPE_CHECKING, TypeVar
|
|
7
7
|
|
|
8
|
+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
8
9
|
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
from crawlee.storage_clients import StorageClient
|
|
11
|
+
from ._utils import validate_storage_name
|
|
12
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
13
14
|
from ._base import Storage
|
|
14
15
|
|
|
15
16
|
T = TypeVar('T', bound='Storage')
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
@dataclass
|
|
19
|
-
class
|
|
20
|
-
"""Cache for
|
|
21
|
-
|
|
22
|
-
Example:
|
|
23
|
-
Storage=Dataset, id='123', additional_cache_key="some_path" will be located in
|
|
24
|
-
storage = by_id[Dataset]['123'][some_path]
|
|
25
|
-
"""
|
|
20
|
+
class _StorageCache:
|
|
21
|
+
"""Cache for storage instances."""
|
|
26
22
|
|
|
27
23
|
by_id: defaultdict[type[Storage], defaultdict[str, defaultdict[Hashable, Storage]]] = field(
|
|
28
24
|
default_factory=lambda: defaultdict(lambda: defaultdict(lambda: defaultdict()))
|
|
29
25
|
)
|
|
30
|
-
"""Cache for storage instances by ID
|
|
26
|
+
"""Cache for storage instances by ID. Example: by_id[Dataset]['some_id']['some_additional_cache_key']."""
|
|
31
27
|
|
|
32
28
|
by_name: defaultdict[type[Storage], defaultdict[str, defaultdict[Hashable, Storage]]] = field(
|
|
33
29
|
default_factory=lambda: defaultdict(lambda: defaultdict(lambda: defaultdict()))
|
|
34
30
|
)
|
|
35
|
-
"""Cache for storage instances by name
|
|
31
|
+
"""Cache for storage instances by name. Example: by_name[Dataset]['some_name']['some_additional_cache_key']"""
|
|
36
32
|
|
|
37
33
|
by_alias: defaultdict[type[Storage], defaultdict[str, defaultdict[Hashable, Storage]]] = field(
|
|
38
34
|
default_factory=lambda: defaultdict(lambda: defaultdict(lambda: defaultdict()))
|
|
39
35
|
)
|
|
40
|
-
"""Cache for storage instances by alias
|
|
36
|
+
"""Cache for storage instances by alias. Example: by_alias[Dataset]['some_alias']['some_additional_cache_key']"""
|
|
37
|
+
|
|
38
|
+
def remove_from_cache(self, storage_instance: Storage) -> None:
|
|
39
|
+
"""Remove a storage instance from the cache.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
storage_instance: The storage instance to remove.
|
|
43
|
+
"""
|
|
44
|
+
storage_type = type(storage_instance)
|
|
45
|
+
|
|
46
|
+
# Remove from ID cache
|
|
47
|
+
for additional_key in self.by_id[storage_type][storage_instance.id]:
|
|
48
|
+
del self.by_id[storage_type][storage_instance.id][additional_key]
|
|
49
|
+
break
|
|
41
50
|
|
|
51
|
+
# Remove from name cache or alias cache. It can never be in both.
|
|
52
|
+
if storage_instance.name is not None:
|
|
53
|
+
for additional_key in self.by_name[storage_type][storage_instance.name]:
|
|
54
|
+
del self.by_name[storage_type][storage_instance.name][additional_key]
|
|
55
|
+
break
|
|
56
|
+
else:
|
|
57
|
+
for alias_key in self.by_alias[storage_type]:
|
|
58
|
+
for additional_key in self.by_alias[storage_type][alias_key]:
|
|
59
|
+
del self.by_alias[storage_type][alias_key][additional_key]
|
|
60
|
+
break
|
|
42
61
|
|
|
43
|
-
StorageClientType = DatasetClient | KeyValueStoreClient | RequestQueueClient
|
|
44
|
-
"""Type alias for the storage client types."""
|
|
45
62
|
|
|
46
|
-
ClientOpenerCoro = Coroutine[None, None,
|
|
63
|
+
ClientOpenerCoro = Coroutine[None, None, DatasetClient | KeyValueStoreClient | RequestQueueClient]
|
|
47
64
|
"""Type alias for the client opener function."""
|
|
48
65
|
|
|
49
66
|
|
|
@@ -58,7 +75,7 @@ class StorageInstanceManager:
|
|
|
58
75
|
"""Reserved alias for default unnamed storage."""
|
|
59
76
|
|
|
60
77
|
def __init__(self) -> None:
|
|
61
|
-
self.
|
|
78
|
+
self._cache: _StorageCache = _StorageCache()
|
|
62
79
|
|
|
63
80
|
async def open_storage_instance(
|
|
64
81
|
self,
|
|
@@ -67,20 +84,20 @@ class StorageInstanceManager:
|
|
|
67
84
|
id: str | None,
|
|
68
85
|
name: str | None,
|
|
69
86
|
alias: str | None,
|
|
70
|
-
storage_client_type: type[StorageClient],
|
|
71
87
|
client_opener_coro: ClientOpenerCoro,
|
|
72
|
-
|
|
88
|
+
storage_client_cache_key: Hashable = '',
|
|
73
89
|
) -> T:
|
|
74
90
|
"""Open a storage instance with caching support.
|
|
75
91
|
|
|
76
92
|
Args:
|
|
77
93
|
cls: The storage class to instantiate.
|
|
78
94
|
id: Storage ID.
|
|
79
|
-
name: Storage name. (global scope, persists across runs).
|
|
95
|
+
name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z",
|
|
96
|
+
the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
|
|
97
|
+
(e.g. "my-value-1").
|
|
80
98
|
alias: Storage alias (run scope, creates unnamed storage).
|
|
81
|
-
storage_client_type: Type of storage client to use.
|
|
82
99
|
client_opener_coro: Coroutine to open the storage client when storage instance not found in cache.
|
|
83
|
-
|
|
100
|
+
storage_client_cache_key: Additional optional key from storage client to differentiate cache entries.
|
|
84
101
|
|
|
85
102
|
Returns:
|
|
86
103
|
The storage instance.
|
|
@@ -95,60 +112,48 @@ class StorageInstanceManager:
|
|
|
95
112
|
)
|
|
96
113
|
|
|
97
114
|
# Validate input parameters.
|
|
98
|
-
|
|
99
|
-
if specified_params > 1:
|
|
100
|
-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
115
|
+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
|
|
101
116
|
|
|
102
117
|
# Auto-set alias='default' when no parameters are specified.
|
|
103
118
|
# Default unnamed storage is equal to alias=default unnamed storage.
|
|
104
|
-
if
|
|
119
|
+
if not any([name, alias, id]):
|
|
105
120
|
alias = self._DEFAULT_STORAGE_ALIAS
|
|
106
121
|
|
|
107
122
|
# Check cache
|
|
108
|
-
if id is not None and (
|
|
109
|
-
cached_instance := self._cache_by_storage_client[storage_client_type]
|
|
110
|
-
.by_id[cls][id]
|
|
111
|
-
.get(additional_cache_key)
|
|
112
|
-
):
|
|
123
|
+
if id is not None and (cached_instance := self._cache.by_id[cls][id].get(storage_client_cache_key)):
|
|
113
124
|
if isinstance(cached_instance, cls):
|
|
114
125
|
return cached_instance
|
|
115
126
|
raise RuntimeError('Cached instance type mismatch.')
|
|
116
127
|
|
|
117
|
-
if name is not None and (
|
|
118
|
-
cached_instance := self._cache_by_storage_client[storage_client_type]
|
|
119
|
-
.by_name[cls][name]
|
|
120
|
-
.get(additional_cache_key)
|
|
121
|
-
):
|
|
128
|
+
if name is not None and (cached_instance := self._cache.by_name[cls][name].get(storage_client_cache_key)):
|
|
122
129
|
if isinstance(cached_instance, cls):
|
|
123
130
|
return cached_instance
|
|
124
131
|
raise RuntimeError('Cached instance type mismatch.')
|
|
125
132
|
|
|
126
133
|
if alias is not None and (
|
|
127
|
-
cached_instance := self.
|
|
128
|
-
.by_alias[cls][alias]
|
|
129
|
-
.get(additional_cache_key)
|
|
134
|
+
cached_instance := self._cache.by_alias[cls][alias].get(storage_client_cache_key)
|
|
130
135
|
):
|
|
131
136
|
if isinstance(cached_instance, cls):
|
|
132
137
|
return cached_instance
|
|
133
138
|
raise RuntimeError('Cached instance type mismatch.')
|
|
134
139
|
|
|
135
140
|
# Check for conflicts between named and alias storages
|
|
136
|
-
if alias and (
|
|
137
|
-
self._cache_by_storage_client[storage_client_type].by_name[cls][alias].get(additional_cache_key)
|
|
138
|
-
):
|
|
141
|
+
if alias and (self._cache.by_name[cls][alias].get(storage_client_cache_key)):
|
|
139
142
|
raise ValueError(
|
|
140
143
|
f'Cannot create alias storage "{alias}" because a named storage with the same name already exists. '
|
|
141
144
|
f'Use a different alias or drop the existing named storage first.'
|
|
142
145
|
)
|
|
143
146
|
|
|
144
|
-
if name and (
|
|
145
|
-
self._cache_by_storage_client[storage_client_type].by_alias[cls][name].get(additional_cache_key)
|
|
146
|
-
):
|
|
147
|
+
if name and (self._cache.by_alias[cls][name].get(storage_client_cache_key)):
|
|
147
148
|
raise ValueError(
|
|
148
149
|
f'Cannot create named storage "{name}" because an alias storage with the same name already exists. '
|
|
149
150
|
f'Use a different name or drop the existing alias storage first.'
|
|
150
151
|
)
|
|
151
152
|
|
|
153
|
+
# Validate storage name
|
|
154
|
+
if name is not None:
|
|
155
|
+
validate_storage_name(name)
|
|
156
|
+
|
|
152
157
|
# Create new instance
|
|
153
158
|
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
154
159
|
client = await client_opener_coro
|
|
@@ -160,17 +165,15 @@ class StorageInstanceManager:
|
|
|
160
165
|
|
|
161
166
|
# Cache the instance.
|
|
162
167
|
# Always cache by id.
|
|
163
|
-
self.
|
|
168
|
+
self._cache.by_id[cls][instance.id][storage_client_cache_key] = instance
|
|
164
169
|
|
|
165
170
|
# Cache named storage.
|
|
166
171
|
if instance_name is not None:
|
|
167
|
-
self.
|
|
168
|
-
instance
|
|
169
|
-
)
|
|
172
|
+
self._cache.by_name[cls][instance_name][storage_client_cache_key] = instance
|
|
170
173
|
|
|
171
174
|
# Cache unnamed storage.
|
|
172
175
|
if alias is not None:
|
|
173
|
-
self.
|
|
176
|
+
self._cache.by_alias[cls][alias][storage_client_cache_key] = instance
|
|
174
177
|
|
|
175
178
|
return instance
|
|
176
179
|
|
|
@@ -185,25 +188,8 @@ class StorageInstanceManager:
|
|
|
185
188
|
Args:
|
|
186
189
|
storage_instance: The storage instance to remove.
|
|
187
190
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
for storage_client_cache in self._cache_by_storage_client.values():
|
|
191
|
-
# Remove from ID cache
|
|
192
|
-
for additional_key in storage_client_cache.by_id[storage_type][storage_instance.id]:
|
|
193
|
-
del storage_client_cache.by_id[storage_type][storage_instance.id][additional_key]
|
|
194
|
-
break
|
|
195
|
-
|
|
196
|
-
# Remove from name cache or alias cache. It can never be in both.
|
|
197
|
-
if storage_instance.name is not None:
|
|
198
|
-
for additional_key in storage_client_cache.by_name[storage_type][storage_instance.name]:
|
|
199
|
-
del storage_client_cache.by_name[storage_type][storage_instance.name][additional_key]
|
|
200
|
-
break
|
|
201
|
-
else:
|
|
202
|
-
for alias_key in storage_client_cache.by_alias[storage_type]:
|
|
203
|
-
for additional_key in storage_client_cache.by_alias[storage_type][alias_key]:
|
|
204
|
-
del storage_client_cache.by_alias[storage_type][alias_key][additional_key]
|
|
205
|
-
break
|
|
191
|
+
self._cache.remove_from_cache(storage_instance)
|
|
206
192
|
|
|
207
193
|
def clear_cache(self) -> None:
|
|
208
194
|
"""Clear all cached storage instances."""
|
|
209
|
-
self.
|
|
195
|
+
self._cache = _StorageCache()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$')
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def validate_storage_name(name: str | None) -> None:
|
|
7
|
+
if name and not NAME_REGEX.match(name):
|
|
8
|
+
raise ValueError(
|
|
9
|
+
f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through'
|
|
10
|
+
'"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")'
|
|
11
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.1b1
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -223,15 +223,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
223
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
224
|
Classifier: Programming Language :: Python :: 3.12
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
226
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
228
|
Requires-Python: >=3.10
|
|
228
229
|
Requires-Dist: cachetools>=5.5.0
|
|
229
230
|
Requires-Dist: colorama>=0.4.0
|
|
230
|
-
Requires-Dist: impit>=0.
|
|
231
|
+
Requires-Dist: impit>=0.8.0
|
|
231
232
|
Requires-Dist: more-itertools>=10.2.0
|
|
232
233
|
Requires-Dist: protego>=0.5.0
|
|
233
234
|
Requires-Dist: psutil>=6.0.0
|
|
234
|
-
Requires-Dist: pydantic-settings
|
|
235
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
235
236
|
Requires-Dist: pydantic>=2.11.0
|
|
236
237
|
Requires-Dist: pyee>=9.0.0
|
|
237
238
|
Requires-Dist: tldextract>=5.1.0
|
|
@@ -244,7 +245,9 @@ Requires-Dist: jaro-winkler>=2.0.3; extra == 'adaptive-crawler'
|
|
|
244
245
|
Requires-Dist: playwright>=1.27.0; extra == 'adaptive-crawler'
|
|
245
246
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
246
247
|
Provides-Extra: all
|
|
248
|
+
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
247
249
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
250
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'all'
|
|
248
251
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
249
252
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
250
253
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -261,8 +264,10 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
|
|
|
261
264
|
Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
|
|
262
265
|
Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
263
266
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
267
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
|
|
264
268
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
265
269
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
270
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
266
271
|
Requires-Dist: typer>=0.12.0; extra == 'all'
|
|
267
272
|
Requires-Dist: wrapt>=1.17.0; extra == 'all'
|
|
268
273
|
Provides-Extra: beautifulsoup
|
|
@@ -293,6 +298,14 @@ Provides-Extra: playwright
|
|
|
293
298
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
294
299
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
295
300
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
301
|
+
Provides-Extra: redis
|
|
302
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
303
|
+
Provides-Extra: sql-postgres
|
|
304
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'sql-postgres'
|
|
305
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
306
|
+
Provides-Extra: sql-sqlite
|
|
307
|
+
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
|
308
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-sqlite'
|
|
296
309
|
Description-Content-Type: text/markdown
|
|
297
310
|
|
|
298
311
|
<h1 align="center">
|
|
@@ -327,8 +340,6 @@ Description-Content-Type: text/markdown
|
|
|
327
340
|
|
|
328
341
|
Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
|
|
329
342
|
|
|
330
|
-
> 🚀 Crawlee for Python is open to early adopters!
|
|
331
|
-
|
|
332
343
|
Your crawlers will appear almost human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data and persistently store it in machine-readable formats, without having to worry about the technical details. And thanks to rich configuration options, you can tweak almost any aspect of Crawlee to suit your project's needs if the default settings don't cut it.
|
|
333
344
|
|
|
334
345
|
> 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev/python/)** 👈
|
|
@@ -3,10 +3,10 @@ crawlee/_browserforge_workaround.py,sha256=FYQaqpqfZGYkx-A8evF9nsHnj4KK4IMtjNq3L
|
|
|
3
3
|
crawlee/_cli.py,sha256=czuEsGD8QYEiq5gtMcBxrL08hQ5OJQQkMVhAr1pvDaQ,10353
|
|
4
4
|
crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
|
|
5
5
|
crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
|
|
6
|
-
crawlee/_request.py,sha256=
|
|
7
|
-
crawlee/_service_locator.py,sha256=
|
|
8
|
-
crawlee/_types.py,sha256=
|
|
9
|
-
crawlee/configuration.py,sha256=
|
|
6
|
+
crawlee/_request.py,sha256=fnUofyFMV3HJwfcLjYr2BCZ5K9mEwl6vZd8Pr309wCE,16458
|
|
7
|
+
crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
|
|
8
|
+
crawlee/_types.py,sha256=DAmfSv5W1dt3nJhJ8z-02gDaE06fdEizNKUlHpsd2_A,29129
|
|
9
|
+
crawlee/configuration.py,sha256=DWS2z1FC6Ua93W2tStK3R1ZKZbZjVQYWGiGFbZFaRtA,8064
|
|
10
10
|
crawlee/errors.py,sha256=RhFNA_uT615nVBHf9TylpX5YWwtDuHUUEV8LPT4CYa4,3878
|
|
11
11
|
crawlee/proxy_configuration.py,sha256=rqf67yerXvLvraBaAHW04nvf5ECze3wMQbK7LlqXucM,10386
|
|
12
12
|
crawlee/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -28,43 +28,44 @@ crawlee/_utils/file.py,sha256=sGcGsV9zOJ-fKsMuYV_xwTRkOqoEiHVCqPpbprho5t4,5282
|
|
|
28
28
|
crawlee/_utils/globs.py,sha256=SGX2J35Kqw7yZnSS5c4mLz9UD8c77PF0IoCgXQM5uiw,5310
|
|
29
29
|
crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZkY,902
|
|
30
30
|
crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
|
|
31
|
-
crawlee/_utils/
|
|
32
|
-
crawlee/_utils/
|
|
31
|
+
crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
|
|
32
|
+
crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
|
|
33
|
+
crawlee/_utils/recurring_task.py,sha256=sQMiURuDXbwwfAcIXK8V4NXncSxIBxsqN1cZWX7DLyg,2128
|
|
33
34
|
crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
|
|
34
|
-
crawlee/_utils/robots.py,sha256=
|
|
35
|
-
crawlee/_utils/sitemap.py,sha256=
|
|
35
|
+
crawlee/_utils/robots.py,sha256=DBU5ni4Y-p7bIKMbLd_ws8wgHSFc4K8zPVF3JvH_pkw,4661
|
|
36
|
+
crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
|
|
36
37
|
crawlee/_utils/system.py,sha256=tA8AP__9vsJ9OTLTnAYAKkxc8U5-IEna0N_hqYBybUo,4294
|
|
37
38
|
crawlee/_utils/time.py,sha256=WK17P939r65dLz2rWvL59OEJoxgzdinw-ND9WuG4DuU,2353
|
|
38
39
|
crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
|
|
39
|
-
crawlee/_utils/urls.py,sha256=
|
|
40
|
+
crawlee/_utils/urls.py,sha256=fEYXJxBT02f-DIYKF_h7PdaKAShfXBs99-dHDjDX03A,1725
|
|
40
41
|
crawlee/_utils/wait.py,sha256=RfiXhp5VUBxOEtEMtru7_jNfKDr2BJCcFge5qGg2gxk,2848
|
|
41
42
|
crawlee/_utils/web.py,sha256=nnKhg8pUSWz0RY64Qd-_GPNBX1fWI2hXS-gzcfQ-rig,364
|
|
42
43
|
crawlee/browsers/__init__.py,sha256=TghkrNSbI_k87UgVBlgNNcEm8Ot05pSLEAPRSv6YsUs,1064
|
|
43
44
|
crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6kcV-g3AXE0I,3068
|
|
44
45
|
crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
|
|
45
|
-
crawlee/browsers/_browser_pool.py,sha256=
|
|
46
|
+
crawlee/browsers/_browser_pool.py,sha256=n1GTVS220yxo-aMaKDVfQO571_AqEV5pMawWbr0zUHk,15832
|
|
46
47
|
crawlee/browsers/_playwright_browser.py,sha256=1yXD6cXuLefZZGUG1m0CT38xXYSwIC7n95bJBdMOxbo,3820
|
|
47
|
-
crawlee/browsers/_playwright_browser_controller.py,sha256=
|
|
48
|
-
crawlee/browsers/_playwright_browser_plugin.py,sha256=
|
|
49
|
-
crawlee/browsers/_types.py,sha256=
|
|
48
|
+
crawlee/browsers/_playwright_browser_controller.py,sha256=W6G5MjZpg9IcZoHts6lTML5VxSEpBTgzx5qeQ8XDigY,10216
|
|
49
|
+
crawlee/browsers/_playwright_browser_plugin.py,sha256=A1qa1nJhTSKNP9uOiO-oGzR7VGlnOMo0A0YNedccb2A,8869
|
|
50
|
+
crawlee/browsers/_types.py,sha256=ZnDgJHeQNSd_s_mXhgQnAf09c2smuiXC31VbawHHXUM,436
|
|
50
51
|
crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
52
|
crawlee/crawlers/__init__.py,sha256=9VmFahav3rjE-2Bxa5PAhBgkYXP0k5SSAEpdG2xMZ7c,2340
|
|
52
53
|
crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
|
|
53
54
|
crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
55
|
crawlee/crawlers/_abstract_http/__init__.py,sha256=QCjn8x7jpo8FwEeSRw10TVj_0La2v9mLEiQWdk2RoTw,273
|
|
55
|
-
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=
|
|
56
|
+
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=DEiErZi7j2FHMgyVELPy09GyHo5Gx4UDpuiN6D3sGNk,11553
|
|
56
57
|
crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
|
|
57
58
|
crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
|
|
58
59
|
crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
60
|
crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=LREq9WR9BKsE8S8lSsEhlCoNjQaLhlJ9yo8y_6a8o4c,1072
|
|
60
|
-
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=
|
|
61
|
+
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=nPFB9Q_3xQDJprb24NIQO53gf56J8wXjbM9C-58iiZ8,22862
|
|
61
62
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py,sha256=_At8T8S3JLGPA-1AeCFGrpE-FuCDW9sazrXt9U0tK6U,1048
|
|
62
63
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=9FlHIUC05IzUhJsVldQvpnDnj1jk8GJpqC98mPLN_fw,10431
|
|
63
64
|
crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkbIN_059jUyCG8Z6XAb_FBLClIKw7z-aDvjon2I,10834
|
|
64
65
|
crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
|
|
65
66
|
crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
|
|
66
67
|
crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
|
|
67
|
-
crawlee/crawlers/_basic/_basic_crawler.py,sha256
|
|
68
|
+
crawlee/crawlers/_basic/_basic_crawler.py,sha256=-lo9yMjPkT8fU4ndOMaeEiwWHgu-DHw9Fny6f1kPPkk,73004
|
|
68
69
|
crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
|
|
69
70
|
crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
|
|
70
71
|
crawlee/crawlers/_basic/_logging_utils.py,sha256=jp5mEwSq5a_BgzUhNPJ9WrIDcoIeYGbeHstcRqCcP0s,3093
|
|
@@ -84,14 +85,14 @@ crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOM
|
|
|
84
85
|
crawlee/crawlers/_parsel/_parsel_parser.py,sha256=yWBfuXUHMriK4DRnyrXTQoGeqX5WV9bOEkBp_g0YCvQ,1540
|
|
85
86
|
crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
|
|
86
87
|
crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
|
|
87
|
-
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=
|
|
88
|
+
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=53iytj5LZHw19QOYqYNlZL4ApPlhbWn9Ds-DPTKANhQ,24158
|
|
88
89
|
crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
|
|
89
90
|
crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=Nfm69dqX85k68jN1p3ljZWbn8egqDWPIPRykXyXsoQs,3977
|
|
90
91
|
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=fEI2laWhmJdWiGoMF5JBLBsim9NtENfagZt6FFd2Rgo,1387
|
|
91
92
|
crawlee/crawlers/_playwright/_types.py,sha256=hMKA9K9gjzQuwwbnmmfJsQrwR-kq235jH-WBXWeqkGo,2174
|
|
92
93
|
crawlee/crawlers/_playwright/_utils.py,sha256=FQ_-LYo7DGHsNHRrTtWt3mC06VzQvQ2wkGqpA2wBzYU,3441
|
|
93
94
|
crawlee/events/__init__.py,sha256=YMgOXKI0LsXfImKQy06PZ2Vdjy-uD_-acioagHft1do,577
|
|
94
|
-
crawlee/events/_event_manager.py,sha256=
|
|
95
|
+
crawlee/events/_event_manager.py,sha256=wjZTYIKBI8daKUkOVxUrbPHuU8LnFpUtWStdkts7r3U,11588
|
|
95
96
|
crawlee/events/_local_event_manager.py,sha256=CSiMJ6a_BwX0PPwtffEOtHm21dmALJz1zifo3AuMAk8,3708
|
|
96
97
|
crawlee/events/_types.py,sha256=MKsI014OOKKhjPJRrvWYrezIDGoLjGGhWXrkqYw26Ns,3313
|
|
97
98
|
crawlee/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -99,7 +100,7 @@ crawlee/fingerprint_suite/__init__.py,sha256=noY9qw80B0seZGj_B3bBvCDIDk2YWOSN-ll
|
|
|
99
100
|
crawlee/fingerprint_suite/_browserforge_adapter.py,sha256=bsGebBjjHawM-FiINgqkZW5I9a9Fnv3SGwdKgaVWiRI,11934
|
|
100
101
|
crawlee/fingerprint_suite/_consts.py,sha256=SgykWfxD-pYvOpRp_ooQ4ZTPS0sQ2b3wDyyCjwU_8-w,258
|
|
101
102
|
crawlee/fingerprint_suite/_fingerprint_generator.py,sha256=Di4sDk1qioiFGx4ZcoVyHhtFHF8JXDhxQt8ZPug99k8,730
|
|
102
|
-
crawlee/fingerprint_suite/_header_generator.py,sha256=
|
|
103
|
+
crawlee/fingerprint_suite/_header_generator.py,sha256=9X9FbStehXdw-FZc_D0y-nLk1BUHXVYFxs7fv4dl9zU,3513
|
|
103
104
|
crawlee/fingerprint_suite/_types.py,sha256=7n2LJTiL2XvL-H4G-Y26Uoq5-ZXzH07Dq4o50uhMa-w,2423
|
|
104
105
|
crawlee/fingerprint_suite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
106
|
crawlee/http_clients/__init__.py,sha256=OQFhR9F8BrdlIaS5aRS7hvgQ0tKJPQ8FiyYPualyQcU,890
|
|
@@ -108,7 +109,7 @@ crawlee/http_clients/_curl_impersonate.py,sha256=EBaoJZzBDgsfhLRgZwu-mFzKqFJ8BDF
|
|
|
108
109
|
crawlee/http_clients/_httpx.py,sha256=pMVyL5P1UU49svqRr7hqcxInIuT4MT3R7UBmkJ07xMc,11780
|
|
109
110
|
crawlee/http_clients/_impit.py,sha256=nGSe5NuZmsJjQzsfUGIZdbZFC48_Gpu8aU3I8kpq6O4,8833
|
|
110
111
|
crawlee/otel/__init__.py,sha256=g5y1tJfpDKfcIPGcKBztMgP6sptum-vJrtemeR8_-co,108
|
|
111
|
-
crawlee/otel/crawler_instrumentor.py,sha256
|
|
112
|
+
crawlee/otel/crawler_instrumentor.py,sha256=yC367A1NnAdhOanvym2zfiu4H4BskUslrib0GcHiVJs,6865
|
|
112
113
|
crawlee/project_template/cookiecutter.json,sha256=dJeYxLx5QEy2DCzXsDpqJQJlIJ3nw42lJrclZFoSZ8w,622
|
|
113
114
|
crawlee/project_template/hooks/post_gen_project.py,sha256=EBNgb_-eodDxaYJljYYznZGASnduxJ54RDO_b4ofm4M,1296
|
|
114
115
|
crawlee/project_template/hooks/pre_gen_project.py,sha256=AqvHJRTOTQzKFX10Zt8uKt8UFHczDJ1ogPPHQVdY2ZU,1175
|
|
@@ -125,8 +126,8 @@ crawlee/project_template/templates/routes_playwright_camoufox.py,sha256=XtXWbPZ4
|
|
|
125
126
|
crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore,sha256=PCDXvENlrMmYleuQULduBiw8ipXmE_iYJtCmeZVuz1I,6
|
|
126
127
|
crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile,sha256=NRSdHgEnCjBWE0lU3y-qHNEUJg_OH3zhoo0fPzkIs58,4138
|
|
127
128
|
crawlee/project_template/{{cookiecutter.project_name}}/README.md,sha256=kEwhjWKqnSbg3gtGsuEiqWFGoqMdf4W7TZ0Lu0omwHk,1753
|
|
128
|
-
crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml,sha256=
|
|
129
|
-
crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt,sha256=
|
|
129
|
+
crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml,sha256=rfzarAQB8H93clog9xnqVThCIR7ltKqasMHX0-9PIMw,970
|
|
130
|
+
crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt,sha256=HTiM50HxLguBgqKKLFR9DjsfrWgo13n8cAAJ9xhEmw8,460
|
|
130
131
|
crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
132
|
crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py,sha256=7YQVjE3HdCnoU055kLcKUcqXvbp3C2rtAY2TSJaItts,867
|
|
132
133
|
crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py,sha256=mb3Wo_FpXGdIzWABJ0Y6CE-eXKxqSM_k__tnYYM1rF4,55
|
|
@@ -136,49 +137,70 @@ crawlee/request_loaders/_request_list.py,sha256=SIalHBMuFanE5GLnFocI0QCppWUiJQjr
|
|
|
136
137
|
crawlee/request_loaders/_request_loader.py,sha256=2Bg-AWWkIV1W-Dwjqo91dPY8nmc7H3teQy7d6OSgliQ,3620
|
|
137
138
|
crawlee/request_loaders/_request_manager.py,sha256=qFizyJuV2meIb9iiPfuii7ciuERMrp4SldAufiH46dc,3000
|
|
138
139
|
crawlee/request_loaders/_request_manager_tandem.py,sha256=lv-s94KPsoQAqx1KaXFch96ejhO147uOflF3UK5ORTk,4058
|
|
139
|
-
crawlee/request_loaders/_sitemap_request_loader.py,sha256=
|
|
140
|
+
crawlee/request_loaders/_sitemap_request_loader.py,sha256=W1_k_Szrtk0iE2LJBkHrrFeDtcKReXzr3DG32EnQaQE,16565
|
|
140
141
|
crawlee/sessions/__init__.py,sha256=dJdelbL-6MK5sW4SMU4QrjFbb9kRZ9uRnN-VS3R5-8Y,190
|
|
141
142
|
crawlee/sessions/_cookies.py,sha256=ihYbmpXfCzClzXDT7M2wefB_3KVzcMUdIzTZo6uGk6Y,9356
|
|
142
143
|
crawlee/sessions/_models.py,sha256=JMRQgDUP30XUdZ32isncHowOsXvK9jC_m9QYegbBI1E,2916
|
|
143
144
|
crawlee/sessions/_session.py,sha256=cMXVf6QjfGJDgdLUB4MhUP-zTm3pEDHRs-W5SBA4JFI,9638
|
|
144
|
-
crawlee/sessions/_session_pool.py,sha256
|
|
145
|
+
crawlee/sessions/_session_pool.py,sha256=_FYTXRANDiREK09DSoHIu-536sSUj9wazbrcl6iKdFM,9631
|
|
145
146
|
crawlee/sessions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
147
|
crawlee/statistics/__init__.py,sha256=lXAsHNkeRZQBffW1B7rERarivXIUJveNlcKTGOXQZY0,154
|
|
147
|
-
crawlee/statistics/_error_snapshotter.py,sha256=
|
|
148
|
+
crawlee/statistics/_error_snapshotter.py,sha256=g-roZgkJ-glyStZL7gXrOhrpdZvZ686W9lR43uZjPao,3279
|
|
148
149
|
crawlee/statistics/_error_tracker.py,sha256=x9Yw1TuyEptjwgPPJ4gIom-0oVjawcNReQDsHH2nZ3w,8553
|
|
149
|
-
crawlee/statistics/_models.py,sha256=
|
|
150
|
-
crawlee/statistics/_statistics.py,sha256=
|
|
151
|
-
crawlee/storage_clients/__init__.py,sha256=
|
|
152
|
-
crawlee/storage_clients/models.py,sha256=
|
|
150
|
+
crawlee/statistics/_models.py,sha256=n4sT35D4dqNPYREl8Q_YXANZtxaWC0HaZizobA4qK_c,6674
|
|
151
|
+
crawlee/statistics/_statistics.py,sha256=mSgnCnV7q2buJdyuXBxbUU9MQEUjxaLST_NO4ej3XRw,12341
|
|
152
|
+
crawlee/storage_clients/__init__.py,sha256=X3M6Z_WAOJ3M9I8JhGhJDnrtbCOmM27DpGAzgt87R2A,874
|
|
153
|
+
crawlee/storage_clients/models.py,sha256=gfW_kpSCOBuoTBIW0N7tb3FUv7BgD3keZADS7pyT4_I,6586
|
|
153
154
|
crawlee/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
154
155
|
crawlee/storage_clients/_base/__init__.py,sha256=-f0VIaGjw7Oo6HAgEK9ABtmKnnSRwzkA3WUQZMX5w0w,307
|
|
155
156
|
crawlee/storage_clients/_base/_dataset_client.py,sha256=0JBiXVXZlPzd2o3r2uc_skl6aYEer2-wcvOv3ZgIQls,3049
|
|
156
157
|
crawlee/storage_clients/_base/_key_value_store_client.py,sha256=eTZae1pIINnbs__FLvwvX8jTraEKMjdZuA69IrwLzfk,3409
|
|
157
158
|
crawlee/storage_clients/_base/_request_queue_client.py,sha256=cgM4yk6xJwgfzP-xaN9ApqJn32sh0FrSEPIdxN7kujw,4926
|
|
158
|
-
crawlee/storage_clients/_base/_storage_client.py,sha256=
|
|
159
|
+
crawlee/storage_clients/_base/_storage_client.py,sha256=RvmKCV1U9_KxyG7n8xhClm2vwD2SKChWIiBLk6cuqw0,3523
|
|
159
160
|
crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
160
161
|
crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
|
|
161
|
-
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=
|
|
162
|
-
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=
|
|
163
|
-
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=
|
|
164
|
-
crawlee/storage_clients/_file_system/_storage_client.py,sha256=
|
|
162
|
+
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=1Z8VCDx8ueh0FQQXUr8tJlOtKw8ggkaFjuz3-T_GJDY,17735
|
|
163
|
+
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=qNa3RRJQ8Omy2AteQvYh1Td04PsP5AhUFyTpL6KQbSg,18676
|
|
164
|
+
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=ETwy6eODf3dlBqy2RPM3nr2_oEm2ht37WpoTlFxn4A8,33970
|
|
165
|
+
crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
|
|
165
166
|
crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
167
|
crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
168
|
crawlee/storage_clients/_memory/__init__.py,sha256=WHyBhckxdw2k0epkM_B3ymNASebNTOCU_NrvfzUAn14,355
|
|
168
|
-
crawlee/storage_clients/_memory/_dataset_client.py,sha256=
|
|
169
|
-
crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=
|
|
170
|
-
crawlee/storage_clients/_memory/_request_queue_client.py,sha256=
|
|
169
|
+
crawlee/storage_clients/_memory/_dataset_client.py,sha256=wxuhyVnWyi9jUfI2eY4kSgD6vVje4gL2Bursto348Ps,8827
|
|
170
|
+
crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=rBXRggALe-0kBAe03sdLVkABhkEFmHqXRabR28IugUE,6472
|
|
171
|
+
crawlee/storage_clients/_memory/_request_queue_client.py,sha256=hPI78S1sOopVKFKDRW7ndkha7TVcJfwdd4onXgfb4Pk,13096
|
|
171
172
|
crawlee/storage_clients/_memory/_storage_client.py,sha256=EyiH-MgM_6iBItjmy2SkWAdjVbviacnxr3la-yiGfIw,2724
|
|
172
173
|
crawlee/storage_clients/_memory/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
+
crawlee/storage_clients/_redis/__init__.py,sha256=HrtfC0mRgz2dCNe8olCWjwe2NXnFFDPoXOuBIDBFlIg,328
|
|
175
|
+
crawlee/storage_clients/_redis/_client_mixin.py,sha256=DIPW4_En-w5z7tVbSUDa1jn5DFbLlG_6vW5naoeg_wY,12435
|
|
176
|
+
crawlee/storage_clients/_redis/_dataset_client.py,sha256=7D31bVcpmtyhRjZuzSvSKSMev8xoiX3J6f_NPc7wlUc,11562
|
|
177
|
+
crawlee/storage_clients/_redis/_key_value_store_client.py,sha256=EnR99uRJUOxqbELS-73vEDNw4WDJzF6-UG2ESHGrD6M,10394
|
|
178
|
+
crawlee/storage_clients/_redis/_request_queue_client.py,sha256=WNms5wHhDo7Jf2xG_8sV3KgtXEVDP-rBarOF-EUS92E,24958
|
|
179
|
+
crawlee/storage_clients/_redis/_storage_client.py,sha256=oeHLD1_vDDrgN6uUUT4nqGP-6z0YAM3_SnyxmFOvW58,5404
|
|
180
|
+
crawlee/storage_clients/_redis/_utils.py,sha256=fEdQ5b5Zex_bSXcKhXakfQBpXmo5wKnEK_MeeT-q6Bc,700
|
|
181
|
+
crawlee/storage_clients/_redis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua,sha256=gYtZfjXp-D-vvlRZBzf7J4OCP2_js_oNnzAkbPfK3Bg,1054
|
|
183
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua,sha256=66aJjIR9f6hQcmVhOA8Km_7PaduJFqOdsisieWDIn6E,1351
|
|
184
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua,sha256=coxBfdPMc0X9rpszMCvdZXEbQzrRc7qU4flhN3MKwjc,1123
|
|
185
|
+
crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua,sha256=gvDTseg5vo-YNVvftip6f_unCdM7TaM_Btc9sj-5AOw,896
|
|
186
|
+
crawlee/storage_clients/_sql/__init__.py,sha256=X_fDMc6jn50gEBZ9QyUw54sjovYfFvE-dgXAdci6Y2M,312
|
|
187
|
+
crawlee/storage_clients/_sql/_client_mixin.py,sha256=U9ThDUuRbT5JDtCFlBurhZIs1Ay5t9fTfPXXI_4dwHY,15988
|
|
188
|
+
crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
|
|
189
|
+
crawlee/storage_clients/_sql/_db_models.py,sha256=KzA-R_L6zv9gqQg7B27mF-fERNJuMUEnewV9iofmTnI,9812
|
|
190
|
+
crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=LnVLWhOjo4LdvtCac4fwuf__DgEQjlqSxz8KkjY3Qx4,11311
|
|
191
|
+
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=OlvAOwEoYY5f4NO7BdhLFRT_i_E3YzJDb_ptKKK2huY,29478
|
|
192
|
+
crawlee/storage_clients/_sql/_storage_client.py,sha256=okONooE_1Z5hxhJaXl7jEDXEryKqXlnFx6F1EuxyP2s,11317
|
|
193
|
+
crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
194
|
crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
|
|
174
|
-
crawlee/storages/_base.py,sha256=
|
|
175
|
-
crawlee/storages/_dataset.py,sha256=
|
|
176
|
-
crawlee/storages/_key_value_store.py,sha256=
|
|
177
|
-
crawlee/storages/_request_queue.py,sha256=
|
|
178
|
-
crawlee/storages/_storage_instance_manager.py,sha256=
|
|
195
|
+
crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
|
|
196
|
+
crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
|
|
197
|
+
crawlee/storages/_key_value_store.py,sha256=xdkMJYdH3zXzwB3jtkijq-YkMlwBtfXxDFIUlpvpXAE,10298
|
|
198
|
+
crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
|
|
199
|
+
crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
|
|
200
|
+
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
179
201
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
|
-
crawlee-
|
|
181
|
-
crawlee-
|
|
182
|
-
crawlee-
|
|
183
|
-
crawlee-
|
|
184
|
-
crawlee-
|
|
202
|
+
crawlee-1.1.1b1.dist-info/METADATA,sha256=_N3gsCxqVz8gVU3wD6jT5ujI10qqDL3TbVlGRsrKMQY,29532
|
|
203
|
+
crawlee-1.1.1b1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
204
|
+
crawlee-1.1.1b1.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
205
|
+
crawlee-1.1.1b1.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
206
|
+
crawlee-1.1.1b1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|