crawlee 1.0.5b18__py3-none-any.whl → 1.2.2b24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlee/__init__.py +2 -1
- crawlee/_browserforge_workaround.py +7 -3
- crawlee/_request.py +32 -13
- crawlee/_types.py +44 -5
- crawlee/_utils/context.py +3 -3
- crawlee/_utils/file.py +8 -1
- crawlee/_utils/globs.py +4 -4
- crawlee/_utils/recurring_task.py +12 -3
- crawlee/_utils/sitemap.py +12 -5
- crawlee/_utils/system.py +27 -11
- crawlee/_utils/time.py +41 -1
- crawlee/browsers/_browser_pool.py +1 -1
- crawlee/browsers/_playwright_browser.py +2 -1
- crawlee/crawlers/__init__.py +5 -1
- crawlee/crawlers/_abstract_http/__init__.py +2 -1
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +53 -17
- crawlee/crawlers/_adaptive_playwright/__init__.py +5 -2
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +20 -49
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +6 -2
- crawlee/crawlers/_basic/_basic_crawler.py +138 -124
- crawlee/crawlers/_basic/_context_utils.py +24 -0
- crawlee/crawlers/_basic/_logging_utils.py +23 -4
- crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py +2 -2
- crawlee/crawlers/_parsel/_parsel_crawler.py +2 -2
- crawlee/crawlers/_parsel/_parsel_parser.py +1 -1
- crawlee/crawlers/_playwright/_playwright_crawler.py +60 -22
- crawlee/crawlers/_playwright/_playwright_http_client.py +7 -1
- crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
- crawlee/crawlers/_playwright/_types.py +12 -2
- crawlee/errors.py +4 -0
- crawlee/events/_event_manager.py +12 -6
- crawlee/fingerprint_suite/_browserforge_adapter.py +1 -1
- crawlee/http_clients/_base.py +4 -0
- crawlee/http_clients/_curl_impersonate.py +68 -14
- crawlee/http_clients/_httpx.py +16 -6
- crawlee/http_clients/_impit.py +25 -10
- crawlee/otel/crawler_instrumentor.py +1 -3
- crawlee/request_loaders/_sitemap_request_loader.py +18 -5
- crawlee/router.py +13 -3
- crawlee/sessions/_cookies.py +13 -8
- crawlee/sessions/_models.py +3 -3
- crawlee/statistics/_models.py +51 -9
- crawlee/statistics/_statistics.py +2 -21
- crawlee/storage_clients/_base/_dataset_client.py +2 -2
- crawlee/storage_clients/_base/_key_value_store_client.py +2 -2
- crawlee/storage_clients/_file_system/_dataset_client.py +6 -5
- crawlee/storage_clients/_file_system/_key_value_store_client.py +7 -4
- crawlee/storage_clients/_file_system/_request_queue_client.py +5 -4
- crawlee/storage_clients/_redis/_client_mixin.py +1 -4
- crawlee/storage_clients/_redis/_dataset_client.py +6 -2
- crawlee/storage_clients/_redis/_key_value_store_client.py +3 -5
- crawlee/storage_clients/_redis/_request_queue_client.py +5 -8
- crawlee/storage_clients/_redis/_storage_client.py +12 -9
- crawlee/storage_clients/_redis/_utils.py +1 -1
- crawlee/storage_clients/_sql/_client_mixin.py +1 -1
- crawlee/storage_clients/_sql/_storage_client.py +0 -9
- crawlee/storage_clients/models.py +8 -3
- crawlee/storages/_storage_instance_manager.py +103 -44
- {crawlee-1.0.5b18.dist-info → crawlee-1.2.2b24.dist-info}/METADATA +10 -16
- {crawlee-1.0.5b18.dist-info → crawlee-1.2.2b24.dist-info}/RECORD +63 -62
- {crawlee-1.0.5b18.dist-info → crawlee-1.2.2b24.dist-info}/WHEEL +1 -1
- {crawlee-1.0.5b18.dist-info → crawlee-1.2.2b24.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.5b18.dist-info → crawlee-1.2.2b24.dist-info}/licenses/LICENSE +0 -0
|
@@ -247,7 +247,6 @@ class RedisRequestQueueClient(RequestQueueClient, RedisClientMixin):
|
|
|
247
247
|
*,
|
|
248
248
|
forefront: bool = False,
|
|
249
249
|
) -> AddRequestsResponse:
|
|
250
|
-
# Mypy workaround
|
|
251
250
|
if self._add_requests_script is None:
|
|
252
251
|
raise RuntimeError('Scripts not loaded. Call _ensure_scripts_loaded() before using the client.')
|
|
253
252
|
|
|
@@ -264,8 +263,8 @@ class RedisRequestQueueClient(RequestQueueClient, RedisClientMixin):
|
|
|
264
263
|
await await_redis_response(pipe.smismember(self._pending_set_key, unique_keys))
|
|
265
264
|
await await_redis_response(pipe.smismember(self._handled_set_key, unique_keys))
|
|
266
265
|
elif self._dedup_strategy == 'bloom':
|
|
267
|
-
await await_redis_response(pipe.bf().mexists(self._added_filter_key, *unique_keys))
|
|
268
|
-
await await_redis_response(pipe.bf().mexists(self._handled_filter_key, *unique_keys))
|
|
266
|
+
await await_redis_response(pipe.bf().mexists(self._added_filter_key, *unique_keys))
|
|
267
|
+
await await_redis_response(pipe.bf().mexists(self._handled_filter_key, *unique_keys))
|
|
269
268
|
|
|
270
269
|
pipe_results = await pipe.execute()
|
|
271
270
|
|
|
@@ -353,7 +352,6 @@ class RedisRequestQueueClient(RequestQueueClient, RedisClientMixin):
|
|
|
353
352
|
if self._pending_fetch_cache:
|
|
354
353
|
return self._pending_fetch_cache.popleft()
|
|
355
354
|
|
|
356
|
-
# Mypy workaround
|
|
357
355
|
if self._fetch_script is None:
|
|
358
356
|
raise RuntimeError('Scripts not loaded. Call _ensure_scripts_loaded() before using the client.')
|
|
359
357
|
|
|
@@ -399,7 +397,7 @@ class RedisRequestQueueClient(RequestQueueClient, RedisClientMixin):
|
|
|
399
397
|
await await_redis_response(pipe.sadd(self._handled_set_key, request.unique_key))
|
|
400
398
|
await await_redis_response(pipe.srem(self._pending_set_key, request.unique_key))
|
|
401
399
|
elif self._dedup_strategy == 'bloom':
|
|
402
|
-
await await_redis_response(pipe.bf().add(self._handled_filter_key, request.unique_key))
|
|
400
|
+
await await_redis_response(pipe.bf().add(self._handled_filter_key, request.unique_key))
|
|
403
401
|
|
|
404
402
|
await await_redis_response(pipe.hdel(self._in_progress_key, request.unique_key))
|
|
405
403
|
await await_redis_response(pipe.hdel(self._data_key, request.unique_key))
|
|
@@ -499,17 +497,16 @@ class RedisRequestQueueClient(RequestQueueClient, RedisClientMixin):
|
|
|
499
497
|
await await_redis_response(
|
|
500
498
|
pipeline.bf().create(
|
|
501
499
|
self._added_filter_key, errorRate=self._bloom_error_rate, capacity=100000, expansion=10
|
|
502
|
-
)
|
|
500
|
+
)
|
|
503
501
|
)
|
|
504
502
|
await await_redis_response(
|
|
505
503
|
pipeline.bf().create(
|
|
506
504
|
self._handled_filter_key, errorRate=self._bloom_error_rate, capacity=100000, expansion=10
|
|
507
|
-
)
|
|
505
|
+
)
|
|
508
506
|
)
|
|
509
507
|
|
|
510
508
|
async def _reclaim_stale_requests(self) -> None:
|
|
511
509
|
"""Reclaim requests that have been in progress for too long."""
|
|
512
|
-
# Mypy workaround
|
|
513
510
|
if self._reclaim_stale_script is None:
|
|
514
511
|
raise RuntimeError('Scripts not loaded. Call _ensure_scripts_loaded() before using the client.')
|
|
515
512
|
|
|
@@ -57,16 +57,19 @@ class RedisStorageClient(StorageClient):
|
|
|
57
57
|
queue_bloom_error_rate: Desired false positive rate for Bloom filter deduplication. Only relevant if
|
|
58
58
|
`queue_dedup_strategy` is set to 'bloom'.
|
|
59
59
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
raise ValueError('Either redis or connection_string must be provided.')
|
|
63
|
-
case (Redis(), None):
|
|
64
|
-
self._redis = redis
|
|
65
|
-
case (None, str()):
|
|
66
|
-
self._redis = Redis.from_url(connection_string)
|
|
67
|
-
case (Redis(), str()):
|
|
68
|
-
raise ValueError('Either redis or connection_string must be provided, not both.')
|
|
60
|
+
if redis is None and connection_string is None:
|
|
61
|
+
raise ValueError('Either redis or connection_string must be provided.')
|
|
69
62
|
|
|
63
|
+
if redis is not None and connection_string is not None:
|
|
64
|
+
raise ValueError('Either redis or connection_string must be provided, not both.')
|
|
65
|
+
|
|
66
|
+
if isinstance(redis, Redis) and connection_string is None:
|
|
67
|
+
self._redis = redis
|
|
68
|
+
|
|
69
|
+
if isinstance(connection_string, str) and redis is None:
|
|
70
|
+
self._redis = Redis.from_url(connection_string)
|
|
71
|
+
|
|
72
|
+
self._redis: Redis # to help type checker
|
|
70
73
|
self._queue_dedup_strategy = queue_dedup_strategy
|
|
71
74
|
self._queue_bloom_error_rate = queue_bloom_error_rate
|
|
72
75
|
|
|
@@ -19,5 +19,5 @@ async def await_redis_response(response: Awaitable[T] | T) -> T:
|
|
|
19
19
|
def read_lua_script(script_name: str) -> str:
|
|
20
20
|
"""Read a Lua script from a file."""
|
|
21
21
|
file_path = Path(__file__).parent / 'lua_scripts' / script_name
|
|
22
|
-
with file_path.open('r', encoding='utf-8') as file:
|
|
22
|
+
with file_path.open(mode='r', encoding='utf-8') as file:
|
|
23
23
|
return file.read()
|
|
@@ -105,7 +105,7 @@ class SqlClientMixin(ABC):
|
|
|
105
105
|
else:
|
|
106
106
|
stmt = select(cls._METADATA_TABLE).where(cls._METADATA_TABLE.internal_name == internal_name)
|
|
107
107
|
result = await session.execute(stmt)
|
|
108
|
-
orm_metadata = result.scalar_one_or_none()
|
|
108
|
+
orm_metadata = result.scalar_one_or_none()
|
|
109
109
|
|
|
110
110
|
if orm_metadata:
|
|
111
111
|
client = cls(id=orm_metadata.id, storage_client=storage_client)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
import warnings
|
|
5
4
|
from datetime import timedelta
|
|
6
5
|
from pathlib import Path
|
|
@@ -269,14 +268,6 @@ class SqlStorageClient(StorageClient):
|
|
|
269
268
|
'Unsupported database. Supported: sqlite, postgresql. Consider using a different database.'
|
|
270
269
|
)
|
|
271
270
|
|
|
272
|
-
# TODO: https://github.com/apify/crawlee-python/issues/1555
|
|
273
|
-
if 'postgresql' in connection_string and sys.version_info >= (3, 14):
|
|
274
|
-
raise ValueError(
|
|
275
|
-
'SqlStorageClient cannot use PostgreSQL with Python 3.14 '
|
|
276
|
-
'due to asyncpg compatibility limitations. '
|
|
277
|
-
'Please use Python 3.13 or earlier, or switch to SQLite.'
|
|
278
|
-
)
|
|
279
|
-
|
|
280
271
|
self._engine = create_async_engine(
|
|
281
272
|
connection_string,
|
|
282
273
|
future=True,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from datetime import datetime
|
|
4
|
-
from typing import Annotated, Any, Generic
|
|
4
|
+
from typing import TYPE_CHECKING, Annotated, Any, Generic
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
|
|
7
7
|
from typing_extensions import TypeVar
|
|
@@ -127,8 +127,13 @@ class DatasetItemsListPage(BaseModel):
|
|
|
127
127
|
desc: Annotated[bool, Field(default=False)]
|
|
128
128
|
"""Indicates if the returned list is in descending order."""
|
|
129
129
|
|
|
130
|
-
|
|
131
|
-
|
|
130
|
+
# Workaround for Pydantic and type checkers when using Annotated with default_factory
|
|
131
|
+
if TYPE_CHECKING:
|
|
132
|
+
items: list[dict] = []
|
|
133
|
+
"""The list of dataset items returned on this page."""
|
|
134
|
+
else:
|
|
135
|
+
items: Annotated[list[dict], Field(default_factory=list)]
|
|
136
|
+
"""The list of dataset items returned on this page."""
|
|
132
137
|
|
|
133
138
|
|
|
134
139
|
@docs_group('Storage data')
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from asyncio import Lock
|
|
3
4
|
from collections import defaultdict
|
|
4
5
|
from collections.abc import Coroutine, Hashable
|
|
5
6
|
from dataclasses import dataclass, field
|
|
6
7
|
from typing import TYPE_CHECKING, TypeVar
|
|
8
|
+
from weakref import WeakValueDictionary
|
|
7
9
|
|
|
8
10
|
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
9
11
|
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
|
|
@@ -76,6 +78,7 @@ class StorageInstanceManager:
|
|
|
76
78
|
|
|
77
79
|
def __init__(self) -> None:
|
|
78
80
|
self._cache: _StorageCache = _StorageCache()
|
|
81
|
+
self._opener_locks: WeakValueDictionary[tuple, Lock] = WeakValueDictionary()
|
|
79
82
|
|
|
80
83
|
async def open_storage_instance(
|
|
81
84
|
self,
|
|
@@ -119,63 +122,71 @@ class StorageInstanceManager:
|
|
|
119
122
|
if not any([name, alias, id]):
|
|
120
123
|
alias = self._DEFAULT_STORAGE_ALIAS
|
|
121
124
|
|
|
122
|
-
# Check cache
|
|
123
|
-
if
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
125
|
+
# Check cache without lock first for performance.
|
|
126
|
+
if cached_instance := self._get_from_cache(
|
|
127
|
+
cls,
|
|
128
|
+
id=id,
|
|
129
|
+
name=name,
|
|
130
|
+
alias=alias,
|
|
131
|
+
storage_client_cache_key=storage_client_cache_key,
|
|
132
|
+
):
|
|
133
|
+
return cached_instance
|
|
127
134
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
raise RuntimeError('Cached instance type mismatch.')
|
|
135
|
+
# Validate storage name
|
|
136
|
+
if name is not None:
|
|
137
|
+
validate_storage_name(name)
|
|
132
138
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
):
|
|
136
|
-
|
|
139
|
+
# Acquire lock for this opener
|
|
140
|
+
opener_lock_key = (cls, str(id or name or alias), storage_client_cache_key)
|
|
141
|
+
if not (lock := self._opener_locks.get(opener_lock_key)):
|
|
142
|
+
lock = Lock()
|
|
143
|
+
self._opener_locks[opener_lock_key] = lock
|
|
144
|
+
|
|
145
|
+
async with lock:
|
|
146
|
+
# Another task could have created the storage while we were waiting for the lock - check if that
|
|
147
|
+
# happened
|
|
148
|
+
if cached_instance := self._get_from_cache(
|
|
149
|
+
cls,
|
|
150
|
+
id=id,
|
|
151
|
+
name=name,
|
|
152
|
+
alias=alias,
|
|
153
|
+
storage_client_cache_key=storage_client_cache_key,
|
|
154
|
+
):
|
|
137
155
|
return cached_instance
|
|
138
|
-
raise RuntimeError('Cached instance type mismatch.')
|
|
139
156
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
157
|
+
# Check for conflicts between named and alias storages
|
|
158
|
+
self._check_name_alias_conflict(
|
|
159
|
+
cls,
|
|
160
|
+
name=name,
|
|
161
|
+
alias=alias,
|
|
162
|
+
storage_client_cache_key=storage_client_cache_key,
|
|
145
163
|
)
|
|
146
164
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
f'Use a different name or drop the existing alias storage first.'
|
|
151
|
-
)
|
|
165
|
+
# Create new instance
|
|
166
|
+
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
167
|
+
client = await client_opener_coro
|
|
152
168
|
|
|
153
|
-
|
|
154
|
-
if name is not None:
|
|
155
|
-
validate_storage_name(name)
|
|
156
|
-
|
|
157
|
-
# Create new instance
|
|
158
|
-
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
159
|
-
client = await client_opener_coro
|
|
169
|
+
metadata = await client.get_metadata()
|
|
160
170
|
|
|
161
|
-
|
|
171
|
+
instance = cls(client, metadata.id, metadata.name) # type: ignore[call-arg]
|
|
172
|
+
instance_name = getattr(instance, 'name', None)
|
|
162
173
|
|
|
163
|
-
|
|
164
|
-
|
|
174
|
+
# Cache the instance.
|
|
175
|
+
# Note: No awaits in this section. All cache entries must be written
|
|
176
|
+
# atomically to ensure pre-checks outside the lock see consistent state.
|
|
165
177
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
self._cache.by_id[cls][instance.id][storage_client_cache_key] = instance
|
|
178
|
+
# Always cache by id.
|
|
179
|
+
self._cache.by_id[cls][instance.id][storage_client_cache_key] = instance
|
|
169
180
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
181
|
+
# Cache named storage.
|
|
182
|
+
if instance_name is not None:
|
|
183
|
+
self._cache.by_name[cls][instance_name][storage_client_cache_key] = instance
|
|
173
184
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
185
|
+
# Cache unnamed storage.
|
|
186
|
+
if alias is not None:
|
|
187
|
+
self._cache.by_alias[cls][alias][storage_client_cache_key] = instance
|
|
177
188
|
|
|
178
|
-
|
|
189
|
+
return instance
|
|
179
190
|
|
|
180
191
|
finally:
|
|
181
192
|
# Make sure the client opener is closed.
|
|
@@ -193,3 +204,51 @@ class StorageInstanceManager:
|
|
|
193
204
|
def clear_cache(self) -> None:
|
|
194
205
|
"""Clear all cached storage instances."""
|
|
195
206
|
self._cache = _StorageCache()
|
|
207
|
+
|
|
208
|
+
def _get_from_cache(
|
|
209
|
+
self,
|
|
210
|
+
cls: type[T],
|
|
211
|
+
*,
|
|
212
|
+
id: str | None = None,
|
|
213
|
+
name: str | None = None,
|
|
214
|
+
alias: str | None = None,
|
|
215
|
+
storage_client_cache_key: Hashable = '',
|
|
216
|
+
) -> T | None:
|
|
217
|
+
"""Get a storage instance from the cache."""
|
|
218
|
+
if id is not None and (cached_instance := self._cache.by_id[cls][id].get(storage_client_cache_key)):
|
|
219
|
+
if isinstance(cached_instance, cls):
|
|
220
|
+
return cached_instance
|
|
221
|
+
raise RuntimeError('Cached instance type mismatch.')
|
|
222
|
+
|
|
223
|
+
if name is not None and (cached_instance := self._cache.by_name[cls][name].get(storage_client_cache_key)):
|
|
224
|
+
if isinstance(cached_instance, cls):
|
|
225
|
+
return cached_instance
|
|
226
|
+
raise RuntimeError('Cached instance type mismatch.')
|
|
227
|
+
|
|
228
|
+
if alias is not None and (cached_instance := self._cache.by_alias[cls][alias].get(storage_client_cache_key)):
|
|
229
|
+
if isinstance(cached_instance, cls):
|
|
230
|
+
return cached_instance
|
|
231
|
+
raise RuntimeError('Cached instance type mismatch.')
|
|
232
|
+
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
def _check_name_alias_conflict(
|
|
236
|
+
self,
|
|
237
|
+
cls: type[T],
|
|
238
|
+
*,
|
|
239
|
+
name: str | None = None,
|
|
240
|
+
alias: str | None = None,
|
|
241
|
+
storage_client_cache_key: Hashable = '',
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Check for conflicts between named and alias storages."""
|
|
244
|
+
if alias and (self._cache.by_name[cls][alias].get(storage_client_cache_key)):
|
|
245
|
+
raise ValueError(
|
|
246
|
+
f'Cannot create alias storage "{alias}" because a named storage with the same name already exists. '
|
|
247
|
+
f'Use a different alias or drop the existing named storage first.'
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if name and (self._cache.by_alias[cls][name].get(storage_client_cache_key)):
|
|
251
|
+
raise ValueError(
|
|
252
|
+
f'Cannot create named storage "{name}" because an alias storage with the same name already exists. '
|
|
253
|
+
f'Use a different name or drop the existing alias storage first.'
|
|
254
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.2b24
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -226,6 +226,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
226
226
|
Classifier: Programming Language :: Python :: 3.14
|
|
227
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
228
228
|
Requires-Python: >=3.10
|
|
229
|
+
Requires-Dist: async-timeout>=5.0.1
|
|
229
230
|
Requires-Dist: cachetools>=5.5.0
|
|
230
231
|
Requires-Dist: colorama>=0.4.0
|
|
231
232
|
Requires-Dist: impit>=0.8.0
|
|
@@ -247,7 +248,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
|
247
248
|
Provides-Extra: all
|
|
248
249
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
249
250
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
250
|
-
Requires-Dist: asyncpg>=0.24.0;
|
|
251
|
+
Requires-Dist: asyncpg>=0.24.0; extra == 'all'
|
|
251
252
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
252
253
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
253
254
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -301,7 +302,7 @@ Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
|
301
302
|
Provides-Extra: redis
|
|
302
303
|
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
303
304
|
Provides-Extra: sql-postgres
|
|
304
|
-
Requires-Dist: asyncpg>=0.24.0;
|
|
305
|
+
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
305
306
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
306
307
|
Provides-Extra: sql-sqlite
|
|
307
308
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
|
@@ -323,19 +324,12 @@ Description-Content-Type: text/markdown
|
|
|
323
324
|
<a href="https://trendshift.io/repositories/11169" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11169" alt="apify%2Fcrawlee-python | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
324
325
|
</p>
|
|
325
326
|
|
|
326
|
-
<p align=center>
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
</a>
|
|
333
|
-
<a href="https://pypi.org/project/crawlee/" rel="nofollow">
|
|
334
|
-
<img src="https://img.shields.io/pypi/pyversions/crawlee" alt="PyPI - Python Version" style="max-width: 100%;">
|
|
335
|
-
</a>
|
|
336
|
-
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
|
|
337
|
-
<img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
|
|
338
|
-
</a>
|
|
327
|
+
<p align="center">
|
|
328
|
+
<a href="https://badge.fury.io/py/crawlee" rel="nofollow"><img src="https://badge.fury.io/py/crawlee.svg" alt="PyPI package version"></a>
|
|
329
|
+
<a href="https://pypi.org/project/crawlee/" rel="nofollow"><img src="https://img.shields.io/pypi/dm/crawlee" alt="PyPI package downloads"></a>
|
|
330
|
+
<a href="https://codecov.io/gh/apify/crawlee-python"><img src="https://codecov.io/gh/apify/crawlee-python/graph/badge.svg?token=cCju61iPQG" alt="Codecov report"></a>
|
|
331
|
+
<a href="https://pypi.org/project/crawlee/" rel="nofollow"><img src="https://img.shields.io/pypi/pyversions/crawlee" alt="PyPI Python version"></a>
|
|
332
|
+
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow"><img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on Discord"></a>
|
|
339
333
|
</p>
|
|
340
334
|
|
|
341
335
|
Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
|