crawlee 1.0.3b6__py3-none-any.whl → 1.0.5b18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlee/_service_locator.py +4 -4
- crawlee/_utils/recoverable_state.py +32 -8
- crawlee/_utils/recurring_task.py +15 -0
- crawlee/_utils/robots.py +17 -5
- crawlee/_utils/sitemap.py +1 -1
- crawlee/_utils/urls.py +9 -2
- crawlee/browsers/_browser_pool.py +4 -1
- crawlee/browsers/_playwright_browser_controller.py +1 -1
- crawlee/browsers/_playwright_browser_plugin.py +17 -3
- crawlee/browsers/_types.py +1 -1
- crawlee/configuration.py +3 -1
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -13
- crawlee/crawlers/_basic/_basic_crawler.py +23 -12
- crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
- crawlee/fingerprint_suite/_header_generator.py +2 -2
- crawlee/otel/crawler_instrumentor.py +3 -3
- crawlee/request_loaders/_sitemap_request_loader.py +5 -0
- crawlee/sessions/_session_pool.py +1 -1
- crawlee/statistics/_error_snapshotter.py +1 -1
- crawlee/statistics/_statistics.py +41 -31
- crawlee/storage_clients/__init__.py +4 -0
- crawlee/storage_clients/_file_system/_request_queue_client.py +24 -6
- crawlee/storage_clients/_redis/__init__.py +6 -0
- crawlee/storage_clients/_redis/_client_mixin.py +295 -0
- crawlee/storage_clients/_redis/_dataset_client.py +325 -0
- crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
- crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
- crawlee/storage_clients/_redis/_storage_client.py +146 -0
- crawlee/storage_clients/_redis/_utils.py +23 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
- crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
- crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
- crawlee/storage_clients/_redis/py.typed +0 -0
- crawlee/storage_clients/_sql/_db_models.py +1 -2
- crawlee/storage_clients/_sql/_storage_client.py +9 -0
- crawlee/storages/_key_value_store.py +5 -2
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/METADATA +9 -5
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/RECORD +43 -31
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/WHEEL +0 -0
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from redis.asyncio import Redis
|
|
7
|
+
from typing_extensions import override
|
|
8
|
+
|
|
9
|
+
from crawlee._utils.docs import docs_group
|
|
10
|
+
from crawlee.configuration import Configuration
|
|
11
|
+
from crawlee.storage_clients._base import StorageClient
|
|
12
|
+
|
|
13
|
+
from ._dataset_client import RedisDatasetClient
|
|
14
|
+
from ._key_value_store_client import RedisKeyValueStoreClient
|
|
15
|
+
from ._request_queue_client import RedisRequestQueueClient
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@docs_group('Storage clients')
|
|
19
|
+
class RedisStorageClient(StorageClient):
|
|
20
|
+
"""Redis implementation of the storage client.
|
|
21
|
+
|
|
22
|
+
This storage client provides access to datasets, key-value stores, and request queues that persist data
|
|
23
|
+
to a Redis database v8.0+. Each storage type uses Redis-specific data structures and key patterns for
|
|
24
|
+
efficient storage and retrieval.
|
|
25
|
+
|
|
26
|
+
The client accepts either a Redis connection string or a pre-configured Redis client instance.
|
|
27
|
+
Exactly one of these parameters must be provided during initialization.
|
|
28
|
+
|
|
29
|
+
Storage types use the following Redis data structures:
|
|
30
|
+
- **Datasets**: Redis JSON arrays for item storage with metadata in JSON objects
|
|
31
|
+
- **Key-value stores**: Redis hashes for key-value pairs with separate metadata storage
|
|
32
|
+
- **Request queues**: Redis lists for FIFO queuing, hashes for request data and in-progress tracking,
|
|
33
|
+
and Bloom filters for request deduplication
|
|
34
|
+
|
|
35
|
+
Warning:
|
|
36
|
+
This is an experimental feature. The behavior and interface may change in future versions.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
*,
|
|
42
|
+
connection_string: str | None = None,
|
|
43
|
+
redis: Redis | None = None,
|
|
44
|
+
queue_dedup_strategy: Literal['default', 'bloom'] = 'default',
|
|
45
|
+
queue_bloom_error_rate: float = 1e-7,
|
|
46
|
+
) -> None:
|
|
47
|
+
"""Initialize the Redis storage client.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
connection_string: Redis connection string (e.g., "redis://localhost:6379").
|
|
51
|
+
Supports standard Redis URL format with optional database selection.
|
|
52
|
+
redis: Pre-configured Redis client instance.
|
|
53
|
+
queue_dedup_strategy: Strategy for request queue deduplication. Options are:
|
|
54
|
+
- 'default': Uses Redis sets for exact deduplication.
|
|
55
|
+
- 'bloom': Uses Redis Bloom filters for probabilistic deduplication with lower memory usage. When using
|
|
56
|
+
this approach, approximately 1 in 1e-7 requests will be falsely considered duplicate.
|
|
57
|
+
queue_bloom_error_rate: Desired false positive rate for Bloom filter deduplication. Only relevant if
|
|
58
|
+
`queue_dedup_strategy` is set to 'bloom'.
|
|
59
|
+
"""
|
|
60
|
+
match (redis, connection_string):
|
|
61
|
+
case (None, None):
|
|
62
|
+
raise ValueError('Either redis or connection_string must be provided.')
|
|
63
|
+
case (Redis(), None):
|
|
64
|
+
self._redis = redis
|
|
65
|
+
case (None, str()):
|
|
66
|
+
self._redis = Redis.from_url(connection_string)
|
|
67
|
+
case (Redis(), str()):
|
|
68
|
+
raise ValueError('Either redis or connection_string must be provided, not both.')
|
|
69
|
+
|
|
70
|
+
self._queue_dedup_strategy = queue_dedup_strategy
|
|
71
|
+
self._queue_bloom_error_rate = queue_bloom_error_rate
|
|
72
|
+
|
|
73
|
+
# Call the notification only once
|
|
74
|
+
warnings.warn(
|
|
75
|
+
(
|
|
76
|
+
'RedisStorageClient is experimental and its API, behavior, and key structure may change in future '
|
|
77
|
+
'releases.'
|
|
78
|
+
),
|
|
79
|
+
category=UserWarning,
|
|
80
|
+
stacklevel=2,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@override
|
|
84
|
+
async def create_dataset_client(
|
|
85
|
+
self,
|
|
86
|
+
*,
|
|
87
|
+
id: str | None = None,
|
|
88
|
+
name: str | None = None,
|
|
89
|
+
alias: str | None = None,
|
|
90
|
+
configuration: Configuration | None = None,
|
|
91
|
+
) -> RedisDatasetClient:
|
|
92
|
+
configuration = configuration or Configuration.get_global_configuration()
|
|
93
|
+
|
|
94
|
+
client = await RedisDatasetClient.open(
|
|
95
|
+
id=id,
|
|
96
|
+
name=name,
|
|
97
|
+
alias=alias,
|
|
98
|
+
redis=self._redis,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
await self._purge_if_needed(client, configuration)
|
|
102
|
+
return client
|
|
103
|
+
|
|
104
|
+
@override
|
|
105
|
+
async def create_kvs_client(
|
|
106
|
+
self,
|
|
107
|
+
*,
|
|
108
|
+
id: str | None = None,
|
|
109
|
+
name: str | None = None,
|
|
110
|
+
alias: str | None = None,
|
|
111
|
+
configuration: Configuration | None = None,
|
|
112
|
+
) -> RedisKeyValueStoreClient:
|
|
113
|
+
configuration = configuration or Configuration.get_global_configuration()
|
|
114
|
+
|
|
115
|
+
client = await RedisKeyValueStoreClient.open(
|
|
116
|
+
id=id,
|
|
117
|
+
name=name,
|
|
118
|
+
alias=alias,
|
|
119
|
+
redis=self._redis,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
await self._purge_if_needed(client, configuration)
|
|
123
|
+
return client
|
|
124
|
+
|
|
125
|
+
@override
|
|
126
|
+
async def create_rq_client(
|
|
127
|
+
self,
|
|
128
|
+
*,
|
|
129
|
+
id: str | None = None,
|
|
130
|
+
name: str | None = None,
|
|
131
|
+
alias: str | None = None,
|
|
132
|
+
configuration: Configuration | None = None,
|
|
133
|
+
) -> RedisRequestQueueClient:
|
|
134
|
+
configuration = configuration or Configuration.get_global_configuration()
|
|
135
|
+
|
|
136
|
+
client = await RedisRequestQueueClient.open(
|
|
137
|
+
id=id,
|
|
138
|
+
name=name,
|
|
139
|
+
alias=alias,
|
|
140
|
+
redis=self._redis,
|
|
141
|
+
dedup_strategy=self._queue_dedup_strategy,
|
|
142
|
+
bloom_error_rate=self._queue_bloom_error_rate,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
await self._purge_if_needed(client, configuration)
|
|
146
|
+
return client
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from collections.abc import Awaitable
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import TypeVar, overload
|
|
4
|
+
|
|
5
|
+
T = TypeVar('T')
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@overload
|
|
9
|
+
async def await_redis_response(response: Awaitable[T]) -> T: ...
|
|
10
|
+
@overload
|
|
11
|
+
async def await_redis_response(response: T) -> T: ...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def await_redis_response(response: Awaitable[T] | T) -> T:
|
|
15
|
+
"""Solve the problem of ambiguous typing for redis."""
|
|
16
|
+
return await response if isinstance(response, Awaitable) else response
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def read_lua_script(script_name: str) -> str:
|
|
20
|
+
"""Read a Lua script from a file."""
|
|
21
|
+
file_path = Path(__file__).parent / 'lua_scripts' / script_name
|
|
22
|
+
with file_path.open('r', encoding='utf-8') as file:
|
|
23
|
+
return file.read()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
local added_filter_key = KEYS[1]
|
|
2
|
+
local queue_key = KEYS[2]
|
|
3
|
+
local data_key = KEYS[3]
|
|
4
|
+
|
|
5
|
+
local forefront = ARGV[1] == '1'
|
|
6
|
+
local unique_keys = cjson.decode(ARGV[2])
|
|
7
|
+
local requests_data = cjson.decode(ARGV[3])
|
|
8
|
+
|
|
9
|
+
-- Add and check which unique keys are actually new using Bloom filter
|
|
10
|
+
local bf_results = redis.call('bf.madd', added_filter_key, unpack(unique_keys))
|
|
11
|
+
|
|
12
|
+
local actually_added = {}
|
|
13
|
+
local hset_args = {}
|
|
14
|
+
|
|
15
|
+
-- Process the results
|
|
16
|
+
for i, unique_key in ipairs(unique_keys) do
|
|
17
|
+
if bf_results[i] == 1 then
|
|
18
|
+
-- This key was added by us (did not exist before)
|
|
19
|
+
table.insert(hset_args, unique_key)
|
|
20
|
+
table.insert(hset_args, requests_data[unique_key])
|
|
21
|
+
table.insert(actually_added, unique_key)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
-- Add only those that are actually new
|
|
26
|
+
if #actually_added > 0 then
|
|
27
|
+
redis.call('hset', data_key, unpack(hset_args))
|
|
28
|
+
|
|
29
|
+
if forefront then
|
|
30
|
+
redis.call('lpush', queue_key, unpack(actually_added))
|
|
31
|
+
else
|
|
32
|
+
redis.call('rpush', queue_key, unpack(actually_added))
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
return cjson.encode(actually_added)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
local queue_key = KEYS[1]
|
|
2
|
+
local in_progress_key = KEYS[2]
|
|
3
|
+
local data_key = KEYS[3]
|
|
4
|
+
local client_id = ARGV[1]
|
|
5
|
+
local blocked_until_timestamp = ARGV[2]
|
|
6
|
+
local batch_size = tonumber(ARGV[3])
|
|
7
|
+
|
|
8
|
+
-- Pop batch unique_key from queue
|
|
9
|
+
local batch_result = redis.call('LMPOP', 1, queue_key, 'LEFT', 'COUNT', batch_size)
|
|
10
|
+
if not batch_result then
|
|
11
|
+
return nil
|
|
12
|
+
end
|
|
13
|
+
local unique_keys = batch_result[2]
|
|
14
|
+
|
|
15
|
+
-- Get requests data
|
|
16
|
+
local requests_data = redis.call('HMGET', data_key, unpack(unique_keys))
|
|
17
|
+
if not requests_data then
|
|
18
|
+
-- Data missing, skip this request
|
|
19
|
+
return nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
-- Prepare results and update in_progress
|
|
23
|
+
local final_result = {}
|
|
24
|
+
local in_progress_hmset = {}
|
|
25
|
+
local pending_decrement = 0
|
|
26
|
+
local in_progress_data = cjson.encode({
|
|
27
|
+
client_id = client_id,
|
|
28
|
+
blocked_until_timestamp = tonumber(blocked_until_timestamp)
|
|
29
|
+
})
|
|
30
|
+
for i = 1, #unique_keys do
|
|
31
|
+
local unique_key = unique_keys[i]
|
|
32
|
+
local request_data = requests_data[i]
|
|
33
|
+
|
|
34
|
+
if request_data then
|
|
35
|
+
-- Add to in_progress hash
|
|
36
|
+
table.insert(in_progress_hmset, unique_key)
|
|
37
|
+
table.insert(in_progress_hmset, in_progress_data)
|
|
38
|
+
|
|
39
|
+
table.insert(final_result, request_data)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
-- Update in_progress hash
|
|
44
|
+
if #in_progress_hmset > 0 then
|
|
45
|
+
redis.call('HMSET', in_progress_key, unpack(in_progress_hmset))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
-- Return result with requests data
|
|
49
|
+
return final_result
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
local added_filter_key = KEYS[1]
|
|
2
|
+
local queue_key = KEYS[2]
|
|
3
|
+
local data_key = KEYS[3]
|
|
4
|
+
|
|
5
|
+
local forefront = ARGV[1] == '1'
|
|
6
|
+
local unique_keys = cjson.decode(ARGV[2])
|
|
7
|
+
local requests_data = cjson.decode(ARGV[3])
|
|
8
|
+
|
|
9
|
+
-- Add and check which unique keys are actually new using Redis set
|
|
10
|
+
local actually_added = {}
|
|
11
|
+
local hset_args = {}
|
|
12
|
+
|
|
13
|
+
-- Process each unique key
|
|
14
|
+
for _, unique_key in ipairs(unique_keys) do
|
|
15
|
+
-- Try to add the key to the set, returns 1 if added, 0 if already existed
|
|
16
|
+
local set_result = redis.call('sadd', added_filter_key, unique_key)
|
|
17
|
+
|
|
18
|
+
if set_result == 1 then
|
|
19
|
+
-- This key was added by us (did not exist before)
|
|
20
|
+
table.insert(hset_args, unique_key)
|
|
21
|
+
table.insert(hset_args, requests_data[unique_key])
|
|
22
|
+
table.insert(actually_added, unique_key)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
-- Add only those that are actually new
|
|
27
|
+
if #actually_added > 0 then
|
|
28
|
+
redis.call('hset', data_key, unpack(hset_args))
|
|
29
|
+
|
|
30
|
+
if forefront then
|
|
31
|
+
redis.call('lpush', queue_key, unpack(actually_added))
|
|
32
|
+
else
|
|
33
|
+
redis.call('rpush', queue_key, unpack(actually_added))
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
return cjson.encode(actually_added)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
local in_progress_key = KEYS[1]
|
|
2
|
+
local queue_key = KEYS[2]
|
|
3
|
+
local data_key = KEYS[3]
|
|
4
|
+
local current_time = tonumber(ARGV[1])
|
|
5
|
+
|
|
6
|
+
local max_reclaim = 1000
|
|
7
|
+
|
|
8
|
+
local cursor = "0"
|
|
9
|
+
local count = 0
|
|
10
|
+
|
|
11
|
+
repeat
|
|
12
|
+
local result = redis.call('hscan', in_progress_key, cursor, 'COUNT', 100)
|
|
13
|
+
cursor = result[1]
|
|
14
|
+
local entries = result[2]
|
|
15
|
+
|
|
16
|
+
for i = 1, #entries, 2 do
|
|
17
|
+
if count >= max_reclaim then
|
|
18
|
+
break
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
local unique_key = entries[i]
|
|
22
|
+
local data = cjson.decode(entries[i + 1])
|
|
23
|
+
|
|
24
|
+
-- Check if timed out
|
|
25
|
+
if current_time > data.blocked_until_timestamp then
|
|
26
|
+
-- Atomically remove from in_progress and add back to queue
|
|
27
|
+
redis.call('hdel', in_progress_key, unique_key)
|
|
28
|
+
redis.call('rpush', queue_key, unique_key)
|
|
29
|
+
count = count + 1
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
until cursor == "0" or count >= max_reclaim
|
|
33
|
+
|
|
34
|
+
return count
|
|
File without changes
|
|
@@ -205,9 +205,8 @@ class RequestDb(Base):
|
|
|
205
205
|
'idx_fetch_available',
|
|
206
206
|
'request_queue_id',
|
|
207
207
|
'is_handled',
|
|
208
|
-
'time_blocked_until',
|
|
209
208
|
'sequence_number',
|
|
210
|
-
postgresql_where=text('is_handled
|
|
209
|
+
postgresql_where=text('is_handled is false'),
|
|
211
210
|
),
|
|
212
211
|
)
|
|
213
212
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import sys
|
|
3
4
|
import warnings
|
|
4
5
|
from datetime import timedelta
|
|
5
6
|
from pathlib import Path
|
|
@@ -268,6 +269,14 @@ class SqlStorageClient(StorageClient):
|
|
|
268
269
|
'Unsupported database. Supported: sqlite, postgresql. Consider using a different database.'
|
|
269
270
|
)
|
|
270
271
|
|
|
272
|
+
# TODO: https://github.com/apify/crawlee-python/issues/1555
|
|
273
|
+
if 'postgresql' in connection_string and sys.version_info >= (3, 14):
|
|
274
|
+
raise ValueError(
|
|
275
|
+
'SqlStorageClient cannot use PostgreSQL with Python 3.14 '
|
|
276
|
+
'due to asyncpg compatibility limitations. '
|
|
277
|
+
'Please use Python 3.13 or earlier, or switch to SQLite.'
|
|
278
|
+
)
|
|
279
|
+
|
|
271
280
|
self._engine = create_async_engine(
|
|
272
281
|
connection_string,
|
|
273
282
|
future=True,
|
|
@@ -281,11 +281,14 @@ class KeyValueStore(Storage):
|
|
|
281
281
|
if key in cache:
|
|
282
282
|
return cache[key].current_value.root
|
|
283
283
|
|
|
284
|
+
async def kvs_factory() -> KeyValueStore:
|
|
285
|
+
return self
|
|
286
|
+
|
|
284
287
|
cache[key] = recoverable_state = RecoverableState(
|
|
285
288
|
default_state=AutosavedValue(default_value),
|
|
286
|
-
persistence_enabled=True,
|
|
287
|
-
persist_state_kvs_id=self.id,
|
|
288
289
|
persist_state_key=key,
|
|
290
|
+
persistence_enabled=True,
|
|
291
|
+
persist_state_kvs_factory=kvs_factory,
|
|
289
292
|
logger=logger,
|
|
290
293
|
)
|
|
291
294
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crawlee
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5b18
|
|
4
4
|
Summary: Crawlee for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
|
|
@@ -223,15 +223,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
223
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
224
|
Classifier: Programming Language :: Python :: 3.12
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
226
227
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
228
|
Requires-Python: >=3.10
|
|
228
229
|
Requires-Dist: cachetools>=5.5.0
|
|
229
230
|
Requires-Dist: colorama>=0.4.0
|
|
230
|
-
Requires-Dist: impit>=0.
|
|
231
|
+
Requires-Dist: impit>=0.8.0
|
|
231
232
|
Requires-Dist: more-itertools>=10.2.0
|
|
232
233
|
Requires-Dist: protego>=0.5.0
|
|
233
234
|
Requires-Dist: psutil>=6.0.0
|
|
234
|
-
Requires-Dist: pydantic-settings
|
|
235
|
+
Requires-Dist: pydantic-settings>=2.12.0
|
|
235
236
|
Requires-Dist: pydantic>=2.11.0
|
|
236
237
|
Requires-Dist: pyee>=9.0.0
|
|
237
238
|
Requires-Dist: tldextract>=5.1.0
|
|
@@ -246,7 +247,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
|
|
|
246
247
|
Provides-Extra: all
|
|
247
248
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
|
|
248
249
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
|
|
249
|
-
Requires-Dist: asyncpg>=0.24.0; extra == 'all'
|
|
250
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'all'
|
|
250
251
|
Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
|
|
251
252
|
Requires-Dist: browserforge>=1.2.3; extra == 'all'
|
|
252
253
|
Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
|
|
@@ -263,6 +264,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1; extra == 'all'
|
|
|
263
264
|
Requires-Dist: opentelemetry-semantic-conventions>=0.54; extra == 'all'
|
|
264
265
|
Requires-Dist: parsel>=1.10.0; extra == 'all'
|
|
265
266
|
Requires-Dist: playwright>=1.27.0; extra == 'all'
|
|
267
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'all'
|
|
266
268
|
Requires-Dist: rich>=13.9.0; extra == 'all'
|
|
267
269
|
Requires-Dist: scikit-learn>=1.6.0; extra == 'all'
|
|
268
270
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'all'
|
|
@@ -296,8 +298,10 @@ Provides-Extra: playwright
|
|
|
296
298
|
Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'playwright'
|
|
297
299
|
Requires-Dist: browserforge>=1.2.3; extra == 'playwright'
|
|
298
300
|
Requires-Dist: playwright>=1.27.0; extra == 'playwright'
|
|
301
|
+
Provides-Extra: redis
|
|
302
|
+
Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
|
|
299
303
|
Provides-Extra: sql-postgres
|
|
300
|
-
Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
|
|
304
|
+
Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'sql-postgres'
|
|
301
305
|
Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
|
|
302
306
|
Provides-Extra: sql-sqlite
|
|
303
307
|
Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'
|
|
@@ -4,9 +4,9 @@ crawlee/_cli.py,sha256=czuEsGD8QYEiq5gtMcBxrL08hQ5OJQQkMVhAr1pvDaQ,10353
|
|
|
4
4
|
crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
|
|
5
5
|
crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
|
|
6
6
|
crawlee/_request.py,sha256=XliqiaL5Gp3fIDqHVVw0GF35VydXOtg6wJIkeaLcAwk,16458
|
|
7
|
-
crawlee/_service_locator.py,sha256=
|
|
7
|
+
crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
|
|
8
8
|
crawlee/_types.py,sha256=DAmfSv5W1dt3nJhJ8z-02gDaE06fdEizNKUlHpsd2_A,29129
|
|
9
|
-
crawlee/configuration.py,sha256=
|
|
9
|
+
crawlee/configuration.py,sha256=DWS2z1FC6Ua93W2tStK3R1ZKZbZjVQYWGiGFbZFaRtA,8064
|
|
10
10
|
crawlee/errors.py,sha256=RhFNA_uT615nVBHf9TylpX5YWwtDuHUUEV8LPT4CYa4,3878
|
|
11
11
|
crawlee/proxy_configuration.py,sha256=rqf67yerXvLvraBaAHW04nvf5ECze3wMQbK7LlqXucM,10386
|
|
12
12
|
crawlee/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -29,43 +29,43 @@ crawlee/_utils/globs.py,sha256=SGX2J35Kqw7yZnSS5c4mLz9UD8c77PF0IoCgXQM5uiw,5310
|
|
|
29
29
|
crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZkY,902
|
|
30
30
|
crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
|
|
31
31
|
crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
|
|
32
|
-
crawlee/_utils/recoverable_state.py,sha256=
|
|
33
|
-
crawlee/_utils/recurring_task.py,sha256=
|
|
32
|
+
crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
|
|
33
|
+
crawlee/_utils/recurring_task.py,sha256=sQMiURuDXbwwfAcIXK8V4NXncSxIBxsqN1cZWX7DLyg,2128
|
|
34
34
|
crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
|
|
35
|
-
crawlee/_utils/robots.py,sha256=
|
|
36
|
-
crawlee/_utils/sitemap.py,sha256=
|
|
35
|
+
crawlee/_utils/robots.py,sha256=DBU5ni4Y-p7bIKMbLd_ws8wgHSFc4K8zPVF3JvH_pkw,4661
|
|
36
|
+
crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
|
|
37
37
|
crawlee/_utils/system.py,sha256=tA8AP__9vsJ9OTLTnAYAKkxc8U5-IEna0N_hqYBybUo,4294
|
|
38
38
|
crawlee/_utils/time.py,sha256=WK17P939r65dLz2rWvL59OEJoxgzdinw-ND9WuG4DuU,2353
|
|
39
39
|
crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
|
|
40
|
-
crawlee/_utils/urls.py,sha256=
|
|
40
|
+
crawlee/_utils/urls.py,sha256=fEYXJxBT02f-DIYKF_h7PdaKAShfXBs99-dHDjDX03A,1725
|
|
41
41
|
crawlee/_utils/wait.py,sha256=RfiXhp5VUBxOEtEMtru7_jNfKDr2BJCcFge5qGg2gxk,2848
|
|
42
42
|
crawlee/_utils/web.py,sha256=nnKhg8pUSWz0RY64Qd-_GPNBX1fWI2hXS-gzcfQ-rig,364
|
|
43
43
|
crawlee/browsers/__init__.py,sha256=TghkrNSbI_k87UgVBlgNNcEm8Ot05pSLEAPRSv6YsUs,1064
|
|
44
44
|
crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6kcV-g3AXE0I,3068
|
|
45
45
|
crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
|
|
46
|
-
crawlee/browsers/_browser_pool.py,sha256=
|
|
46
|
+
crawlee/browsers/_browser_pool.py,sha256=n1GTVS220yxo-aMaKDVfQO571_AqEV5pMawWbr0zUHk,15832
|
|
47
47
|
crawlee/browsers/_playwright_browser.py,sha256=1yXD6cXuLefZZGUG1m0CT38xXYSwIC7n95bJBdMOxbo,3820
|
|
48
|
-
crawlee/browsers/_playwright_browser_controller.py,sha256=
|
|
49
|
-
crawlee/browsers/_playwright_browser_plugin.py,sha256=
|
|
50
|
-
crawlee/browsers/_types.py,sha256=
|
|
48
|
+
crawlee/browsers/_playwright_browser_controller.py,sha256=W6G5MjZpg9IcZoHts6lTML5VxSEpBTgzx5qeQ8XDigY,10216
|
|
49
|
+
crawlee/browsers/_playwright_browser_plugin.py,sha256=A1qa1nJhTSKNP9uOiO-oGzR7VGlnOMo0A0YNedccb2A,8869
|
|
50
|
+
crawlee/browsers/_types.py,sha256=ZnDgJHeQNSd_s_mXhgQnAf09c2smuiXC31VbawHHXUM,436
|
|
51
51
|
crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
crawlee/crawlers/__init__.py,sha256=9VmFahav3rjE-2Bxa5PAhBgkYXP0k5SSAEpdG2xMZ7c,2340
|
|
53
53
|
crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
|
|
54
54
|
crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
crawlee/crawlers/_abstract_http/__init__.py,sha256=QCjn8x7jpo8FwEeSRw10TVj_0La2v9mLEiQWdk2RoTw,273
|
|
56
|
-
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=
|
|
56
|
+
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=DEiErZi7j2FHMgyVELPy09GyHo5Gx4UDpuiN6D3sGNk,11553
|
|
57
57
|
crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
|
|
58
58
|
crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
|
|
59
59
|
crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
60
|
crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=LREq9WR9BKsE8S8lSsEhlCoNjQaLhlJ9yo8y_6a8o4c,1072
|
|
61
|
-
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=
|
|
61
|
+
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=nPFB9Q_3xQDJprb24NIQO53gf56J8wXjbM9C-58iiZ8,22862
|
|
62
62
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py,sha256=_At8T8S3JLGPA-1AeCFGrpE-FuCDW9sazrXt9U0tK6U,1048
|
|
63
63
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=9FlHIUC05IzUhJsVldQvpnDnj1jk8GJpqC98mPLN_fw,10431
|
|
64
64
|
crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkbIN_059jUyCG8Z6XAb_FBLClIKw7z-aDvjon2I,10834
|
|
65
65
|
crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
|
|
66
66
|
crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
|
|
67
67
|
crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
|
|
68
|
-
crawlee/crawlers/_basic/_basic_crawler.py,sha256=
|
|
68
|
+
crawlee/crawlers/_basic/_basic_crawler.py,sha256=yZ_A_l9Dux9Y2eYa9XbN3c7h-3YO7MgGmJbzCMbCplg,73257
|
|
69
69
|
crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
|
|
70
70
|
crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
|
|
71
71
|
crawlee/crawlers/_basic/_logging_utils.py,sha256=jp5mEwSq5a_BgzUhNPJ9WrIDcoIeYGbeHstcRqCcP0s,3093
|
|
@@ -85,7 +85,7 @@ crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOM
|
|
|
85
85
|
crawlee/crawlers/_parsel/_parsel_parser.py,sha256=yWBfuXUHMriK4DRnyrXTQoGeqX5WV9bOEkBp_g0YCvQ,1540
|
|
86
86
|
crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
|
|
87
87
|
crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
|
|
88
|
-
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=
|
|
88
|
+
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=53iytj5LZHw19QOYqYNlZL4ApPlhbWn9Ds-DPTKANhQ,24158
|
|
89
89
|
crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
|
|
90
90
|
crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=Nfm69dqX85k68jN1p3ljZWbn8egqDWPIPRykXyXsoQs,3977
|
|
91
91
|
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=fEI2laWhmJdWiGoMF5JBLBsim9NtENfagZt6FFd2Rgo,1387
|
|
@@ -100,7 +100,7 @@ crawlee/fingerprint_suite/__init__.py,sha256=noY9qw80B0seZGj_B3bBvCDIDk2YWOSN-ll
|
|
|
100
100
|
crawlee/fingerprint_suite/_browserforge_adapter.py,sha256=bsGebBjjHawM-FiINgqkZW5I9a9Fnv3SGwdKgaVWiRI,11934
|
|
101
101
|
crawlee/fingerprint_suite/_consts.py,sha256=SgykWfxD-pYvOpRp_ooQ4ZTPS0sQ2b3wDyyCjwU_8-w,258
|
|
102
102
|
crawlee/fingerprint_suite/_fingerprint_generator.py,sha256=Di4sDk1qioiFGx4ZcoVyHhtFHF8JXDhxQt8ZPug99k8,730
|
|
103
|
-
crawlee/fingerprint_suite/_header_generator.py,sha256=
|
|
103
|
+
crawlee/fingerprint_suite/_header_generator.py,sha256=9X9FbStehXdw-FZc_D0y-nLk1BUHXVYFxs7fv4dl9zU,3513
|
|
104
104
|
crawlee/fingerprint_suite/_types.py,sha256=7n2LJTiL2XvL-H4G-Y26Uoq5-ZXzH07Dq4o50uhMa-w,2423
|
|
105
105
|
crawlee/fingerprint_suite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
crawlee/http_clients/__init__.py,sha256=OQFhR9F8BrdlIaS5aRS7hvgQ0tKJPQ8FiyYPualyQcU,890
|
|
@@ -109,7 +109,7 @@ crawlee/http_clients/_curl_impersonate.py,sha256=EBaoJZzBDgsfhLRgZwu-mFzKqFJ8BDF
|
|
|
109
109
|
crawlee/http_clients/_httpx.py,sha256=pMVyL5P1UU49svqRr7hqcxInIuT4MT3R7UBmkJ07xMc,11780
|
|
110
110
|
crawlee/http_clients/_impit.py,sha256=nGSe5NuZmsJjQzsfUGIZdbZFC48_Gpu8aU3I8kpq6O4,8833
|
|
111
111
|
crawlee/otel/__init__.py,sha256=g5y1tJfpDKfcIPGcKBztMgP6sptum-vJrtemeR8_-co,108
|
|
112
|
-
crawlee/otel/crawler_instrumentor.py,sha256
|
|
112
|
+
crawlee/otel/crawler_instrumentor.py,sha256=yC367A1NnAdhOanvym2zfiu4H4BskUslrib0GcHiVJs,6865
|
|
113
113
|
crawlee/project_template/cookiecutter.json,sha256=dJeYxLx5QEy2DCzXsDpqJQJlIJ3nw42lJrclZFoSZ8w,622
|
|
114
114
|
crawlee/project_template/hooks/post_gen_project.py,sha256=EBNgb_-eodDxaYJljYYznZGASnduxJ54RDO_b4ofm4M,1296
|
|
115
115
|
crawlee/project_template/hooks/pre_gen_project.py,sha256=AqvHJRTOTQzKFX10Zt8uKt8UFHczDJ1ogPPHQVdY2ZU,1175
|
|
@@ -137,19 +137,19 @@ crawlee/request_loaders/_request_list.py,sha256=SIalHBMuFanE5GLnFocI0QCppWUiJQjr
|
|
|
137
137
|
crawlee/request_loaders/_request_loader.py,sha256=2Bg-AWWkIV1W-Dwjqo91dPY8nmc7H3teQy7d6OSgliQ,3620
|
|
138
138
|
crawlee/request_loaders/_request_manager.py,sha256=qFizyJuV2meIb9iiPfuii7ciuERMrp4SldAufiH46dc,3000
|
|
139
139
|
crawlee/request_loaders/_request_manager_tandem.py,sha256=lv-s94KPsoQAqx1KaXFch96ejhO147uOflF3UK5ORTk,4058
|
|
140
|
-
crawlee/request_loaders/_sitemap_request_loader.py,sha256=
|
|
140
|
+
crawlee/request_loaders/_sitemap_request_loader.py,sha256=s65D_N0mZxeIrGJEjqUYfu1uYj2AXSOkmErSnfAHv2A,15554
|
|
141
141
|
crawlee/sessions/__init__.py,sha256=dJdelbL-6MK5sW4SMU4QrjFbb9kRZ9uRnN-VS3R5-8Y,190
|
|
142
142
|
crawlee/sessions/_cookies.py,sha256=ihYbmpXfCzClzXDT7M2wefB_3KVzcMUdIzTZo6uGk6Y,9356
|
|
143
143
|
crawlee/sessions/_models.py,sha256=JMRQgDUP30XUdZ32isncHowOsXvK9jC_m9QYegbBI1E,2916
|
|
144
144
|
crawlee/sessions/_session.py,sha256=cMXVf6QjfGJDgdLUB4MhUP-zTm3pEDHRs-W5SBA4JFI,9638
|
|
145
|
-
crawlee/sessions/_session_pool.py,sha256
|
|
145
|
+
crawlee/sessions/_session_pool.py,sha256=_FYTXRANDiREK09DSoHIu-536sSUj9wazbrcl6iKdFM,9631
|
|
146
146
|
crawlee/sessions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
147
|
crawlee/statistics/__init__.py,sha256=lXAsHNkeRZQBffW1B7rERarivXIUJveNlcKTGOXQZY0,154
|
|
148
|
-
crawlee/statistics/_error_snapshotter.py,sha256=
|
|
148
|
+
crawlee/statistics/_error_snapshotter.py,sha256=g-roZgkJ-glyStZL7gXrOhrpdZvZ686W9lR43uZjPao,3279
|
|
149
149
|
crawlee/statistics/_error_tracker.py,sha256=x9Yw1TuyEptjwgPPJ4gIom-0oVjawcNReQDsHH2nZ3w,8553
|
|
150
150
|
crawlee/statistics/_models.py,sha256=SFWYpT3r1c4XugU8nrm0epTpcM5_0fS1mXi9fnbhGJ8,5237
|
|
151
|
-
crawlee/statistics/_statistics.py,sha256=
|
|
152
|
-
crawlee/storage_clients/__init__.py,sha256=
|
|
151
|
+
crawlee/statistics/_statistics.py,sha256=AnxbVq6w8fuiRumUJMznhTtQmtONyF4pzqrFYgO4yjo,13076
|
|
152
|
+
crawlee/storage_clients/__init__.py,sha256=X3M6Z_WAOJ3M9I8JhGhJDnrtbCOmM27DpGAzgt87R2A,874
|
|
153
153
|
crawlee/storage_clients/models.py,sha256=gfW_kpSCOBuoTBIW0N7tb3FUv7BgD3keZADS7pyT4_I,6586
|
|
154
154
|
crawlee/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
155
|
crawlee/storage_clients/_base/__init__.py,sha256=-f0VIaGjw7Oo6HAgEK9ABtmKnnSRwzkA3WUQZMX5w0w,307
|
|
@@ -161,7 +161,7 @@ crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
161
161
|
crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
|
|
162
162
|
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=1Z8VCDx8ueh0FQQXUr8tJlOtKw8ggkaFjuz3-T_GJDY,17735
|
|
163
163
|
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=qNa3RRJQ8Omy2AteQvYh1Td04PsP5AhUFyTpL6KQbSg,18676
|
|
164
|
-
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=
|
|
164
|
+
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=ETwy6eODf3dlBqy2RPM3nr2_oEm2ht37WpoTlFxn4A8,33970
|
|
165
165
|
crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
|
|
166
166
|
crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
167
|
crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -171,24 +171,36 @@ crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=rBXRggALe-0kBA
|
|
|
171
171
|
crawlee/storage_clients/_memory/_request_queue_client.py,sha256=hPI78S1sOopVKFKDRW7ndkha7TVcJfwdd4onXgfb4Pk,13096
|
|
172
172
|
crawlee/storage_clients/_memory/_storage_client.py,sha256=EyiH-MgM_6iBItjmy2SkWAdjVbviacnxr3la-yiGfIw,2724
|
|
173
173
|
crawlee/storage_clients/_memory/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
+
crawlee/storage_clients/_redis/__init__.py,sha256=HrtfC0mRgz2dCNe8olCWjwe2NXnFFDPoXOuBIDBFlIg,328
|
|
175
|
+
crawlee/storage_clients/_redis/_client_mixin.py,sha256=DIPW4_En-w5z7tVbSUDa1jn5DFbLlG_6vW5naoeg_wY,12435
|
|
176
|
+
crawlee/storage_clients/_redis/_dataset_client.py,sha256=7D31bVcpmtyhRjZuzSvSKSMev8xoiX3J6f_NPc7wlUc,11562
|
|
177
|
+
crawlee/storage_clients/_redis/_key_value_store_client.py,sha256=EnR99uRJUOxqbELS-73vEDNw4WDJzF6-UG2ESHGrD6M,10394
|
|
178
|
+
crawlee/storage_clients/_redis/_request_queue_client.py,sha256=WNms5wHhDo7Jf2xG_8sV3KgtXEVDP-rBarOF-EUS92E,24958
|
|
179
|
+
crawlee/storage_clients/_redis/_storage_client.py,sha256=oeHLD1_vDDrgN6uUUT4nqGP-6z0YAM3_SnyxmFOvW58,5404
|
|
180
|
+
crawlee/storage_clients/_redis/_utils.py,sha256=fEdQ5b5Zex_bSXcKhXakfQBpXmo5wKnEK_MeeT-q6Bc,700
|
|
181
|
+
crawlee/storage_clients/_redis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua,sha256=gYtZfjXp-D-vvlRZBzf7J4OCP2_js_oNnzAkbPfK3Bg,1054
|
|
183
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua,sha256=66aJjIR9f6hQcmVhOA8Km_7PaduJFqOdsisieWDIn6E,1351
|
|
184
|
+
crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua,sha256=coxBfdPMc0X9rpszMCvdZXEbQzrRc7qU4flhN3MKwjc,1123
|
|
185
|
+
crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua,sha256=gvDTseg5vo-YNVvftip6f_unCdM7TaM_Btc9sj-5AOw,896
|
|
174
186
|
crawlee/storage_clients/_sql/__init__.py,sha256=X_fDMc6jn50gEBZ9QyUw54sjovYfFvE-dgXAdci6Y2M,312
|
|
175
187
|
crawlee/storage_clients/_sql/_client_mixin.py,sha256=U9ThDUuRbT5JDtCFlBurhZIs1Ay5t9fTfPXXI_4dwHY,15988
|
|
176
188
|
crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
|
|
177
|
-
crawlee/storage_clients/_sql/_db_models.py,sha256=
|
|
189
|
+
crawlee/storage_clients/_sql/_db_models.py,sha256=KzA-R_L6zv9gqQg7B27mF-fERNJuMUEnewV9iofmTnI,9812
|
|
178
190
|
crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=LnVLWhOjo4LdvtCac4fwuf__DgEQjlqSxz8KkjY3Qx4,11311
|
|
179
191
|
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=OlvAOwEoYY5f4NO7BdhLFRT_i_E3YzJDb_ptKKK2huY,29478
|
|
180
|
-
crawlee/storage_clients/_sql/_storage_client.py,sha256=
|
|
192
|
+
crawlee/storage_clients/_sql/_storage_client.py,sha256=okONooE_1Z5hxhJaXl7jEDXEryKqXlnFx6F1EuxyP2s,11317
|
|
181
193
|
crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
194
|
crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
|
|
183
195
|
crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
|
|
184
196
|
crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
|
|
185
|
-
crawlee/storages/_key_value_store.py,sha256=
|
|
197
|
+
crawlee/storages/_key_value_store.py,sha256=xdkMJYdH3zXzwB3jtkijq-YkMlwBtfXxDFIUlpvpXAE,10298
|
|
186
198
|
crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
|
|
187
199
|
crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
|
|
188
200
|
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
189
201
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
|
-
crawlee-1.0.
|
|
191
|
-
crawlee-1.0.
|
|
192
|
-
crawlee-1.0.
|
|
193
|
-
crawlee-1.0.
|
|
194
|
-
crawlee-1.0.
|
|
202
|
+
crawlee-1.0.5b18.dist-info/METADATA,sha256=Gi6jjwizwwxv2xNVnQky1WQ5pqNd06XDlnXmjXFVDgQ,29533
|
|
203
|
+
crawlee-1.0.5b18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
204
|
+
crawlee-1.0.5b18.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
205
|
+
crawlee-1.0.5b18.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
206
|
+
crawlee-1.0.5b18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|