crawlee 1.1.1__py3-none-any.whl → 1.1.1b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlee/_types.py +1 -20
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +2 -8
- crawlee/crawlers/_basic/_basic_crawler.py +44 -60
- crawlee/crawlers/_playwright/_playwright_crawler.py +3 -6
- crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
- crawlee/storage_clients/_file_system/_key_value_store_client.py +3 -3
- crawlee/storage_clients/_file_system/_request_queue_client.py +3 -3
- {crawlee-1.1.1.dist-info → crawlee-1.1.1b2.dist-info}/METADATA +1 -1
- {crawlee-1.1.1.dist-info → crawlee-1.1.1b2.dist-info}/RECORD +12 -12
- {crawlee-1.1.1.dist-info → crawlee-1.1.1b2.dist-info}/WHEEL +1 -1
- {crawlee-1.1.1.dist-info → crawlee-1.1.1b2.dist-info}/entry_points.txt +0 -0
- {crawlee-1.1.1.dist-info → crawlee-1.1.1b2.dist-info}/licenses/LICENSE +0 -0
crawlee/_types.py
CHANGED
|
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
|
|
|
15
15
|
import re
|
|
16
16
|
from collections.abc import Callable, Coroutine, Sequence
|
|
17
17
|
|
|
18
|
-
from typing_extensions import NotRequired, Required,
|
|
18
|
+
from typing_extensions import NotRequired, Required, Unpack
|
|
19
19
|
|
|
20
20
|
from crawlee import Glob, Request
|
|
21
21
|
from crawlee._request import RequestOptions
|
|
@@ -643,25 +643,6 @@ class BasicCrawlingContext:
|
|
|
643
643
|
"""Return hash of the context. Each context is considered unique."""
|
|
644
644
|
return id(self)
|
|
645
645
|
|
|
646
|
-
def create_modified_copy(
|
|
647
|
-
self,
|
|
648
|
-
push_data: PushDataFunction | None = None,
|
|
649
|
-
add_requests: AddRequestsFunction | None = None,
|
|
650
|
-
get_key_value_store: GetKeyValueStoreFromRequestHandlerFunction | None = None,
|
|
651
|
-
) -> Self:
|
|
652
|
-
"""Create a modified copy of the crawling context with specified changes."""
|
|
653
|
-
original_fields = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
|
|
654
|
-
modified_fields = {
|
|
655
|
-
key: value
|
|
656
|
-
for key, value in {
|
|
657
|
-
'push_data': push_data,
|
|
658
|
-
'add_requests': add_requests,
|
|
659
|
-
'get_key_value_store': get_key_value_store,
|
|
660
|
-
}.items()
|
|
661
|
-
if value
|
|
662
|
-
}
|
|
663
|
-
return self.__class__(**{**original_fields, **modified_fields})
|
|
664
|
-
|
|
665
646
|
|
|
666
647
|
class GetDataKwargs(TypedDict):
|
|
667
648
|
"""Keyword arguments for dataset's `get_data` method."""
|
|
@@ -167,15 +167,9 @@ class AbstractHttpCrawler(
|
|
|
167
167
|
kwargs.setdefault('strategy', 'same-hostname')
|
|
168
168
|
|
|
169
169
|
links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
extracted_base_urls = list(self._parser.find_links(parsed_content, 'base[href]'))
|
|
173
|
-
base_url: str = (
|
|
174
|
-
str(extracted_base_urls[0])
|
|
175
|
-
if extracted_base_urls
|
|
176
|
-
else context.request.loaded_url or context.request.url
|
|
170
|
+
links_iterator = to_absolute_url_iterator(
|
|
171
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
177
172
|
)
|
|
178
|
-
links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
|
|
179
173
|
|
|
180
174
|
if robots_txt_file:
|
|
181
175
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import asyncio
|
|
5
|
-
import functools
|
|
6
5
|
import logging
|
|
7
6
|
import signal
|
|
8
7
|
import sys
|
|
@@ -15,7 +14,7 @@ from contextlib import AsyncExitStack, suppress
|
|
|
15
14
|
from datetime import timedelta
|
|
16
15
|
from functools import partial
|
|
17
16
|
from pathlib import Path
|
|
18
|
-
from typing import TYPE_CHECKING, Any, Generic, Literal,
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Generic, Literal, cast
|
|
19
18
|
from urllib.parse import ParseResult, urlparse
|
|
20
19
|
from weakref import WeakKeyDictionary
|
|
21
20
|
|
|
@@ -97,9 +96,6 @@ if TYPE_CHECKING:
|
|
|
97
96
|
TCrawlingContext = TypeVar('TCrawlingContext', bound=BasicCrawlingContext, default=BasicCrawlingContext)
|
|
98
97
|
TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
|
|
99
98
|
TRequestIterator = TypeVar('TRequestIterator', str, Request)
|
|
100
|
-
TParams = ParamSpec('TParams')
|
|
101
|
-
T = TypeVar('T')
|
|
102
|
-
|
|
103
99
|
ErrorHandler = Callable[[TCrawlingContext, Exception], Awaitable[Request | None]]
|
|
104
100
|
FailedRequestHandler = Callable[[TCrawlingContext, Exception], Awaitable[None]]
|
|
105
101
|
SkippedRequestCallback = Callable[[str, SkippedReason], Awaitable[None]]
|
|
@@ -524,24 +520,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
524
520
|
self._logger.info(f'Crawler.stop() was called with following reason: {reason}.')
|
|
525
521
|
self._unexpected_stop = True
|
|
526
522
|
|
|
527
|
-
def _wrap_handler_with_error_context(
|
|
528
|
-
self, handler: Callable[[TCrawlingContext | BasicCrawlingContext, Exception], Awaitable[T]]
|
|
529
|
-
) -> Callable[[TCrawlingContext | BasicCrawlingContext, Exception], Awaitable[T]]:
|
|
530
|
-
"""Decorate error handlers to make their context helpers usable."""
|
|
531
|
-
|
|
532
|
-
@functools.wraps(handler)
|
|
533
|
-
async def wrapped_handler(context: TCrawlingContext | BasicCrawlingContext, exception: Exception) -> T:
|
|
534
|
-
# Original context helpers that are from `RequestHandlerRunResult` will not be commited as the request
|
|
535
|
-
# failed. Modified context provides context helpers with direct access to the storages.
|
|
536
|
-
error_context = context.create_modified_copy(
|
|
537
|
-
push_data=self._push_data,
|
|
538
|
-
get_key_value_store=self.get_key_value_store,
|
|
539
|
-
add_requests=functools.partial(self._add_requests, context),
|
|
540
|
-
)
|
|
541
|
-
return await handler(error_context, exception)
|
|
542
|
-
|
|
543
|
-
return wrapped_handler
|
|
544
|
-
|
|
545
523
|
def _stop_if_max_requests_count_exceeded(self) -> None:
|
|
546
524
|
"""Call `stop` when the maximum number of requests to crawl has been reached."""
|
|
547
525
|
if self._max_requests_per_crawl is None:
|
|
@@ -640,7 +618,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
640
618
|
|
|
641
619
|
The error handler is invoked after a request handler error occurs and before a retry attempt.
|
|
642
620
|
"""
|
|
643
|
-
self._error_handler =
|
|
621
|
+
self._error_handler = handler
|
|
644
622
|
return handler
|
|
645
623
|
|
|
646
624
|
def failed_request_handler(
|
|
@@ -650,7 +628,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
650
628
|
|
|
651
629
|
The failed request handler is invoked when a request has failed all retry attempts.
|
|
652
630
|
"""
|
|
653
|
-
self._failed_request_handler =
|
|
631
|
+
self._failed_request_handler = handler
|
|
654
632
|
return handler
|
|
655
633
|
|
|
656
634
|
def on_skipped_request(self, callback: SkippedRequestCallback) -> SkippedRequestCallback:
|
|
@@ -1278,46 +1256,52 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
1278
1256
|
else:
|
|
1279
1257
|
yield Request.from_url(url)
|
|
1280
1258
|
|
|
1281
|
-
async def _add_requests(
|
|
1282
|
-
self,
|
|
1283
|
-
context: BasicCrawlingContext,
|
|
1284
|
-
requests: Sequence[str | Request],
|
|
1285
|
-
rq_id: str | None = None,
|
|
1286
|
-
rq_name: str | None = None,
|
|
1287
|
-
rq_alias: str | None = None,
|
|
1288
|
-
**kwargs: Unpack[EnqueueLinksKwargs],
|
|
1289
|
-
) -> None:
|
|
1290
|
-
"""Add requests method aware of the crawling context."""
|
|
1291
|
-
if rq_id or rq_name or rq_alias:
|
|
1292
|
-
request_manager: RequestManager = await RequestQueue.open(
|
|
1293
|
-
id=rq_id,
|
|
1294
|
-
name=rq_name,
|
|
1295
|
-
alias=rq_alias,
|
|
1296
|
-
storage_client=self._service_locator.get_storage_client(),
|
|
1297
|
-
configuration=self._service_locator.get_configuration(),
|
|
1298
|
-
)
|
|
1299
|
-
else:
|
|
1300
|
-
request_manager = await self.get_request_manager()
|
|
1301
|
-
|
|
1302
|
-
context_aware_requests = list[Request]()
|
|
1303
|
-
base_url = kwargs.get('base_url') or context.request.loaded_url or context.request.url
|
|
1304
|
-
requests_iterator = self._convert_url_to_request_iterator(requests, base_url)
|
|
1305
|
-
filter_requests_iterator = self._enqueue_links_filter_iterator(requests_iterator, context.request.url, **kwargs)
|
|
1306
|
-
for dst_request in filter_requests_iterator:
|
|
1307
|
-
# Update the crawl depth of the request.
|
|
1308
|
-
dst_request.crawl_depth = context.request.crawl_depth + 1
|
|
1309
|
-
|
|
1310
|
-
if self._max_crawl_depth is None or dst_request.crawl_depth <= self._max_crawl_depth:
|
|
1311
|
-
context_aware_requests.append(dst_request)
|
|
1312
|
-
|
|
1313
|
-
return await request_manager.add_requests(context_aware_requests)
|
|
1314
|
-
|
|
1315
1259
|
async def _commit_request_handler_result(self, context: BasicCrawlingContext) -> None:
|
|
1316
1260
|
"""Commit request handler result for the input `context`. Result is taken from `_context_result_map`."""
|
|
1317
1261
|
result = self._context_result_map[context]
|
|
1318
1262
|
|
|
1263
|
+
base_request_manager = await self.get_request_manager()
|
|
1264
|
+
|
|
1265
|
+
origin = context.request.loaded_url or context.request.url
|
|
1266
|
+
|
|
1319
1267
|
for add_requests_call in result.add_requests_calls:
|
|
1320
|
-
|
|
1268
|
+
rq_id = add_requests_call.get('rq_id')
|
|
1269
|
+
rq_name = add_requests_call.get('rq_name')
|
|
1270
|
+
rq_alias = add_requests_call.get('rq_alias')
|
|
1271
|
+
specified_params = sum(1 for param in [rq_id, rq_name, rq_alias] if param is not None)
|
|
1272
|
+
if specified_params > 1:
|
|
1273
|
+
raise ValueError('You can only provide one of `rq_id`, `rq_name` or `rq_alias` arguments.')
|
|
1274
|
+
if rq_id or rq_name or rq_alias:
|
|
1275
|
+
request_manager: RequestManager | RequestQueue = await RequestQueue.open(
|
|
1276
|
+
id=rq_id,
|
|
1277
|
+
name=rq_name,
|
|
1278
|
+
alias=rq_alias,
|
|
1279
|
+
storage_client=self._service_locator.get_storage_client(),
|
|
1280
|
+
configuration=self._service_locator.get_configuration(),
|
|
1281
|
+
)
|
|
1282
|
+
else:
|
|
1283
|
+
request_manager = base_request_manager
|
|
1284
|
+
|
|
1285
|
+
requests = list[Request]()
|
|
1286
|
+
|
|
1287
|
+
base_url = url if (url := add_requests_call.get('base_url')) else origin
|
|
1288
|
+
|
|
1289
|
+
requests_iterator = self._convert_url_to_request_iterator(add_requests_call['requests'], base_url)
|
|
1290
|
+
|
|
1291
|
+
enqueue_links_kwargs: EnqueueLinksKwargs = {k: v for k, v in add_requests_call.items() if k != 'requests'} # type: ignore[assignment]
|
|
1292
|
+
|
|
1293
|
+
filter_requests_iterator = self._enqueue_links_filter_iterator(
|
|
1294
|
+
requests_iterator, context.request.url, **enqueue_links_kwargs
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
for dst_request in filter_requests_iterator:
|
|
1298
|
+
# Update the crawl depth of the request.
|
|
1299
|
+
dst_request.crawl_depth = context.request.crawl_depth + 1
|
|
1300
|
+
|
|
1301
|
+
if self._max_crawl_depth is None or dst_request.crawl_depth <= self._max_crawl_depth:
|
|
1302
|
+
requests.append(dst_request)
|
|
1303
|
+
|
|
1304
|
+
await request_manager.add_requests(requests)
|
|
1321
1305
|
|
|
1322
1306
|
for push_data_call in result.push_data_calls:
|
|
1323
1307
|
await self._push_data(**push_data_call)
|
|
@@ -369,12 +369,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
369
369
|
links_iterator: Iterator[str] = iter(
|
|
370
370
|
[url for element in elements if (url := await element.get_attribute('href')) is not None]
|
|
371
371
|
)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
base_url: str = extracted_base_url or context.request.loaded_url or context.request.url
|
|
376
|
-
|
|
377
|
-
links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
|
|
372
|
+
links_iterator = to_absolute_url_iterator(
|
|
373
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
374
|
+
)
|
|
378
375
|
|
|
379
376
|
if robots_txt_file:
|
|
380
377
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -134,7 +134,7 @@ class FileSystemDatasetClient(DatasetClient):
|
|
|
134
134
|
continue
|
|
135
135
|
|
|
136
136
|
try:
|
|
137
|
-
file = await asyncio.to_thread(path_to_metadata.open
|
|
137
|
+
file = await asyncio.to_thread(path_to_metadata.open)
|
|
138
138
|
try:
|
|
139
139
|
file_content = json.load(file)
|
|
140
140
|
metadata = DatasetMetadata(**file_content)
|
|
@@ -163,7 +163,7 @@ class FileSystemDatasetClient(DatasetClient):
|
|
|
163
163
|
|
|
164
164
|
# If the dataset directory exists, reconstruct the client from the metadata file.
|
|
165
165
|
if path_to_dataset.exists() and path_to_metadata.exists():
|
|
166
|
-
file = await asyncio.to_thread(open, path_to_metadata
|
|
166
|
+
file = await asyncio.to_thread(open, path_to_metadata)
|
|
167
167
|
try:
|
|
168
168
|
file_content = json.load(file)
|
|
169
169
|
finally:
|
|
@@ -133,7 +133,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
133
133
|
continue
|
|
134
134
|
|
|
135
135
|
try:
|
|
136
|
-
file = await asyncio.to_thread(path_to_metadata.open
|
|
136
|
+
file = await asyncio.to_thread(path_to_metadata.open)
|
|
137
137
|
try:
|
|
138
138
|
file_content = json.load(file)
|
|
139
139
|
metadata = KeyValueStoreMetadata(**file_content)
|
|
@@ -162,7 +162,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
162
162
|
|
|
163
163
|
# If the key-value store directory exists, reconstruct the client from the metadata file.
|
|
164
164
|
if path_to_kvs.exists() and path_to_metadata.exists():
|
|
165
|
-
file = await asyncio.to_thread(open, path_to_metadata
|
|
165
|
+
file = await asyncio.to_thread(open, path_to_metadata)
|
|
166
166
|
try:
|
|
167
167
|
file_content = json.load(file)
|
|
168
168
|
finally:
|
|
@@ -239,7 +239,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
|
|
|
239
239
|
# Read the metadata file
|
|
240
240
|
async with self._lock:
|
|
241
241
|
try:
|
|
242
|
-
file = await asyncio.to_thread(open, record_metadata_filepath
|
|
242
|
+
file = await asyncio.to_thread(open, record_metadata_filepath)
|
|
243
243
|
except FileNotFoundError:
|
|
244
244
|
logger.warning(f'Metadata file disappeared for key "{key}", aborting get_value')
|
|
245
245
|
return None
|
|
@@ -197,7 +197,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
197
197
|
continue
|
|
198
198
|
|
|
199
199
|
try:
|
|
200
|
-
file = await asyncio.to_thread(path_to_metadata.open
|
|
200
|
+
file = await asyncio.to_thread(path_to_metadata.open)
|
|
201
201
|
try:
|
|
202
202
|
file_content = json.load(file)
|
|
203
203
|
metadata = RequestQueueMetadata(**file_content)
|
|
@@ -232,7 +232,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
232
232
|
|
|
233
233
|
# If the RQ directory exists, reconstruct the client from the metadata file.
|
|
234
234
|
if path_to_rq.exists() and path_to_metadata.exists():
|
|
235
|
-
file = await asyncio.to_thread(open, path_to_metadata
|
|
235
|
+
file = await asyncio.to_thread(open, path_to_metadata)
|
|
236
236
|
try:
|
|
237
237
|
file_content = json.load(file)
|
|
238
238
|
finally:
|
|
@@ -775,7 +775,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
775
775
|
"""
|
|
776
776
|
# Open the request file.
|
|
777
777
|
try:
|
|
778
|
-
file = await asyncio.to_thread(open, file_path
|
|
778
|
+
file = await asyncio.to_thread(open, file_path)
|
|
779
779
|
except FileNotFoundError:
|
|
780
780
|
logger.warning(f'Request file "{file_path}" not found.')
|
|
781
781
|
return None
|
|
@@ -5,7 +5,7 @@ crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
|
|
|
5
5
|
crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
|
|
6
6
|
crawlee/_request.py,sha256=fnUofyFMV3HJwfcLjYr2BCZ5K9mEwl6vZd8Pr309wCE,16458
|
|
7
7
|
crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
|
|
8
|
-
crawlee/_types.py,sha256=
|
|
8
|
+
crawlee/_types.py,sha256=DAmfSv5W1dt3nJhJ8z-02gDaE06fdEizNKUlHpsd2_A,29129
|
|
9
9
|
crawlee/configuration.py,sha256=DWS2z1FC6Ua93W2tStK3R1ZKZbZjVQYWGiGFbZFaRtA,8064
|
|
10
10
|
crawlee/errors.py,sha256=RhFNA_uT615nVBHf9TylpX5YWwtDuHUUEV8LPT4CYa4,3878
|
|
11
11
|
crawlee/proxy_configuration.py,sha256=rqf67yerXvLvraBaAHW04nvf5ECze3wMQbK7LlqXucM,10386
|
|
@@ -53,7 +53,7 @@ crawlee/crawlers/__init__.py,sha256=9VmFahav3rjE-2Bxa5PAhBgkYXP0k5SSAEpdG2xMZ7c,
|
|
|
53
53
|
crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
|
|
54
54
|
crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
crawlee/crawlers/_abstract_http/__init__.py,sha256=QCjn8x7jpo8FwEeSRw10TVj_0La2v9mLEiQWdk2RoTw,273
|
|
56
|
-
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=
|
|
56
|
+
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=DEiErZi7j2FHMgyVELPy09GyHo5Gx4UDpuiN6D3sGNk,11553
|
|
57
57
|
crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
|
|
58
58
|
crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
|
|
59
59
|
crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -65,7 +65,7 @@ crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkb
|
|
|
65
65
|
crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
|
|
66
66
|
crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
|
|
67
67
|
crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
|
|
68
|
-
crawlee/crawlers/_basic/_basic_crawler.py,sha256=
|
|
68
|
+
crawlee/crawlers/_basic/_basic_crawler.py,sha256=w8JSPF1zw2QG_1Phek5dENL_d5BC2N4dSZ6oR0MoUSY,73052
|
|
69
69
|
crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
|
|
70
70
|
crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
|
|
71
71
|
crawlee/crawlers/_basic/_logging_utils.py,sha256=jp5mEwSq5a_BgzUhNPJ9WrIDcoIeYGbeHstcRqCcP0s,3093
|
|
@@ -85,7 +85,7 @@ crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOM
|
|
|
85
85
|
crawlee/crawlers/_parsel/_parsel_parser.py,sha256=yWBfuXUHMriK4DRnyrXTQoGeqX5WV9bOEkBp_g0YCvQ,1540
|
|
86
86
|
crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
|
|
87
87
|
crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
|
|
88
|
-
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=
|
|
88
|
+
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=53iytj5LZHw19QOYqYNlZL4ApPlhbWn9Ds-DPTKANhQ,24158
|
|
89
89
|
crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
|
|
90
90
|
crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=Nfm69dqX85k68jN1p3ljZWbn8egqDWPIPRykXyXsoQs,3977
|
|
91
91
|
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=fEI2laWhmJdWiGoMF5JBLBsim9NtENfagZt6FFd2Rgo,1387
|
|
@@ -159,9 +159,9 @@ crawlee/storage_clients/_base/_request_queue_client.py,sha256=cgM4yk6xJwgfzP-xaN
|
|
|
159
159
|
crawlee/storage_clients/_base/_storage_client.py,sha256=RvmKCV1U9_KxyG7n8xhClm2vwD2SKChWIiBLk6cuqw0,3523
|
|
160
160
|
crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
161
|
crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
|
|
162
|
-
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=
|
|
163
|
-
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=
|
|
164
|
-
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=
|
|
162
|
+
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=1Z8VCDx8ueh0FQQXUr8tJlOtKw8ggkaFjuz3-T_GJDY,17735
|
|
163
|
+
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=qNa3RRJQ8Omy2AteQvYh1Td04PsP5AhUFyTpL6KQbSg,18676
|
|
164
|
+
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=ETwy6eODf3dlBqy2RPM3nr2_oEm2ht37WpoTlFxn4A8,33970
|
|
165
165
|
crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
|
|
166
166
|
crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
167
|
crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -199,8 +199,8 @@ crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKp
|
|
|
199
199
|
crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
|
|
200
200
|
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
201
201
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
|
-
crawlee-1.1.
|
|
203
|
-
crawlee-1.1.
|
|
204
|
-
crawlee-1.1.
|
|
205
|
-
crawlee-1.1.
|
|
206
|
-
crawlee-1.1.
|
|
202
|
+
crawlee-1.1.1b2.dist-info/METADATA,sha256=LMwCP8_OXNKMiOwWfP1tF-6gy2OTkjJdrjtfKHRfdLs,29532
|
|
203
|
+
crawlee-1.1.1b2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
204
|
+
crawlee-1.1.1b2.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
205
|
+
crawlee-1.1.1b2.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
206
|
+
crawlee-1.1.1b2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|