crawlee 1.0.3b9__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

@@ -7,6 +7,9 @@ from typing import TYPE_CHECKING
7
7
  if TYPE_CHECKING:
8
8
  from collections.abc import Callable
9
9
  from datetime import timedelta
10
+ from types import TracebackType
11
+
12
+ from typing_extensions import Self
10
13
 
11
14
  logger = getLogger(__name__)
12
15
 
@@ -26,6 +29,18 @@ class RecurringTask:
26
29
  self.delay = delay
27
30
  self.task: asyncio.Task | None = None
28
31
 
32
+ async def __aenter__(self) -> Self:
33
+ self.start()
34
+ return self
35
+
36
+ async def __aexit__(
37
+ self,
38
+ exc_type: type[BaseException] | None,
39
+ exc_value: BaseException | None,
40
+ exc_traceback: TracebackType | None,
41
+ ) -> None:
42
+ await self.stop()
43
+
29
44
  async def _wrapper(self) -> None:
30
45
  """Continuously execute the provided function with the specified delay.
31
46
 
crawlee/_utils/urls.py CHANGED
@@ -7,6 +7,7 @@ from yarl import URL
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  from collections.abc import Iterator
10
+ from logging import Logger
10
11
 
11
12
 
12
13
  def is_url_absolute(url: str) -> bool:
@@ -22,13 +23,19 @@ def convert_to_absolute_url(base_url: str, relative_url: str) -> str:
22
23
  return str(URL(base_url).join(URL(relative_url)))
23
24
 
24
25
 
25
- def to_absolute_url_iterator(base_url: str, urls: Iterator[str]) -> Iterator[str]:
26
+ def to_absolute_url_iterator(base_url: str, urls: Iterator[str], logger: Logger | None = None) -> Iterator[str]:
26
27
  """Convert an iterator of relative URLs to absolute URLs using a base URL."""
27
28
  for url in urls:
28
29
  if is_url_absolute(url):
29
30
  yield url
30
31
  else:
31
- yield convert_to_absolute_url(base_url, url)
32
+ converted_url = convert_to_absolute_url(base_url, url)
33
+ # Skip the URL if conversion fails, probably due to an incorrect format, such as 'mailto:'.
34
+ if not is_url_absolute(converted_url):
35
+ if logger:
36
+ logger.debug(f'Could not convert URL "{url}" to absolute using base URL "{base_url}". Skipping it.')
37
+ continue
38
+ yield converted_url
32
39
 
33
40
 
34
41
  _http_url_adapter = TypeAdapter(AnyHttpUrl)
@@ -167,7 +167,9 @@ class AbstractHttpCrawler(
167
167
  kwargs.setdefault('strategy', 'same-hostname')
168
168
 
169
169
  links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
170
- links_iterator = to_absolute_url_iterator(context.request.loaded_url or context.request.url, links_iterator)
170
+ links_iterator = to_absolute_url_iterator(
171
+ context.request.loaded_url or context.request.url, links_iterator, logger=context.log
172
+ )
171
173
 
172
174
  if robots_txt_file:
173
175
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -149,10 +149,6 @@ class AdaptivePlaywrightCrawler(
149
149
  non-default configuration.
150
150
  kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
151
151
  """
152
- # Some sub crawler kwargs are internally modified. Prepare copies.
153
- basic_crawler_kwargs_for_static_crawler = deepcopy(kwargs)
154
- basic_crawler_kwargs_for_pw_crawler = deepcopy(kwargs)
155
-
156
152
  # Adaptive crawling related.
157
153
  self.rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor()
158
154
  self.result_checker = result_checker or (lambda _: True)
@@ -170,11 +166,11 @@ class AdaptivePlaywrightCrawler(
170
166
  # Each sub crawler will use custom logger .
171
167
  static_logger = getLogger('Subcrawler_static')
172
168
  static_logger.setLevel(logging.ERROR)
173
- basic_crawler_kwargs_for_static_crawler['_logger'] = static_logger
169
+ basic_crawler_kwargs_for_static_crawler: _BasicCrawlerOptions = {'_logger': static_logger, **kwargs}
174
170
 
175
171
  pw_logger = getLogger('Subcrawler_playwright')
176
172
  pw_logger.setLevel(logging.ERROR)
177
- basic_crawler_kwargs_for_pw_crawler['_logger'] = pw_logger
173
+ basic_crawler_kwargs_for_pw_crawler: _BasicCrawlerOptions = {'_logger': pw_logger, **kwargs}
178
174
 
179
175
  # Initialize sub crawlers to create their pipelines.
180
176
  static_crawler_class = AbstractHttpCrawler.create_parsed_http_crawler_class(static_parser=static_parser)
@@ -319,7 +315,7 @@ class AdaptivePlaywrightCrawler(
319
315
  ),
320
316
  logger=self._logger,
321
317
  )
322
- return SubCrawlerRun(result=result)
318
+ return SubCrawlerRun(result=result, run_context=context_linked_to_result)
323
319
  except Exception as e:
324
320
  return SubCrawlerRun(exception=e)
325
321
 
@@ -375,7 +371,8 @@ class AdaptivePlaywrightCrawler(
375
371
  self.track_http_only_request_handler_runs()
376
372
 
377
373
  static_run = await self._crawl_one(rendering_type='static', context=context)
378
- if static_run.result and self.result_checker(static_run.result):
374
+ if static_run.result and static_run.run_context and self.result_checker(static_run.result):
375
+ self._update_context_from_copy(context, static_run.run_context)
379
376
  self._context_result_map[context] = static_run.result
380
377
  return
381
378
  if static_run.exception:
@@ -406,13 +403,10 @@ class AdaptivePlaywrightCrawler(
406
403
  if pw_run.exception is not None:
407
404
  raise pw_run.exception
408
405
 
409
- if pw_run.result:
410
- self._context_result_map[context] = pw_run.result
411
-
406
+ if pw_run.result and pw_run.run_context:
412
407
  if should_detect_rendering_type:
413
408
  detection_result: RenderingType
414
409
  static_run = await self._crawl_one('static', context=context, state=old_state_copy)
415
-
416
410
  if static_run.result and self.result_comparator(static_run.result, pw_run.result):
417
411
  detection_result = 'static'
418
412
  else:
@@ -421,6 +415,9 @@ class AdaptivePlaywrightCrawler(
421
415
  context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
422
416
  self.rendering_type_predictor.store_result(context.request, detection_result)
423
417
 
418
+ self._update_context_from_copy(context, pw_run.run_context)
419
+ self._context_result_map[context] = pw_run.result
420
+
424
421
  def pre_navigation_hook(
425
422
  self,
426
423
  hook: Callable[[AdaptivePlaywrightPreNavCrawlingContext], Awaitable[None]] | None = None,
@@ -455,8 +452,32 @@ class AdaptivePlaywrightCrawler(
455
452
  def track_rendering_type_mispredictions(self) -> None:
456
453
  self.statistics.state.rendering_type_mispredictions += 1
457
454
 
455
+ def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
456
+ """Update mutable fields of `context` from `context_copy`.
457
+
458
+ Uses object.__setattr__ to bypass frozen dataclass restrictions,
459
+ allowing state synchronization after isolated crawler execution.
460
+ """
461
+ updating_attributes = {
462
+ 'request': ('headers', 'user_data'),
463
+ 'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
464
+ }
465
+
466
+ for attr, sub_attrs in updating_attributes.items():
467
+ original_sub_obj = getattr(context, attr)
468
+ copy_sub_obj = getattr(context_copy, attr)
469
+
470
+ # Check that both sub objects are not None
471
+ if original_sub_obj is None or copy_sub_obj is None:
472
+ continue
473
+
474
+ for sub_attr in sub_attrs:
475
+ new_value = getattr(copy_sub_obj, sub_attr)
476
+ object.__setattr__(original_sub_obj, sub_attr, new_value)
477
+
458
478
 
459
479
  @dataclass(frozen=True)
460
480
  class SubCrawlerRun:
461
481
  result: RequestHandlerRunResult | None = None
462
482
  exception: Exception | None = None
483
+ run_context: BasicCrawlingContext | None = None
@@ -56,7 +56,7 @@ from crawlee.errors import (
56
56
  SessionError,
57
57
  UserDefinedErrorHandlerError,
58
58
  )
59
- from crawlee.events._types import Event, EventCrawlerStatusData
59
+ from crawlee.events._types import Event, EventCrawlerStatusData, EventPersistStateData
60
60
  from crawlee.http_clients import ImpitHttpClient
61
61
  from crawlee.router import Router
62
62
  from crawlee.sessions import SessionPool
@@ -437,14 +437,23 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
437
437
  self._statistics_log_format = statistics_log_format
438
438
 
439
439
  # Statistics
440
- self._statistics = statistics or cast(
441
- 'Statistics[TStatisticsState]',
442
- Statistics.with_default_state(
443
- periodic_message_logger=self._logger,
444
- statistics_log_format=self._statistics_log_format,
445
- log_message='Current request statistics:',
446
- ),
447
- )
440
+ if statistics:
441
+ self._statistics = statistics
442
+ else:
443
+
444
+ async def persist_state_factory() -> KeyValueStore:
445
+ return await self.get_key_value_store()
446
+
447
+ self._statistics = cast(
448
+ 'Statistics[TStatisticsState]',
449
+ Statistics.with_default_state(
450
+ persistence_enabled=True,
451
+ periodic_message_logger=self._logger,
452
+ statistics_log_format=self._statistics_log_format,
453
+ log_message='Current request statistics:',
454
+ persist_state_kvs_factory=persist_state_factory,
455
+ ),
456
+ )
448
457
 
449
458
  # Additional context managers to enter and exit
450
459
  self._additional_context_managers = _additional_context_managers or []
@@ -689,7 +698,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
689
698
  except CancelledError:
690
699
  pass
691
700
  finally:
692
- await self._crawler_state_rec_task.stop()
693
701
  if threading.current_thread() is threading.main_thread():
694
702
  with suppress(NotImplementedError):
695
703
  asyncio.get_running_loop().remove_signal_handler(signal.SIGINT)
@@ -721,8 +729,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
721
729
  async def _run_crawler(self) -> None:
722
730
  event_manager = self._service_locator.get_event_manager()
723
731
 
724
- self._crawler_state_rec_task.start()
725
-
726
732
  # Collect the context managers to be entered. Context managers that are already active are excluded,
727
733
  # as they were likely entered by the caller, who will also be responsible for exiting them.
728
734
  contexts_to_enter = [
@@ -733,6 +739,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
733
739
  self._statistics,
734
740
  self._session_pool if self._use_session_pool else None,
735
741
  self._http_client,
742
+ self._crawler_state_rec_task,
736
743
  *self._additional_context_managers,
737
744
  )
738
745
  if cm and getattr(cm, 'active', False) is False
@@ -744,6 +751,9 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
744
751
 
745
752
  await self._autoscaled_pool.run()
746
753
 
754
+ # Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
755
+ event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
756
+
747
757
  async def add_requests(
748
758
  self,
749
759
  requests: Sequence[str | Request],
@@ -972,6 +982,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
972
982
  label=label,
973
983
  user_data=user_data,
974
984
  transform_request_function=transform_request_function,
985
+ **kwargs,
975
986
  ),
976
987
  rq_id=rq_id,
977
988
  rq_name=rq_name,
@@ -366,7 +366,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
366
366
  links_iterator: Iterator[str] = iter(
367
367
  [url for element in elements if (url := await element.get_attribute('href')) is not None]
368
368
  )
369
- links_iterator = to_absolute_url_iterator(context.request.loaded_url or context.request.url, links_iterator)
369
+ links_iterator = to_absolute_url_iterator(
370
+ context.request.loaded_url or context.request.url, links_iterator, logger=context.log
371
+ )
370
372
 
371
373
  if robots_txt_file:
372
374
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -96,7 +96,7 @@ class Statistics(Generic[TStatisticsState]):
96
96
 
97
97
  self._state = RecoverableState(
98
98
  default_state=state_model(stats_id=self._id),
99
- persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
99
+ persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
100
100
  persistence_enabled=persistence_enabled,
101
101
  persist_state_kvs_name=persist_state_kvs_name,
102
102
  persist_state_kvs_factory=persist_state_kvs_factory,
@@ -130,6 +130,7 @@ class Statistics(Generic[TStatisticsState]):
130
130
  persistence_enabled: bool = False,
131
131
  persist_state_kvs_name: str | None = None,
132
132
  persist_state_key: str | None = None,
133
+ persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
133
134
  log_message: str = 'Statistics',
134
135
  periodic_message_logger: Logger | None = None,
135
136
  log_interval: timedelta = timedelta(minutes=1),
@@ -141,6 +142,7 @@ class Statistics(Generic[TStatisticsState]):
141
142
  persistence_enabled=persistence_enabled,
142
143
  persist_state_kvs_name=persist_state_kvs_name,
143
144
  persist_state_key=persist_state_key,
145
+ persist_state_kvs_factory=persist_state_kvs_factory,
144
146
  log_message=log_message,
145
147
  periodic_message_logger=periodic_message_logger,
146
148
  log_interval=log_interval,
@@ -187,7 +189,10 @@ class Statistics(Generic[TStatisticsState]):
187
189
  if not self._active:
188
190
  raise RuntimeError(f'The {self.__class__.__name__} is not active.')
189
191
 
190
- self._state.current_value.crawler_finished_at = datetime.now(timezone.utc)
192
+ if not self.state.crawler_last_started_at:
193
+ raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
194
+ self.state.crawler_finished_at = datetime.now(timezone.utc)
195
+ self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
191
196
 
192
197
  await self._state.teardown()
193
198
 
@@ -255,8 +260,7 @@ class Statistics(Generic[TStatisticsState]):
255
260
  if self._instance_start is None:
256
261
  raise RuntimeError('The Statistics object is not initialized')
257
262
 
258
- crawler_runtime = datetime.now(timezone.utc) - self._instance_start
259
- total_minutes = crawler_runtime.total_seconds() / 60
263
+ total_minutes = self.state.crawler_runtime.total_seconds() / 60
260
264
  state = self._state.current_value
261
265
  serialized_state = state.model_dump(by_alias=False)
262
266
 
@@ -267,7 +271,7 @@ class Statistics(Generic[TStatisticsState]):
267
271
  requests_failed_per_minute=math.floor(state.requests_failed / total_minutes) if total_minutes else 0,
268
272
  request_total_duration=state.request_total_finished_duration + state.request_total_failed_duration,
269
273
  requests_total=state.requests_failed + state.requests_finished,
270
- crawler_runtime=crawler_runtime,
274
+ crawler_runtime=state.crawler_runtime,
271
275
  requests_finished=state.requests_finished,
272
276
  requests_failed=state.requests_failed,
273
277
  retry_histogram=serialized_state['request_retry_histogram'],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.3b9
3
+ Version: 1.0.4
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -30,14 +30,14 @@ crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZk
30
30
  crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
31
31
  crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
32
32
  crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
33
- crawlee/_utils/recurring_task.py,sha256=sA0n4Cf9pYLQyBD9PZ7QbR6m6KphlbkACaT2GdbLfs4,1757
33
+ crawlee/_utils/recurring_task.py,sha256=sQMiURuDXbwwfAcIXK8V4NXncSxIBxsqN1cZWX7DLyg,2128
34
34
  crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
35
35
  crawlee/_utils/robots.py,sha256=k3Yi2OfKT0H04MPkP-OBGGV7fEePgOqb60awltjMYWY,4346
36
36
  crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
37
37
  crawlee/_utils/system.py,sha256=tA8AP__9vsJ9OTLTnAYAKkxc8U5-IEna0N_hqYBybUo,4294
38
38
  crawlee/_utils/time.py,sha256=WK17P939r65dLz2rWvL59OEJoxgzdinw-ND9WuG4DuU,2353
39
39
  crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
40
- crawlee/_utils/urls.py,sha256=NN27TA6KMU5V_j5TCZ4o33UIXw4pB9a-wGlmDQtYT8E,1294
40
+ crawlee/_utils/urls.py,sha256=fEYXJxBT02f-DIYKF_h7PdaKAShfXBs99-dHDjDX03A,1725
41
41
  crawlee/_utils/wait.py,sha256=RfiXhp5VUBxOEtEMtru7_jNfKDr2BJCcFge5qGg2gxk,2848
42
42
  crawlee/_utils/web.py,sha256=nnKhg8pUSWz0RY64Qd-_GPNBX1fWI2hXS-gzcfQ-rig,364
43
43
  crawlee/browsers/__init__.py,sha256=TghkrNSbI_k87UgVBlgNNcEm8Ot05pSLEAPRSv6YsUs,1064
@@ -53,19 +53,19 @@ crawlee/crawlers/__init__.py,sha256=9VmFahav3rjE-2Bxa5PAhBgkYXP0k5SSAEpdG2xMZ7c,
53
53
  crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
54
54
  crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  crawlee/crawlers/_abstract_http/__init__.py,sha256=QCjn8x7jpo8FwEeSRw10TVj_0La2v9mLEiQWdk2RoTw,273
56
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=ZG6A4DNGZYbOBXi0Th6K6CHDi2SqWO5VpxcnjypDO-A,11503
56
+ crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=DEiErZi7j2FHMgyVELPy09GyHo5Gx4UDpuiN6D3sGNk,11553
57
57
  crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
58
58
  crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
59
59
  crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=LREq9WR9BKsE8S8lSsEhlCoNjQaLhlJ9yo8y_6a8o4c,1072
61
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=j-7lm5vTI_14JrEQQDUFQ3iWnidTaca376UbSZ-uiTk,21731
61
+ crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=ME90JLkScWj_ynUymA59f832vEvvVpkP01cYfEc8m-Y,22895
62
62
  crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py,sha256=_At8T8S3JLGPA-1AeCFGrpE-FuCDW9sazrXt9U0tK6U,1048
63
63
  crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=9FlHIUC05IzUhJsVldQvpnDnj1jk8GJpqC98mPLN_fw,10431
64
64
  crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkbIN_059jUyCG8Z6XAb_FBLClIKw7z-aDvjon2I,10834
65
65
  crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
66
66
  crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
67
67
  crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
68
- crawlee/crawlers/_basic/_basic_crawler.py,sha256=7qnDAO3t9qIn_RF2dCYgqTzr7rTg3namul3o3dsyES4,72690
68
+ crawlee/crawlers/_basic/_basic_crawler.py,sha256=yZ_A_l9Dux9Y2eYa9XbN3c7h-3YO7MgGmJbzCMbCplg,73257
69
69
  crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
70
70
  crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
71
71
  crawlee/crawlers/_basic/_logging_utils.py,sha256=jp5mEwSq5a_BgzUhNPJ9WrIDcoIeYGbeHstcRqCcP0s,3093
@@ -85,7 +85,7 @@ crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOM
85
85
  crawlee/crawlers/_parsel/_parsel_parser.py,sha256=yWBfuXUHMriK4DRnyrXTQoGeqX5WV9bOEkBp_g0YCvQ,1540
86
86
  crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
87
87
  crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
88
- crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=YI_EvJApfabuBY5TZq7OdBI-45ASiDE2GfsIC4qpd8A,23756
88
+ crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=QfZVWj6A0H1idC0yQT-WAxlWTk7janB4TtKDtf8htt8,23806
89
89
  crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
90
90
  crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=Nfm69dqX85k68jN1p3ljZWbn8egqDWPIPRykXyXsoQs,3977
91
91
  crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=fEI2laWhmJdWiGoMF5JBLBsim9NtENfagZt6FFd2Rgo,1387
@@ -148,7 +148,7 @@ crawlee/statistics/__init__.py,sha256=lXAsHNkeRZQBffW1B7rERarivXIUJveNlcKTGOXQZY
148
148
  crawlee/statistics/_error_snapshotter.py,sha256=ChBBG0gIMWcSeyEzs3jQf3mSnHLZUHcD284wEDan1Js,3278
149
149
  crawlee/statistics/_error_tracker.py,sha256=x9Yw1TuyEptjwgPPJ4gIom-0oVjawcNReQDsHH2nZ3w,8553
150
150
  crawlee/statistics/_models.py,sha256=SFWYpT3r1c4XugU8nrm0epTpcM5_0fS1mXi9fnbhGJ8,5237
151
- crawlee/statistics/_statistics.py,sha256=vp8swl1yt4lBi2W0YyaI_xKCrRku0remI4BLx90q7-Y,12455
151
+ crawlee/statistics/_statistics.py,sha256=d6z5XxXm-an4M_8TierOPpSB78vxqxwvUFCewIEmiK4,12786
152
152
  crawlee/storage_clients/__init__.py,sha256=RCnutWMOqs_kUQpzfLVT5jgpHGWakLv557c6UIYFQsA,754
153
153
  crawlee/storage_clients/models.py,sha256=gfW_kpSCOBuoTBIW0N7tb3FUv7BgD3keZADS7pyT4_I,6586
154
154
  crawlee/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -187,8 +187,8 @@ crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKp
187
187
  crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
188
188
  crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
189
189
  crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
- crawlee-1.0.3b9.dist-info/METADATA,sha256=CyjByjVQZw9Ys3xmTIa4ZEUV6hcQGt0aeltxUG6w0Pw,29314
191
- crawlee-1.0.3b9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
192
- crawlee-1.0.3b9.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
193
- crawlee-1.0.3b9.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
194
- crawlee-1.0.3b9.dist-info/RECORD,,
190
+ crawlee-1.0.4.dist-info/METADATA,sha256=8mrJxwsKWy8I_uBRXstDhgXKm_Ic25h3xS9EHTnL5jY,29312
191
+ crawlee-1.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
192
+ crawlee-1.0.4.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
193
+ crawlee-1.0.4.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
194
+ crawlee-1.0.4.dist-info/RECORD,,