PyPI - crawlee - Versions diffs - 1.1.2b4__py3-none-any.whl → 1.2.1b7__py3-none-any.whl - Mend

crawlee 1.1.2b4py3-none-any.whl → 1.2.1b7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crawlee might be problematic. Click here for more details.

Files changed (22) hide show

crawlee/__init__.py +2 -1
crawlee/_request.py +29 -10
crawlee/_types.py +22 -1
crawlee/_utils/context.py +2 -2
crawlee/_utils/recurring_task.py +2 -1
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +6 -2
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +10 -33
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +6 -2
crawlee/crawlers/_basic/_basic_crawler.py +31 -20
crawlee/crawlers/_basic/_context_utils.py +24 -0
crawlee/crawlers/_basic/_logging_utils.py +23 -4
crawlee/crawlers/_playwright/_playwright_crawler.py +19 -8
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py +4 -1
crawlee/crawlers/_playwright/_types.py +12 -2
crawlee/errors.py +4 -0
crawlee/events/_event_manager.py +1 -3
crawlee/router.py +13 -3
{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/METADATA +7 -14
{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/RECORD +22 -21
{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/WHEEL +0 -0
{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/entry_points.txt +0 -0
{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/licenses/LICENSE +0 -0

crawlee/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from importlib import metadata
-from ._request import Request, RequestOptions
+from ._request import Request, RequestOptions, RequestState
 from ._service_locator import service_locator
 from ._types import ConcurrencySettings, EnqueueStrategy, HttpHeaders, RequestTransformAction, SkippedReason
 from ._utils.globs import Glob
@@ -14,6 +14,7 @@ __all__ = [
     'HttpHeaders',
     'Request',
     'RequestOptions',
+    'RequestState',
     'RequestTransformAction',
     'SkippedReason',
     'service_locator',

crawlee/_request.py CHANGED Viewed

@@ -34,14 +34,14 @@ class RequestState(IntEnum):
 class CrawleeRequestData(BaseModel):
     """Crawlee-specific configuration stored in the `user_data`."""
-    max_retries: Annotated[int | None, Field(alias='maxRetries')] = None
+    max_retries: Annotated[int | None, Field(alias='maxRetries', frozen=True)] = None
     """Maximum number of retries for this request. Allows to override the global `max_request_retries` option of
     `BasicCrawler`."""
     enqueue_strategy: Annotated[EnqueueStrategy | None, Field(alias='enqueueStrategy')] = None
     """The strategy that was used for enqueuing the request."""
-    state: RequestState | None = None
+    state: RequestState = RequestState.UNPROCESSED
     """Describes the request's current lifecycle state."""
     session_rotation_count: Annotated[int | None, Field(alias='sessionRotationCount')] = None
@@ -137,6 +137,8 @@ class RequestOptions(TypedDict):
     always_enqueue: NotRequired[bool]
     user_data: NotRequired[dict[str, JsonSerializable]]
     no_retry: NotRequired[bool]
+    enqueue_strategy: NotRequired[EnqueueStrategy]
+    max_retries: NotRequired[int | None]
 @docs_group('Storage data')
@@ -166,7 +168,7 @@ class Request(BaseModel):
     model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
-    unique_key: Annotated[str, Field(alias='uniqueKey')]
+    unique_key: Annotated[str, Field(alias='uniqueKey', frozen=True)]
     """A unique key identifying the request. Two requests with the same `unique_key` are considered as pointing
     to the same URL.
@@ -178,17 +180,18 @@ class Request(BaseModel):
     and specify which URLs shall be considered equal.
     """
-    url: Annotated[str, BeforeValidator(validate_http_url), Field()]
+    url: Annotated[str, BeforeValidator(validate_http_url), Field(frozen=True)]
     """The URL of the web page to crawl. Must be a valid HTTP or HTTPS URL, and may include query parameters
     and fragments."""
-    method: HttpMethod = 'GET'
+    method: Annotated[HttpMethod, Field(frozen=True)] = 'GET'
     """HTTP request method."""
     payload: Annotated[
         HttpPayload | None,
         BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
         PlainSerializer(lambda v: v.decode() if isinstance(v, bytes) else v),
+        Field(frozen=True),
     ] = None
     """HTTP request payload."""
@@ -250,6 +253,8 @@ class Request(BaseModel):
         keep_url_fragment: bool = False,
         use_extended_unique_key: bool = False,
         always_enqueue: bool = False,
+        enqueue_strategy: EnqueueStrategy | None = None,
+        max_retries: int | None = None,
         **kwargs: Any,
     ) -> Self:
         """Create a new `Request` instance from a URL.
@@ -277,6 +282,9 @@ class Request(BaseModel):
                 `unique_key` computation. This is only relevant when `unique_key` is not provided.
             always_enqueue: If set to `True`, the request will be enqueued even if it is already present in the queue.
                 Using this is not allowed when a custom `unique_key` is also provided and will result in a `ValueError`.
+            enqueue_strategy: The strategy that will be used for enqueuing the request.
+            max_retries: Maximum number of retries for this request. Allows to override the global `max_request_retries`
+                option of `BasicCrawler`.
             **kwargs: Additional request properties.
         """
         if unique_key is not None and always_enqueue:
@@ -301,12 +309,27 @@ class Request(BaseModel):
         if always_enqueue:
             unique_key = f'{crypto_random_object_id()}|{unique_key}'
+        user_data_dict = kwargs.pop('user_data', {}) or {}
+        crawlee_data_dict = user_data_dict.get('__crawlee', {})
+        if max_retries is not None:
+            crawlee_data_dict['maxRetries'] = max_retries
+        if enqueue_strategy is not None:
+            crawlee_data_dict['enqueueStrategy'] = enqueue_strategy
+        crawlee_data = CrawleeRequestData(**crawlee_data_dict)
+        if crawlee_data:
+            user_data_dict['__crawlee'] = crawlee_data
         request = cls(
             url=url,
             unique_key=unique_key,
             method=method,
             headers=headers,
             payload=payload,
+            user_data=user_data_dict,
             **kwargs,
         )
@@ -352,7 +375,7 @@ class Request(BaseModel):
         self.crawlee_data.crawl_depth = new_value
     @property
-    def state(self) -> RequestState | None:
+    def state(self) -> RequestState:
         """Crawlee-specific request handling state."""
         return self.crawlee_data.state
@@ -365,10 +388,6 @@ class Request(BaseModel):
         """Crawlee-specific limit on the number of retries of the request."""
         return self.crawlee_data.max_retries
-    @max_retries.setter
-    def max_retries(self, new_max_retries: int) -> None:
-        self.crawlee_data.max_retries = new_max_retries
     @property
     def session_rotation_count(self) -> int | None:
         """Crawlee-specific number of finished session rotations for the request."""

crawlee/_types.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import dataclasses
 from collections.abc import Callable, Iterator, Mapping
+from copy import deepcopy
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypedDict, TypeVar, cast, overload
@@ -260,12 +261,24 @@ class KeyValueStoreChangeRecords:
 class RequestHandlerRunResult:
     """Record of calls to storage-related context helpers."""
-    def __init__(self, *, key_value_store_getter: GetKeyValueStoreFunction) -> None:
+    def __init__(
+        self,
+        *,
+        key_value_store_getter: GetKeyValueStoreFunction,
+        request: Request,
+    ) -> None:
         self._key_value_store_getter = key_value_store_getter
         self.add_requests_calls = list[AddRequestsKwargs]()
         self.push_data_calls = list[PushDataFunctionCall]()
         self.key_value_store_changes = dict[tuple[str | None, str | None, str | None], KeyValueStoreChangeRecords]()
+        # Isolated copies for handler execution
+        self._request = deepcopy(request)
+    @property
+    def request(self) -> Request:
+        return self._request
     async def add_requests(
         self,
         requests: Sequence[str | Request],
@@ -315,6 +328,14 @@ class RequestHandlerRunResult:
         return self.key_value_store_changes[id, name, alias]
+    def apply_request_changes(self, target: Request) -> None:
+        """Apply tracked changes from handler copy to original request."""
+        if self.request.user_data != target.user_data:
+            target.user_data = self.request.user_data
+        if self.request.headers != target.headers:
+            target.headers = self.request.headers
 @docs_group('Functions')
 class AddRequestsFunction(Protocol):

crawlee/_utils/context.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-import asyncio
+import inspect
 from collections.abc import Callable
 from functools import wraps
 from typing import Any, TypeVar
@@ -44,4 +44,4 @@ def ensure_context(method: T) -> T:
         return await method(self, *args, **kwargs)
-    return async_wrapper if asyncio.iscoroutinefunction(method) else sync_wrapper  # type: ignore[return-value]
+    return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper  # type: ignore[return-value]

crawlee/_utils/recurring_task.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import asyncio
+import inspect
 from logging import getLogger
 from typing import TYPE_CHECKING
@@ -49,7 +50,7 @@ class RecurringTask:
         """
         sleep_time_secs = self.delay.total_seconds()
         while True:
-            await self.func() if asyncio.iscoroutinefunction(self.func) else self.func()
+            await self.func() if inspect.iscoroutinefunction(self.func) else self.func()
             await asyncio.sleep(sleep_time_secs)
     def start(self) -> None:

crawlee/crawlers/_abstract_http/_abstract_http_crawler.py CHANGED Viewed

@@ -10,7 +10,7 @@ from more_itertools import partition
 from pydantic import ValidationError
 from typing_extensions import NotRequired, TypeVar
-from crawlee._request import Request, RequestOptions
+from crawlee._request import Request, RequestOptions, RequestState
 from crawlee._utils.docs import docs_group
 from crawlee._utils.time import SharedTimeout
 from crawlee._utils.urls import to_absolute_url_iterator
@@ -191,6 +191,7 @@ class AbstractHttpCrawler(
             robots_txt_file = await self._get_robots_txt_file_for_url(context.request.url)
             kwargs.setdefault('strategy', 'same-hostname')
+            strategy = kwargs.get('strategy', 'same-hostname')
             links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
@@ -209,7 +210,9 @@ class AbstractHttpCrawler(
                 skipped = iter([])
             for url in self._enqueue_links_filter_iterator(links_iterator, context.request.url, **kwargs):
-                request_options = RequestOptions(url=url, user_data={**base_user_data}, label=label)
+                request_options = RequestOptions(
+                    url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
+                )
                 if transform_request_function:
                     transform_request_options = transform_request_function(request_options)
@@ -257,6 +260,7 @@ class AbstractHttpCrawler(
                 timeout=remaining_timeout,
             )
+        context.request.state = RequestState.AFTER_NAV
         yield HttpCrawlingContext.from_basic_crawling_context(context=context, http_response=result.http_response)
     async def _handle_status_code_response(

crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py CHANGED Viewed

@@ -290,11 +290,14 @@ class AdaptivePlaywrightCrawler(
             use_state_function = context.use_state
         # New result is created and injected to newly created context. This is done to ensure isolation of sub crawlers.
-        result = RequestHandlerRunResult(key_value_store_getter=self.get_key_value_store)
+        result = RequestHandlerRunResult(
+            key_value_store_getter=self.get_key_value_store,
+            request=context.request,
+        )
         context_linked_to_result = BasicCrawlingContext(
-            request=deepcopy(context.request),
-            session=deepcopy(context.session),
-            proxy_info=deepcopy(context.proxy_info),
+            request=result.request,
+            session=context.session,
+            proxy_info=context.proxy_info,
             send_request=context.send_request,
             add_requests=result.add_requests,
             push_data=result.push_data,
@@ -314,7 +317,7 @@ class AdaptivePlaywrightCrawler(
                 ),
                 logger=self._logger,
             )
-            return SubCrawlerRun(result=result, run_context=context_linked_to_result)
+            return SubCrawlerRun(result=result)
         except Exception as e:
             return SubCrawlerRun(exception=e)
@@ -370,8 +373,7 @@ class AdaptivePlaywrightCrawler(
                 self.track_http_only_request_handler_runs()
                 static_run = await self._crawl_one(rendering_type='static', context=context)
-                if static_run.result and static_run.run_context and self.result_checker(static_run.result):
-                    self._update_context_from_copy(context, static_run.run_context)
+                if static_run.result and self.result_checker(static_run.result):
                     self._context_result_map[context] = static_run.result
                     return
                 if static_run.exception:
@@ -402,7 +404,7 @@ class AdaptivePlaywrightCrawler(
         if pw_run.exception is not None:
             raise pw_run.exception
-        if pw_run.result and pw_run.run_context:
+        if pw_run.result:
             if should_detect_rendering_type:
                 detection_result: RenderingType
                 static_run = await self._crawl_one('static', context=context, state=old_state_copy)
@@ -414,7 +416,6 @@ class AdaptivePlaywrightCrawler(
                 context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
                 self.rendering_type_predictor.store_result(context.request, detection_result)
-            self._update_context_from_copy(context, pw_run.run_context)
             self._context_result_map[context] = pw_run.result
     def pre_navigation_hook(
@@ -451,32 +452,8 @@ class AdaptivePlaywrightCrawler(
     def track_rendering_type_mispredictions(self) -> None:
         self.statistics.state.rendering_type_mispredictions += 1
-    def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
-        """Update mutable fields of `context` from `context_copy`.
-        Uses object.__setattr__ to bypass frozen dataclass restrictions,
-        allowing state synchronization after isolated crawler execution.
-        """
-        updating_attributes = {
-            'request': ('headers', 'user_data'),
-            'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
-        }
-        for attr, sub_attrs in updating_attributes.items():
-            original_sub_obj = getattr(context, attr)
-            copy_sub_obj = getattr(context_copy, attr)
-            # Check that both sub objects are not None
-            if original_sub_obj is None or copy_sub_obj is None:
-                continue
-            for sub_attr in sub_attrs:
-                new_value = getattr(copy_sub_obj, sub_attr)
-                object.__setattr__(original_sub_obj, sub_attr, new_value)
 @dataclass(frozen=True)
 class SubCrawlerRun:
     result: RequestHandlerRunResult | None = None
     exception: Exception | None = None
-    run_context: BasicCrawlingContext | None = None

crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py CHANGED Viewed

@@ -17,7 +17,7 @@ if TYPE_CHECKING:
     from playwright.async_api import Page, Response
     from typing_extensions import Self
-    from crawlee.crawlers._playwright._types import BlockRequestsFunction
+    from crawlee.crawlers._playwright._types import BlockRequestsFunction, GotoOptions
 TStaticParseResult = TypeVar('TStaticParseResult')
@@ -190,8 +190,9 @@ class AdaptivePlaywrightCrawlingContext(
         http_response = await PlaywrightHttpResponse.from_playwright_response(
             response=context.response, protocol=protocol_guess or ''
         )
-        # block_requests is useful only on pre-navigation contexts. It is useless here.
+        # block_requests and goto_options are useful only on pre-navigation contexts. It is useless here.
         context_kwargs.pop('block_requests')
+        context_kwargs.pop('goto_options')
         return cls(
             parsed_content=await parser.parse(http_response),
             http_response=http_response,
@@ -212,6 +213,9 @@ class AdaptivePlaywrightPreNavCrawlingContext(BasicCrawlingContext):
     block_requests: BlockRequestsFunction | None = None
     """Blocks network requests matching specified URL patterns."""
+    goto_options: GotoOptions | None = None
+    """Additional options to pass to Playwright's `Page.goto()` method. The `timeout` option is not supported."""
     @property
     def page(self) -> Page:
         """The Playwright `Page` object for the current page.

crawlee/crawlers/_basic/_basic_crawler.py CHANGED Viewed

@@ -59,6 +59,7 @@ from crawlee.errors import (
     RequestHandlerError,
     SessionError,
     UserDefinedErrorHandlerError,
+    UserHandlerTimeoutError,
 )
 from crawlee.events._types import Event, EventCrawlerStatusData
 from crawlee.http_clients import ImpitHttpClient
@@ -68,6 +69,7 @@ from crawlee.statistics import Statistics, StatisticsState
 from crawlee.storages import Dataset, KeyValueStore, RequestQueue
 from ._context_pipeline import ContextPipeline
+from ._context_utils import swaped_context
 from ._logging_utils import (
     get_one_line_error_summary_if_possible,
     reduce_asyncio_timeout_error_to_relevant_traceback_parts,
@@ -1037,7 +1039,12 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
         warning_flag = True
         for request in request_iterator:
-            target_url = request.url if isinstance(request, Request) else request
+            if isinstance(request, Request):
+                if request.enqueue_strategy != strategy:
+                    request.enqueue_strategy = strategy
+                target_url = request.url
+            else:
+                target_url = request
             parsed_target_url = urlparse(target_url)
             if warning_flag and strategy != 'all' and not parsed_target_url.hostname:
@@ -1134,7 +1141,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
             request.retry_count += 1
             reduced_error = str(error).split('\n')[0]
             self.log.warning(
-                f'Retrying request to {context.request.url} due to: {reduced_error}'
+                f'Retrying request to {context.request.url} due to: {reduced_error}. '
                 f'{get_one_line_error_summary_if_possible(error)}'
             )
             await self._statistics.error_tracker.add(error=error, context=context)
@@ -1152,6 +1159,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
             await request_manager.reclaim_request(request)
         else:
+            request.state = RequestState.ERROR
             await self._mark_request_as_handled(request)
             await self._handle_failed_request(context, error)
             self._statistics.record_request_processing_failure(request.unique_key)
@@ -1167,8 +1175,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
                 f'{self._internal_timeout.total_seconds()} seconds',
                 logger=self._logger,
             )
-            context.request.state = RequestState.DONE
         except UserDefinedErrorHandlerError:
             context.request.state = RequestState.ERROR
             raise
@@ -1201,8 +1207,8 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
         self, request: Request | str, reason: SkippedReason, *, need_mark: bool = False
     ) -> None:
         if need_mark and isinstance(request, Request):
-            await self._mark_request_as_handled(request)
             request.state = RequestState.SKIPPED
+            await self._mark_request_as_handled(request)
         url = request.url if isinstance(request, Request) else request
@@ -1222,10 +1228,11 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
         if (
             isinstance(error, asyncio.exceptions.TimeoutError)
+            and traceback_parts
             and self._request_handler_timeout_text in traceback_parts[-1]
-        ):
+        ) or isinstance(error, UserHandlerTimeoutError):
             used_traceback_parts = reduce_asyncio_timeout_error_to_relevant_traceback_parts(error)
-            used_traceback_parts.append(traceback_parts[-1])
+            used_traceback_parts.extend(traceback_parts[-1:])
         return ''.join(used_traceback_parts).strip('\n')
@@ -1320,6 +1327,8 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
         await self._commit_key_value_store_changes(result, get_kvs=self.get_key_value_store)
+        result.apply_request_changes(target=context.request)
     @staticmethod
     async def _commit_key_value_store_changes(
         result: RequestHandlerRunResult, get_kvs: GetKeyValueStoreFromRequestHandlerFunction
@@ -1385,10 +1394,10 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
         else:
             session = await self._get_session()
         proxy_info = await self._get_proxy_info(request, session)
-        result = RequestHandlerRunResult(key_value_store_getter=self.get_key_value_store)
+        result = RequestHandlerRunResult(key_value_store_getter=self.get_key_value_store, request=request)
         context = BasicCrawlingContext(
-            request=request,
+            request=result.request,
             session=session,
             proxy_info=proxy_info,
             send_request=self._prepare_send_request_function(session, proxy_info),
@@ -1405,26 +1414,26 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
         try:
             request.state = RequestState.REQUEST_HANDLER
-            self._check_request_collision(context.request, context.session)
             try:
-                await self._run_request_handler(context=context)
+                with swaped_context(context, request):
+                    self._check_request_collision(request, session)
+                    await self._run_request_handler(context=context)
             except asyncio.TimeoutError as e:
                 raise RequestHandlerError(e, context) from e
             await self._commit_request_handler_result(context)
-            await self._mark_request_as_handled(request)
             request.state = RequestState.DONE
-            if context.session and context.session.is_usable:
-                context.session.mark_good()
+            await self._mark_request_as_handled(request)
+            if session and session.is_usable:
+                session.mark_good()
             self._statistics.record_request_processing_finish(request.unique_key)
         except RequestCollisionError as request_error:
-            context.request.no_retry = True
+            request.no_retry = True
             await self._handle_request_error(context, request_error)
         except RequestHandlerError as primary_error:
@@ -1439,7 +1448,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
             await self._handle_request_error(primary_error.crawling_context, primary_error.wrapped_exception)
         except SessionError as session_error:
-            if not context.session:
+            if not session:
                 raise RuntimeError('SessionError raised in a crawling context without a session') from session_error
             if self._error_handler:
@@ -1449,10 +1458,11 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
                 exc_only = ''.join(traceback.format_exception_only(session_error)).strip()
                 self._logger.warning('Encountered "%s", rotating session and retrying...', exc_only)
-                context.session.retire()
+                if session:
+                    session.retire()
                 # Increment session rotation count.
-                context.request.session_rotation_count = (context.request.session_rotation_count or 0) + 1
+                request.session_rotation_count = (request.session_rotation_count or 0) + 1
                 await request_manager.reclaim_request(request)
                 await self._statistics.error_tracker_retry.add(error=session_error, context=context)
@@ -1483,6 +1493,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
             raise
     async def _run_request_handler(self, context: BasicCrawlingContext) -> None:
+        context.request.state = RequestState.BEFORE_NAV
         await self._context_pipeline(
             context,
             lambda final_context: wait_for(

crawlee/crawlers/_basic/_context_utils.py ADDED Viewed

@@ -0,0 +1,24 @@
+from __future__ import annotations
+from contextlib import contextmanager
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from crawlee._request import Request
+    from ._basic_crawling_context import BasicCrawlingContext
+@contextmanager
+def swaped_context(
+    context: BasicCrawlingContext,
+    request: Request,
+) -> Iterator[None]:
+    """Replace context's isolated copies with originals after handler execution."""
+    try:
+        yield
+    finally:
+        # Restore original context state to avoid side effects between different handlers.
+        object.__setattr__(context, 'request', request)

crawlee/crawlers/_basic/_logging_utils.py CHANGED Viewed

@@ -2,9 +2,21 @@ import asyncio
 import re
 import traceback
+import crawlee.errors
 def _get_only_innermost_exception(error: BaseException) -> BaseException:
-    """Get innermost exception by following __cause__ and __context__ attributes of exception."""
+    """Get innermost exception by following __cause__ and __context__ attributes of exception.
+    If the innermost exception is UserHandlerTimeoutError, return whatever caused that if possible.
+    """
+    if type(error) is crawlee.errors.UserHandlerTimeoutError:
+        if error.__cause__:
+            return error.__cause__
+        if error.__context__:
+            return error.__context__
+        return error
     if error.__cause__:
         return _get_only_innermost_exception(error.__cause__)
     if error.__context__:
@@ -34,7 +46,7 @@ def _strip_pep657_highlighting(traceback_part: str) -> str:
 def reduce_asyncio_timeout_error_to_relevant_traceback_parts(
-    timeout_error: asyncio.exceptions.TimeoutError,
+    timeout_error: asyncio.exceptions.TimeoutError | crawlee.errors.UserHandlerTimeoutError,
 ) -> list[str]:
     innermost_error_traceback_parts = _get_traceback_parts_for_innermost_exception(timeout_error)
     return _get_filtered_traceback_parts_for_asyncio_timeout_error(innermost_error_traceback_parts)
@@ -43,13 +55,20 @@ def reduce_asyncio_timeout_error_to_relevant_traceback_parts(
 def _get_traceback_parts_for_innermost_exception(error: Exception) -> list[str]:
     innermost_error = _get_only_innermost_exception(error)
     return traceback.format_exception(
-        type(innermost_error), value=innermost_error, tb=innermost_error.__traceback__, chain=True
+        type(innermost_error), value=innermost_error, tb=innermost_error.__traceback__, chain=False
     )
 def get_one_line_error_summary_if_possible(error: Exception) -> str:
     if isinstance(error, asyncio.exceptions.TimeoutError):
-        most_relevant_part = ',' + reduce_asyncio_timeout_error_to_relevant_traceback_parts(error)[-1]
+        relevant_part = reduce_asyncio_timeout_error_to_relevant_traceback_parts(error)
+        most_relevant_part = (',' + relevant_part[-1]) if len(relevant_part) else ''
+    elif isinstance(error, crawlee.errors.UserHandlerTimeoutError):
+        # Error is user defined handler. First two lines should be location of the `UserHandlerTimeoutError` in crawlee
+        # code and third line the topmost user error
+        traceback_parts = _get_traceback_parts_for_innermost_exception(error)
+        relevant_index_from_start = 3
+        most_relevant_part = traceback_parts[2] if len(traceback_parts) >= relevant_index_from_start else ''
     elif 'playwright._impl._errors.Error' in str(error.__class__):
         # Playwright autogenerated errors are often very long, so we do not try to summarize them at all as they anyway
         # point to deep internals.

crawlee/crawlers/_playwright/_playwright_crawler.py CHANGED Viewed

@@ -13,7 +13,7 @@ from pydantic import ValidationError
 from typing_extensions import NotRequired, TypedDict, TypeVar
 from crawlee import service_locator
-from crawlee._request import Request, RequestOptions
+from crawlee._request import Request, RequestOptions, RequestState
 from crawlee._types import (
     BasicCrawlingContext,
     ConcurrencySettings,
@@ -35,6 +35,7 @@ from crawlee.statistics import StatisticsState
 from ._playwright_crawling_context import PlaywrightCrawlingContext
 from ._playwright_http_client import PlaywrightHttpClient, browser_page_context
 from ._playwright_pre_nav_crawling_context import PlaywrightPreNavCrawlingContext
+from ._types import GotoOptions
 from ._utils import block_requests, infinite_scroll
 TCrawlingContext = TypeVar('TCrawlingContext', bound=PlaywrightCrawlingContext)
@@ -108,6 +109,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
         user_data_dir: str | Path | None = None,
         browser_launch_options: Mapping[str, Any] | None = None,
         browser_new_context_options: Mapping[str, Any] | None = None,
+        goto_options: GotoOptions | None = None,
         fingerprint_generator: FingerprintGenerator | None | Literal['default'] = 'default',
         headless: bool | None = None,
         use_incognito_pages: bool | None = None,
@@ -142,6 +144,8 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
                 This option should not be used if `browser_pool` is provided.
             navigation_timeout: Timeout for navigation (the process between opening a Playwright page and calling
                 the request handler)
+            goto_options: Additional options to pass to Playwright's `Page.goto()` method. The `timeout` option is
+                not supported, use `navigation_timeout` instead.
             kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
         """
         configuration = kwargs.pop('configuration', None)
@@ -213,6 +217,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             kwargs['concurrency_settings'] = ConcurrencySettings(desired_concurrency=1)
         self._navigation_timeout = navigation_timeout or timedelta(minutes=1)
+        self._goto_options = goto_options or GotoOptions()
         super().__init__(**kwargs)
@@ -238,6 +243,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             log=context.log,
             page=crawlee_page.page,
             block_requests=partial(block_requests, page=crawlee_page.page),
+            goto_options=GotoOptions(**self._goto_options),
         )
         context_id = id(pre_navigation_context)
@@ -321,8 +327,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             try:
                 async with self._shared_navigation_timeouts[id(context)] as remaining_timeout:
                     response = await context.page.goto(
-                        context.request.url, timeout=remaining_timeout.total_seconds() * 1000
+                        context.request.url, timeout=remaining_timeout.total_seconds() * 1000, **context.goto_options
                     )
+                context.request.state = RequestState.AFTER_NAV
             except playwright.async_api.TimeoutError as exc:
                 raise asyncio.TimeoutError from exc
@@ -351,6 +358,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
                     extract_links=extract_links,
                     enqueue_links=self._create_enqueue_links_function(context, extract_links),
                     block_requests=partial(block_requests, page=context.page),
+                    goto_options=context.goto_options,
                 )
             if context.session:
@@ -391,6 +399,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             robots_txt_file = await self._get_robots_txt_file_for_url(context.request.url)
             kwargs.setdefault('strategy', 'same-hostname')
+            strategy = kwargs.get('strategy', 'same-hostname')
             elements = await context.page.query_selector_all(selector)
             links_iterator: Iterator[str] = iter(
@@ -409,17 +418,19 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
                 skipped = iter([])
             for url in self._enqueue_links_filter_iterator(links_iterator, context.request.url, **kwargs):
-                request_option = RequestOptions({'url': url, 'user_data': {**base_user_data}, 'label': label})
+                request_options = RequestOptions(
+                    url=url, user_data={**base_user_data}, label=label, enqueue_strategy=strategy
+                )
                 if transform_request_function:
-                    transform_request_option = transform_request_function(request_option)
-                    if transform_request_option == 'skip':
+                    transform_request_options = transform_request_function(request_options)
+                    if transform_request_options == 'skip':
                         continue
-                    if transform_request_option != 'unchanged':
-                        request_option = transform_request_option
+                    if transform_request_options != 'unchanged':
+                        request_options = transform_request_options
                 try:
-                    request = Request.from_url(**request_option)
+                    request = Request.from_url(**request_options)
                 except ValidationError as exc:
                     context.log.debug(
                         f'Skipping URL "{url}" due to invalid format: {exc}. '

crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py CHANGED Viewed

@@ -9,7 +9,7 @@ from crawlee._utils.docs import docs_group
 if TYPE_CHECKING:
     from playwright.async_api import Page
-    from ._types import BlockRequestsFunction
+    from ._types import BlockRequestsFunction, GotoOptions
 @dataclass(frozen=True)
@@ -26,6 +26,9 @@ class PlaywrightPreNavCrawlingContext(BasicCrawlingContext):
     block_requests: BlockRequestsFunction
     """Blocks network requests matching specified URL patterns."""
+    goto_options: GotoOptions
+    """Additional options to pass to Playwright's `Page.goto()` method. The `timeout` option is not supported."""
     async def get_snapshot(self) -> PageSnapshot:
         """Get snapshot of crawled page."""
         html = None

crawlee/crawlers/_playwright/_types.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Protocol
+from typing import TYPE_CHECKING, Literal, Protocol, TypedDict
 from crawlee import HttpHeaders
 from crawlee._utils.docs import docs_group
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
     from collections.abc import AsyncGenerator
     from playwright.async_api import APIResponse, Response
-    from typing_extensions import Self
+    from typing_extensions import NotRequired, Self
 @docs_group('Functions')
@@ -58,3 +58,13 @@ class PlaywrightHttpResponse:
         _content = await response.body()
         return cls(http_version=http_version, status_code=status_code, headers=headers, _content=_content)
+class GotoOptions(TypedDict):
+    """Keyword arguments for Playwright's `Page.goto()` method."""
+    wait_until: NotRequired[Literal['domcontentloaded', 'load', 'networkidle', 'commit']]
+    """When to consider operation succeeded, defaults to 'load' event."""
+    referer: NotRequired[str]
+    """Referer header value."""

crawlee/errors.py CHANGED Viewed

@@ -29,6 +29,10 @@ class UserDefinedErrorHandlerError(Exception):
     """Wraps an exception thrown from an user-defined error handler."""
+class UserHandlerTimeoutError(UserDefinedErrorHandlerError):
+    """Raised when a router fails due to user raised timeout. This is different from user-defined handler timing out."""
 @docs_group('Errors')
 class SessionError(Exception):
     """Errors of `SessionError` type will trigger a session rotation.

crawlee/events/_event_manager.py CHANGED Viewed

@@ -174,11 +174,9 @@ class EventManager:
             # to avoid blocking the event loop
             coro = (
                 listener(*bound_args.args, **bound_args.kwargs)
-                if asyncio.iscoroutinefunction(listener)
+                if inspect.iscoroutinefunction(listener)
                 else asyncio.to_thread(cast('Callable[..., None]', listener), *bound_args.args, **bound_args.kwargs)
             )
-            # Note: use `asyncio.iscoroutinefunction` rather then `inspect.iscoroutinefunction` since it works with
-            # unittests.mock.AsyncMock. See https://github.com/python/cpython/issues/84753.
             listener_task = asyncio.create_task(coro, name=f'Task-{event.value}-{listener.__name__}')
             self._listener_tasks.add(listener_task)

crawlee/router.py CHANGED Viewed

@@ -1,13 +1,17 @@
 from __future__ import annotations
+import asyncio
 from collections.abc import Awaitable, Callable
 from typing import Generic, TypeVar
+from crawlee._request import RequestState
 from crawlee._types import BasicCrawlingContext
 from crawlee._utils.docs import docs_group
 __all__ = ['Router']
+from crawlee.errors import UserHandlerTimeoutError
 TCrawlingContext = TypeVar('TCrawlingContext', bound=BasicCrawlingContext)
 RequestHandler = Callable[[TCrawlingContext], Awaitable[None]]
@@ -89,13 +93,19 @@ class Router(Generic[TCrawlingContext]):
     async def __call__(self, context: TCrawlingContext) -> None:
         """Invoke a request handler that matches the request label (or the default)."""
+        context.request.state = RequestState.REQUEST_HANDLER
         if context.request.label is None or context.request.label not in self._handlers_by_label:
             if self._default_handler is None:
                 raise RuntimeError(
                     f'No handler matches label `{context.request.label}` and no default handler is configured'
                 )
-            return await self._default_handler(context)
+            user_defined_handler = self._default_handler
+        else:
+            user_defined_handler = self._handlers_by_label[context.request.label]
-        handler = self._handlers_by_label[context.request.label]
-        return await handler(context)
+        try:
+            return await user_defined_handler(context)
+        except asyncio.TimeoutError as e:
+            # Timeout in handler, but not timeout of handler.
+            raise UserHandlerTimeoutError('Timeout raised by user defined handler') from e

{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: crawlee
-Version: 1.1.2b4
+Version: 1.2.1b7
 Summary: Crawlee for Python
 Project-URL: Apify Homepage, https://apify.com
 Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -324,19 +324,12 @@ Description-Content-Type: text/markdown
     <a href="https://trendshift.io/repositories/11169" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11169" alt="apify%2Fcrawlee-python | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 </p>
-<p align=center>
-    <a href="https://badge.fury.io/py/crawlee" rel="nofollow">
-        <img src="https://badge.fury.io/py/crawlee.svg" alt="PyPI version" style="max-width: 100%;">
-    </a>
-    <a href="https://pypi.org/project/crawlee/" rel="nofollow">
-        <img src="https://img.shields.io/pypi/dm/crawlee" alt="PyPI - Downloads" style="max-width: 100%;">
-    </a>
-    <a href="https://pypi.org/project/crawlee/" rel="nofollow">
-        <img src="https://img.shields.io/pypi/pyversions/crawlee" alt="PyPI - Python Version" style="max-width: 100%;">
-    </a>
-    <a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
-        <img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
-    </a>
+<p align="center">
+  <a href="https://badge.fury.io/py/crawlee" rel="nofollow"><img src="https://badge.fury.io/py/crawlee.svg" alt="PyPI package version"></a>
+  <a href="https://pypi.org/project/crawlee/" rel="nofollow"><img src="https://img.shields.io/pypi/dm/crawlee" alt="PyPI package downloads"></a>
+  <a href="https://codecov.io/gh/apify/crawlee-python"><img src="https://codecov.io/gh/apify/crawlee-python/graph/badge.svg?token=cCju61iPQG" alt="Codecov report"></a>
+  <a href="https://pypi.org/project/crawlee/" rel="nofollow"><img src="https://img.shields.io/pypi/pyversions/crawlee" alt="PyPI Python version"></a>
+  <a href="https://discord.gg/jyEM2PRvMU" rel="nofollow"><img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on Discord"></a>
 </p>
 Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**

{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
-crawlee/__init__.py,sha256=GdTXZXJsxj4Cb53y6raz4tlaUWLO918fKKshw91eE70,514
+crawlee/__init__.py,sha256=ECFcNbLQp3HX-o6K4eMo38rZQ5NnZg7udvEEkjkqnuw,548
 crawlee/_browserforge_workaround.py,sha256=FYQaqpqfZGYkx-A8evF9nsHnj4KK4IMtjNq3LtmX_vA,1664
 crawlee/_cli.py,sha256=czuEsGD8QYEiq5gtMcBxrL08hQ5OJQQkMVhAr1pvDaQ,10353
 crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
 crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
-crawlee/_request.py,sha256=fnUofyFMV3HJwfcLjYr2BCZ5K9mEwl6vZd8Pr309wCE,16458
+crawlee/_request.py,sha256=M8hTSs5dJTBBW0JIDh0QSUhWyEWarEg86Un9kX12qy4,17374
 crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
-crawlee/_types.py,sha256=93yoGr_KqMDIkq3__3QYpIAJmEzZvDoilHAF7_X4J4A,29933
+crawlee/_types.py,sha256=_CQyq1BmvuHr0p25NFn6rHbgsiuR65o8gLxCCuQWfAg,30534
 crawlee/configuration.py,sha256=DWS2z1FC6Ua93W2tStK3R1ZKZbZjVQYWGiGFbZFaRtA,8064
-crawlee/errors.py,sha256=RhFNA_uT615nVBHf9TylpX5YWwtDuHUUEV8LPT4CYa4,3878
+crawlee/errors.py,sha256=fnAFpyvJKMDq3VDGr1iq1E-JqnfoOEI7cd8YjDaqb9s,4062
 crawlee/proxy_configuration.py,sha256=rqf67yerXvLvraBaAHW04nvf5ECze3wMQbK7LlqXucM,10386
 crawlee/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-crawlee/router.py,sha256=DE0CU_hzXS8RXpYu2v-699hAzQF-KUlSwlX1xgtjuH4,3825
+crawlee/router.py,sha256=HbKxE22r8ZVu93tIxBdGObMa3fGPcuSvKthqibimekU,4252
 crawlee/_autoscaling/__init__.py,sha256=t6Z44gU488C0UmkBCTtwsgAR8iqJcv2g4ZlC4NYh0ZI,182
 crawlee/_autoscaling/_types.py,sha256=xnrRHXYOVn7GwELLVHi_y7B-Ic7u3hPkYl3P-LT3Fhk,5453
 crawlee/_autoscaling/autoscaled_pool.py,sha256=Bcu2jDgK2SYMnZN5xfjs8Oxti0ZxrktjydWv3J0Hz48,12214
@@ -21,7 +21,7 @@ crawlee/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 crawlee/_utils/blocked.py,sha256=sxN99AouFXMoe6uG1EvCTCmKMGk73DBMUk9nOkWK86I,863
 crawlee/_utils/byte_size.py,sha256=zs4qWUEDgTGDqYfUJ7t5edWNYYJCG8Y1EyJ9GASfRL4,3744
 crawlee/_utils/console.py,sha256=vAIM8AO7cT-HdXg44eR8zQyHAHk8X8G7J1KKFCBL2LY,2242
-crawlee/_utils/context.py,sha256=FsRh394RpNZTRovuVHVlyVTZ8AbSLGe-K7RpLgU9AX8,1726
+crawlee/_utils/context.py,sha256=LFIXjJQBhv94j1prbK-2yjH3EXg5jPOfVqW8P6cwNIY,1726
 crawlee/_utils/crypto.py,sha256=tYzn2z91KgV3ugxz4CKtSTcCjW-3FC8un7hpDNCl6rs,757
 crawlee/_utils/docs.py,sha256=S09-3xAQAlUvrmPpBXVJpE8wblB8LtS6QduLNncfqdQ,1130
 crawlee/_utils/file.py,sha256=FJHTC25qSWQs3ZhCZrLgs0cUwA9K81MlQRGEmcWKAQU,5758
@@ -30,7 +30,7 @@ crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZk
 crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
 crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
 crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
-crawlee/_utils/recurring_task.py,sha256=sQMiURuDXbwwfAcIXK8V4NXncSxIBxsqN1cZWX7DLyg,2128
+crawlee/_utils/recurring_task.py,sha256=_injmSsvG4p0xS4nBtoZZIR02syBG8JcLkuwgNDL8Nc,2143
 crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
 crawlee/_utils/robots.py,sha256=DBU5ni4Y-p7bIKMbLd_ws8wgHSFc4K8zPVF3JvH_pkw,4661
 crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
@@ -53,22 +53,23 @@ crawlee/crawlers/__init__.py,sha256=jNFMsPizSgCN0ARYSmHs9Ppk8yvGgjUH5PxUeDchljE,
 crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
 crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 crawlee/crawlers/_abstract_http/__init__.py,sha256=h8jVWcPbDXzWHill1Vm7J7iliJW0hIrea0gkg-Hkb-M,319
-crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=AfrEea3Ez2RvS6G6BFBfB6gg9aAFu-84RWqWryNHFrc,12977
+crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=Y12SBNAiF8QNJH83s6pPoao1W5ZSUhxHRHHKjE0qZhk,13174
 crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
 crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
 crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=LREq9WR9BKsE8S8lSsEhlCoNjQaLhlJ9yo8y_6a8o4c,1072
-crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=nPFB9Q_3xQDJprb24NIQO53gf56J8wXjbM9C-58iiZ8,22862
+crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=qAtZUwzGMwASwl5NKLAOsYnVA03IpZkk-BLKm3SwHoM,21588
 crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py,sha256=_At8T8S3JLGPA-1AeCFGrpE-FuCDW9sazrXt9U0tK6U,1048
-crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=9FlHIUC05IzUhJsVldQvpnDnj1jk8GJpqC98mPLN_fw,10431
+crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=tejw-yfA8zVR8L-shIZOTFoMUQOI5Kt7FBJa8H0q4H0,10664
 crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkbIN_059jUyCG8Z6XAb_FBLClIKw7z-aDvjon2I,10834
 crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
 crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
 crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
-crawlee/crawlers/_basic/_basic_crawler.py,sha256=uL9QDGis__8z0W0u6ShsJMpvlrMmIi3MaucOvIxh0iI,73437
+crawlee/crawlers/_basic/_basic_crawler.py,sha256=wTZW_1vM2A1x14VADRBsUr0TJzKfGoJODeHX0gOZnnY,73914
 crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
 crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
-crawlee/crawlers/_basic/_logging_utils.py,sha256=jp5mEwSq5a_BgzUhNPJ9WrIDcoIeYGbeHstcRqCcP0s,3093
+crawlee/crawlers/_basic/_context_utils.py,sha256=U1s0nl7EW9k-JrZA2VM7d_aWnE7Je3lXK04RFrXvRC4,655
+crawlee/crawlers/_basic/_logging_utils.py,sha256=6Q206Sv0RzHztwu5y5XSdUpZhpqQ5-zSapQzUY9GxCo,4014
 crawlee/crawlers/_basic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 crawlee/crawlers/_beautifulsoup/__init__.py,sha256=7pL273ashA7yYDrH6nokYZ7SAMUAezilGIWdfThi_Co,822
 crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py,sha256=Q8Sb_lflpdYIwDZ1fIeuquPzdDG2zCnKsrcj8fe8n6k,3056
@@ -85,14 +86,14 @@ crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOM
 crawlee/crawlers/_parsel/_parsel_parser.py,sha256=yWBfuXUHMriK4DRnyrXTQoGeqX5WV9bOEkBp_g0YCvQ,1540
 crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
 crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
-crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=PHoU2qn_SlUM0mGeiPVfLPaKgXIjzvPn-ZDI3rdGVII,25546
+crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=WappMIb0w-AnS745vlJpQNxwibKS7ok6_5a6iAcoTDs,26207
 crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
 crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=4mvaCI9Zum7znbm0F-ZZ6T1FEqZ-N-cvPOk1iqtcUSo,4164
-crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=fEI2laWhmJdWiGoMF5JBLBsim9NtENfagZt6FFd2Rgo,1387
-crawlee/crawlers/_playwright/_types.py,sha256=hMKA9K9gjzQuwwbnmmfJsQrwR-kq235jH-WBXWeqkGo,2174
+crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=NFenJKgXcPuifaVYc2sdU5AV2BX6836GUuqFTE2Q0lU,1545
+crawlee/crawlers/_playwright/_types.py,sha256=D4MaRWgYdps1CwgNWURJRLKkJk_9Oyue70jvkHAxnEU,2534
 crawlee/crawlers/_playwright/_utils.py,sha256=FQ_-LYo7DGHsNHRrTtWt3mC06VzQvQ2wkGqpA2wBzYU,3441
 crawlee/events/__init__.py,sha256=YMgOXKI0LsXfImKQy06PZ2Vdjy-uD_-acioagHft1do,577
-crawlee/events/_event_manager.py,sha256=wjZTYIKBI8daKUkOVxUrbPHuU8LnFpUtWStdkts7r3U,11588
+crawlee/events/_event_manager.py,sha256=M8nKPc2BJo8RIBVHaG9BYuks0jwt5v3BFYQLA7IvolI,11380
 crawlee/events/_local_event_manager.py,sha256=CSiMJ6a_BwX0PPwtffEOtHm21dmALJz1zifo3AuMAk8,3708
 crawlee/events/_types.py,sha256=MKsI014OOKKhjPJRrvWYrezIDGoLjGGhWXrkqYw26Ns,3313
 crawlee/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -199,8 +200,8 @@ crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKp
 crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
 crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
 crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-crawlee-1.1.2b4.dist-info/METADATA,sha256=xHqoYBVYvhy6i_bpLm0VF44jWovuibr2Xl2bl4wfRPU,29508
-crawlee-1.1.2b4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-crawlee-1.1.2b4.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
-crawlee-1.1.2b4.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
-crawlee-1.1.2b4.dist-info/RECORD,,
+crawlee-1.2.1b7.dist-info/METADATA,sha256=YPjeW0r_pqD_lHRtFfJ8GL84Z4t1IvEgu1uBtxc4IuY,29526
+crawlee-1.2.1b7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+crawlee-1.2.1b7.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
+crawlee-1.2.1b7.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
+crawlee-1.2.1b7.dist-info/RECORD,,

{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/WHEEL RENAMED Viewed

File without changes

{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{crawlee-1.1.2b4.dist-info → crawlee-1.2.1b7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

crawlee 1.1.2b4__py3-none-any.whl → 1.2.1b7__py3-none-any.whl

Potentially problematic release.

crawlee 1.1.2b4py3-none-any.whl → 1.2.1b7py3-none-any.whl