PyPI - apify - Versions diffs - 1.7.1b1__py3-none-any.whl → 2.2.0b14__py3-none-any.whl - Mend

apify 1.7.1b1py3-none-any.whl → 2.2.0b14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of apify might be problematic. Click here for more details.

Files changed (62) hide show

apify/__init__.py +19 -4
apify/_actor.py +1030 -0
apify/_configuration.py +370 -0
apify/_consts.py +10 -0
apify/_crypto.py +31 -27
apify/_models.py +117 -0
apify/_platform_event_manager.py +231 -0
apify/_proxy_configuration.py +320 -0
apify/_utils.py +18 -484
apify/apify_storage_client/__init__.py +3 -0
apify/apify_storage_client/_apify_storage_client.py +68 -0
apify/apify_storage_client/_dataset_client.py +190 -0
apify/apify_storage_client/_dataset_collection_client.py +51 -0
apify/apify_storage_client/_key_value_store_client.py +94 -0
apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
apify/apify_storage_client/_request_queue_client.py +176 -0
apify/apify_storage_client/_request_queue_collection_client.py +51 -0
apify/apify_storage_client/py.typed +0 -0
apify/log.py +22 -105
apify/scrapy/__init__.py +11 -3
apify/scrapy/middlewares/__init__.py +3 -1
apify/scrapy/middlewares/apify_proxy.py +29 -27
apify/scrapy/middlewares/py.typed +0 -0
apify/scrapy/pipelines/__init__.py +3 -1
apify/scrapy/pipelines/actor_dataset_push.py +6 -3
apify/scrapy/pipelines/py.typed +0 -0
apify/scrapy/py.typed +0 -0
apify/scrapy/requests.py +60 -58
apify/scrapy/scheduler.py +28 -19
apify/scrapy/utils.py +10 -32
apify/storages/__init__.py +4 -10
apify/storages/_request_list.py +150 -0
apify/storages/py.typed +0 -0
apify-2.2.0b14.dist-info/METADATA +211 -0
apify-2.2.0b14.dist-info/RECORD +38 -0
{apify-1.7.1b1.dist-info → apify-2.2.0b14.dist-info}/WHEEL +1 -2
apify/_memory_storage/__init__.py +0 -3
apify/_memory_storage/file_storage_utils.py +0 -71
apify/_memory_storage/memory_storage_client.py +0 -219
apify/_memory_storage/resource_clients/__init__.py +0 -19
apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
apify/_memory_storage/resource_clients/dataset.py +0 -452
apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
apify/_memory_storage/resource_clients/key_value_store.py +0 -533
apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
apify/_memory_storage/resource_clients/request_queue.py +0 -466
apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
apify/actor.py +0 -1351
apify/config.py +0 -127
apify/consts.py +0 -67
apify/event_manager.py +0 -236
apify/proxy_configuration.py +0 -365
apify/storages/base_storage.py +0 -181
apify/storages/dataset.py +0 -494
apify/storages/key_value_store.py +0 -257
apify/storages/request_queue.py +0 -602
apify/storages/storage_client_manager.py +0 -72
apify-1.7.1b1.dist-info/METADATA +0 -149
apify-1.7.1b1.dist-info/RECORD +0 -41
apify-1.7.1b1.dist-info/top_level.txt +0 -1
{apify-1.7.1b1.dist-info → apify-2.2.0b14.dist-info}/LICENSE +0 -0

apify/log.py CHANGED Viewed

@@ -1,16 +1,9 @@
 from __future__ import annotations
-import json
 import logging
-import textwrap
-import traceback
-from typing import Any
 from apify_shared.utils import ignore_docs
-from colorama import Fore, Style, just_fix_windows_console
-just_fix_windows_console()
+from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level
 # Name of the logger used throughout the library (resolves to 'apify')
 logger_name = __name__.split('.')[0]
@@ -18,107 +11,31 @@ logger_name = __name__.split('.')[0]
 # Logger used throughout the library
 logger = logging.getLogger(logger_name)
-_LOG_NAME_COLOR = Fore.LIGHTBLACK_EX
-_LOG_LEVEL_COLOR = {
-    logging.DEBUG: Fore.BLUE,
-    logging.INFO: Fore.GREEN,
-    logging.WARNING: Fore.YELLOW,
-    logging.ERROR: Fore.RED,
-    logging.CRITICAL: Fore.RED,
-}
-_LOG_LEVEL_SHORT_ALIAS = {
-    logging.DEBUG: 'DEBUG',
-    logging.INFO: 'INFO ',
-    logging.WARNING: 'WARN ',
-    logging.ERROR: 'ERROR',
-}
-# So that all the log messages have the same alignment
-_LOG_MESSAGE_INDENT = ' ' * 6
-class ActorLogFormatter(logging.Formatter):
-    """Log formatter that prints out the log message nicely formatted, with colored level and stringified extra fields.
-    It formats the log records so that they:
-    - start with the level (colorized, and padded to 5 chars so that it is nicely aligned)
-    - then have the actual log message, if it's multiline then it's nicely indented
-    - then have the stringified extra log fields
-    - then, if an exception is a part of the log record, prints the formatted exception.
-    """
-    # The fields that are added to the log record with `logger.log(..., extra={...})`
-    # are just merged in the log record with the other log record properties, and you can't get them in some nice, isolated way.
-    # So, to get the extra fields, we just compare all the properties present in the log record
-    # with properties present in an empty log record,
-    # and extract all the extra ones not present in the empty log record
-    empty_record = logging.LogRecord('dummy', 0, 'dummy', 0, 'dummy', None, None)
-    def __init__(
-        self: ActorLogFormatter,
-        include_logger_name: bool = False,  # noqa: FBT001, FBT002
-        *args: Any,
-        **kwargs: Any,
-    ) -> None:
-        """Create an instance of the ActorLogFormatter.
-        Args:
-            include_logger_name: Include logger name at the beginning of the log line. Defaults to False.
-            args: Arguments passed to the parent class.
-            kwargs: Keyword arguments passed to the parent class.
-        """
-        super().__init__(*args, **kwargs)
-        self.include_logger_name = include_logger_name
-    def _get_extra_fields(self: ActorLogFormatter, record: logging.LogRecord) -> dict[str, Any]:
-        extra_fields: dict[str, Any] = {}
-        for key, value in record.__dict__.items():
-            if key not in self.empty_record.__dict__:
-                extra_fields[key] = value
-        return extra_fields
-    @ignore_docs
-    def format(self: ActorLogFormatter, record: logging.LogRecord) -> str:
-        """Format the log record nicely.
-        This formats the log record so that it:
-        - starts with the level (colorized, and padded to 5 chars so that it is nicely aligned)
-        - then has the actual log message, if it's multiline then it's nicely indented
-        - then has the stringified extra log fields
-        - then, if an exception is a part of the log record, prints the formatted exception.
-        """
-        logger_name_string = f'{_LOG_NAME_COLOR}[{record.name}]{Style.RESET_ALL} '
+@ignore_docs
+class ActorLogFormatter(CrawleeLogFormatter):  # noqa: D101 (Inherited from parent class)
+    pass
-        # Colorize the log level, and shorten it to 6 chars tops
-        level_color_code = _LOG_LEVEL_COLOR.get(record.levelno, '')
-        level_short_alias = _LOG_LEVEL_SHORT_ALIAS.get(record.levelno, record.levelname)
-        level_string = f'{level_color_code}{level_short_alias}{Style.RESET_ALL} '
-        # Format the exception, if there is some
-        # Basically just print the traceback and indent it a bit
-        exception_string = ''
-        if record.exc_info:
-            exc_info = record.exc_info
-            record.exc_info = None
-            exception_string = ''.join(traceback.format_exception(*exc_info)).rstrip()
-            exception_string = '\n' + textwrap.indent(exception_string, _LOG_MESSAGE_INDENT)
+def _configure_logging() -> None:
+    apify_client_logger = logging.getLogger('apify_client')
+    configure_logger(apify_client_logger, remove_old_handlers=True)
-        # Format the extra log record fields, if there were some
-        # Just stringify them to JSON and color them gray
-        extra_string = ''
-        extra = self._get_extra_fields(record)
-        if extra:
-            extra_string = f' {Fore.LIGHTBLACK_EX}({json.dumps(extra, ensure_ascii=False, default=str)}){Style.RESET_ALL}'
+    level = get_configured_log_level()
-        # Format the actual log message, and indent everything but the first line
-        log_string = super().format(record)
-        log_string = textwrap.indent(log_string, _LOG_MESSAGE_INDENT).lstrip()
+    # Keep apify_client logger quiet unless debug logging is requested
+    if level > logging.DEBUG:
+        apify_client_logger.setLevel(logging.INFO)
+    else:
+        apify_client_logger.setLevel(level)
-        if self.include_logger_name:
-            # Include logger name at the beginning of the log line
-            return f'{logger_name_string}{level_string}{log_string}{extra_string}{exception_string}'
+    # Silence HTTPX logger unless debug logging is requested
+    httpx_logger = logging.getLogger('httpx')
+    if level > logging.DEBUG:
+        httpx_logger.setLevel(logging.WARNING)
+    else:
+        httpx_logger.setLevel(level)
-        return f'{level_string}{log_string}{extra_string}{exception_string}'
+    # Use configured log level for apify logger
+    apify_logger = logging.getLogger('apify')
+    configure_logger(apify_logger, remove_old_handlers=True)

apify/scrapy/__init__.py CHANGED Viewed

@@ -1,3 +1,11 @@
-from .requests import to_apify_request, to_scrapy_request
-from .scheduler import ApifyScheduler
-from .utils import get_basic_auth_header, get_running_event_loop_id, open_queue_with_custom_client
+from apify.scrapy.requests import to_apify_request, to_scrapy_request
+from apify.scrapy.scheduler import ApifyScheduler
+from apify.scrapy.utils import get_basic_auth_header, get_running_event_loop_id
+__all__ = [
+    'ApifyScheduler',
+    'get_basic_auth_header',
+    'get_running_event_loop_id',
+    'to_apify_request',
+    'to_scrapy_request',
+]

apify/scrapy/middlewares/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
-from .apify_proxy import ApifyHttpProxyMiddleware
+from apify.scrapy.middlewares.apify_proxy import ApifyHttpProxyMiddleware
+__all__ = ['ApifyHttpProxyMiddleware']

apify/scrapy/middlewares/apify_proxy.py CHANGED Viewed

@@ -1,19 +1,20 @@
 from __future__ import annotations
+from typing import TYPE_CHECKING
 from urllib.parse import ParseResult, urlparse
 try:
-    from scrapy import Request, Spider  # noqa: TCH002
+    if TYPE_CHECKING:
+        from scrapy import Request, Spider
+        from scrapy.crawler import Crawler
     from scrapy.core.downloader.handlers.http11 import TunnelError
-    from scrapy.crawler import Crawler  # noqa: TCH002
     from scrapy.exceptions import NotConfigured
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
     ) from exc
-from apify.actor import Actor
-from apify.proxy_configuration import ProxyConfiguration
+from apify import Actor, ProxyConfiguration
 from apify.scrapy.utils import get_basic_auth_header
@@ -28,7 +29,7 @@ class ApifyHttpProxyMiddleware:
     proxy_settings = {'useApifyProxy': true, 'apifyProxyGroups': []}
     """
-    def __init__(self: ApifyHttpProxyMiddleware, proxy_settings: dict) -> None:
+    def __init__(self, proxy_settings: dict) -> None:
         """Create a new instance.
         Args:
@@ -43,29 +44,31 @@ class ApifyHttpProxyMiddleware:
         """Create an instance of ApifyHttpProxyMiddleware from a Scrapy Crawler.
         Args:
-            cls: Class type.
             crawler: Scrapy Crawler object.
-        Returns:
-            ApifyHttpProxyMiddleware: Instance of the class.
+        Returns: Instance of the class.
         """
         proxy_settings: dict | None = crawler.settings.get('APIFY_PROXY_SETTINGS')
         if proxy_settings is None:
-            Actor.log.warning('ApifyHttpProxyMiddleware is not going to be used. Object "proxyConfiguration" is probably missing in the Actor input.')
+            Actor.log.warning(
+                'ApifyHttpProxyMiddleware is not going to be used. Object "proxyConfiguration" is probably missing '
+                ' in the Actor input.'
+            )
             raise NotConfigured
         use_apify_proxy = proxy_settings.get('useApifyProxy', False)
         if use_apify_proxy is not True:
             Actor.log.warning(
-                'ApifyHttpProxyMiddleware is not going to be used. Actor input field "proxyConfiguration.useApifyProxy" is probably set to False.'
+                'ApifyHttpProxyMiddleware is not going to be used. Actor input field '
+                '"proxyConfiguration.useApifyProxy" is probably set to False.'
             )
             raise NotConfigured
         return cls(proxy_settings)
-    async def process_request(self: ApifyHttpProxyMiddleware, request: Request, spider: Spider) -> None:
+    async def process_request(self, request: Request, spider: Spider) -> None:
         """Process a Scrapy request by assigning a new proxy.
         Args:
@@ -74,9 +77,6 @@ class ApifyHttpProxyMiddleware:
         Raises:
             ValueError: If username and password are not provided in the proxy URL.
-        Returns:
-            None: The request is processed and middleware pipeline can continue.
         """
         Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}, spider={spider}')
         url = await self._get_new_proxy_url()
@@ -91,11 +91,11 @@ class ApifyHttpProxyMiddleware:
         Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')
     def process_exception(
-        self: ApifyHttpProxyMiddleware,
+        self,
         request: Request,
         exception: Exception,
         spider: Spider,
-    ) -> None | Request:
+    ) -> None:
         """Process an exception that occurs during request processing.
         Args:
@@ -104,27 +104,27 @@ class ApifyHttpProxyMiddleware:
             spider: Scrapy Spider object.
         Returns:
-            If a TunnelError occurs, return the request object to halt its processing in the middleware pipeline.
-            Return None otherwise to allow the continuation of request processing.
+            Returning None, meaning Scrapy will continue processing this exception, executing any other
+            process_exception() methods of installed middleware, until no middleware is left and the default
+            exception handling kicks in.
         """
         Actor.log.debug(
             f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}, spider={spider}',
         )
         if isinstance(exception, TunnelError):
-            Actor.log.warning(f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", reason="{exception}", skipping...')
-            return request
-        return None
+            Actor.log.warning(
+                f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '
+                'reason="{exception}", skipping...'
+            )
-    async def _get_new_proxy_url(self: ApifyHttpProxyMiddleware) -> ParseResult:
+    async def _get_new_proxy_url(self) -> ParseResult:
         """Get a new proxy URL.
         Raises:
             NotConfigured: If creation of the proxy configuration fails.
-        Returns:
-            ParseResult: New proxy URL.
+        Returns: New proxy URL.
         """
         # Get proxy configuration, creating it if necessary
         proxy_cfg = (
@@ -136,7 +136,9 @@ class ApifyHttpProxyMiddleware:
         # If the proxy configuration is still not available, raise an error. However, this should not happen due
         # to the checks in the `from_crawler` method.
         if proxy_cfg is None:
-            Actor.log.error('Creation of proxy configuration failed. Check the field "proxyConfiguration" in the Actor input.')
+            Actor.log.error(
+                'Creation of proxy configuration failed. Check the field "proxyConfiguration" in the Actor input.'
+            )
             raise NotConfigured
         # Store the proxy configuration for future use
@@ -144,4 +146,4 @@ class ApifyHttpProxyMiddleware:
         # Get a new proxy URL and return it
         new_url = await proxy_cfg.new_url()
-        return urlparse(new_url)
+        return urlparse(str(new_url))

apify/scrapy/middlewares/py.typed ADDED Viewed

File without changes

apify/scrapy/pipelines/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
-from .actor_dataset_push import ActorDatasetPushPipeline
+from apify.scrapy.pipelines.actor_dataset_push import ActorDatasetPushPipeline
+__all__ = ['ActorDatasetPushPipeline']

apify/scrapy/pipelines/actor_dataset_push.py CHANGED Viewed

@@ -1,15 +1,18 @@
 from __future__ import annotations
+from typing import TYPE_CHECKING
 from itemadapter.adapter import ItemAdapter
 try:
-    from scrapy import Item, Spider  # noqa: TCH002
+    if TYPE_CHECKING:
+        from scrapy import Item, Spider
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
     ) from exc
-from apify.actor import Actor
+from apify import Actor
 class ActorDatasetPushPipeline:
@@ -19,7 +22,7 @@ class ActorDatasetPushPipeline:
     """
     async def process_item(
-        self: ActorDatasetPushPipeline,
+        self,
         item: Item,
         spider: Spider,
     ) -> Item:

apify/scrapy/pipelines/py.typed ADDED Viewed

File without changes

apify/scrapy/py.typed ADDED Viewed

File without changes

apify/scrapy/requests.py CHANGED Viewed

@@ -2,6 +2,9 @@ from __future__ import annotations
 import codecs
 import pickle
+from typing import Any, cast
+from apify_shared.utils import ignore_docs
 try:
     from scrapy import Request, Spider
@@ -12,9 +15,12 @@ except ImportError as exc:
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
     ) from exc
-from apify._crypto import crypto_random_object_id
-from apify._utils import compute_unique_key
-from apify.actor import Actor
+from crawlee import Request as CrawleeRequest
+from crawlee._types import HttpHeaders
+from crawlee._utils.crypto import crypto_random_object_id
+from crawlee._utils.requests import compute_unique_key, unique_key_to_request_id
+from apify import Actor
 def _is_request_produced_by_middleware(scrapy_request: Request) -> bool:
@@ -25,7 +31,8 @@ def _is_request_produced_by_middleware(scrapy_request: Request) -> bool:
     return bool(scrapy_request.meta.get('redirect_times')) or bool(scrapy_request.meta.get('retry_times'))
-def to_apify_request(scrapy_request: Request, spider: Spider) -> dict | None:
+@ignore_docs
+def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest | None:
     """Convert a Scrapy request to an Apify request.
     Args:
@@ -36,46 +43,50 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict | None:
         The converted Apify request if the conversion was successful, otherwise None.
     """
     if not isinstance(scrapy_request, Request):
-        Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
+        Actor.log.warning(  # type: ignore[unreachable]
+            'Failed to convert to Apify request: Scrapy request must be a Request instance.'
+        )
         return None
     call_id = crypto_random_object_id(8)
     Actor.log.debug(f'[{call_id}]: to_apify_request was called (scrapy_request={scrapy_request})...')
     try:
-        apify_request = {
-            'url': scrapy_request.url,
-            'method': scrapy_request.method,
-            'payload': scrapy_request.body,
-            'userData': scrapy_request.meta.get('userData', {}),
-        }
-        # Convert Scrapy's headers to a dictionary and store them in the apify_request
-        if isinstance(scrapy_request.headers, Headers):
-            apify_request['headers'] = dict(scrapy_request.headers.to_unicode_dict())
-        else:
-            Actor.log.warning(f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}')
-        # If the request was produced by the middleware (e.g. retry or redirect), we must compute the unique key here
         if _is_request_produced_by_middleware(scrapy_request):
-            apify_request['uniqueKey'] = compute_unique_key(
+            unique_key = compute_unique_key(
                 url=scrapy_request.url,
-                method=scrapy_request.method,
+                method=scrapy_request.method,  # type: ignore[arg-type]  # str vs literal
                 payload=scrapy_request.body,
                 use_extended_unique_key=True,
             )
-        # Othwerwise, we can use the unique key (also the id) from the meta
+        elif scrapy_request.dont_filter:
+            unique_key = crypto_random_object_id(8)
+        elif scrapy_request.meta.get('apify_request_unique_key'):
+            unique_key = scrapy_request.meta['apify_request_unique_key']
         else:
-            if scrapy_request.meta.get('apify_request_id'):
-                apify_request['id'] = scrapy_request.meta['apify_request_id']
+            unique_key = crypto_random_object_id(8)
-            if scrapy_request.meta.get('apify_request_unique_key'):
-                apify_request['uniqueKey'] = scrapy_request.meta['apify_request_unique_key']
+        if scrapy_request.meta.get('apify_request_id'):
+            request_id = scrapy_request.meta['apify_request_id']
+        else:
+            request_id = unique_key_to_request_id(unique_key)
+        apify_request = CrawleeRequest(
+            url=scrapy_request.url,
+            method=scrapy_request.method,
+            payload=scrapy_request.body,
+            user_data=scrapy_request.meta.get('userData', {}),
+            unique_key=unique_key,
+            id=request_id,
+        )
-        # If the request's dont_filter field is set, we must generate a random `uniqueKey` to avoid deduplication
-        # of the request in the Request Queue.
-        if scrapy_request.dont_filter:
-            apify_request['uniqueKey'] = crypto_random_object_id(8)
+        # Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
+        if isinstance(scrapy_request.headers, Headers):
+            apify_request.headers = HttpHeaders(dict(scrapy_request.headers.to_unicode_dict()))
+        else:
+            Actor.log.warning(  # type: ignore[unreachable]
+                f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
+            )
         # Serialize the Scrapy Request and store it in the apify_request.
         #   - This process involves converting the Scrapy Request object into a dictionary, encoding it to base64,
@@ -83,7 +94,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict | None:
         #   - The serialization process can be referenced at: https://stackoverflow.com/questions/30469575/.
         scrapy_request_dict = scrapy_request.to_dict(spider=spider)
         scrapy_request_dict_encoded = codecs.encode(pickle.dumps(scrapy_request_dict), 'base64').decode()
-        apify_request['userData']['scrapy_request'] = scrapy_request_dict_encoded
+        apify_request.user_data['scrapy_request'] = scrapy_request_dict_encoded
     except Exception as exc:
         Actor.log.warning(f'Conversion of Scrapy request {scrapy_request} to Apify request failed; {exc}')
@@ -93,7 +104,8 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict | None:
     return apify_request
-def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
+@ignore_docs
+def to_scrapy_request(apify_request: CrawleeRequest, spider: Spider) -> Request:
     """Convert an Apify request to a Scrapy request.
     Args:
@@ -101,32 +113,26 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
         spider: The Scrapy spider that the request is associated with.
     Raises:
-        TypeError: If the apify_request is not a dictionary.
+        TypeError: If the apify_request is not a crawlee request.
         ValueError: If the apify_request does not contain the required keys.
     Returns:
         The converted Scrapy request.
     """
-    if not isinstance(apify_request, dict):
-        raise TypeError('apify_request must be a dictionary')
-    required_keys = ['url', 'method', 'id', 'uniqueKey']
-    missing_keys = [key for key in required_keys if key not in apify_request]
-    if missing_keys:
-        raise ValueError(f'apify_request must contain {", ".join(map(repr, missing_keys))} key(s)')
+    if not isinstance(cast(Any, apify_request), CrawleeRequest):
+        raise TypeError('apify_request must be a crawlee.Request instance')
     call_id = crypto_random_object_id(8)
     Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...')
     # If the apify_request comes from the Scrapy
-    if 'userData' in apify_request and 'scrapy_request' in apify_request['userData']:
+    if 'scrapy_request' in apify_request.user_data:
         # Deserialize the Scrapy Request from the apify_request.
         #   - This process involves decoding the base64-encoded request data and reconstructing
         #     the Scrapy Request object from its dictionary representation.
         Actor.log.debug(f'[{call_id}]: Restoring the Scrapy Request from the apify_request...')
-        scrapy_request_dict_encoded = apify_request['userData']['scrapy_request']
+        scrapy_request_dict_encoded = apify_request.user_data['scrapy_request']
         if not isinstance(scrapy_request_dict_encoded, str):
             raise TypeError('scrapy_request_dict_encoded must be a string')
@@ -142,34 +148,30 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
         # Update the meta field with the meta field from the apify_request
         meta = scrapy_request.meta or {}
-        meta.update({'apify_request_id': apify_request['id'], 'apify_request_unique_key': apify_request['uniqueKey']})
-        scrapy_request._meta = meta  # scrapy_request.meta is a property, so we have to set it like this
+        meta.update({'apify_request_id': apify_request.id, 'apify_request_unique_key': apify_request.unique_key})
+        # scrapy_request.meta is a property, so we have to set it like this
+        scrapy_request._meta = meta  # noqa: SLF001
     # If the apify_request comes directly from the Request Queue, typically start URLs
     else:
         Actor.log.debug(f'[{call_id}]: gonna create a new Scrapy Request (cannot be restored)')
         scrapy_request = Request(
-            url=apify_request['url'],
-            method=apify_request['method'],
+            url=apify_request.url,
+            method=apify_request.method,
             meta={
-                'apify_request_id': apify_request['id'],
-                'apify_request_unique_key': apify_request['uniqueKey'],
+                'apify_request_id': apify_request.id,
+                'apify_request_unique_key': apify_request.unique_key,
             },
         )
     # Add optional 'headers' field
-    if 'headers' in apify_request:
-        if isinstance(apify_request['headers'], dict):
-            scrapy_request.headers = Headers(apify_request['headers'])
-        else:
-            Actor.log.warning(
-                f'apify_request[headers] is not an instance of the dict class, apify_request[headers] = {apify_request["headers"]}',
-            )
+    if apify_request.headers:
+        scrapy_request.headers |= Headers(apify_request.headers)
     # Add optional 'userData' field
-    if 'userData' in apify_request:
-        scrapy_request.meta['userData'] = apify_request['userData']
+    if apify_request.user_data:
+        scrapy_request.meta['userData'] = apify_request.user_data
     Actor.log.debug(f'[{call_id}]: an apify_request was converted to the scrapy_request={scrapy_request}')
     return scrapy_request

apify 1.7.1b1__py3-none-any.whl → 2.2.0b14__py3-none-any.whl

Potentially problematic release.

apify 1.7.1b1py3-none-any.whl → 2.2.0b14py3-none-any.whl