PyPI - apify - Versions diffs - 2.0.2b7__tar.gz → 2.1.0__tar.gz - Mend

apify 2.0.2b7tar.gz → 2.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of apify might be problematic. Click here for more details.

Files changed (40) hide show

{apify-2.0.2b7 → apify-2.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: apify
-Version: 2.0.2b7
+Version: 2.1.0
 Summary: Apify SDK for Python
 License: Apache-2.0
 Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
@@ -21,9 +21,9 @@ Classifier: Topic :: Software Development :: Libraries
 Provides-Extra: scrapy
 Requires-Dist: apify-client (>=1.8.1)
 Requires-Dist: apify-shared (>=1.1.2)
-Requires-Dist: crawlee (>=0.3.9)
+Requires-Dist: crawlee (>=0.4.0,<0.5.0)
 Requires-Dist: cryptography (>=42.0.0)
-Requires-Dist: httpx (>=0.27.0)
+Requires-Dist: httpx (>=0.27.0,<0.28.0)
 Requires-Dist: lazy-object-proxy (>=1.10.0)
 Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
 Requires-Dist: typing-extensions (>=4.1.0)

{apify-2.0.2b7 → apify-2.1.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "apify"
-version = "2.0.2b7"
+version = "2.1.0"
 description = "Apify SDK for Python"
 authors = ["Apify Technologies s.r.o. <support@apify.com>"]
 license = "Apache-2.0"
@@ -41,16 +41,15 @@ keywords = [
 "Issue Tracker" = "https://github.com/apify/apify-sdk-python/issues"
 "Repository" = "https://github.com/apify/apify-sdk-python"
-# We use inclusive ordered comparison clauses for external packages intentionally in order to enhance SDK's
-# compatibility with external packages. This decision was discussed in detail in the following PR:
-# https://github.com/apify/apify-sdk-python/pull/154.
 [tool.poetry.dependencies]
 python = "^3.9"
 apify-client = ">=1.8.1"
 apify-shared = ">=1.1.2"
-crawlee = ">=0.3.9"
+crawlee = "~0.4.0"
 cryptography = ">=42.0.0"
-httpx = ">=0.27.0"
+# TODO: relax the upper bound once the issue is resolved:
+# https://github.com/apify/apify-sdk-python/issues/348
+httpx = "~0.27.0"
 lazy-object-proxy = ">=1.10.0"
 scrapy = { version = ">=2.11.0", optional = true }
 typing-extensions = ">=4.1.0"
@@ -65,13 +64,13 @@ pre-commit = "~4.0.0"
 pydoc-markdown = "~4.8.0"
 pytest = "~8.3.0"
 pytest-asyncio = "~0.24.0"
-pytest-cov = "~5.0.0"
+pytest-cov = "~6.0.0"
 pytest-only = "~2.1.0"
 pytest-timeout = "~2.3.0"
 pytest-xdist = "~3.6.0"
 respx = "~0.21.0"
-ruff = "~0.7.0"
-setuptools = "~75.0.0"     # setuptools are used by pytest but not explicitly required
+ruff = "~0.8.0"
+setuptools = "~75.6.0"     # setuptools are used by pytest but not explicitly required
 [tool.poetry.extras]
 scrapy = ["scrapy"]
@@ -82,8 +81,6 @@ line-length = 120
 [tool.ruff.lint]
 select = ["ALL"]
 ignore = [
-    "ANN101",   # Missing type annotation for `self` in method
-    "ANN102",   # Missing type annotation for `{name}` in classmethod
     "ANN401",   # Dynamically typed expressions (typing.Any) are disallowed in {filename}
     "ASYNC109", # Async function definition with a `timeout` parameter
     "BLE001",   # Do not catch blind exception
@@ -96,7 +93,6 @@ ignore = [
     "G004",     # Logging statement uses f-string
     "ISC001",   # This rule may cause conflicts when used with the formatter
     "FIX",      # flake8-fixme
-    "PGH003",   # Use specific rule codes when ignoring type issues
     "PLR0911",  # Too many return statements
     "PLR0913",  # Too many arguments in function definition
     "PLR0915",  # Too many statements
@@ -141,6 +137,12 @@ indent-style = "space"
 docstring-quotes = "double"
 inline-quotes = "single"
+[tool.ruff.lint.flake8-type-checking]
+runtime-evaluated-base-classes = [
+    "pydantic.BaseModel",
+    "crawlee.configuration.Configuration",
+]
 [tool.ruff.lint.flake8-builtins]
 builtins-ignorelist = ["id"]
@@ -180,15 +182,17 @@ exclude = []
 module = ['scrapy', 'scrapy.*', 'lazy_object_proxy']
 ignore_missing_imports = true
+[tool.basedpyright]
+pythonVersion = "3.9"
+typeCheckingMode = "standard"
+include = ["src", "tests"]
 [tool.coverage.report]
 exclude_lines = [
     "pragma: no cover",
     "if TYPE_CHECKING:",
-    "assert_never()"
+    "assert_never()",
 ]
-[tool.basedpyright]
-typeCheckingMode = "standard"
 [tool.ipdb]
 context = 7

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_actor.py RENAMED Viewed

@@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
 from lazy_object_proxy import Proxy
 from pydantic import AliasChoices
-from typing_extensions import Self
 from apify_client import ApifyClientAsync
 from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
@@ -22,7 +21,7 @@ from apify._crypto import decrypt_input_secrets, load_private_key
 from apify._models import ActorRun
 from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
 from apify._proxy_configuration import ProxyConfiguration
-from apify._utils import get_system_info, is_running_in_ipython
+from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
 from apify.apify_storage_client import ApifyStorageClient
 from apify.log import _configure_logging, logger
 from apify.storages import Dataset, KeyValueStore, RequestQueue
@@ -31,6 +30,8 @@ if TYPE_CHECKING:
     import logging
     from types import TracebackType
+    from typing_extensions import Self
     from crawlee.proxy_configuration import _NewUrlFunction
     from apify._models import Webhook
@@ -39,6 +40,8 @@ if TYPE_CHECKING:
 MainReturnType = TypeVar('MainReturnType')
+@docs_name('Actor')
+@docs_group('Classes')
 class _ActorType:
     """The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_configuration.py RENAMED Viewed

@@ -1,4 +1,3 @@
-# ruff: noqa: TCH001 TCH002 TCH003 (so that pydantic annotations work)
 from __future__ import annotations
 from datetime import datetime, timedelta
@@ -11,7 +10,10 @@ from crawlee._utils.models import timedelta_ms
 from crawlee._utils.urls import validate_http_url
 from crawlee.configuration import Configuration as CrawleeConfiguration
+from apify._utils import docs_group
+@docs_group('Classes')
 class Configuration(CrawleeConfiguration):
     """A class for specifying the configuration of an Actor.
@@ -321,4 +323,4 @@ class Configuration(CrawleeConfiguration):
 # Monkey-patch the base class so that it works with the extended configuration
-CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration  # type: ignore
+CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration  # type: ignore[method-assign]

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_models.py RENAMED Viewed

@@ -1,4 +1,3 @@
-# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic)
 from __future__ import annotations
 from datetime import datetime, timedelta
@@ -10,7 +9,10 @@ from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType
 from crawlee._utils.models import timedelta_ms
 from crawlee._utils.urls import validate_http_url
+from apify._utils import docs_group
+@docs_group('Data structures')
 class Webhook(BaseModel):
     __model_config__ = ConfigDict(populate_by_name=True)
@@ -29,12 +31,14 @@ class Webhook(BaseModel):
     ] = None
+@docs_group('Data structures')
 class ActorRunMeta(BaseModel):
     __model_config__ = ConfigDict(populate_by_name=True)
     origin: Annotated[MetaOrigin, Field()]
+@docs_group('Data structures')
 class ActorRunStats(BaseModel):
     __model_config__ = ConfigDict(populate_by_name=True)
@@ -55,6 +59,7 @@ class ActorRunStats(BaseModel):
     compute_units: Annotated[float, Field(alias='computeUnits')]
+@docs_group('Data structures')
 class ActorRunOptions(BaseModel):
     __model_config__ = ConfigDict(populate_by_name=True)
@@ -64,6 +69,7 @@ class ActorRunOptions(BaseModel):
     disk_mbytes: Annotated[int, Field(alias='diskMbytes')]
+@docs_group('Data structures')
 class ActorRunUsage(BaseModel):
     __model_config__ = ConfigDict(populate_by_name=True)
@@ -81,6 +87,7 @@ class ActorRunUsage(BaseModel):
     proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
+@docs_group('Data structures')
 class ActorRun(BaseModel):
     __model_config__ = ConfigDict(populate_by_name=True)

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_platform_event_manager.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import asyncio
-from datetime import datetime  # noqa: TCH003
+from datetime import datetime
 from typing import TYPE_CHECKING, Annotated, Any, Literal, Union
 import websockets.client
@@ -19,6 +19,7 @@ from crawlee.events._types import (
     EventSystemInfoData,
 )
+from apify._utils import docs_group
 from apify.log import logger
 if TYPE_CHECKING:
@@ -30,11 +31,13 @@ if TYPE_CHECKING:
 __all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager']
+@docs_group('Data structures')
 class PersistStateEvent(BaseModel):
     name: Literal[Event.PERSIST_STATE]
     data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
+@docs_group('Data structures')
 class SystemInfoEventData(BaseModel):
     mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
     mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
@@ -61,26 +64,31 @@ class SystemInfoEventData(BaseModel):
         )
+@docs_group('Data structures')
 class SystemInfoEvent(BaseModel):
     name: Literal[Event.SYSTEM_INFO]
     data: SystemInfoEventData
+@docs_group('Data structures')
 class MigratingEvent(BaseModel):
     name: Literal[Event.MIGRATING]
     data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
+@docs_group('Data structures')
 class AbortingEvent(BaseModel):
     name: Literal[Event.ABORTING]
     data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
+@docs_group('Data structures')
 class ExitEvent(BaseModel):
     name: Literal[Event.EXIT]
     data: Annotated[EventExitData, Field(default_factory=EventExitData)]
+@docs_group('Data structures')
 class EventWithoutData(BaseModel):
     name: Literal[
         Event.SESSION_RETIRED,
@@ -93,11 +101,13 @@ class EventWithoutData(BaseModel):
     data: Any = None
+@docs_group('Data structures')
 class DeprecatedEvent(BaseModel):
     name: Literal['cpuInfo']
     data: Annotated[dict[str, Any], Field(default_factory=dict)]
+@docs_group('Data structures')
 class UnknownEvent(BaseModel):
     name: str
     data: Annotated[dict[str, Any], Field(default_factory=dict)]
@@ -125,6 +135,7 @@ event_data_adapter: TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent] =
 )
+@docs_group('Classes')
 class PlatformEventManager(EventManager):
     """A class for managing Actor events.

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_proxy_configuration.py RENAMED Viewed

@@ -16,6 +16,7 @@ from crawlee.proxy_configuration import ProxyInfo as CrawleeProxyInfo
 from crawlee.proxy_configuration import _NewUrlFunction
 from apify._configuration import Configuration
+from apify._utils import docs_group
 from apify.log import logger
 if TYPE_CHECKING:
@@ -68,6 +69,7 @@ def _check(
         raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}')
+@docs_group('Classes')
 @dataclass
 class ProxyInfo(CrawleeProxyInfo):
     """Provides information about a proxy connection that is used for requests."""
@@ -87,6 +89,7 @@ class ProxyInfo(CrawleeProxyInfo):
     """
+@docs_group('Classes')
 class ProxyConfiguration(CrawleeProxyConfiguration):
     """Configures a connection to a proxy server with the provided options.
@@ -277,7 +280,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
             return
         status = None
-        async with httpx.AsyncClient(proxies=proxy_info.url, timeout=10) as client:
+        async with httpx.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
             for _ in range(2):
                 try:
                     response = await client.get(proxy_status_url)

apify-2.1.0/src/apify/_utils.py ADDED Viewed

@@ -0,0 +1,58 @@
+from __future__ import annotations
+import builtins
+import sys
+from importlib import metadata
+from typing import Callable, Literal
+def get_system_info() -> dict:
+    python_version = '.'.join([str(x) for x in sys.version_info[:3]])
+    system_info: dict[str, str | bool] = {
+        'apify_sdk_version': metadata.version('apify'),
+        'apify_client_version': metadata.version('apify-client'),
+        'crawlee_version': metadata.version('crawlee'),
+        'python_version': python_version,
+        'os': sys.platform,
+    }
+    if is_running_in_ipython():
+        system_info['is_running_in_ipython'] = True
+    return system_info
+def is_running_in_ipython() -> bool:
+    return getattr(builtins, '__IPYTHON__', False)
+GroupName = Literal['Classes', 'Abstract classes', 'Data structures', 'Errors', 'Functions']
+def docs_group(group_name: GroupName) -> Callable:  # noqa: ARG001
+    """Decorator to mark symbols for rendering and grouping in documentation.
+    This decorator is used purely for documentation purposes and does not alter the behavior
+    of the decorated callable.
+    """
+    def wrapper(func: Callable) -> Callable:
+        return func
+    return wrapper
+def docs_name(symbol_name: str) -> Callable:  # noqa: ARG001
+    """Decorator for renaming symbols in documentation.
+    This changes the rendered name of the symbol only in the rendered web documentation.
+    This decorator is used purely for documentation purposes and does not alter the behavior
+    of the decorated callable.
+    """
+    def wrapper(func: Callable) -> Callable:
+        return func
+    return wrapper

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_apify_storage_client.py RENAMED Viewed

@@ -5,6 +5,7 @@ from crawlee._utils.crypto import crypto_random_object_id
 from crawlee.base_storage_client import BaseStorageClient
 from apify._configuration import Configuration
+from apify._utils import docs_group
 from apify.apify_storage_client._dataset_client import DatasetClient
 from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
 from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient
@@ -13,6 +14,7 @@ from apify.apify_storage_client._request_queue_client import RequestQueueClient
 from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
+@docs_group('Classes')
 class ApifyStorageClient(BaseStorageClient):
     """A storage client implementation based on the Apify platform storage."""

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/middlewares/apify_proxy.py RENAMED Viewed

@@ -1,11 +1,13 @@
 from __future__ import annotations
+from typing import TYPE_CHECKING
 from urllib.parse import ParseResult, urlparse
 try:
-    from scrapy import Request, Spider  # noqa: TCH002
+    if TYPE_CHECKING:
+        from scrapy import Request, Spider
+        from scrapy.crawler import Crawler
     from scrapy.core.downloader.handlers.http11 import TunnelError
-    from scrapy.crawler import Crawler  # noqa: TCH002
     from scrapy.exceptions import NotConfigured
 except ImportError as exc:
     raise ImportError(

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/pipelines/actor_dataset_push.py RENAMED Viewed

@@ -1,9 +1,12 @@
 from __future__ import annotations
+from typing import TYPE_CHECKING
 from itemadapter.adapter import ItemAdapter
 try:
-    from scrapy import Item, Spider  # noqa: TCH002
+    if TYPE_CHECKING:
+        from scrapy import Item, Spider
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/requests.py RENAMED Viewed

@@ -42,8 +42,10 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
     Returns:
         The converted Apify request if the conversion was successful, otherwise None.
     """
-    if not isinstance(cast(Any, scrapy_request), Request):
-        Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
+    if not isinstance(scrapy_request, Request):
+        Actor.log.warning(  # type: ignore[unreachable]
+            'Failed to convert to Apify request: Scrapy request must be a Request instance.'
+        )
         return None
     call_id = crypto_random_object_id(8)
@@ -53,7 +55,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
         if _is_request_produced_by_middleware(scrapy_request):
             unique_key = compute_unique_key(
                 url=scrapy_request.url,
-                method=scrapy_request.method,
+                method=scrapy_request.method,  # type: ignore[arg-type]  # str vs literal
                 payload=scrapy_request.body,
                 use_extended_unique_key=True,
             )
@@ -80,9 +82,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
         # Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
         if isinstance(scrapy_request.headers, Headers):
-            apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
+            apify_request.headers = HttpHeaders(dict(scrapy_request.headers.to_unicode_dict()))
         else:
-            Actor.log.warning(
+            Actor.log.warning(  # type: ignore[unreachable]
                 f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
             )

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/scheduler.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import traceback
+from typing import TYPE_CHECKING
 from apify._configuration import Configuration
 from apify.apify_storage_client import ApifyStorageClient
@@ -8,8 +9,10 @@ from apify.apify_storage_client import ApifyStorageClient
 try:
     from scrapy import Spider
     from scrapy.core.scheduler import BaseScheduler
-    from scrapy.http.request import Request  # noqa: TCH002
     from scrapy.utils.reactor import is_asyncio_reactor_installed
+    if TYPE_CHECKING:
+        from scrapy.http.request import Request
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/utils.py RENAMED Viewed

@@ -2,14 +2,17 @@ from __future__ import annotations
 import asyncio
 from base64 import b64encode
+from typing import TYPE_CHECKING
 from urllib.parse import unquote
 from apify_shared.utils import ignore_docs
 try:
-    from scrapy.settings import Settings  # noqa: TCH002
     from scrapy.utils.project import get_project_settings
     from scrapy.utils.python import to_bytes
+    if TYPE_CHECKING:
+        from scrapy.settings import Settings
 except ImportError as exc:
     raise ImportError(
         'To use this module, you need to install the "scrapy" extra. For example, if you use pip, run '

apify-2.1.0/src/apify/storages/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from crawlee.storages import Dataset, KeyValueStore, RequestQueue
+from ._request_list import RequestList
+__all__ = ['Dataset', 'KeyValueStore', 'RequestList', 'RequestQueue']

apify-2.1.0/src/apify/storages/_request_list.py ADDED Viewed

@@ -0,0 +1,150 @@
+from __future__ import annotations
+import asyncio
+import re
+from asyncio import Task
+from functools import partial
+from typing import Annotated, Any, Union
+from pydantic import BaseModel, Field, TypeAdapter
+from crawlee import Request
+from crawlee._types import HttpMethod
+from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
+from crawlee.storages import RequestList as CrawleeRequestList
+from apify._utils import docs_group
+URL_NO_COMMAS_REGEX = re.compile(
+    r'https?:\/\/(www\.)?([^\W_]|[^\W_][-\w0-9@:%._+~#=]{0,254}[^\W_])\.[a-z]{2,63}(:\d{1,5})?(\/[-\w@:%+.~#?&/=()]*)?'
+)
+class _RequestDetails(BaseModel):
+    method: HttpMethod = 'GET'
+    payload: str = ''
+    headers: Annotated[dict[str, str], Field(default_factory=dict)] = {}
+    user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')] = {}
+class _RequestsFromUrlInput(_RequestDetails):
+    requests_from_url: str = Field(alias='requestsFromUrl')
+class _SimpleUrlInput(_RequestDetails):
+    url: str
+url_input_adapter = TypeAdapter(list[Union[_RequestsFromUrlInput, _SimpleUrlInput]])
+@docs_group('Classes')
+class RequestList(CrawleeRequestList):
+    """Extends crawlee RequestList.
+    Method open is used to create RequestList from actor's requestListSources input.
+    """
+    @staticmethod
+    async def open(
+        name: str | None = None,
+        request_list_sources_input: list[dict[str, Any]] | None = None,
+        http_client: BaseHttpClient | None = None,
+    ) -> RequestList:
+        """Creates RequestList from Actor input requestListSources.
+        Args:
+            name: Name of the returned RequestList.
+            request_list_sources_input: List of dicts with either url key or requestsFromUrl key.
+            http_client: Client that will be used to send get request to urls defined by value of requestsFromUrl keys.
+        Returns:
+            RequestList created from request_list_sources_input.
+        ### Usage
+        ```python
+        example_input = [
+            # Gather urls from response body.
+            {'requestsFromUrl': 'https://crawlee.dev/file.txt', 'method': 'GET'},
+            # Directly include this url.
+            {'url': 'https://crawlee.dev', 'method': 'GET'}
+        ]
+        request_list = await RequestList.open(request_list_sources_input=example_input)
+        ```
+        """
+        request_list_sources_input = request_list_sources_input or []
+        return await RequestList._create_request_list(name, request_list_sources_input, http_client)
+    @staticmethod
+    async def _create_request_list(
+        name: str | None, request_list_sources_input: list[dict[str, Any]], http_client: BaseHttpClient | None
+    ) -> RequestList:
+        if not http_client:
+            http_client = HttpxHttpClient()
+        url_inputs = url_input_adapter.validate_python(request_list_sources_input)
+        simple_url_inputs = [url_input for url_input in url_inputs if isinstance(url_input, _SimpleUrlInput)]
+        remote_url_inputs = [url_input for url_input in url_inputs if isinstance(url_input, _RequestsFromUrlInput)]
+        simple_url_requests = RequestList._create_requests_from_input(simple_url_inputs)
+        remote_url_requests = await RequestList._fetch_requests_from_url(remote_url_inputs, http_client=http_client)
+        return RequestList(name=name, requests=simple_url_requests + remote_url_requests)
+    @staticmethod
+    def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
+        return [
+            Request.from_url(
+                method=request_input.method,
+                url=request_input.url,
+                payload=request_input.payload.encode('utf-8'),
+                headers=request_input.headers,
+                user_data=request_input.user_data,
+            )
+            for request_input in simple_url_inputs
+        ]
+    @staticmethod
+    async def _fetch_requests_from_url(
+        remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: BaseHttpClient
+    ) -> list[Request]:
+        """Crete list of requests from url.
+        Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
+        callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
+        collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
+        """
+        created_requests: list[Request] = []
+        def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
+            """Callback to scrape response body with regexp and create Requests from matches."""
+            matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
+            created_requests.extend(
+                [
+                    Request.from_url(
+                        match.group(0),
+                        method=request_input.method,
+                        payload=request_input.payload.encode('utf-8'),
+                        headers=request_input.headers,
+                        user_data=request_input.user_data,
+                    )
+                    for match in matches
+                ]
+            )
+        remote_url_requests = []
+        for remote_url_requests_input in remote_url_requests_inputs:
+            get_response_task = asyncio.create_task(
+                http_client.send_request(
+                    method='GET',
+                    url=remote_url_requests_input.requests_from_url,
+                )
+            )
+            get_response_task.add_done_callback(partial(create_requests_from_response, remote_url_requests_input))
+            remote_url_requests.append(get_response_task)
+        await asyncio.gather(*remote_url_requests)
+        return created_requests

apify-2.0.2b7/src/apify/_utils.py DELETED Viewed

@@ -1,26 +0,0 @@
-from __future__ import annotations
-import builtins
-import sys
-from importlib import metadata
-def get_system_info() -> dict:
-    python_version = '.'.join([str(x) for x in sys.version_info[:3]])
-    system_info: dict[str, str | bool] = {
-        'apify_sdk_version': metadata.version('apify'),
-        'apify_client_version': metadata.version('apify-client'),
-        'crawlee_version': metadata.version('crawlee'),
-        'python_version': python_version,
-        'os': sys.platform,
-    }
-    if is_running_in_ipython():
-        system_info['is_running_in_ipython'] = True
-    return system_info
-def is_running_in_ipython() -> bool:
-    return getattr(builtins, '__IPYTHON__', False)

apify-2.0.2b7/src/apify/storages/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from crawlee.storages import Dataset, KeyValueStore, RequestQueue
-__all__ = ['Dataset', 'KeyValueStore', 'RequestQueue']

{apify-2.0.2b7 → apify-2.1.0}/LICENSE RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/README.md RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/__init__.py RENAMED Viewed

@@ -13,8 +13,8 @@ __version__ = metadata.version('apify')
 __all__ = [
     'Actor',
-    'Event',
     'Configuration',
+    'Event',
     'ProxyConfiguration',
     'ProxyInfo',
     'Request',

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_consts.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/_crypto.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/__init__.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_dataset_client.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_dataset_collection_client.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_key_value_store_client.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_key_value_store_collection_client.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_request_queue_client.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/_request_queue_collection_client.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/apify_storage_client/py.typed RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/log.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/py.typed RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/__init__.py RENAMED Viewed

@@ -3,9 +3,9 @@ from apify.scrapy.scheduler import ApifyScheduler
 from apify.scrapy.utils import get_basic_auth_header, get_running_event_loop_id
 __all__ = [
-    'to_apify_request',
-    'to_scrapy_request',
     'ApifyScheduler',
     'get_basic_auth_header',
     'get_running_event_loop_id',
+    'to_apify_request',
+    'to_scrapy_request',
 ]

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/middlewares/__init__.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/middlewares/py.typed RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/pipelines/__init__.py RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/pipelines/py.typed RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/scrapy/py.typed RENAMED Viewed

File without changes

{apify-2.0.2b7 → apify-2.1.0}/src/apify/storages/py.typed RENAMED Viewed

File without changes

apify 2.0.2b7__tar.gz → 2.1.0__tar.gz

Potentially problematic release.

apify 2.0.2b7tar.gz → 2.1.0tar.gz