apify 1.5.2b4__tar.gz → 1.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (46) hide show
  1. {apify-1.5.2b4 → apify-1.5.3}/PKG-INFO +3 -3
  2. {apify-1.5.2b4 → apify-1.5.3}/pyproject.toml +5 -5
  3. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_crypto.py +2 -3
  4. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/file_storage_utils.py +1 -2
  5. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/memory_storage_client.py +7 -8
  6. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/base_resource_client.py +1 -1
  7. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/base_resource_collection_client.py +3 -3
  8. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/dataset.py +6 -7
  9. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/dataset_collection.py +2 -2
  10. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/key_value_store.py +7 -8
  11. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/key_value_store_collection.py +2 -2
  12. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/request_queue.py +7 -8
  13. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/request_queue_collection.py +2 -2
  14. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_utils.py +1 -2
  15. {apify-1.5.2b4 → apify-1.5.3}/src/apify/actor.py +9 -9
  16. {apify-1.5.2b4 → apify-1.5.3}/src/apify/config.py +1 -1
  17. {apify-1.5.2b4 → apify-1.5.3}/src/apify/event_manager.py +3 -4
  18. {apify-1.5.2b4 → apify-1.5.3}/src/apify/log.py +1 -2
  19. {apify-1.5.2b4 → apify-1.5.3}/src/apify/proxy_configuration.py +3 -5
  20. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/middlewares/apify_proxy.py +3 -3
  21. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/middlewares/apify_retry.py +3 -3
  22. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/pipelines/actor_dataset_push.py +1 -1
  23. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/scheduler.py +4 -4
  24. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/utils.py +52 -4
  25. {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/base_storage.py +4 -4
  26. {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/dataset.py +7 -7
  27. {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/key_value_store.py +5 -5
  28. {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/request_queue.py +8 -8
  29. {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/storage_client_manager.py +2 -2
  30. {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/PKG-INFO +3 -3
  31. {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/requires.txt +2 -2
  32. {apify-1.5.2b4 → apify-1.5.3}/LICENSE +0 -0
  33. {apify-1.5.2b4 → apify-1.5.3}/README.md +0 -0
  34. {apify-1.5.2b4 → apify-1.5.3}/setup.cfg +0 -0
  35. {apify-1.5.2b4 → apify-1.5.3}/src/apify/__init__.py +0 -0
  36. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/__init__.py +0 -0
  37. {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/__init__.py +0 -0
  38. {apify-1.5.2b4 → apify-1.5.3}/src/apify/consts.py +0 -0
  39. {apify-1.5.2b4 → apify-1.5.3}/src/apify/py.typed +0 -0
  40. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/__init__.py +0 -0
  41. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/middlewares/__init__.py +0 -0
  42. {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/pipelines/__init__.py +0 -0
  43. {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/__init__.py +0 -0
  44. {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/SOURCES.txt +0 -0
  45. {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/dependency_links.txt +0 -0
  46. {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.2b4
3
+ Version: 1.5.3
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
@@ -24,8 +24,8 @@ Classifier: Topic :: Software Development :: Libraries
24
24
  Requires-Python: >=3.8
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
- Requires-Dist: apify-client~=1.6.0
28
- Requires-Dist: apify-shared~=1.1.0
27
+ Requires-Dist: apify-client~=1.6.2
28
+ Requires-Dist: apify-shared~=1.1.1
29
29
  Requires-Dist: aiofiles>=22.1.0
30
30
  Requires-Dist: aioshutil>=1.0
31
31
  Requires-Dist: colorama>=0.4.6
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "apify"
3
- version = "1.5.2b4"
3
+ version = "1.5.3"
4
4
  description = "Apify SDK for Python"
5
5
  readme = "README.md"
6
6
  license = { text = "Apache Software License" }
@@ -26,8 +26,8 @@ requires-python = ">=3.8"
26
26
  # compatibility with a wide range of external packages. This decision was discussed in detail in the following PR:
27
27
  # https://github.com/apify/apify-sdk-python/pull/154
28
28
  dependencies = [
29
- "apify-client ~= 1.6.0",
30
- "apify-shared ~= 1.1.0",
29
+ "apify-client ~= 1.6.2",
30
+ "apify-shared ~= 1.1.1",
31
31
  "aiofiles >= 22.1.0",
32
32
  "aioshutil >= 1.0",
33
33
  "colorama >= 0.4.6",
@@ -111,7 +111,6 @@ ignore = [
111
111
  "S303", # Use of insecure MD2, MD4, MD5, or SHA1 hash function
112
112
  "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
113
113
  "TD002", # Missing author in TODO; try: `# TODO(<author_name>): ...` or `# TODO @<author_name>: ...
114
- "TID252", # Relative imports from parent modules are bannedRuff
115
114
  "TRY003", # Avoid specifying long messages outside the exception class
116
115
 
117
116
  # TODO: Remove this once the following issue is fixed
@@ -139,6 +138,7 @@ indent-style = "space"
139
138
  "PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
140
139
  "S101", # Use of assert detected
141
140
  "T20", # flake8-print
141
+ "TID252", # Relative imports from parent modules are banned
142
142
  "TRY301", # Abstract `raise` to an inner function
143
143
  ]
144
144
 
@@ -147,7 +147,7 @@ docstring-quotes = "double"
147
147
  inline-quotes = "single"
148
148
 
149
149
  [tool.ruff.lint.isort]
150
- known-first-party = ["apify", "apify_client", "apify_shared"]
150
+ known-local-folder = ["apify"]
151
151
 
152
152
  [tool.ruff.lint.pydocstyle]
153
153
  convention = "google"
@@ -4,14 +4,13 @@ import base64
4
4
  import secrets
5
5
  from typing import Any
6
6
 
7
+ from apify_shared.utils import ignore_docs
7
8
  from cryptography.exceptions import InvalidTag as InvalidTagException
8
9
  from cryptography.hazmat.primitives import hashes, serialization
9
10
  from cryptography.hazmat.primitives.asymmetric import padding, rsa
10
11
  from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
11
12
 
12
- from apify_shared.utils import ignore_docs
13
-
14
- from .consts import ENCRYPTED_INPUT_VALUE_REGEXP
13
+ from apify.consts import ENCRYPTED_INPUT_VALUE_REGEXP
15
14
 
16
15
  ENCRYPTION_KEY_LENGTH = 32
17
16
  ENCRYPTION_IV_LENGTH = 16
@@ -4,10 +4,9 @@ import os
4
4
 
5
5
  import aiofiles
6
6
  from aiofiles.os import makedirs
7
-
8
7
  from apify_shared.utils import json_dumps
9
8
 
10
- from .._utils import force_remove
9
+ from apify._utils import force_remove
11
10
 
12
11
 
13
12
  async def update_metadata(*, data: dict, entity_directory: str, write_metadata: bool) -> None:
@@ -8,17 +8,16 @@ from pathlib import Path
8
8
  import aioshutil
9
9
  from aiofiles import ospath
10
10
  from aiofiles.os import rename, scandir
11
-
12
11
  from apify_shared.consts import ApifyEnvVars
13
12
  from apify_shared.utils import ignore_docs
14
13
 
15
- from .._utils import maybe_parse_bool
16
- from .resource_clients.dataset import DatasetClient
17
- from .resource_clients.dataset_collection import DatasetCollectionClient
18
- from .resource_clients.key_value_store import KeyValueStoreClient
19
- from .resource_clients.key_value_store_collection import KeyValueStoreCollectionClient
20
- from .resource_clients.request_queue import RequestQueueClient
21
- from .resource_clients.request_queue_collection import RequestQueueCollectionClient
14
+ from apify._memory_storage.resource_clients.dataset import DatasetClient
15
+ from apify._memory_storage.resource_clients.dataset_collection import DatasetCollectionClient
16
+ from apify._memory_storage.resource_clients.key_value_store import KeyValueStoreClient
17
+ from apify._memory_storage.resource_clients.key_value_store_collection import KeyValueStoreCollectionClient
18
+ from apify._memory_storage.resource_clients.request_queue import RequestQueueClient
19
+ from apify._memory_storage.resource_clients.request_queue_collection import RequestQueueCollectionClient
20
+ from apify._utils import maybe_parse_bool
22
21
 
23
22
  """
24
23
  Memory storage emulates data storages that are available on the Apify platform.
@@ -10,7 +10,7 @@ from apify_shared.utils import ignore_docs
10
10
  if TYPE_CHECKING:
11
11
  from typing_extensions import Self
12
12
 
13
- from ..memory_storage_client import MemoryStorageClient
13
+ from apify._memory_storage.memory_storage_client import MemoryStorageClient
14
14
 
15
15
 
16
16
  @ignore_docs
@@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Generic, TypeVar, cast
7
7
  from apify_shared.models import ListPage
8
8
  from apify_shared.utils import ignore_docs
9
9
 
10
- from ..file_storage_utils import update_metadata
11
- from .base_resource_client import BaseResourceClient
10
+ from apify._memory_storage.file_storage_utils import update_metadata
11
+ from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
12
12
 
13
13
  if TYPE_CHECKING:
14
- from ..memory_storage_client import MemoryStorageClient
14
+ from apify._memory_storage.memory_storage_client import MemoryStorageClient
15
15
 
16
16
 
17
17
  ResourceClientType = TypeVar('ResourceClientType', bound=BaseResourceClient, contravariant=True) # noqa: PLC0105
@@ -7,20 +7,19 @@ from datetime import datetime, timezone
7
7
  from typing import TYPE_CHECKING, Any, AsyncIterator
8
8
 
9
9
  import aioshutil
10
-
11
10
  from apify_shared.models import ListPage
12
11
  from apify_shared.utils import ignore_docs
13
12
 
14
- from ..._crypto import crypto_random_object_id
15
- from ..._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage
16
- from ...consts import StorageTypes
17
- from ..file_storage_utils import _update_dataset_items, update_metadata
18
- from .base_resource_client import BaseResourceClient
13
+ from apify._crypto import crypto_random_object_id
14
+ from apify._memory_storage.file_storage_utils import _update_dataset_items, update_metadata
15
+ from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
16
+ from apify._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage
17
+ from apify.consts import StorageTypes
19
18
 
20
19
  if TYPE_CHECKING:
21
20
  from apify_shared.types import JSONSerializable
22
21
 
23
- from ..memory_storage_client import MemoryStorageClient
22
+ from apify._memory_storage.memory_storage_client import MemoryStorageClient
24
23
 
25
24
  # This is what API returns in the x-apify-pagination-limit
26
25
  # header when no limit query parameter is used.
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
4
4
 
5
5
  from apify_shared.utils import ignore_docs
6
6
 
7
- from .base_resource_collection_client import BaseResourceCollectionClient
8
- from .dataset import DatasetClient
7
+ from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
8
+ from apify._memory_storage.resource_clients.dataset import DatasetClient
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from apify_shared.models import ListPage
@@ -13,11 +13,12 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, TypedDict
13
13
  import aiofiles
14
14
  import aioshutil
15
15
  from aiofiles.os import makedirs
16
-
17
16
  from apify_shared.utils import ignore_docs, is_file_or_bytes, json_dumps
18
17
 
19
- from ..._crypto import crypto_random_object_id
20
- from ..._utils import (
18
+ from apify._crypto import crypto_random_object_id
19
+ from apify._memory_storage.file_storage_utils import update_metadata
20
+ from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
21
+ from apify._utils import (
21
22
  force_remove,
22
23
  force_rename,
23
24
  guess_file_extension,
@@ -25,15 +26,13 @@ from ..._utils import (
25
26
  raise_on_duplicate_storage,
26
27
  raise_on_non_existing_storage,
27
28
  )
28
- from ...consts import DEFAULT_API_PARAM_LIMIT, StorageTypes
29
- from ...log import logger
30
- from ..file_storage_utils import update_metadata
31
- from .base_resource_client import BaseResourceClient
29
+ from apify.consts import DEFAULT_API_PARAM_LIMIT, StorageTypes
30
+ from apify.log import logger
32
31
 
33
32
  if TYPE_CHECKING:
34
33
  from typing_extensions import NotRequired
35
34
 
36
- from ..memory_storage_client import MemoryStorageClient
35
+ from apify._memory_storage.memory_storage_client import MemoryStorageClient
37
36
 
38
37
 
39
38
  class KeyValueStoreRecord(TypedDict):
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
4
4
 
5
5
  from apify_shared.utils import ignore_docs
6
6
 
7
- from .base_resource_collection_client import BaseResourceCollectionClient
8
- from .key_value_store import KeyValueStoreClient
7
+ from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
8
+ from apify._memory_storage.resource_clients.key_value_store import KeyValueStoreClient
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from apify_shared.models import ListPage
@@ -8,18 +8,17 @@ from decimal import Decimal
8
8
  from typing import TYPE_CHECKING
9
9
 
10
10
  import aioshutil
11
- from sortedcollections import ValueSortedDict
12
-
13
11
  from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, json_dumps
12
+ from sortedcollections import ValueSortedDict
14
13
 
15
- from ..._crypto import crypto_random_object_id
16
- from ..._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage, unique_key_to_request_id
17
- from ...consts import StorageTypes
18
- from ..file_storage_utils import delete_request, update_metadata, update_request_queue_item
19
- from .base_resource_client import BaseResourceClient
14
+ from apify._crypto import crypto_random_object_id
15
+ from apify._memory_storage.file_storage_utils import delete_request, update_metadata, update_request_queue_item
16
+ from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
17
+ from apify._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage, unique_key_to_request_id
18
+ from apify.consts import StorageTypes
20
19
 
21
20
  if TYPE_CHECKING:
22
- from ..memory_storage_client import MemoryStorageClient
21
+ from apify._memory_storage.memory_storage_client import MemoryStorageClient
23
22
 
24
23
 
25
24
  @ignore_docs
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
4
4
 
5
5
  from apify_shared.utils import ignore_docs
6
6
 
7
- from .base_resource_collection_client import BaseResourceCollectionClient
8
- from .request_queue import RequestQueueClient
7
+ from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
8
+ from apify._memory_storage.resource_clients.request_queue import RequestQueueClient
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from apify_shared.models import ListPage
@@ -35,7 +35,6 @@ import aioshutil
35
35
  import psutil
36
36
  from aiofiles import ospath
37
37
  from aiofiles.os import remove, rename
38
-
39
38
  from apify_shared.consts import (
40
39
  BOOL_ENV_VARS,
41
40
  BOOL_ENV_VARS_TYPE,
@@ -57,7 +56,7 @@ from apify_shared.utils import (
57
56
  maybe_extract_enum_member_value,
58
57
  )
59
58
 
60
- from .consts import REQUEST_ID_LENGTH, StorageTypes
59
+ from apify.consts import REQUEST_ID_LENGTH, StorageTypes
61
60
 
62
61
  T = TypeVar('T')
63
62
 
@@ -12,8 +12,8 @@ from apify_client import ApifyClientAsync
12
12
  from apify_shared.consts import ActorEnvVars, ActorEventTypes, ActorExitCodes, ApifyEnvVars, WebhookEventType
13
13
  from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
14
14
 
15
- from ._crypto import decrypt_input_secrets, load_private_key
16
- from ._utils import (
15
+ from apify._crypto import decrypt_input_secrets, load_private_key
16
+ from apify._utils import (
17
17
  dualproperty,
18
18
  fetch_and_parse_env_var,
19
19
  get_cpu_usage_percent,
@@ -23,18 +23,18 @@ from ._utils import (
23
23
  run_func_at_interval_async,
24
24
  wrap_internal,
25
25
  )
26
- from .config import Configuration
27
- from .consts import EVENT_LISTENERS_TIMEOUT_SECS
28
- from .event_manager import EventManager
29
- from .log import logger
30
- from .proxy_configuration import ProxyConfiguration
31
- from .storages import Dataset, KeyValueStore, RequestQueue, StorageClientManager
26
+ from apify.config import Configuration
27
+ from apify.consts import EVENT_LISTENERS_TIMEOUT_SECS
28
+ from apify.event_manager import EventManager
29
+ from apify.log import logger
30
+ from apify.proxy_configuration import ProxyConfiguration
31
+ from apify.storages import Dataset, KeyValueStore, RequestQueue, StorageClientManager
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  import logging
35
35
  from types import TracebackType
36
36
 
37
- from ._memory_storage import MemoryStorageClient
37
+ from apify._memory_storage import MemoryStorageClient
38
38
 
39
39
  T = TypeVar('T')
40
40
  MainReturnType = TypeVar('MainReturnType')
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from apify_shared.consts import ActorEnvVars, ApifyEnvVars
4
4
 
5
- from ._utils import fetch_and_parse_env_var
5
+ from apify._utils import fetch_and_parse_env_var
6
6
 
7
7
 
8
8
  class Configuration:
@@ -8,16 +8,15 @@ from collections import defaultdict
8
8
  from typing import TYPE_CHECKING, Any, Callable, Coroutine, Union
9
9
 
10
10
  import websockets.client
11
- from pyee.asyncio import AsyncIOEventEmitter
12
-
13
11
  from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value, parse_date_fields
12
+ from pyee.asyncio import AsyncIOEventEmitter
14
13
 
15
- from .log import logger
14
+ from apify.log import logger
16
15
 
17
16
  if TYPE_CHECKING:
18
17
  from apify_shared.consts import ActorEventTypes
19
18
 
20
- from .config import Configuration
19
+ from apify.config import Configuration
21
20
 
22
21
  ListenerType = Union[Callable[[], None], Callable[[Any], None], Callable[[], Coroutine[Any, Any, None]], Callable[[Any], Coroutine[Any, Any, None]]]
23
22
 
@@ -6,9 +6,8 @@ import textwrap
6
6
  import traceback
7
7
  from typing import Any
8
8
 
9
- from colorama import Fore, Style, just_fix_windows_console
10
-
11
9
  from apify_shared.utils import ignore_docs
10
+ from colorama import Fore, Style, just_fix_windows_console
12
11
 
13
12
  just_fix_windows_console()
14
13
 
@@ -7,17 +7,15 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable, Pattern, TypedDict
7
7
  from urllib.parse import urljoin, urlparse
8
8
 
9
9
  import httpx
10
-
11
10
  from apify_shared.consts import ApifyEnvVars
12
11
  from apify_shared.utils import ignore_docs
13
12
 
14
- from .config import Configuration
15
- from .log import logger
13
+ from apify.config import Configuration
14
+ from apify.log import logger
16
15
 
17
16
  if TYPE_CHECKING:
18
- from typing_extensions import NotRequired
19
-
20
17
  from apify_client import ApifyClientAsync
18
+ from typing_extensions import NotRequired
21
19
 
22
20
  APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
23
21
  COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
@@ -12,9 +12,9 @@ except ImportError as exc:
12
12
  'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
13
13
  ) from exc
14
14
 
15
- from ...actor import Actor
16
- from ...proxy_configuration import ProxyConfiguration
17
- from ..utils import get_basic_auth_header
15
+ from apify.actor import Actor
16
+ from apify.proxy_configuration import ProxyConfiguration
17
+ from apify.scrapy.utils import get_basic_auth_header
18
18
 
19
19
 
20
20
  class ApifyHttpProxyMiddleware:
@@ -13,11 +13,11 @@ except ImportError as exc:
13
13
  'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
14
14
  ) from exc
15
15
 
16
- from ...actor import Actor
17
- from ..utils import nested_event_loop, open_queue_with_custom_client, to_apify_request
16
+ from apify.actor import Actor
17
+ from apify.scrapy.utils import nested_event_loop, open_queue_with_custom_client, to_apify_request
18
18
 
19
19
  if TYPE_CHECKING:
20
- from ...storages import RequestQueue
20
+ from apify.storages import RequestQueue
21
21
 
22
22
 
23
23
  class ApifyRetryMiddleware(RetryMiddleware):
@@ -9,7 +9,7 @@ except ImportError as exc:
9
9
  'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
10
10
  ) from exc
11
11
 
12
- from ...actor import Actor
12
+ from apify.actor import Actor
13
13
 
14
14
 
15
15
  class ActorDatasetPushPipeline:
@@ -12,10 +12,10 @@ except ImportError as exc:
12
12
  'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
13
13
  ) from exc
14
14
 
15
- from .._crypto import crypto_random_object_id
16
- from ..actor import Actor
17
- from ..storages import RequestQueue
18
- from .utils import nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request
15
+ from apify._crypto import crypto_random_object_id
16
+ from apify.actor import Actor
17
+ from apify.scrapy.utils import nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request
18
+ from apify.storages import RequestQueue
19
19
 
20
20
 
21
21
  class ApifyScheduler(BaseScheduler):
@@ -8,6 +8,8 @@ from urllib.parse import unquote
8
8
 
9
9
  try:
10
10
  from scrapy import Request, Spider
11
+ from scrapy.settings import Settings # noqa: TCH002
12
+ from scrapy.utils.project import get_project_settings
11
13
  from scrapy.utils.python import to_bytes
12
14
  from scrapy.utils.request import request_from_dict
13
15
  except ImportError as exc:
@@ -15,9 +17,9 @@ except ImportError as exc:
15
17
  'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
16
18
  ) from exc
17
19
 
18
- from .._crypto import crypto_random_object_id
19
- from ..actor import Actor
20
- from ..storages import RequestQueue, StorageClientManager
20
+ from apify._crypto import crypto_random_object_id
21
+ from apify.actor import Actor
22
+ from apify.storages import RequestQueue, StorageClientManager
21
23
 
22
24
  nested_event_loop: asyncio.AbstractEventLoop = asyncio.new_event_loop()
23
25
 
@@ -47,6 +49,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
47
49
  scrapy_request: The Scrapy request to be converted.
48
50
  spider: The Scrapy spider that the request is associated with.
49
51
 
52
+ Raises:
53
+ TypeError: If the scrapy_request is not an instance of the scrapy.Request class.
54
+
50
55
  Returns:
51
56
  The converted Apify request.
52
57
  """
@@ -88,6 +93,10 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
88
93
  apify_request: The Apify request to be converted.
89
94
  spider: The Scrapy spider that the request is associated with.
90
95
 
96
+ Raises:
97
+ TypeError: If the apify_request is not a dictionary.
98
+ ValueError: If the apify_request does not contain the required keys.
99
+
91
100
  Returns:
92
101
  The converted Scrapy request.
93
102
  """
@@ -98,7 +107,7 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
98
107
  missing_keys = [key for key in required_keys if key not in apify_request]
99
108
 
100
109
  if missing_keys:
101
- raise ValueError(f"apify_request must contain {', '.join(map(repr, missing_keys))} key(s)")
110
+ raise ValueError(f'apify_request must contain {", ".join(map(repr, missing_keys))} key(s)')
102
111
 
103
112
  call_id = crypto_random_object_id(8)
104
113
  Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...')
@@ -146,6 +155,45 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
146
155
  return scrapy_request
147
156
 
148
157
 
158
+ def apply_apify_settings(*, settings: Settings | None = None, proxy_config: dict | None = None) -> Settings:
159
+ """Integrates Apify configuration into a Scrapy project settings.
160
+
161
+ Note: The function directly modifies the passed `settings` object and also returns it.
162
+
163
+ Args:
164
+ settings: Scrapy project settings to be modified.
165
+ proxy_config: Proxy configuration to be stored in the settings.
166
+
167
+ Returns:
168
+ Scrapy project settings with custom configurations.
169
+ """
170
+ if settings is None:
171
+ settings = get_project_settings()
172
+
173
+ # Use ApifyScheduler as the scheduler
174
+ settings['SCHEDULER'] = 'apify.scrapy.scheduler.ApifyScheduler'
175
+
176
+ # Add the ActorDatasetPushPipeline into the item pipelines, assigning it the highest integer (1000),
177
+ # ensuring it is executed as the final step in the pipeline sequence
178
+ settings['ITEM_PIPELINES']['apify.scrapy.pipelines.ActorDatasetPushPipeline'] = 1000
179
+
180
+ # Disable the default RobotsTxtMiddleware, Apify's custom scheduler already handles robots.txt
181
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware'] = None
182
+
183
+ # Disable the default HttpProxyMiddleware and add ApifyHttpProxyMiddleware
184
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware'] = None
185
+ settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyHttpProxyMiddleware'] = 950
186
+
187
+ # Disable the default RetryMiddleware and add ApifyRetryMiddleware with the highest integer (1000)
188
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.retry.RetryMiddleware'] = None
189
+ settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyRetryMiddleware'] = 1000
190
+
191
+ # Store the proxy configuration
192
+ settings['APIFY_PROXY_SETTINGS'] = proxy_config
193
+
194
+ return settings
195
+
196
+
149
197
  async def open_queue_with_custom_client() -> RequestQueue:
150
198
  """Open a Request Queue with custom Apify Client.
151
199
 
@@ -6,10 +6,10 @@ from typing import TYPE_CHECKING, Generic, TypeVar, cast
6
6
 
7
7
  from apify_shared.utils import ignore_docs
8
8
 
9
- from .._memory_storage import MemoryStorageClient
10
- from .._memory_storage.resource_clients import BaseResourceClient, BaseResourceCollectionClient
11
- from ..config import Configuration
12
- from .storage_client_manager import StorageClientManager
9
+ from apify._memory_storage import MemoryStorageClient
10
+ from apify._memory_storage.resource_clients import BaseResourceClient, BaseResourceCollectionClient
11
+ from apify.config import Configuration
12
+ from apify.storages.storage_client_manager import StorageClientManager
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from apify_client import ApifyClientAsync
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, AsyncIterator, Iterable, Iterator
7
7
 
8
8
  from apify_shared.utils import ignore_docs, json_dumps
9
9
 
10
- from .._utils import wrap_internal
11
- from ..consts import MAX_PAYLOAD_SIZE_BYTES
12
- from .base_storage import BaseStorage
13
- from .key_value_store import KeyValueStore
10
+ from apify._utils import wrap_internal
11
+ from apify.consts import MAX_PAYLOAD_SIZE_BYTES
12
+ from apify.storages.base_storage import BaseStorage
13
+ from apify.storages.key_value_store import KeyValueStore
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from apify_client import ApifyClientAsync
@@ -18,9 +18,9 @@ if TYPE_CHECKING:
18
18
  from apify_shared.models import ListPage
19
19
  from apify_shared.types import JSONSerializable
20
20
 
21
- from .._memory_storage import MemoryStorageClient
22
- from .._memory_storage.resource_clients import DatasetClient, DatasetCollectionClient
23
- from ..config import Configuration
21
+ from apify._memory_storage import MemoryStorageClient
22
+ from apify._memory_storage.resource_clients import DatasetClient, DatasetCollectionClient
23
+ from apify.config import Configuration
24
24
 
25
25
  # 0.01%
26
26
  SAFETY_BUFFER_PERCENT = 0.01 / 100
@@ -5,15 +5,15 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, NamedTuple, TypedDict, Typ
5
5
  from apify_client.clients import KeyValueStoreClientAsync, KeyValueStoreCollectionClientAsync
6
6
  from apify_shared.utils import ignore_docs
7
7
 
8
- from .._utils import wrap_internal
9
- from .base_storage import BaseStorage
8
+ from apify._utils import wrap_internal
9
+ from apify.storages.base_storage import BaseStorage
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from apify_client import ApifyClientAsync
13
13
 
14
- from .._memory_storage import MemoryStorageClient
15
- from .._memory_storage.resource_clients import KeyValueStoreClient, KeyValueStoreCollectionClient
16
- from ..config import Configuration
14
+ from apify._memory_storage import MemoryStorageClient
15
+ from apify._memory_storage.resource_clients import KeyValueStoreClient, KeyValueStoreCollectionClient
16
+ from apify.config import Configuration
17
17
 
18
18
 
19
19
  T = TypeVar('T')
@@ -8,19 +8,19 @@ from typing import OrderedDict as OrderedDictType
8
8
 
9
9
  from apify_shared.utils import ignore_docs
10
10
 
11
- from .._crypto import crypto_random_object_id
12
- from .._utils import LRUCache, budget_ow, unique_key_to_request_id
13
- from ..consts import REQUEST_QUEUE_HEAD_MAX_LIMIT
14
- from ..log import logger
15
- from .base_storage import BaseStorage
11
+ from apify._crypto import crypto_random_object_id
12
+ from apify._utils import LRUCache, budget_ow, unique_key_to_request_id
13
+ from apify.consts import REQUEST_QUEUE_HEAD_MAX_LIMIT
14
+ from apify.log import logger
15
+ from apify.storages.base_storage import BaseStorage
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from apify_client import ApifyClientAsync
19
19
  from apify_client.clients import RequestQueueClientAsync, RequestQueueCollectionClientAsync
20
20
 
21
- from .._memory_storage import MemoryStorageClient
22
- from .._memory_storage.resource_clients import RequestQueueClient, RequestQueueCollectionClient
23
- from ..config import Configuration
21
+ from apify._memory_storage import MemoryStorageClient
22
+ from apify._memory_storage.resource_clients import RequestQueueClient, RequestQueueCollectionClient
23
+ from apify.config import Configuration
24
24
 
25
25
 
26
26
  MAX_CACHED_REQUESTS = 1_000_000
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
4
4
 
5
5
  from apify_shared.utils import ignore_docs
6
6
 
7
- from .._memory_storage import MemoryStorageClient
8
- from ..config import Configuration
7
+ from apify._memory_storage import MemoryStorageClient
8
+ from apify.config import Configuration
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from apify_client import ApifyClientAsync
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.2b4
3
+ Version: 1.5.3
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
@@ -24,8 +24,8 @@ Classifier: Topic :: Software Development :: Libraries
24
24
  Requires-Python: >=3.8
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
- Requires-Dist: apify-client~=1.6.0
28
- Requires-Dist: apify-shared~=1.1.0
27
+ Requires-Dist: apify-client~=1.6.2
28
+ Requires-Dist: apify-shared~=1.1.1
29
29
  Requires-Dist: aiofiles>=22.1.0
30
30
  Requires-Dist: aioshutil>=1.0
31
31
  Requires-Dist: colorama>=0.4.6
@@ -1,5 +1,5 @@
1
- apify-client~=1.6.0
2
- apify-shared~=1.1.0
1
+ apify-client~=1.6.2
2
+ apify-shared~=1.1.1
3
3
  aiofiles>=22.1.0
4
4
  aioshutil>=1.0
5
5
  colorama>=0.4.6
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes