apify 1.5.2b4__tar.gz → 1.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-1.5.2b4 → apify-1.5.3}/PKG-INFO +3 -3
- {apify-1.5.2b4 → apify-1.5.3}/pyproject.toml +5 -5
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_crypto.py +2 -3
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/file_storage_utils.py +1 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/memory_storage_client.py +7 -8
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/base_resource_client.py +1 -1
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/base_resource_collection_client.py +3 -3
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/dataset.py +6 -7
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/dataset_collection.py +2 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/key_value_store.py +7 -8
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/key_value_store_collection.py +2 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/request_queue.py +7 -8
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/request_queue_collection.py +2 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_utils.py +1 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/actor.py +9 -9
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/config.py +1 -1
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/event_manager.py +3 -4
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/log.py +1 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/proxy_configuration.py +3 -5
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/middlewares/apify_proxy.py +3 -3
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/middlewares/apify_retry.py +3 -3
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/pipelines/actor_dataset_push.py +1 -1
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/scheduler.py +4 -4
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/utils.py +52 -4
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/base_storage.py +4 -4
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/dataset.py +7 -7
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/key_value_store.py +5 -5
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/request_queue.py +8 -8
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/storage_client_manager.py +2 -2
- {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/PKG-INFO +3 -3
- {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/requires.txt +2 -2
- {apify-1.5.2b4 → apify-1.5.3}/LICENSE +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/README.md +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/setup.cfg +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/consts.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/py.typed +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify/storages/__init__.py +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/SOURCES.txt +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/dependency_links.txt +0 -0
- {apify-1.5.2b4 → apify-1.5.3}/src/apify.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.3
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Author-email: "Apify Technologies s.r.o." <support@apify.com>
|
|
6
6
|
License: Apache Software License
|
|
@@ -24,8 +24,8 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
24
24
|
Requires-Python: >=3.8
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
|
-
Requires-Dist: apify-client~=1.6.
|
|
28
|
-
Requires-Dist: apify-shared~=1.1.
|
|
27
|
+
Requires-Dist: apify-client~=1.6.2
|
|
28
|
+
Requires-Dist: apify-shared~=1.1.1
|
|
29
29
|
Requires-Dist: aiofiles>=22.1.0
|
|
30
30
|
Requires-Dist: aioshutil>=1.0
|
|
31
31
|
Requires-Dist: colorama>=0.4.6
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "apify"
|
|
3
|
-
version = "1.5.
|
|
3
|
+
version = "1.5.3"
|
|
4
4
|
description = "Apify SDK for Python"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "Apache Software License" }
|
|
@@ -26,8 +26,8 @@ requires-python = ">=3.8"
|
|
|
26
26
|
# compatibility with a wide range of external packages. This decision was discussed in detail in the following PR:
|
|
27
27
|
# https://github.com/apify/apify-sdk-python/pull/154
|
|
28
28
|
dependencies = [
|
|
29
|
-
"apify-client ~= 1.6.
|
|
30
|
-
"apify-shared ~= 1.1.
|
|
29
|
+
"apify-client ~= 1.6.2",
|
|
30
|
+
"apify-shared ~= 1.1.1",
|
|
31
31
|
"aiofiles >= 22.1.0",
|
|
32
32
|
"aioshutil >= 1.0",
|
|
33
33
|
"colorama >= 0.4.6",
|
|
@@ -111,7 +111,6 @@ ignore = [
|
|
|
111
111
|
"S303", # Use of insecure MD2, MD4, MD5, or SHA1 hash function
|
|
112
112
|
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
|
|
113
113
|
"TD002", # Missing author in TODO; try: `# TODO(<author_name>): ...` or `# TODO @<author_name>: ...
|
|
114
|
-
"TID252", # Relative imports from parent modules are bannedRuff
|
|
115
114
|
"TRY003", # Avoid specifying long messages outside the exception class
|
|
116
115
|
|
|
117
116
|
# TODO: Remove this once the following issue is fixed
|
|
@@ -139,6 +138,7 @@ indent-style = "space"
|
|
|
139
138
|
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
|
|
140
139
|
"S101", # Use of assert detected
|
|
141
140
|
"T20", # flake8-print
|
|
141
|
+
"TID252", # Relative imports from parent modules are banned
|
|
142
142
|
"TRY301", # Abstract `raise` to an inner function
|
|
143
143
|
]
|
|
144
144
|
|
|
@@ -147,7 +147,7 @@ docstring-quotes = "double"
|
|
|
147
147
|
inline-quotes = "single"
|
|
148
148
|
|
|
149
149
|
[tool.ruff.lint.isort]
|
|
150
|
-
known-
|
|
150
|
+
known-local-folder = ["apify"]
|
|
151
151
|
|
|
152
152
|
[tool.ruff.lint.pydocstyle]
|
|
153
153
|
convention = "google"
|
|
@@ -4,14 +4,13 @@ import base64
|
|
|
4
4
|
import secrets
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
+
from apify_shared.utils import ignore_docs
|
|
7
8
|
from cryptography.exceptions import InvalidTag as InvalidTagException
|
|
8
9
|
from cryptography.hazmat.primitives import hashes, serialization
|
|
9
10
|
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
|
10
11
|
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
|
11
12
|
|
|
12
|
-
from
|
|
13
|
-
|
|
14
|
-
from .consts import ENCRYPTED_INPUT_VALUE_REGEXP
|
|
13
|
+
from apify.consts import ENCRYPTED_INPUT_VALUE_REGEXP
|
|
15
14
|
|
|
16
15
|
ENCRYPTION_KEY_LENGTH = 32
|
|
17
16
|
ENCRYPTION_IV_LENGTH = 16
|
|
@@ -4,10 +4,9 @@ import os
|
|
|
4
4
|
|
|
5
5
|
import aiofiles
|
|
6
6
|
from aiofiles.os import makedirs
|
|
7
|
-
|
|
8
7
|
from apify_shared.utils import json_dumps
|
|
9
8
|
|
|
10
|
-
from
|
|
9
|
+
from apify._utils import force_remove
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
async def update_metadata(*, data: dict, entity_directory: str, write_metadata: bool) -> None:
|
|
@@ -8,17 +8,16 @@ from pathlib import Path
|
|
|
8
8
|
import aioshutil
|
|
9
9
|
from aiofiles import ospath
|
|
10
10
|
from aiofiles.os import rename, scandir
|
|
11
|
-
|
|
12
11
|
from apify_shared.consts import ApifyEnvVars
|
|
13
12
|
from apify_shared.utils import ignore_docs
|
|
14
13
|
|
|
15
|
-
from
|
|
16
|
-
from .resource_clients.
|
|
17
|
-
from .resource_clients.
|
|
18
|
-
from .resource_clients.
|
|
19
|
-
from .resource_clients.
|
|
20
|
-
from .resource_clients.
|
|
21
|
-
from .
|
|
14
|
+
from apify._memory_storage.resource_clients.dataset import DatasetClient
|
|
15
|
+
from apify._memory_storage.resource_clients.dataset_collection import DatasetCollectionClient
|
|
16
|
+
from apify._memory_storage.resource_clients.key_value_store import KeyValueStoreClient
|
|
17
|
+
from apify._memory_storage.resource_clients.key_value_store_collection import KeyValueStoreCollectionClient
|
|
18
|
+
from apify._memory_storage.resource_clients.request_queue import RequestQueueClient
|
|
19
|
+
from apify._memory_storage.resource_clients.request_queue_collection import RequestQueueCollectionClient
|
|
20
|
+
from apify._utils import maybe_parse_bool
|
|
22
21
|
|
|
23
22
|
"""
|
|
24
23
|
Memory storage emulates data storages that are available on the Apify platform.
|
{apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/base_resource_client.py
RENAMED
|
@@ -10,7 +10,7 @@ from apify_shared.utils import ignore_docs
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from typing_extensions import Self
|
|
12
12
|
|
|
13
|
-
from
|
|
13
|
+
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@ignore_docs
|
|
@@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Generic, TypeVar, cast
|
|
|
7
7
|
from apify_shared.models import ListPage
|
|
8
8
|
from apify_shared.utils import ignore_docs
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from .base_resource_client import BaseResourceClient
|
|
10
|
+
from apify._memory_storage.file_storage_utils import update_metadata
|
|
11
|
+
from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
-
from
|
|
14
|
+
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
ResourceClientType = TypeVar('ResourceClientType', bound=BaseResourceClient, contravariant=True) # noqa: PLC0105
|
|
@@ -7,20 +7,19 @@ from datetime import datetime, timezone
|
|
|
7
7
|
from typing import TYPE_CHECKING, Any, AsyncIterator
|
|
8
8
|
|
|
9
9
|
import aioshutil
|
|
10
|
-
|
|
11
10
|
from apify_shared.models import ListPage
|
|
12
11
|
from apify_shared.utils import ignore_docs
|
|
13
12
|
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from .
|
|
13
|
+
from apify._crypto import crypto_random_object_id
|
|
14
|
+
from apify._memory_storage.file_storage_utils import _update_dataset_items, update_metadata
|
|
15
|
+
from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
|
|
16
|
+
from apify._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage
|
|
17
|
+
from apify.consts import StorageTypes
|
|
19
18
|
|
|
20
19
|
if TYPE_CHECKING:
|
|
21
20
|
from apify_shared.types import JSONSerializable
|
|
22
21
|
|
|
23
|
-
from
|
|
22
|
+
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
24
23
|
|
|
25
24
|
# This is what API returns in the x-apify-pagination-limit
|
|
26
25
|
# header when no limit query parameter is used.
|
{apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/dataset_collection.py
RENAMED
|
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from apify_shared.utils import ignore_docs
|
|
6
6
|
|
|
7
|
-
from .base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
-
from .dataset import DatasetClient
|
|
7
|
+
from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
+
from apify._memory_storage.resource_clients.dataset import DatasetClient
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from apify_shared.models import ListPage
|
|
@@ -13,11 +13,12 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, TypedDict
|
|
|
13
13
|
import aiofiles
|
|
14
14
|
import aioshutil
|
|
15
15
|
from aiofiles.os import makedirs
|
|
16
|
-
|
|
17
16
|
from apify_shared.utils import ignore_docs, is_file_or_bytes, json_dumps
|
|
18
17
|
|
|
19
|
-
from
|
|
20
|
-
from
|
|
18
|
+
from apify._crypto import crypto_random_object_id
|
|
19
|
+
from apify._memory_storage.file_storage_utils import update_metadata
|
|
20
|
+
from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
|
|
21
|
+
from apify._utils import (
|
|
21
22
|
force_remove,
|
|
22
23
|
force_rename,
|
|
23
24
|
guess_file_extension,
|
|
@@ -25,15 +26,13 @@ from ..._utils import (
|
|
|
25
26
|
raise_on_duplicate_storage,
|
|
26
27
|
raise_on_non_existing_storage,
|
|
27
28
|
)
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from ..file_storage_utils import update_metadata
|
|
31
|
-
from .base_resource_client import BaseResourceClient
|
|
29
|
+
from apify.consts import DEFAULT_API_PARAM_LIMIT, StorageTypes
|
|
30
|
+
from apify.log import logger
|
|
32
31
|
|
|
33
32
|
if TYPE_CHECKING:
|
|
34
33
|
from typing_extensions import NotRequired
|
|
35
34
|
|
|
36
|
-
from
|
|
35
|
+
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
class KeyValueStoreRecord(TypedDict):
|
|
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from apify_shared.utils import ignore_docs
|
|
6
6
|
|
|
7
|
-
from .base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
-
from .key_value_store import KeyValueStoreClient
|
|
7
|
+
from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
+
from apify._memory_storage.resource_clients.key_value_store import KeyValueStoreClient
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from apify_shared.models import ListPage
|
|
@@ -8,18 +8,17 @@ from decimal import Decimal
|
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
|
|
10
10
|
import aioshutil
|
|
11
|
-
from sortedcollections import ValueSortedDict
|
|
12
|
-
|
|
13
11
|
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, json_dumps
|
|
12
|
+
from sortedcollections import ValueSortedDict
|
|
14
13
|
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from .
|
|
14
|
+
from apify._crypto import crypto_random_object_id
|
|
15
|
+
from apify._memory_storage.file_storage_utils import delete_request, update_metadata, update_request_queue_item
|
|
16
|
+
from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
|
|
17
|
+
from apify._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage, unique_key_to_request_id
|
|
18
|
+
from apify.consts import StorageTypes
|
|
20
19
|
|
|
21
20
|
if TYPE_CHECKING:
|
|
22
|
-
from
|
|
21
|
+
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
@ignore_docs
|
{apify-1.5.2b4 → apify-1.5.3}/src/apify/_memory_storage/resource_clients/request_queue_collection.py
RENAMED
|
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from apify_shared.utils import ignore_docs
|
|
6
6
|
|
|
7
|
-
from .base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
-
from .request_queue import RequestQueueClient
|
|
7
|
+
from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
+
from apify._memory_storage.resource_clients.request_queue import RequestQueueClient
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from apify_shared.models import ListPage
|
|
@@ -35,7 +35,6 @@ import aioshutil
|
|
|
35
35
|
import psutil
|
|
36
36
|
from aiofiles import ospath
|
|
37
37
|
from aiofiles.os import remove, rename
|
|
38
|
-
|
|
39
38
|
from apify_shared.consts import (
|
|
40
39
|
BOOL_ENV_VARS,
|
|
41
40
|
BOOL_ENV_VARS_TYPE,
|
|
@@ -57,7 +56,7 @@ from apify_shared.utils import (
|
|
|
57
56
|
maybe_extract_enum_member_value,
|
|
58
57
|
)
|
|
59
58
|
|
|
60
|
-
from .consts import REQUEST_ID_LENGTH, StorageTypes
|
|
59
|
+
from apify.consts import REQUEST_ID_LENGTH, StorageTypes
|
|
61
60
|
|
|
62
61
|
T = TypeVar('T')
|
|
63
62
|
|
|
@@ -12,8 +12,8 @@ from apify_client import ApifyClientAsync
|
|
|
12
12
|
from apify_shared.consts import ActorEnvVars, ActorEventTypes, ActorExitCodes, ApifyEnvVars, WebhookEventType
|
|
13
13
|
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
|
|
14
14
|
|
|
15
|
-
from ._crypto import decrypt_input_secrets, load_private_key
|
|
16
|
-
from ._utils import (
|
|
15
|
+
from apify._crypto import decrypt_input_secrets, load_private_key
|
|
16
|
+
from apify._utils import (
|
|
17
17
|
dualproperty,
|
|
18
18
|
fetch_and_parse_env_var,
|
|
19
19
|
get_cpu_usage_percent,
|
|
@@ -23,18 +23,18 @@ from ._utils import (
|
|
|
23
23
|
run_func_at_interval_async,
|
|
24
24
|
wrap_internal,
|
|
25
25
|
)
|
|
26
|
-
from .config import Configuration
|
|
27
|
-
from .consts import EVENT_LISTENERS_TIMEOUT_SECS
|
|
28
|
-
from .event_manager import EventManager
|
|
29
|
-
from .log import logger
|
|
30
|
-
from .proxy_configuration import ProxyConfiguration
|
|
31
|
-
from .storages import Dataset, KeyValueStore, RequestQueue, StorageClientManager
|
|
26
|
+
from apify.config import Configuration
|
|
27
|
+
from apify.consts import EVENT_LISTENERS_TIMEOUT_SECS
|
|
28
|
+
from apify.event_manager import EventManager
|
|
29
|
+
from apify.log import logger
|
|
30
|
+
from apify.proxy_configuration import ProxyConfiguration
|
|
31
|
+
from apify.storages import Dataset, KeyValueStore, RequestQueue, StorageClientManager
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
34
|
import logging
|
|
35
35
|
from types import TracebackType
|
|
36
36
|
|
|
37
|
-
from ._memory_storage import MemoryStorageClient
|
|
37
|
+
from apify._memory_storage import MemoryStorageClient
|
|
38
38
|
|
|
39
39
|
T = TypeVar('T')
|
|
40
40
|
MainReturnType = TypeVar('MainReturnType')
|
|
@@ -8,16 +8,15 @@ from collections import defaultdict
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Callable, Coroutine, Union
|
|
9
9
|
|
|
10
10
|
import websockets.client
|
|
11
|
-
from pyee.asyncio import AsyncIOEventEmitter
|
|
12
|
-
|
|
13
11
|
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value, parse_date_fields
|
|
12
|
+
from pyee.asyncio import AsyncIOEventEmitter
|
|
14
13
|
|
|
15
|
-
from .log import logger
|
|
14
|
+
from apify.log import logger
|
|
16
15
|
|
|
17
16
|
if TYPE_CHECKING:
|
|
18
17
|
from apify_shared.consts import ActorEventTypes
|
|
19
18
|
|
|
20
|
-
from .config import Configuration
|
|
19
|
+
from apify.config import Configuration
|
|
21
20
|
|
|
22
21
|
ListenerType = Union[Callable[[], None], Callable[[Any], None], Callable[[], Coroutine[Any, Any, None]], Callable[[Any], Coroutine[Any, Any, None]]]
|
|
23
22
|
|
|
@@ -6,9 +6,8 @@ import textwrap
|
|
|
6
6
|
import traceback
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from colorama import Fore, Style, just_fix_windows_console
|
|
10
|
-
|
|
11
9
|
from apify_shared.utils import ignore_docs
|
|
10
|
+
from colorama import Fore, Style, just_fix_windows_console
|
|
12
11
|
|
|
13
12
|
just_fix_windows_console()
|
|
14
13
|
|
|
@@ -7,17 +7,15 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable, Pattern, TypedDict
|
|
|
7
7
|
from urllib.parse import urljoin, urlparse
|
|
8
8
|
|
|
9
9
|
import httpx
|
|
10
|
-
|
|
11
10
|
from apify_shared.consts import ApifyEnvVars
|
|
12
11
|
from apify_shared.utils import ignore_docs
|
|
13
12
|
|
|
14
|
-
from .config import Configuration
|
|
15
|
-
from .log import logger
|
|
13
|
+
from apify.config import Configuration
|
|
14
|
+
from apify.log import logger
|
|
16
15
|
|
|
17
16
|
if TYPE_CHECKING:
|
|
18
|
-
from typing_extensions import NotRequired
|
|
19
|
-
|
|
20
17
|
from apify_client import ApifyClientAsync
|
|
18
|
+
from typing_extensions import NotRequired
|
|
21
19
|
|
|
22
20
|
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
|
|
23
21
|
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
|
|
@@ -12,9 +12,9 @@ except ImportError as exc:
|
|
|
12
12
|
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
|
|
13
13
|
) from exc
|
|
14
14
|
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
15
|
+
from apify.actor import Actor
|
|
16
|
+
from apify.proxy_configuration import ProxyConfiguration
|
|
17
|
+
from apify.scrapy.utils import get_basic_auth_header
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class ApifyHttpProxyMiddleware:
|
|
@@ -13,11 +13,11 @@ except ImportError as exc:
|
|
|
13
13
|
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
|
|
14
14
|
) from exc
|
|
15
15
|
|
|
16
|
-
from
|
|
17
|
-
from
|
|
16
|
+
from apify.actor import Actor
|
|
17
|
+
from apify.scrapy.utils import nested_event_loop, open_queue_with_custom_client, to_apify_request
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
|
-
from
|
|
20
|
+
from apify.storages import RequestQueue
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class ApifyRetryMiddleware(RetryMiddleware):
|
|
@@ -12,10 +12,10 @@ except ImportError as exc:
|
|
|
12
12
|
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
|
|
13
13
|
) from exc
|
|
14
14
|
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from .
|
|
15
|
+
from apify._crypto import crypto_random_object_id
|
|
16
|
+
from apify.actor import Actor
|
|
17
|
+
from apify.scrapy.utils import nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request
|
|
18
|
+
from apify.storages import RequestQueue
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class ApifyScheduler(BaseScheduler):
|
|
@@ -8,6 +8,8 @@ from urllib.parse import unquote
|
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
from scrapy import Request, Spider
|
|
11
|
+
from scrapy.settings import Settings # noqa: TCH002
|
|
12
|
+
from scrapy.utils.project import get_project_settings
|
|
11
13
|
from scrapy.utils.python import to_bytes
|
|
12
14
|
from scrapy.utils.request import request_from_dict
|
|
13
15
|
except ImportError as exc:
|
|
@@ -15,9 +17,9 @@ except ImportError as exc:
|
|
|
15
17
|
'To use this module, you need to install the "scrapy" extra. Run "pip install apify[scrapy]".',
|
|
16
18
|
) from exc
|
|
17
19
|
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
20
|
+
from apify._crypto import crypto_random_object_id
|
|
21
|
+
from apify.actor import Actor
|
|
22
|
+
from apify.storages import RequestQueue, StorageClientManager
|
|
21
23
|
|
|
22
24
|
nested_event_loop: asyncio.AbstractEventLoop = asyncio.new_event_loop()
|
|
23
25
|
|
|
@@ -47,6 +49,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
|
|
|
47
49
|
scrapy_request: The Scrapy request to be converted.
|
|
48
50
|
spider: The Scrapy spider that the request is associated with.
|
|
49
51
|
|
|
52
|
+
Raises:
|
|
53
|
+
TypeError: If the scrapy_request is not an instance of the scrapy.Request class.
|
|
54
|
+
|
|
50
55
|
Returns:
|
|
51
56
|
The converted Apify request.
|
|
52
57
|
"""
|
|
@@ -88,6 +93,10 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
|
|
|
88
93
|
apify_request: The Apify request to be converted.
|
|
89
94
|
spider: The Scrapy spider that the request is associated with.
|
|
90
95
|
|
|
96
|
+
Raises:
|
|
97
|
+
TypeError: If the apify_request is not a dictionary.
|
|
98
|
+
ValueError: If the apify_request does not contain the required keys.
|
|
99
|
+
|
|
91
100
|
Returns:
|
|
92
101
|
The converted Scrapy request.
|
|
93
102
|
"""
|
|
@@ -98,7 +107,7 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
|
|
|
98
107
|
missing_keys = [key for key in required_keys if key not in apify_request]
|
|
99
108
|
|
|
100
109
|
if missing_keys:
|
|
101
|
-
raise ValueError(f
|
|
110
|
+
raise ValueError(f'apify_request must contain {", ".join(map(repr, missing_keys))} key(s)')
|
|
102
111
|
|
|
103
112
|
call_id = crypto_random_object_id(8)
|
|
104
113
|
Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...')
|
|
@@ -146,6 +155,45 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
|
|
|
146
155
|
return scrapy_request
|
|
147
156
|
|
|
148
157
|
|
|
158
|
+
def apply_apify_settings(*, settings: Settings | None = None, proxy_config: dict | None = None) -> Settings:
|
|
159
|
+
"""Integrates Apify configuration into a Scrapy project settings.
|
|
160
|
+
|
|
161
|
+
Note: The function directly modifies the passed `settings` object and also returns it.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
settings: Scrapy project settings to be modified.
|
|
165
|
+
proxy_config: Proxy configuration to be stored in the settings.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Scrapy project settings with custom configurations.
|
|
169
|
+
"""
|
|
170
|
+
if settings is None:
|
|
171
|
+
settings = get_project_settings()
|
|
172
|
+
|
|
173
|
+
# Use ApifyScheduler as the scheduler
|
|
174
|
+
settings['SCHEDULER'] = 'apify.scrapy.scheduler.ApifyScheduler'
|
|
175
|
+
|
|
176
|
+
# Add the ActorDatasetPushPipeline into the item pipelines, assigning it the highest integer (1000),
|
|
177
|
+
# ensuring it is executed as the final step in the pipeline sequence
|
|
178
|
+
settings['ITEM_PIPELINES']['apify.scrapy.pipelines.ActorDatasetPushPipeline'] = 1000
|
|
179
|
+
|
|
180
|
+
# Disable the default RobotsTxtMiddleware, Apify's custom scheduler already handles robots.txt
|
|
181
|
+
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware'] = None
|
|
182
|
+
|
|
183
|
+
# Disable the default HttpProxyMiddleware and add ApifyHttpProxyMiddleware
|
|
184
|
+
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware'] = None
|
|
185
|
+
settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyHttpProxyMiddleware'] = 950
|
|
186
|
+
|
|
187
|
+
# Disable the default RetryMiddleware and add ApifyRetryMiddleware with the highest integer (1000)
|
|
188
|
+
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.retry.RetryMiddleware'] = None
|
|
189
|
+
settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyRetryMiddleware'] = 1000
|
|
190
|
+
|
|
191
|
+
# Store the proxy configuration
|
|
192
|
+
settings['APIFY_PROXY_SETTINGS'] = proxy_config
|
|
193
|
+
|
|
194
|
+
return settings
|
|
195
|
+
|
|
196
|
+
|
|
149
197
|
async def open_queue_with_custom_client() -> RequestQueue:
|
|
150
198
|
"""Open a Request Queue with custom Apify Client.
|
|
151
199
|
|
|
@@ -6,10 +6,10 @@ from typing import TYPE_CHECKING, Generic, TypeVar, cast
|
|
|
6
6
|
|
|
7
7
|
from apify_shared.utils import ignore_docs
|
|
8
8
|
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from .storage_client_manager import StorageClientManager
|
|
9
|
+
from apify._memory_storage import MemoryStorageClient
|
|
10
|
+
from apify._memory_storage.resource_clients import BaseResourceClient, BaseResourceCollectionClient
|
|
11
|
+
from apify.config import Configuration
|
|
12
|
+
from apify.storages.storage_client_manager import StorageClientManager
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from apify_client import ApifyClientAsync
|
|
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, AsyncIterator, Iterable, Iterator
|
|
|
7
7
|
|
|
8
8
|
from apify_shared.utils import ignore_docs, json_dumps
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from .base_storage import BaseStorage
|
|
13
|
-
from .key_value_store import KeyValueStore
|
|
10
|
+
from apify._utils import wrap_internal
|
|
11
|
+
from apify.consts import MAX_PAYLOAD_SIZE_BYTES
|
|
12
|
+
from apify.storages.base_storage import BaseStorage
|
|
13
|
+
from apify.storages.key_value_store import KeyValueStore
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from apify_client import ApifyClientAsync
|
|
@@ -18,9 +18,9 @@ if TYPE_CHECKING:
|
|
|
18
18
|
from apify_shared.models import ListPage
|
|
19
19
|
from apify_shared.types import JSONSerializable
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
21
|
+
from apify._memory_storage import MemoryStorageClient
|
|
22
|
+
from apify._memory_storage.resource_clients import DatasetClient, DatasetCollectionClient
|
|
23
|
+
from apify.config import Configuration
|
|
24
24
|
|
|
25
25
|
# 0.01%
|
|
26
26
|
SAFETY_BUFFER_PERCENT = 0.01 / 100
|
|
@@ -5,15 +5,15 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, NamedTuple, TypedDict, Typ
|
|
|
5
5
|
from apify_client.clients import KeyValueStoreClientAsync, KeyValueStoreCollectionClientAsync
|
|
6
6
|
from apify_shared.utils import ignore_docs
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
from .base_storage import BaseStorage
|
|
8
|
+
from apify._utils import wrap_internal
|
|
9
|
+
from apify.storages.base_storage import BaseStorage
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
from apify_client import ApifyClientAsync
|
|
13
13
|
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
14
|
+
from apify._memory_storage import MemoryStorageClient
|
|
15
|
+
from apify._memory_storage.resource_clients import KeyValueStoreClient, KeyValueStoreCollectionClient
|
|
16
|
+
from apify.config import Configuration
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
T = TypeVar('T')
|
|
@@ -8,19 +8,19 @@ from typing import OrderedDict as OrderedDictType
|
|
|
8
8
|
|
|
9
9
|
from apify_shared.utils import ignore_docs
|
|
10
10
|
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from .base_storage import BaseStorage
|
|
11
|
+
from apify._crypto import crypto_random_object_id
|
|
12
|
+
from apify._utils import LRUCache, budget_ow, unique_key_to_request_id
|
|
13
|
+
from apify.consts import REQUEST_QUEUE_HEAD_MAX_LIMIT
|
|
14
|
+
from apify.log import logger
|
|
15
|
+
from apify.storages.base_storage import BaseStorage
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from apify_client import ApifyClientAsync
|
|
19
19
|
from apify_client.clients import RequestQueueClientAsync, RequestQueueCollectionClientAsync
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
21
|
+
from apify._memory_storage import MemoryStorageClient
|
|
22
|
+
from apify._memory_storage.resource_clients import RequestQueueClient, RequestQueueCollectionClient
|
|
23
|
+
from apify.config import Configuration
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
MAX_CACHED_REQUESTS = 1_000_000
|
|
@@ -4,8 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from apify_shared.utils import ignore_docs
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
7
|
+
from apify._memory_storage import MemoryStorageClient
|
|
8
|
+
from apify.config import Configuration
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from apify_client import ApifyClientAsync
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.3
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Author-email: "Apify Technologies s.r.o." <support@apify.com>
|
|
6
6
|
License: Apache Software License
|
|
@@ -24,8 +24,8 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
24
24
|
Requires-Python: >=3.8
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
|
-
Requires-Dist: apify-client~=1.6.
|
|
28
|
-
Requires-Dist: apify-shared~=1.1.
|
|
27
|
+
Requires-Dist: apify-client~=1.6.2
|
|
28
|
+
Requires-Dist: apify-shared~=1.1.1
|
|
29
29
|
Requires-Dist: aiofiles>=22.1.0
|
|
30
30
|
Requires-Dist: aioshutil>=1.0
|
|
31
31
|
Requires-Dist: colorama>=0.4.6
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|