apify 3.4.2b26__tar.gz → 3.4.2b28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apify-3.4.2b26 → apify-3.4.2b28}/CHANGELOG.md +2 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/PKG-INFO +1 -1
- {apify-3.4.2b26 → apify-3.4.2b28}/pyproject.toml +1 -1
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_actor.py +8 -6
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_charging.py +4 -1
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_proxy_configuration.py +3 -1
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_utils.py +1 -0
- apify-3.4.2b28/src/apify/errors.py +34 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/events/_apify_event_manager.py +4 -2
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/middlewares/apify_proxy.py +0 -7
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -4
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/requests.py +0 -12
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/scheduler.py +1 -8
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_alias_resolving.py +1 -2
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_file_system/_key_value_store_client.py +1 -1
- {apify-3.4.2b26 → apify-3.4.2b28}/.gitignore +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/CONTRIBUTING.md +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/LICENSE +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/README.md +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_configuration.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_consts.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_crypto.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/_webhook.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/events/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/events/_types.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/events/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/log.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/request_loaders/_apify_request_list.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/_serialization.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/scrapy/utils.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_api_client_creation.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_dataset_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_models.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_request_queue_single_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_utils.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_file_system/_dataset_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_ppe_dataset_mixin.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storages/__init__.py +0 -0
- {apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storages/py.typed +0 -0
|
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
|
|
|
8
8
|
### 🚀 Features
|
|
9
9
|
|
|
10
10
|
- Expose missing platform env vars via Actor.get_env() ([#984](https://github.com/apify/apify-sdk-python/pull/984)) ([f6e9b3b](https://github.com/apify/apify-sdk-python/commit/f6e9b3b328768aefb51257a258c4b6f03980fc72)) by [@vdusek](https://github.com/vdusek)
|
|
11
|
+
- Re-export apify-client errors from apify.errors ([#990](https://github.com/apify/apify-sdk-python/pull/990)) ([165a3f6](https://github.com/apify/apify-sdk-python/commit/165a3f698dac6ace7394fc6b9f15ab7bb3a48050)) by [@vdusek](https://github.com/vdusek), closes [#988](https://github.com/apify/apify-sdk-python/issues/988)
|
|
11
12
|
|
|
12
13
|
### 🐛 Bug Fixes
|
|
13
14
|
|
|
@@ -37,6 +38,7 @@ All notable changes to this project will be documented in this file.
|
|
|
37
38
|
- [**breaking**] Mark secondary arguments as keyword-only ([#917](https://github.com/apify/apify-sdk-python/pull/917)) ([eb94c99](https://github.com/apify/apify-sdk-python/commit/eb94c992ec4aba1cd7cf4dfd7a98731cb304651b)) by [@vdusek](https://github.com/vdusek), closes [#881](https://github.com/apify/apify-sdk-python/issues/881)
|
|
38
39
|
- [**breaking**] Adapt to apify-client v3 ([#719](https://github.com/apify/apify-sdk-python/pull/719)) ([10203bc](https://github.com/apify/apify-sdk-python/commit/10203bc51e67590c97938b37d81614376bc3d29a)) by [@vdusek](https://github.com/vdusek), closes [#697](https://github.com/apify/apify-sdk-python/issues/697), [#736](https://github.com/apify/apify-sdk-python/issues/736), [#770](https://github.com/apify/apify-sdk-python/issues/770), [#853](https://github.com/apify/apify-sdk-python/issues/853)
|
|
39
40
|
- [**breaking**] Remove wait_for_finish from Actor.start ([#962](https://github.com/apify/apify-sdk-python/pull/962)) ([0daca28](https://github.com/apify/apify-sdk-python/commit/0daca2818860cbcbd0778efa4d73622783ce63cf)) by [@vdusek](https://github.com/vdusek)
|
|
41
|
+
- [**breaking**] Polish logging across the codebase ([#1005](https://github.com/apify/apify-sdk-python/pull/1005)) ([f949c24](https://github.com/apify/apify-sdk-python/commit/f949c24529e424b9151c8600f8057d7518c9b6ff)) by [@vdusek](https://github.com/vdusek)
|
|
40
42
|
|
|
41
43
|
### ⚙️ Miscellaneous Tasks
|
|
42
44
|
|
|
@@ -1162,7 +1162,7 @@ class _ActorType:
|
|
|
1162
1162
|
custom_after_sleep: How long to sleep for after the metamorph, to wait for the container to be stopped.
|
|
1163
1163
|
"""
|
|
1164
1164
|
if not self.is_at_home():
|
|
1165
|
-
self.log.
|
|
1165
|
+
self.log.warning('Actor.metamorph() is only supported when running on the Apify platform.')
|
|
1166
1166
|
return
|
|
1167
1167
|
|
|
1168
1168
|
if custom_after_sleep is None:
|
|
@@ -1172,6 +1172,7 @@ class _ActorType:
|
|
|
1172
1172
|
if not self.configuration.actor_run_id:
|
|
1173
1173
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1174
1174
|
|
|
1175
|
+
self.log.debug(f'Metamorphing the Actor run into target Actor {target_actor_id!r}.')
|
|
1175
1176
|
await self.apify_client.run(self.configuration.actor_run_id).metamorph(
|
|
1176
1177
|
target_actor_id=target_actor_id,
|
|
1177
1178
|
run_input=run_input,
|
|
@@ -1198,7 +1199,7 @@ class _ActorType:
|
|
|
1198
1199
|
custom_after_sleep: How long to sleep for after the reboot, to wait for the container to be stopped.
|
|
1199
1200
|
"""
|
|
1200
1201
|
if not self.is_at_home():
|
|
1201
|
-
self.log.
|
|
1202
|
+
self.log.warning('Actor.reboot() is only supported when running on the Apify platform.')
|
|
1202
1203
|
return
|
|
1203
1204
|
|
|
1204
1205
|
if self._is_rebooting:
|
|
@@ -1243,6 +1244,7 @@ class _ActorType:
|
|
|
1243
1244
|
except TimeoutError:
|
|
1244
1245
|
self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot')
|
|
1245
1246
|
|
|
1247
|
+
self.log.debug('Rebooting the Actor run.')
|
|
1246
1248
|
await self.apify_client.run(self.configuration.actor_run_id).reboot()
|
|
1247
1249
|
except BaseException:
|
|
1248
1250
|
# Reset the flag so that a failed or cancelled reboot can be retried.
|
|
@@ -1277,7 +1279,7 @@ class _ActorType:
|
|
|
1277
1279
|
)
|
|
1278
1280
|
|
|
1279
1281
|
if not self.is_at_home():
|
|
1280
|
-
self.log.
|
|
1282
|
+
self.log.warning('Actor.add_webhook() is only supported when running on the Apify platform.')
|
|
1281
1283
|
return
|
|
1282
1284
|
|
|
1283
1285
|
# If is_at_home() is True, config.actor_run_id is always set
|
|
@@ -1454,9 +1456,9 @@ class _ActorType:
|
|
|
1454
1456
|
return max(self.configuration.timeout_at - datetime.now(tz=UTC), timedelta(0))
|
|
1455
1457
|
|
|
1456
1458
|
self.log.warning(
|
|
1457
|
-
'Using `inherit` argument is only possible when the Actor'
|
|
1458
|
-
'
|
|
1459
|
-
|
|
1459
|
+
'Using the `inherit` argument is only possible when the Actor is running on the Apify platform and '
|
|
1460
|
+
'the timeout for the Actor run is set.',
|
|
1461
|
+
extra={'is_at_home': self.is_at_home(), 'timeout_at': self.configuration.timeout_at},
|
|
1460
1462
|
)
|
|
1461
1463
|
return None
|
|
1462
1464
|
|
|
@@ -5,6 +5,7 @@ from contextvars import ContextVar
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from datetime import UTC, datetime
|
|
7
7
|
from decimal import Decimal
|
|
8
|
+
from logging import getLogger
|
|
8
9
|
from typing import TYPE_CHECKING, Literal, Protocol, TypedDict
|
|
9
10
|
|
|
10
11
|
from pydantic import ConfigDict
|
|
@@ -19,7 +20,6 @@ from apify_client._models import PricePerDatasetItemActorPricingInfo as ClientPr
|
|
|
19
20
|
from apify_client._models import PricingPerEvent as ClientPricingPerEvent
|
|
20
21
|
|
|
21
22
|
from apify._utils import ReentrantLock, docs_group, ensure_context
|
|
22
|
-
from apify.log import logger
|
|
23
23
|
from apify.storages import Dataset
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
@@ -29,6 +29,8 @@ if TYPE_CHECKING:
|
|
|
29
29
|
|
|
30
30
|
from apify._configuration import Configuration
|
|
31
31
|
|
|
32
|
+
logger = getLogger(__name__)
|
|
33
|
+
|
|
32
34
|
charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
|
|
33
35
|
"""Holds the current `ChargingManager` instance, if any.
|
|
34
36
|
|
|
@@ -450,6 +452,7 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
450
452
|
pass
|
|
451
453
|
elif event_name in self._pricing_info:
|
|
452
454
|
await self._client.run(self._actor_run_id).charge(event_name, count=charged_count)
|
|
455
|
+
logger.debug(f"Charged {charged_count} occurrence(s) of event '{event_name}'.")
|
|
453
456
|
elif event_name in self._tier_priced_events:
|
|
454
457
|
logger.warning(
|
|
455
458
|
f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API."
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import re
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
|
+
from logging import getLogger
|
|
6
7
|
from re import Pattern
|
|
7
8
|
from typing import TYPE_CHECKING, Any
|
|
8
9
|
|
|
@@ -22,13 +23,14 @@ from crawlee.proxy_configuration import (
|
|
|
22
23
|
from apify._configuration import Configuration
|
|
23
24
|
from apify._consts import ApifyEnvVars
|
|
24
25
|
from apify._utils import docs_group
|
|
25
|
-
from apify.log import logger
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
28
|
from apify_client import ApifyClientAsync
|
|
29
29
|
|
|
30
30
|
from apify import Request
|
|
31
31
|
|
|
32
|
+
logger = getLogger(__name__)
|
|
33
|
+
|
|
32
34
|
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
|
|
33
35
|
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
|
|
34
36
|
# ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""`apify.errors` re-exports the Apify API client's error hierarchy.
|
|
2
|
+
|
|
3
|
+
Callers get a single import location for every error raised by an operation that talks to the Apify API. The SDK
|
|
4
|
+
raises these client exceptions as-is and does not wrap them in its own types. See
|
|
5
|
+
https://docs.apify.com/api/client/python for the full client error reference.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from apify_client.errors import (
|
|
11
|
+
ApifyApiError,
|
|
12
|
+
ApifyClientError,
|
|
13
|
+
ConflictError,
|
|
14
|
+
ForbiddenError,
|
|
15
|
+
InvalidRequestError,
|
|
16
|
+
InvalidResponseBodyError,
|
|
17
|
+
NotFoundError,
|
|
18
|
+
RateLimitError,
|
|
19
|
+
ServerError,
|
|
20
|
+
UnauthorizedError,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
'ApifyApiError',
|
|
25
|
+
'ApifyClientError',
|
|
26
|
+
'ConflictError',
|
|
27
|
+
'ForbiddenError',
|
|
28
|
+
'InvalidRequestError',
|
|
29
|
+
'InvalidResponseBodyError',
|
|
30
|
+
'NotFoundError',
|
|
31
|
+
'RateLimitError',
|
|
32
|
+
'ServerError',
|
|
33
|
+
'UnauthorizedError',
|
|
34
|
+
]
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import contextlib
|
|
5
5
|
import time
|
|
6
|
+
from logging import getLogger
|
|
6
7
|
from typing import TYPE_CHECKING, Annotated, Self
|
|
7
8
|
|
|
8
9
|
import websockets.asyncio.client
|
|
@@ -17,7 +18,6 @@ from crawlee.events._types import Event, EventPersistStateData
|
|
|
17
18
|
|
|
18
19
|
from apify._utils import docs_group
|
|
19
20
|
from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent
|
|
20
|
-
from apify.log import logger
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from collections.abc import Generator
|
|
@@ -28,6 +28,8 @@ if TYPE_CHECKING:
|
|
|
28
28
|
from apify._configuration import Configuration
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
logger = getLogger(__name__)
|
|
32
|
+
|
|
31
33
|
event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
|
|
32
34
|
Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
|
|
33
35
|
)
|
|
@@ -195,7 +197,7 @@ class ApifyEventManager(EventManager):
|
|
|
195
197
|
return
|
|
196
198
|
|
|
197
199
|
if isinstance(parsed_message, UnknownEvent):
|
|
198
|
-
logger.
|
|
200
|
+
logger.debug(
|
|
199
201
|
f'Unknown message received: event_name={parsed_message.name}, event_data={parsed_message.data}'
|
|
200
202
|
)
|
|
201
203
|
return
|
|
@@ -72,7 +72,6 @@ class ApifyHttpProxyMiddleware:
|
|
|
72
72
|
Raises:
|
|
73
73
|
ValueError: If username and password are not provided in the proxy URL.
|
|
74
74
|
"""
|
|
75
|
-
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}')
|
|
76
75
|
url = await self._get_new_proxy_url()
|
|
77
76
|
|
|
78
77
|
if not (url.username and url.password):
|
|
@@ -82,8 +81,6 @@ class ApifyHttpProxyMiddleware:
|
|
|
82
81
|
basic_auth_header = get_basic_auth_header(url.username, url.password)
|
|
83
82
|
request.headers[b'Proxy-Authorization'] = basic_auth_header
|
|
84
83
|
|
|
85
|
-
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')
|
|
86
|
-
|
|
87
84
|
def process_exception(
|
|
88
85
|
self,
|
|
89
86
|
request: Request,
|
|
@@ -100,10 +97,6 @@ class ApifyHttpProxyMiddleware:
|
|
|
100
97
|
process_exception() methods of installed middleware, until no middleware is left and the default
|
|
101
98
|
exception handling kicks in.
|
|
102
99
|
"""
|
|
103
|
-
Actor.log.debug(
|
|
104
|
-
f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}',
|
|
105
|
-
)
|
|
106
|
-
|
|
107
100
|
if isinstance(exception, TunnelError):
|
|
108
101
|
Actor.log.warning(
|
|
109
102
|
f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from logging import getLogger
|
|
4
3
|
from typing import TYPE_CHECKING
|
|
5
4
|
|
|
6
5
|
from itemadapter.adapter import ItemAdapter
|
|
@@ -10,8 +9,6 @@ from apify import Actor
|
|
|
10
9
|
if TYPE_CHECKING:
|
|
11
10
|
from scrapy import Item
|
|
12
11
|
|
|
13
|
-
logger = getLogger(__name__)
|
|
14
|
-
|
|
15
12
|
|
|
16
13
|
class ActorDatasetPushPipeline:
|
|
17
14
|
"""A Scrapy pipeline for pushing items to an Actor's default dataset.
|
|
@@ -25,6 +22,5 @@ class ActorDatasetPushPipeline:
|
|
|
25
22
|
) -> Item:
|
|
26
23
|
"""Pushes the provided Scrapy item to the Actor's default dataset."""
|
|
27
24
|
item_dict = ItemAdapter(item).asdict()
|
|
28
|
-
logger.debug(f'Pushing item={item_dict} to the dataset.')
|
|
29
25
|
await Actor.push_data(item_dict)
|
|
30
26
|
return item
|
|
@@ -61,8 +61,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
|
|
|
61
61
|
logger.warning('Failed to convert to Apify request: Scrapy request must be a ScrapyRequest instance.')
|
|
62
62
|
return None
|
|
63
63
|
|
|
64
|
-
logger.debug(f'to_apify_request was called (scrapy_request={scrapy_request})...')
|
|
65
|
-
|
|
66
64
|
# Configuration to behave as similarly as possible to Scrapy's default RFPDupeFilter.
|
|
67
65
|
#
|
|
68
66
|
# The body is stored twice on purpose: as `payload` (used for the extended unique key) and inside the serialized
|
|
@@ -150,7 +148,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
|
|
|
150
148
|
# a second base64 layer would only add ~33% overhead on the enqueue path.
|
|
151
149
|
apify_request.user_data['scrapy_request'] = scrapy_request_json
|
|
152
150
|
|
|
153
|
-
logger.debug(f'scrapy_request was converted to the apify_request={apify_request}')
|
|
154
151
|
return apify_request
|
|
155
152
|
|
|
156
153
|
|
|
@@ -171,14 +168,10 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
|
|
|
171
168
|
if not isinstance(cast('Any', apify_request), ApifyRequest):
|
|
172
169
|
raise TypeError('apify_request must be an apify.Request instance')
|
|
173
170
|
|
|
174
|
-
logger.debug(f'to_scrapy_request was called (apify_request={apify_request})...')
|
|
175
|
-
|
|
176
171
|
# If the apify_request comes from the Scrapy
|
|
177
172
|
if 'scrapy_request' in apify_request.user_data:
|
|
178
173
|
# Deserialize the Scrapy ScrapyRequest from the apify_request by parsing the stored JSON and reconstructing
|
|
179
174
|
# the Scrapy ScrapyRequest object from its dictionary representation.
|
|
180
|
-
logger.debug('Restoring the Scrapy ScrapyRequest from the apify_request...')
|
|
181
|
-
|
|
182
175
|
scrapy_request_json = apify_request.user_data['scrapy_request']
|
|
183
176
|
if not isinstance(scrapy_request_json, str):
|
|
184
177
|
raise TypeError('the stored scrapy_request must be a string')
|
|
@@ -194,8 +187,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
|
|
|
194
187
|
if not isinstance(scrapy_request, ScrapyRequest):
|
|
195
188
|
raise TypeError('scrapy_request must be an instance of the ScrapyRequest class')
|
|
196
189
|
|
|
197
|
-
logger.debug(f'Scrapy ScrapyRequest successfully reconstructed (scrapy_request={scrapy_request})...')
|
|
198
|
-
|
|
199
190
|
# Update the meta field with the meta field from the apify_request
|
|
200
191
|
meta = scrapy_request.meta or {}
|
|
201
192
|
meta.update({'apify_request_unique_key': apify_request.unique_key})
|
|
@@ -204,8 +195,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
|
|
|
204
195
|
|
|
205
196
|
# If the apify_request comes directly from the Scrapy, typically start URLs.
|
|
206
197
|
else:
|
|
207
|
-
logger.debug('Gonna create a new Scrapy ScrapyRequest (cannot be restored)')
|
|
208
|
-
|
|
209
198
|
scrapy_request = ScrapyRequest(
|
|
210
199
|
url=apify_request.url,
|
|
211
200
|
method=apify_request.method,
|
|
@@ -222,5 +211,4 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
|
|
|
222
211
|
if apify_request.user_data:
|
|
223
212
|
scrapy_request.meta['userData'] = apify_request.user_data
|
|
224
213
|
|
|
225
|
-
logger.debug(f'an apify_request was converted to the scrapy_request={scrapy_request}')
|
|
226
214
|
return scrapy_request
|
|
@@ -134,17 +134,14 @@ class ApifyScheduler(BaseScheduler):
|
|
|
134
134
|
Returns:
|
|
135
135
|
True if the request was successfully enqueued, False otherwise.
|
|
136
136
|
"""
|
|
137
|
-
logger.debug(f'ApifyScheduler.enqueue_request was called (scrapy_request={request})...')
|
|
138
|
-
|
|
139
137
|
if not isinstance(self.spider, Spider):
|
|
140
138
|
raise TypeError('self.spider must be an instance of the Spider class')
|
|
141
139
|
|
|
142
140
|
apify_request = to_apify_request(request, spider=self.spider)
|
|
143
141
|
if apify_request is None:
|
|
144
|
-
logger.
|
|
142
|
+
logger.warning(f'Request {request} could not be converted to Apify request; skipping it.')
|
|
145
143
|
return False
|
|
146
144
|
|
|
147
|
-
logger.debug(f'Converted to apify_request: {apify_request}')
|
|
148
145
|
if not isinstance(self._rq, RequestQueue):
|
|
149
146
|
raise TypeError('self._rq must be an instance of the RequestQueue class')
|
|
150
147
|
|
|
@@ -156,7 +153,6 @@ class ApifyScheduler(BaseScheduler):
|
|
|
156
153
|
logger.exception('Failed to enqueue the request to the request queue.')
|
|
157
154
|
raise
|
|
158
155
|
|
|
159
|
-
logger.debug(f'rq.add_request result: {result}')
|
|
160
156
|
return not bool(result.was_already_present)
|
|
161
157
|
|
|
162
158
|
def next_request(self) -> Request | None:
|
|
@@ -165,7 +161,6 @@ class ApifyScheduler(BaseScheduler):
|
|
|
165
161
|
Returns:
|
|
166
162
|
The next request, or None if there are no more requests.
|
|
167
163
|
"""
|
|
168
|
-
logger.debug('next_request called...')
|
|
169
164
|
if not isinstance(self._rq, RequestQueue):
|
|
170
165
|
raise TypeError('self._rq must be an instance of the RequestQueue class')
|
|
171
166
|
|
|
@@ -177,7 +172,6 @@ class ApifyScheduler(BaseScheduler):
|
|
|
177
172
|
logger.exception('Failed to fetch the next request from the request queue.')
|
|
178
173
|
raise
|
|
179
174
|
|
|
180
|
-
logger.debug(f'Fetched apify_request: {apify_request}')
|
|
181
175
|
if apify_request is None:
|
|
182
176
|
return None
|
|
183
177
|
|
|
@@ -206,5 +200,4 @@ class ApifyScheduler(BaseScheduler):
|
|
|
206
200
|
if scrapy_request is None:
|
|
207
201
|
return None
|
|
208
202
|
|
|
209
|
-
logger.debug(f'Converted to scrapy_request: {scrapy_request}')
|
|
210
203
|
return scrapy_request
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import logging
|
|
4
3
|
from asyncio import Lock
|
|
5
4
|
from functools import cached_property
|
|
6
5
|
from logging import getLogger
|
|
@@ -225,7 +224,7 @@ class AliasResolver:
|
|
|
225
224
|
alias_map[self._storage_key] = storage_id
|
|
226
225
|
|
|
227
226
|
if not self._configuration.is_at_home:
|
|
228
|
-
|
|
227
|
+
logger.debug(
|
|
229
228
|
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
|
|
230
229
|
)
|
|
231
230
|
return
|
{apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_file_system/_key_value_store_client.py
RENAMED
|
@@ -122,7 +122,7 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
122
122
|
try:
|
|
123
123
|
content = await asyncio.to_thread(record_path.read_bytes)
|
|
124
124
|
except FileNotFoundError:
|
|
125
|
-
logger.warning(f'Input file
|
|
125
|
+
logger.warning(f'Input file disappeared from path: "{record_path}"')
|
|
126
126
|
return
|
|
127
127
|
|
|
128
128
|
# Figure out the metadata from the file content
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_key_value_store_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_request_queue_shared_client.py
RENAMED
|
File without changes
|
{apify-3.4.2b26 → apify-3.4.2b28}/src/apify/storage_clients/_apify/_request_queue_single_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|