apify 3.4.2b4__tar.gz → 3.4.2b5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apify-3.4.2b4 → apify-3.4.2b5}/CHANGELOG.md +5 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/PKG-INFO +1 -1
- {apify-3.4.2b4 → apify-3.4.2b5}/pyproject.toml +1 -1
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_charging.py +70 -24
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/_types.py +57 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/_async_thread.py +14 -7
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/middlewares/apify_proxy.py +2 -3
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/scheduler.py +8 -1
- {apify-3.4.2b4 → apify-3.4.2b5}/.gitignore +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/CONTRIBUTING.md +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/LICENSE +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/README.md +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_actor.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_configuration.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_consts.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_crypto.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_proxy_configuration.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_utils.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/_webhook.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/events/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/log.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/request_loaders/_apify_request_list.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/requests.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/scrapy/utils.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_alias_resolving.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_api_client_creation.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_dataset_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_models.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_single_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_utils.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_dataset_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_ppe_dataset_mixin.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storages/__init__.py +0 -0
- {apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storages/py.typed +0 -0
|
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
6
|
## 3.4.2 - **not yet released**
|
|
7
7
|
|
|
8
|
+
### 🐛 Bug Fixes
|
|
9
|
+
|
|
10
|
+
- **scrapy:** Correct proxy middleware exception log and import ([#953](https://github.com/apify/apify-sdk-python/pull/953)) ([5bd6eb9](https://github.com/apify/apify-sdk-python/commit/5bd6eb9843d90844cec083372e932413bceedec9)) by [@vdusek](https://github.com/vdusek)
|
|
11
|
+
- **scrapy:** Skip a request that fails to convert instead of crashing the run ([#952](https://github.com/apify/apify-sdk-python/pull/952)) ([db9444f](https://github.com/apify/apify-sdk-python/commit/db9444faeb0158c29aa394121cf733ff2e843f28)) by [@vdusek](https://github.com/vdusek)
|
|
12
|
+
|
|
8
13
|
### 🚜 Refactor
|
|
9
14
|
|
|
10
15
|
- [**breaking**] Remove deprecated APIs ([#918](https://github.com/apify/apify-sdk-python/pull/918)) ([3e5728d](https://github.com/apify/apify-sdk-python/commit/3e5728d94cb8fd879d5a76e33a03d55792d835d5)) by [@vdusek](https://github.com/vdusek), closes [#635](https://github.com/apify/apify-sdk-python/issues/635)
|
|
@@ -7,7 +7,7 @@ from datetime import UTC, datetime
|
|
|
7
7
|
from decimal import Decimal
|
|
8
8
|
from typing import TYPE_CHECKING, Annotated, Literal, Protocol, TypedDict
|
|
9
9
|
|
|
10
|
-
from pydantic import
|
|
10
|
+
from pydantic import Field
|
|
11
11
|
|
|
12
12
|
import apify_client._models as _client_models
|
|
13
13
|
from apify_client._models import ActorChargeEvent as ClientActorChargeEvent
|
|
@@ -28,14 +28,17 @@ if TYPE_CHECKING:
|
|
|
28
28
|
|
|
29
29
|
from apify._configuration import Configuration
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
"""
|
|
31
|
+
charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
|
|
32
|
+
"""Holds the current `ChargingManager` instance, if any.
|
|
33
|
+
|
|
34
|
+
Allows PPE-aware dataset clients to access the charging manager without needing to pass it explicitly.
|
|
35
|
+
"""
|
|
33
36
|
|
|
34
37
|
DEFAULT_DATASET_ITEM_EVENT = 'apify-default-dataset-item'
|
|
38
|
+
"""Name of the synthetic event charged for each item pushed to the default dataset."""
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
|
|
40
|
+
PricingModel = Literal['PAY_PER_EVENT', 'PRICE_PER_DATASET_ITEM', 'FLAT_PRICE_PER_MONTH', 'FREE']
|
|
41
|
+
"""Pricing model for an Actor."""
|
|
39
42
|
|
|
40
43
|
_ensure_context = ensure_context('active')
|
|
41
44
|
|
|
@@ -49,48 +52,91 @@ _ensure_context = ensure_context('active')
|
|
|
49
52
|
# `apify-client` instance) flows through the same code paths without conversion.
|
|
50
53
|
|
|
51
54
|
|
|
52
|
-
class _RelaxedPricingMetadata(BaseModel):
|
|
53
|
-
"""Mixin relaxing the `CommonActorPricingInfo` metadata fields the platform env var omits."""
|
|
54
|
-
|
|
55
|
-
model_config = ConfigDict(populate_by_name=True, extra='allow')
|
|
56
|
-
|
|
57
|
-
apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
|
|
58
|
-
created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
|
|
59
|
-
started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
|
|
60
|
-
|
|
61
|
-
|
|
62
55
|
@docs_group('Charging')
|
|
63
56
|
class ActorChargeEvent(ClientActorChargeEvent):
|
|
64
|
-
|
|
57
|
+
"""Definition of a single chargeable event in the pay-per-event pricing model."""
|
|
58
|
+
|
|
65
59
|
event_description: Annotated[str | None, Field(alias='eventDescription')] = None
|
|
60
|
+
"""Human-readable description of the event.
|
|
61
|
+
|
|
62
|
+
Required in apify-client but omitted from the env var, so it is relaxed to optional.
|
|
63
|
+
"""
|
|
66
64
|
|
|
67
65
|
|
|
68
66
|
@docs_group('Charging')
|
|
69
67
|
class PricingPerEvent(ClientPricingPerEvent):
|
|
68
|
+
"""Pay-per-event pricing details - the chargeable events and their prices."""
|
|
69
|
+
|
|
70
70
|
actor_charge_events: Annotated[dict[str, ActorChargeEvent] | None, Field(alias='actorChargeEvents')] = None
|
|
71
|
+
"""Mapping of event name to its charge definition."""
|
|
71
72
|
|
|
72
73
|
|
|
73
74
|
@docs_group('Charging')
|
|
74
|
-
class FreeActorPricingInfo(
|
|
75
|
-
|
|
75
|
+
class FreeActorPricingInfo(ClientFree):
|
|
76
|
+
"""Pricing info for an Actor offered free of charge."""
|
|
77
|
+
|
|
78
|
+
apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
|
|
79
|
+
"""Apify's margin on the price, as a percentage."""
|
|
80
|
+
|
|
81
|
+
created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
|
|
82
|
+
"""Timestamp when this pricing info was created."""
|
|
83
|
+
|
|
84
|
+
started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
|
|
85
|
+
"""Timestamp when this pricing became effective."""
|
|
76
86
|
|
|
77
87
|
|
|
78
88
|
@docs_group('Charging')
|
|
79
|
-
class FlatPricePerMonthActorPricingInfo(
|
|
89
|
+
class FlatPricePerMonthActorPricingInfo(ClientFlatPricePerMonth):
|
|
90
|
+
"""Pricing info for an Actor billed at a flat monthly price."""
|
|
91
|
+
|
|
92
|
+
apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
|
|
93
|
+
"""Apify's margin on the price, as a percentage."""
|
|
94
|
+
|
|
95
|
+
created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
|
|
96
|
+
"""Timestamp when this pricing info was created."""
|
|
97
|
+
|
|
98
|
+
started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
|
|
99
|
+
"""Timestamp when this pricing became effective."""
|
|
100
|
+
|
|
80
101
|
trial_minutes: Annotated[int | None, Field(alias='trialMinutes')] = None
|
|
102
|
+
"""Length of the free trial period, in minutes."""
|
|
103
|
+
|
|
81
104
|
price_per_unit_usd: Annotated[float | None, Field(alias='pricePerUnitUsd')] = None
|
|
105
|
+
"""Price per unit, in USD."""
|
|
82
106
|
|
|
83
107
|
|
|
84
108
|
@docs_group('Charging')
|
|
85
|
-
class PricePerDatasetItemActorPricingInfo(
|
|
109
|
+
class PricePerDatasetItemActorPricingInfo(ClientPricePerDatasetItem):
|
|
110
|
+
"""Pricing info for an Actor billed per dataset item produced."""
|
|
111
|
+
|
|
112
|
+
apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
|
|
113
|
+
"""Apify's margin on the price, as a percentage."""
|
|
114
|
+
|
|
115
|
+
created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
|
|
116
|
+
"""Timestamp when this pricing info was created."""
|
|
117
|
+
|
|
118
|
+
started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
|
|
119
|
+
"""Timestamp when this pricing became effective."""
|
|
120
|
+
|
|
86
121
|
unit_name: Annotated[str | None, Field(alias='unitName')] = None
|
|
87
|
-
|
|
122
|
+
"""Name of the billed unit."""
|
|
88
123
|
|
|
89
124
|
|
|
90
125
|
@docs_group('Charging')
|
|
91
|
-
class PayPerEventActorPricingInfo(
|
|
92
|
-
|
|
126
|
+
class PayPerEventActorPricingInfo(ClientPayPerEvent):
|
|
127
|
+
"""Pricing info for an Actor billed per charged event."""
|
|
128
|
+
|
|
129
|
+
apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
|
|
130
|
+
"""Apify's margin on the price, as a percentage."""
|
|
131
|
+
|
|
132
|
+
created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
|
|
133
|
+
"""Timestamp when this pricing info was created."""
|
|
134
|
+
|
|
135
|
+
started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
|
|
136
|
+
"""Timestamp when this pricing became effective."""
|
|
137
|
+
|
|
93
138
|
pricing_per_event: Annotated[PricingPerEvent, Field(alias='pricingPerEvent')]
|
|
139
|
+
"""The pay-per-event pricing details."""
|
|
94
140
|
|
|
95
141
|
|
|
96
142
|
ActorPricingInfoModel = ClientFree | ClientFlatPricePerMonth | ClientPricePerDatasetItem | ClientPayPerEvent
|
|
@@ -27,14 +27,31 @@ This is the Apify-specific subset of [`Event`][crawlee.events.Event] — for the
|
|
|
27
27
|
|
|
28
28
|
@docs_group('Event data')
|
|
29
29
|
class SystemInfoEventData(BaseModel):
|
|
30
|
+
"""Resource usage metrics carried by a `systemInfo` event."""
|
|
31
|
+
|
|
30
32
|
mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
|
|
33
|
+
"""Average memory usage over the measured interval, in bytes."""
|
|
34
|
+
|
|
31
35
|
mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
|
|
36
|
+
"""Current memory usage, in bytes."""
|
|
37
|
+
|
|
32
38
|
mem_max_bytes: Annotated[float, Field(alias='memMaxBytes')]
|
|
39
|
+
"""Peak memory usage observed so far, in bytes."""
|
|
40
|
+
|
|
33
41
|
cpu_avg_usage: Annotated[float, Field(alias='cpuAvgUsage')]
|
|
42
|
+
"""Average CPU usage over the measured interval, in percent."""
|
|
43
|
+
|
|
34
44
|
cpu_max_usage: Annotated[float, Field(alias='cpuMaxUsage')]
|
|
45
|
+
"""Peak CPU usage observed so far, in percent."""
|
|
46
|
+
|
|
35
47
|
cpu_current_usage: Annotated[float, Field(alias='cpuCurrentUsage')]
|
|
48
|
+
"""Current CPU usage, in percent."""
|
|
49
|
+
|
|
36
50
|
is_cpu_overloaded: Annotated[bool, Field(alias='isCpuOverloaded')]
|
|
51
|
+
"""Whether the CPU is currently overloaded."""
|
|
52
|
+
|
|
37
53
|
created_at: Annotated[datetime, Field(alias='createdAt')]
|
|
54
|
+
"""Timestamp when the metrics were collected."""
|
|
38
55
|
|
|
39
56
|
def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
|
|
40
57
|
return EventSystemInfoData.model_validate(
|
|
@@ -54,36 +71,63 @@ class SystemInfoEventData(BaseModel):
|
|
|
54
71
|
|
|
55
72
|
@docs_group('Events')
|
|
56
73
|
class PersistStateEvent(BaseModel):
|
|
74
|
+
"""A `persistState` event instructing the Actor to persist its state."""
|
|
75
|
+
|
|
57
76
|
name: Literal[Event.PERSIST_STATE]
|
|
77
|
+
"""The event name."""
|
|
78
|
+
|
|
58
79
|
data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
|
|
80
|
+
"""The event payload."""
|
|
59
81
|
|
|
60
82
|
|
|
61
83
|
@docs_group('Events')
|
|
62
84
|
class SystemInfoEvent(BaseModel):
|
|
85
|
+
"""A `systemInfo` event carrying the Actor's resource usage metrics."""
|
|
86
|
+
|
|
63
87
|
name: Literal[Event.SYSTEM_INFO]
|
|
88
|
+
"""The event name."""
|
|
89
|
+
|
|
64
90
|
data: SystemInfoEventData
|
|
91
|
+
"""The event payload."""
|
|
65
92
|
|
|
66
93
|
|
|
67
94
|
@docs_group('Events')
|
|
68
95
|
class MigratingEvent(BaseModel):
|
|
96
|
+
"""A `migrating` event signalling the Actor is about to be migrated to another host."""
|
|
97
|
+
|
|
69
98
|
name: Literal[Event.MIGRATING]
|
|
99
|
+
"""The event name."""
|
|
100
|
+
|
|
70
101
|
data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
|
|
102
|
+
"""The event payload."""
|
|
71
103
|
|
|
72
104
|
|
|
73
105
|
@docs_group('Events')
|
|
74
106
|
class AbortingEvent(BaseModel):
|
|
107
|
+
"""An `aborting` event signalling the Actor run is being aborted."""
|
|
108
|
+
|
|
75
109
|
name: Literal[Event.ABORTING]
|
|
110
|
+
"""The event name."""
|
|
111
|
+
|
|
76
112
|
data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
|
|
113
|
+
"""The event payload."""
|
|
77
114
|
|
|
78
115
|
|
|
79
116
|
@docs_group('Events')
|
|
80
117
|
class ExitEvent(BaseModel):
|
|
118
|
+
"""An `exit` event signalling the Actor process is about to exit."""
|
|
119
|
+
|
|
81
120
|
name: Literal[Event.EXIT]
|
|
121
|
+
"""The event name."""
|
|
122
|
+
|
|
82
123
|
data: Annotated[EventExitData, Field(default_factory=EventExitData)]
|
|
124
|
+
"""The event payload."""
|
|
83
125
|
|
|
84
126
|
|
|
85
127
|
@docs_group('Events')
|
|
86
128
|
class EventWithoutData(BaseModel):
|
|
129
|
+
"""A framework-level event that carries no payload (e.g. browser and page lifecycle events)."""
|
|
130
|
+
|
|
87
131
|
name: Literal[
|
|
88
132
|
Event.SESSION_RETIRED,
|
|
89
133
|
Event.BROWSER_LAUNCHED,
|
|
@@ -92,19 +136,32 @@ class EventWithoutData(BaseModel):
|
|
|
92
136
|
Event.PAGE_CREATED,
|
|
93
137
|
Event.PAGE_CLOSED,
|
|
94
138
|
]
|
|
139
|
+
"""The event name."""
|
|
140
|
+
|
|
95
141
|
data: Any = None
|
|
142
|
+
"""The event payload, always empty for this event."""
|
|
96
143
|
|
|
97
144
|
|
|
98
145
|
@docs_group('Events')
|
|
99
146
|
class DeprecatedEvent(BaseModel):
|
|
147
|
+
"""A deprecated event kept for backward compatibility (e.g. `cpuInfo`)."""
|
|
148
|
+
|
|
100
149
|
name: Literal['cpuInfo']
|
|
150
|
+
"""The event name."""
|
|
151
|
+
|
|
101
152
|
data: Annotated[dict[str, Any], Field(default_factory=dict)]
|
|
153
|
+
"""The event payload."""
|
|
102
154
|
|
|
103
155
|
|
|
104
156
|
@docs_group('Events')
|
|
105
157
|
class UnknownEvent(BaseModel):
|
|
158
|
+
"""A fallback for any event whose name is not recognized by the SDK."""
|
|
159
|
+
|
|
106
160
|
name: str
|
|
161
|
+
"""The event name."""
|
|
162
|
+
|
|
107
163
|
data: Annotated[dict[str, Any], Field(default_factory=dict)]
|
|
164
|
+
"""The event payload."""
|
|
108
165
|
|
|
109
166
|
|
|
110
167
|
EventMessage = PersistStateEvent | SystemInfoEvent | MigratingEvent | AbortingEvent | ExitEvent | EventWithoutData
|
|
@@ -5,7 +5,7 @@ import threading
|
|
|
5
5
|
from concurrent import futures
|
|
6
6
|
from datetime import timedelta
|
|
7
7
|
from logging import getLogger
|
|
8
|
-
from typing import TYPE_CHECKING, Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from collections.abc import Coroutine
|
|
@@ -14,13 +14,16 @@ logger = getLogger(__name__)
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class AsyncThread:
|
|
17
|
-
"""
|
|
17
|
+
"""Run an asyncio event loop in a dedicated background thread.
|
|
18
18
|
|
|
19
|
-
This
|
|
20
|
-
|
|
19
|
+
This lets synchronous Scrapy callbacks drive asynchronous Apify and Crawlee coroutines. The
|
|
20
|
+
scheduler and the HTTP cache storage each own their own `AsyncThread`, so the request queue and
|
|
21
|
+
the key-value store never share an event loop; they only share the read-only global
|
|
22
|
+
`Configuration`. A single shared loop would also work but would couple their lifecycles.
|
|
21
23
|
"""
|
|
22
24
|
|
|
23
|
-
def __init__(self) -> None:
|
|
25
|
+
def __init__(self, default_timeout: timedelta = timedelta(seconds=60)) -> None:
|
|
26
|
+
self._default_timeout = default_timeout
|
|
24
27
|
self._eventloop = asyncio.new_event_loop()
|
|
25
28
|
|
|
26
29
|
# Start the event loop in a dedicated daemon thread.
|
|
@@ -33,7 +36,7 @@ class AsyncThread:
|
|
|
33
36
|
def run_coro(
|
|
34
37
|
self,
|
|
35
38
|
coro: Coroutine,
|
|
36
|
-
timeout: timedelta =
|
|
39
|
+
timeout: timedelta | Literal['default'] = 'default',
|
|
37
40
|
) -> Any:
|
|
38
41
|
"""Run a coroutine on an event loop running in a separate thread.
|
|
39
42
|
|
|
@@ -42,7 +45,8 @@ class AsyncThread:
|
|
|
42
45
|
|
|
43
46
|
Args:
|
|
44
47
|
coro: The coroutine to run.
|
|
45
|
-
timeout: The maximum
|
|
48
|
+
timeout: The maximum time to wait for the coroutine to finish. Pass `'default'` to use the
|
|
49
|
+
`default_timeout` passed to the constructor.
|
|
46
50
|
|
|
47
51
|
Returns:
|
|
48
52
|
The result returned by the coroutine.
|
|
@@ -52,6 +56,9 @@ class AsyncThread:
|
|
|
52
56
|
TimeoutError: If the coroutine does not complete within the timeout.
|
|
53
57
|
Exception: Any exception raised during coroutine execution.
|
|
54
58
|
"""
|
|
59
|
+
if timeout == 'default':
|
|
60
|
+
timeout = self._default_timeout
|
|
61
|
+
|
|
55
62
|
if not self._eventloop.is_running():
|
|
56
63
|
raise RuntimeError(f'The coroutine {coro} cannot be executed because the event loop is not running.')
|
|
57
64
|
|
|
@@ -7,7 +7,7 @@ from scrapy.core.downloader.handlers.http11 import TunnelError
|
|
|
7
7
|
from scrapy.exceptions import NotConfigured
|
|
8
8
|
|
|
9
9
|
from apify import Actor, ProxyConfiguration
|
|
10
|
-
from apify.scrapy import get_basic_auth_header
|
|
10
|
+
from apify.scrapy.utils import get_basic_auth_header
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
13
|
from scrapy import Request, Spider
|
|
@@ -30,7 +30,6 @@ class ApifyHttpProxyMiddleware:
|
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
32
|
proxy_settings: Dictionary containing proxy settings, provided by the Actor input.
|
|
33
|
-
auth_encoding: Encoding for basic authentication (default is 'latin-1').
|
|
34
33
|
"""
|
|
35
34
|
self._proxy_settings = proxy_settings
|
|
36
35
|
self._proxy_cfg_internal: ProxyConfiguration | None = None
|
|
@@ -111,7 +110,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
111
110
|
if isinstance(exception, TunnelError):
|
|
112
111
|
Actor.log.warning(
|
|
113
112
|
f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '
|
|
114
|
-
'reason="{exception}", skipping...'
|
|
113
|
+
f'reason="{exception}", skipping...'
|
|
115
114
|
)
|
|
116
115
|
|
|
117
116
|
async def _get_new_proxy_url(self) -> ParseResult:
|
|
@@ -170,6 +170,13 @@ class ApifyScheduler(BaseScheduler):
|
|
|
170
170
|
traceback.print_exc()
|
|
171
171
|
raise
|
|
172
172
|
|
|
173
|
-
|
|
173
|
+
# Reconstruct the Scrapy request. A malformed queue entry must not crash the whole run: it
|
|
174
|
+
# has already been marked handled above, so log it and skip it instead of propagating.
|
|
175
|
+
try:
|
|
176
|
+
scrapy_request = to_scrapy_request(apify_request, spider=self.spider)
|
|
177
|
+
except Exception:
|
|
178
|
+
logger.exception(f'Failed to convert Apify request {apify_request} to a Scrapy request; skipping it.')
|
|
179
|
+
return None
|
|
180
|
+
|
|
174
181
|
logger.debug(f'Converted to scrapy_request: {scrapy_request}')
|
|
175
182
|
return scrapy_request
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_shared_client.py
RENAMED
|
File without changes
|
{apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_apify/_request_queue_single_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-3.4.2b4 → apify-3.4.2b5}/src/apify/storage_clients/_file_system/_key_value_store_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|