apify 3.4.2b6__tar.gz → 3.4.2b8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apify-3.4.2b6 → apify-3.4.2b8}/CHANGELOG.md +3 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/PKG-INFO +2 -2
- {apify-3.4.2b6 → apify-3.4.2b8}/pyproject.toml +2 -2
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_actor.py +9 -1
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_charging.py +33 -20
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_utils.py +4 -4
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/events/_types.py +28 -9
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/request_loaders/_apify_request_list.py +6 -3
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/_logging_config.py +24 -6
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_models.py +25 -11
- {apify-3.4.2b6 → apify-3.4.2b8}/.gitignore +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/CONTRIBUTING.md +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/LICENSE +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/README.md +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_configuration.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_consts.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_crypto.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_proxy_configuration.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/_webhook.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/events/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/events/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/log.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/_serialization.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/requests.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/scheduler.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/scrapy/utils.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_alias_resolving.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_api_client_creation.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_dataset_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_request_queue_single_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_utils.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_file_system/_dataset_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_ppe_dataset_mixin.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storages/__init__.py +0 -0
- {apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storages/py.typed +0 -0
|
@@ -10,6 +10,9 @@ All notable changes to this project will be documented in this file.
|
|
|
10
10
|
- **scrapy:** Correct proxy middleware exception log and import ([#953](https://github.com/apify/apify-sdk-python/pull/953)) ([5bd6eb9](https://github.com/apify/apify-sdk-python/commit/5bd6eb9843d90844cec083372e932413bceedec9)) by [@vdusek](https://github.com/vdusek)
|
|
11
11
|
- **scrapy:** Skip a request that fails to convert instead of crashing the run ([#952](https://github.com/apify/apify-sdk-python/pull/952)) ([db9444f](https://github.com/apify/apify-sdk-python/commit/db9444faeb0158c29aa394121cf733ff2e843f28)) by [@vdusek](https://github.com/vdusek)
|
|
12
12
|
- **scrapy:** [**breaking**] Serialize requests and HTTP cache as JSON instead of pickle ([#951](https://github.com/apify/apify-sdk-python/pull/951)) ([a87e8d1](https://github.com/apify/apify-sdk-python/commit/a87e8d1597478b4f12fd5bb9b379f65f637d8e96)) by [@vdusek](https://github.com/vdusek)
|
|
13
|
+
- **scrapy:** Make logging configuration idempotent ([#954](https://github.com/apify/apify-sdk-python/pull/954)) ([2cc5602](https://github.com/apify/apify-sdk-python/commit/2cc5602b741b93c81f264d4e09e0d9bcfc7200f2)) by [@vdusek](https://github.com/vdusek)
|
|
14
|
+
- Bump typing-extensions floor to 4.4.0 ([#960](https://github.com/apify/apify-sdk-python/pull/960)) ([b7e7d9c](https://github.com/apify/apify-sdk-python/commit/b7e7d9c9a053992d4281a8a19897957d2bff67e1)) by [@vdusek](https://github.com/vdusek)
|
|
15
|
+
- Preserve decorated symbol types in docs_group and docs_name ([#964](https://github.com/apify/apify-sdk-python/pull/964)) ([6c359a7](https://github.com/apify/apify-sdk-python/commit/6c359a714b089b24c1be7fa9a07c9b9899cc5e11)) by [@vdusek](https://github.com/vdusek)
|
|
13
16
|
|
|
14
17
|
### 🚜 Refactor
|
|
15
18
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.2b8
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -233,7 +233,7 @@ Requires-Dist: impit>=0.8.0
|
|
|
233
233
|
Requires-Dist: lazy-object-proxy>=1.11.0
|
|
234
234
|
Requires-Dist: more-itertools>=10.2.0
|
|
235
235
|
Requires-Dist: pydantic>=2.11.0
|
|
236
|
-
Requires-Dist: typing-extensions>=4.
|
|
236
|
+
Requires-Dist: typing-extensions>=4.4.0
|
|
237
237
|
Requires-Dist: websockets>=14.0
|
|
238
238
|
Requires-Dist: yarl>=1.18.0
|
|
239
239
|
Provides-Extra: scrapy
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "3.4.
|
|
7
|
+
version = "3.4.2b8"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -42,7 +42,7 @@ dependencies = [
|
|
|
42
42
|
"lazy-object-proxy>=1.11.0",
|
|
43
43
|
"more_itertools>=10.2.0",
|
|
44
44
|
"pydantic>=2.11.0",
|
|
45
|
-
"typing-extensions>=4.
|
|
45
|
+
"typing-extensions>=4.4.0",
|
|
46
46
|
"websockets>=14.0",
|
|
47
47
|
"yarl>=1.18.0",
|
|
48
48
|
]
|
|
@@ -699,7 +699,15 @@ class _ActorType:
|
|
|
699
699
|
|
|
700
700
|
@_ensure_context
|
|
701
701
|
async def get_input(self) -> Any:
|
|
702
|
-
"""Get the Actor input value from the default key-value store associated with the current Actor run.
|
|
702
|
+
"""Get the Actor input value from the default key-value store associated with the current Actor run.
|
|
703
|
+
|
|
704
|
+
The input is the deserialized contents of the input record (the `INPUT` key by default), so it is typically
|
|
705
|
+
a `dict` keyed by the fields declared in the Actor's input schema. Any secret input fields are decrypted to
|
|
706
|
+
plaintext before being returned.
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
The Actor input, usually a `dict` of input fields, or `None` if the Actor has no input.
|
|
710
|
+
"""
|
|
703
711
|
input_value = await self.get_value(self.configuration.input_key)
|
|
704
712
|
input_secrets_private_key = self.configuration.input_secrets_private_key_file
|
|
705
713
|
input_secrets_key_passphrase = self.configuration.input_secrets_private_key_passphrase
|
|
@@ -5,9 +5,10 @@ from contextvars import ContextVar
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from datetime import UTC, datetime
|
|
7
7
|
from decimal import Decimal
|
|
8
|
-
from typing import TYPE_CHECKING,
|
|
8
|
+
from typing import TYPE_CHECKING, Literal, Protocol, TypedDict
|
|
9
9
|
|
|
10
|
-
from pydantic import
|
|
10
|
+
from pydantic import ConfigDict
|
|
11
|
+
from pydantic.alias_generators import to_camel
|
|
11
12
|
|
|
12
13
|
import apify_client._models as _client_models
|
|
13
14
|
from apify_client._models import ActorChargeEvent as ClientActorChargeEvent
|
|
@@ -56,7 +57,9 @@ _ensure_context = ensure_context('active')
|
|
|
56
57
|
class ActorChargeEvent(ClientActorChargeEvent):
|
|
57
58
|
"""Definition of a single chargeable event in the pay-per-event pricing model."""
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
61
|
+
|
|
62
|
+
event_description: str | None = None
|
|
60
63
|
"""Human-readable description of the event.
|
|
61
64
|
|
|
62
65
|
Required in apify-client but omitted from the env var, so it is relaxed to optional.
|
|
@@ -67,7 +70,9 @@ class ActorChargeEvent(ClientActorChargeEvent):
|
|
|
67
70
|
class PricingPerEvent(ClientPricingPerEvent):
|
|
68
71
|
"""Pay-per-event pricing details - the chargeable events and their prices."""
|
|
69
72
|
|
|
70
|
-
|
|
73
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
74
|
+
|
|
75
|
+
actor_charge_events: dict[str, ActorChargeEvent] | None = None
|
|
71
76
|
"""Mapping of event name to its charge definition."""
|
|
72
77
|
|
|
73
78
|
|
|
@@ -75,13 +80,15 @@ class PricingPerEvent(ClientPricingPerEvent):
|
|
|
75
80
|
class FreeActorPricingInfo(ClientFree):
|
|
76
81
|
"""Pricing info for an Actor offered free of charge."""
|
|
77
82
|
|
|
78
|
-
|
|
83
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
84
|
+
|
|
85
|
+
apify_margin_percentage: float | None = None
|
|
79
86
|
"""Apify's margin on the price, as a percentage."""
|
|
80
87
|
|
|
81
|
-
created_at:
|
|
88
|
+
created_at: datetime | None = None
|
|
82
89
|
"""Timestamp when this pricing info was created."""
|
|
83
90
|
|
|
84
|
-
started_at:
|
|
91
|
+
started_at: datetime | None = None
|
|
85
92
|
"""Timestamp when this pricing became effective."""
|
|
86
93
|
|
|
87
94
|
|
|
@@ -89,19 +96,21 @@ class FreeActorPricingInfo(ClientFree):
|
|
|
89
96
|
class FlatPricePerMonthActorPricingInfo(ClientFlatPricePerMonth):
|
|
90
97
|
"""Pricing info for an Actor billed at a flat monthly price."""
|
|
91
98
|
|
|
92
|
-
|
|
99
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
100
|
+
|
|
101
|
+
apify_margin_percentage: float | None = None
|
|
93
102
|
"""Apify's margin on the price, as a percentage."""
|
|
94
103
|
|
|
95
|
-
created_at:
|
|
104
|
+
created_at: datetime | None = None
|
|
96
105
|
"""Timestamp when this pricing info was created."""
|
|
97
106
|
|
|
98
|
-
started_at:
|
|
107
|
+
started_at: datetime | None = None
|
|
99
108
|
"""Timestamp when this pricing became effective."""
|
|
100
109
|
|
|
101
|
-
trial_minutes:
|
|
110
|
+
trial_minutes: int | None = None
|
|
102
111
|
"""Length of the free trial period, in minutes."""
|
|
103
112
|
|
|
104
|
-
price_per_unit_usd:
|
|
113
|
+
price_per_unit_usd: float | None = None
|
|
105
114
|
"""Price per unit, in USD."""
|
|
106
115
|
|
|
107
116
|
|
|
@@ -109,16 +118,18 @@ class FlatPricePerMonthActorPricingInfo(ClientFlatPricePerMonth):
|
|
|
109
118
|
class PricePerDatasetItemActorPricingInfo(ClientPricePerDatasetItem):
|
|
110
119
|
"""Pricing info for an Actor billed per dataset item produced."""
|
|
111
120
|
|
|
112
|
-
|
|
121
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
122
|
+
|
|
123
|
+
apify_margin_percentage: float | None = None
|
|
113
124
|
"""Apify's margin on the price, as a percentage."""
|
|
114
125
|
|
|
115
|
-
created_at:
|
|
126
|
+
created_at: datetime | None = None
|
|
116
127
|
"""Timestamp when this pricing info was created."""
|
|
117
128
|
|
|
118
|
-
started_at:
|
|
129
|
+
started_at: datetime | None = None
|
|
119
130
|
"""Timestamp when this pricing became effective."""
|
|
120
131
|
|
|
121
|
-
unit_name:
|
|
132
|
+
unit_name: str | None = None
|
|
122
133
|
"""Name of the billed unit."""
|
|
123
134
|
|
|
124
135
|
|
|
@@ -126,16 +137,18 @@ class PricePerDatasetItemActorPricingInfo(ClientPricePerDatasetItem):
|
|
|
126
137
|
class PayPerEventActorPricingInfo(ClientPayPerEvent):
|
|
127
138
|
"""Pricing info for an Actor billed per charged event."""
|
|
128
139
|
|
|
129
|
-
|
|
140
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
141
|
+
|
|
142
|
+
apify_margin_percentage: float | None = None
|
|
130
143
|
"""Apify's margin on the price, as a percentage."""
|
|
131
144
|
|
|
132
|
-
created_at:
|
|
145
|
+
created_at: datetime | None = None
|
|
133
146
|
"""Timestamp when this pricing info was created."""
|
|
134
147
|
|
|
135
|
-
started_at:
|
|
148
|
+
started_at: datetime | None = None
|
|
136
149
|
"""Timestamp when this pricing became effective."""
|
|
137
150
|
|
|
138
|
-
pricing_per_event:
|
|
151
|
+
pricing_per_event: PricingPerEvent
|
|
139
152
|
"""The pay-per-event pricing details."""
|
|
140
153
|
|
|
141
154
|
|
|
@@ -85,7 +85,7 @@ GroupName = Literal[
|
|
|
85
85
|
]
|
|
86
86
|
|
|
87
87
|
|
|
88
|
-
def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
|
|
88
|
+
def docs_group(group_name: GroupName) -> Callable[[T], T]: # noqa: ARG001
|
|
89
89
|
"""Mark a symbol for rendering and grouping in documentation.
|
|
90
90
|
|
|
91
91
|
This decorator is used solely for documentation purposes and does not modify the behavior
|
|
@@ -98,13 +98,13 @@ def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
|
|
|
98
98
|
The original callable without modification.
|
|
99
99
|
"""
|
|
100
100
|
|
|
101
|
-
def wrapper(func:
|
|
101
|
+
def wrapper(func: T) -> T:
|
|
102
102
|
return func
|
|
103
103
|
|
|
104
104
|
return wrapper
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
|
|
107
|
+
def docs_name(symbol_name: str) -> Callable[[T], T]: # noqa: ARG001
|
|
108
108
|
"""Rename a symbol for documentation rendering.
|
|
109
109
|
|
|
110
110
|
This decorator modifies only the displayed name of the symbol in the generated documentation
|
|
@@ -117,7 +117,7 @@ def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
|
|
|
117
117
|
The original callable without modification.
|
|
118
118
|
"""
|
|
119
119
|
|
|
120
|
-
def wrapper(func:
|
|
120
|
+
def wrapper(func: T) -> T:
|
|
121
121
|
return func
|
|
122
122
|
|
|
123
123
|
return wrapper
|
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from typing import Annotated, Any, Literal
|
|
5
5
|
|
|
6
|
-
from pydantic import BaseModel, Field
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
7
|
+
from pydantic.alias_generators import to_camel
|
|
7
8
|
|
|
8
9
|
from crawlee.events._types import (
|
|
9
10
|
Event,
|
|
@@ -29,28 +30,30 @@ This is the Apify-specific subset of [`Event`][crawlee.events.Event] — for the
|
|
|
29
30
|
class SystemInfoEventData(BaseModel):
|
|
30
31
|
"""Resource usage metrics carried by a `systemInfo` event."""
|
|
31
32
|
|
|
32
|
-
|
|
33
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
34
|
+
|
|
35
|
+
mem_avg_bytes: float
|
|
33
36
|
"""Average memory usage over the measured interval, in bytes."""
|
|
34
37
|
|
|
35
|
-
mem_current_bytes:
|
|
38
|
+
mem_current_bytes: float
|
|
36
39
|
"""Current memory usage, in bytes."""
|
|
37
40
|
|
|
38
|
-
mem_max_bytes:
|
|
41
|
+
mem_max_bytes: float
|
|
39
42
|
"""Peak memory usage observed so far, in bytes."""
|
|
40
43
|
|
|
41
|
-
cpu_avg_usage:
|
|
44
|
+
cpu_avg_usage: float
|
|
42
45
|
"""Average CPU usage over the measured interval, in percent."""
|
|
43
46
|
|
|
44
|
-
cpu_max_usage:
|
|
47
|
+
cpu_max_usage: float
|
|
45
48
|
"""Peak CPU usage observed so far, in percent."""
|
|
46
49
|
|
|
47
|
-
cpu_current_usage:
|
|
50
|
+
cpu_current_usage: float
|
|
48
51
|
"""Current CPU usage, in percent."""
|
|
49
52
|
|
|
50
|
-
is_cpu_overloaded:
|
|
53
|
+
is_cpu_overloaded: bool
|
|
51
54
|
"""Whether the CPU is currently overloaded."""
|
|
52
55
|
|
|
53
|
-
created_at:
|
|
56
|
+
created_at: datetime
|
|
54
57
|
"""Timestamp when the metrics were collected."""
|
|
55
58
|
|
|
56
59
|
def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
|
|
@@ -73,6 +76,8 @@ class SystemInfoEventData(BaseModel):
|
|
|
73
76
|
class PersistStateEvent(BaseModel):
|
|
74
77
|
"""A `persistState` event instructing the Actor to persist its state."""
|
|
75
78
|
|
|
79
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
80
|
+
|
|
76
81
|
name: Literal[Event.PERSIST_STATE]
|
|
77
82
|
"""The event name."""
|
|
78
83
|
|
|
@@ -84,6 +89,8 @@ class PersistStateEvent(BaseModel):
|
|
|
84
89
|
class SystemInfoEvent(BaseModel):
|
|
85
90
|
"""A `systemInfo` event carrying the Actor's resource usage metrics."""
|
|
86
91
|
|
|
92
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
93
|
+
|
|
87
94
|
name: Literal[Event.SYSTEM_INFO]
|
|
88
95
|
"""The event name."""
|
|
89
96
|
|
|
@@ -95,6 +102,8 @@ class SystemInfoEvent(BaseModel):
|
|
|
95
102
|
class MigratingEvent(BaseModel):
|
|
96
103
|
"""A `migrating` event signalling the Actor is about to be migrated to another host."""
|
|
97
104
|
|
|
105
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
106
|
+
|
|
98
107
|
name: Literal[Event.MIGRATING]
|
|
99
108
|
"""The event name."""
|
|
100
109
|
|
|
@@ -106,6 +115,8 @@ class MigratingEvent(BaseModel):
|
|
|
106
115
|
class AbortingEvent(BaseModel):
|
|
107
116
|
"""An `aborting` event signalling the Actor run is being aborted."""
|
|
108
117
|
|
|
118
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
119
|
+
|
|
109
120
|
name: Literal[Event.ABORTING]
|
|
110
121
|
"""The event name."""
|
|
111
122
|
|
|
@@ -117,6 +128,8 @@ class AbortingEvent(BaseModel):
|
|
|
117
128
|
class ExitEvent(BaseModel):
|
|
118
129
|
"""An `exit` event signalling the Actor process is about to exit."""
|
|
119
130
|
|
|
131
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
132
|
+
|
|
120
133
|
name: Literal[Event.EXIT]
|
|
121
134
|
"""The event name."""
|
|
122
135
|
|
|
@@ -128,6 +141,8 @@ class ExitEvent(BaseModel):
|
|
|
128
141
|
class EventWithoutData(BaseModel):
|
|
129
142
|
"""A framework-level event that carries no payload (e.g. browser and page lifecycle events)."""
|
|
130
143
|
|
|
144
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
145
|
+
|
|
131
146
|
name: Literal[
|
|
132
147
|
Event.SESSION_RETIRED,
|
|
133
148
|
Event.BROWSER_LAUNCHED,
|
|
@@ -146,6 +161,8 @@ class EventWithoutData(BaseModel):
|
|
|
146
161
|
class DeprecatedEvent(BaseModel):
|
|
147
162
|
"""A deprecated event kept for backward compatibility (e.g. `cpuInfo`)."""
|
|
148
163
|
|
|
164
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
165
|
+
|
|
149
166
|
name: Literal['cpuInfo']
|
|
150
167
|
"""The event name."""
|
|
151
168
|
|
|
@@ -157,6 +174,8 @@ class DeprecatedEvent(BaseModel):
|
|
|
157
174
|
class UnknownEvent(BaseModel):
|
|
158
175
|
"""A fallback for any event whose name is not recognized by the SDK."""
|
|
159
176
|
|
|
177
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
178
|
+
|
|
160
179
|
name: str
|
|
161
180
|
"""The event name."""
|
|
162
181
|
|
|
@@ -5,7 +5,8 @@ import re
|
|
|
5
5
|
from itertools import chain
|
|
6
6
|
from typing import Annotated, Any
|
|
7
7
|
|
|
8
|
-
from pydantic import BaseModel, Field, TypeAdapter
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, TypeAdapter
|
|
9
|
+
from pydantic.alias_generators import to_camel
|
|
9
10
|
|
|
10
11
|
from crawlee._types import HttpMethod
|
|
11
12
|
from crawlee.http_clients import HttpClient, ImpitHttpClient
|
|
@@ -20,14 +21,16 @@ URL_NO_COMMAS_REGEX = re.compile(
|
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class _RequestDetails(BaseModel):
|
|
24
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
25
|
+
|
|
23
26
|
method: HttpMethod = 'GET'
|
|
24
27
|
payload: str = ''
|
|
25
28
|
headers: Annotated[dict[str, str], Field(default_factory=dict)]
|
|
26
|
-
user_data: Annotated[dict[str, str], Field(default_factory=dict
|
|
29
|
+
user_data: Annotated[dict[str, str], Field(default_factory=dict)]
|
|
27
30
|
|
|
28
31
|
|
|
29
32
|
class _RequestsFromUrlInput(_RequestDetails):
|
|
30
|
-
requests_from_url: str
|
|
33
|
+
requests_from_url: str
|
|
31
34
|
|
|
32
35
|
|
|
33
36
|
class _SimpleUrlInput(_RequestDetails):
|
|
@@ -13,6 +13,11 @@ _PRIMARY_LOGGERS = ['apify', 'apify_client', 'scrapy']
|
|
|
13
13
|
_SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted']
|
|
14
14
|
_ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS
|
|
15
15
|
|
|
16
|
+
# Mutable state shared with the Scrapy monkey-patch below. `initialize_logging` refreshes
|
|
17
|
+
# `level`/`handler` on each call; the patch (installed once) reads them so it always applies the
|
|
18
|
+
# latest configuration rather than values captured the first time it ran.
|
|
19
|
+
_state: dict[str, Any] = {'level': 'INFO', 'handler': None, 'patched': False}
|
|
20
|
+
|
|
16
21
|
|
|
17
22
|
def _configure_logger(name: str | None, logging_level: str, handler: logging.Handler) -> None:
|
|
18
23
|
"""Clear and reconfigure the logger."""
|
|
@@ -23,26 +28,39 @@ def _configure_logger(name: str | None, logging_level: str, handler: logging.Han
|
|
|
23
28
|
logger.propagate = False
|
|
24
29
|
|
|
25
30
|
|
|
31
|
+
def _configure_all_loggers() -> None:
|
|
32
|
+
"""Apply the Apify handler and level to the root logger and all defined loggers."""
|
|
33
|
+
handler = _state['handler']
|
|
34
|
+
if handler is None:
|
|
35
|
+
return
|
|
36
|
+
for logger_name in [None, *_ALL_LOGGERS]:
|
|
37
|
+
_configure_logger(logger_name, _state['level'], handler)
|
|
38
|
+
|
|
39
|
+
|
|
26
40
|
def initialize_logging() -> None:
|
|
27
41
|
"""Configure logging for Apify Actors and adjust Scrapy's logging settings."""
|
|
28
42
|
# Retrieve Scrapy project settings and determine the logging level.
|
|
29
43
|
settings = get_project_settings()
|
|
30
|
-
|
|
44
|
+
_state['level'] = settings.get('LOG_LEVEL', 'INFO') # Default to INFO.
|
|
31
45
|
|
|
32
46
|
# Create a custom handler with the Apify log formatter.
|
|
33
47
|
handler = logging.StreamHandler()
|
|
34
48
|
handler.setFormatter(ActorLogFormatter(include_logger_name=True))
|
|
49
|
+
_state['handler'] = handler
|
|
35
50
|
|
|
36
51
|
# Configure the root logger and all other defined loggers.
|
|
37
|
-
|
|
38
|
-
|
|
52
|
+
_configure_all_loggers()
|
|
53
|
+
|
|
54
|
+
# Monkey-patch Scrapy's logging to re-apply our settings whenever it reconfigures logging.
|
|
55
|
+
# Install the wrapper at most once, otherwise repeated calls would nest wrappers.
|
|
56
|
+
if _state['patched']:
|
|
57
|
+
return
|
|
39
58
|
|
|
40
|
-
# Monkey-patch Scrapy's logging configuration to re-apply our settings.
|
|
41
59
|
original_configure_logging = scrapy_logging.configure_logging
|
|
42
60
|
|
|
43
61
|
def new_configure_logging(*args: Any, **kwargs: Any) -> None:
|
|
44
62
|
original_configure_logging(*args, **kwargs)
|
|
45
|
-
|
|
46
|
-
_configure_logger(logger_name, logging_level, handler)
|
|
63
|
+
_configure_all_loggers()
|
|
47
64
|
|
|
48
65
|
scrapy_logging.configure_logging = new_configure_logging # ty: ignore[invalid-assignment]
|
|
66
|
+
_state['patched'] = True
|
|
@@ -3,7 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
from datetime import datetime, timedelta
|
|
4
4
|
from typing import TYPE_CHECKING, Annotated
|
|
5
5
|
|
|
6
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
7
|
+
from pydantic.alias_generators import to_camel
|
|
7
8
|
|
|
8
9
|
from apify_client._models import RequestQueueStats
|
|
9
10
|
from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata
|
|
@@ -22,7 +23,9 @@ class ApifyKeyValueStoreMetadata(KeyValueStoreMetadata):
|
|
|
22
23
|
Includes additional Apify-specific fields.
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
27
|
+
|
|
28
|
+
url_signing_secret_key: str | None = None
|
|
26
29
|
"""The secret key used for signing URLs for secure access to key-value store records."""
|
|
27
30
|
|
|
28
31
|
|
|
@@ -34,24 +37,31 @@ class RequestQueueHead(BaseModel):
|
|
|
34
37
|
including metadata about the queue's state and lock information for the requests.
|
|
35
38
|
"""
|
|
36
39
|
|
|
37
|
-
model_config = ConfigDict(populate_by_name=True, extra='allow')
|
|
40
|
+
model_config = ConfigDict(populate_by_name=True, extra='allow', alias_generator=to_camel)
|
|
38
41
|
|
|
39
|
-
limit:
|
|
42
|
+
limit: int | None = None
|
|
40
43
|
"""The maximum number of requests that were requested from the queue."""
|
|
41
44
|
|
|
42
|
-
had_multiple_clients:
|
|
45
|
+
had_multiple_clients: bool = False
|
|
43
46
|
"""Indicates whether the queue has been accessed by multiple clients (consumers)."""
|
|
44
47
|
|
|
45
|
-
queue_modified_at:
|
|
48
|
+
queue_modified_at: datetime
|
|
46
49
|
"""The timestamp when the queue was last modified."""
|
|
47
50
|
|
|
48
|
-
lock_time: Annotated[
|
|
49
|
-
|
|
51
|
+
lock_time: Annotated[
|
|
52
|
+
timedelta | None,
|
|
53
|
+
Field(validation_alias=AliasChoices('lockSecs', 'lockTime'), serialization_alias='lockSecs'),
|
|
54
|
+
] = None
|
|
55
|
+
"""The duration for which the returned requests are locked and cannot be processed by other clients.
|
|
56
|
+
|
|
57
|
+
The platform's API names this field `lockSecs`, so it is serialized under that alias instead of the
|
|
58
|
+
`lockTime` that `to_camel` would derive from the field name.
|
|
59
|
+
"""
|
|
50
60
|
|
|
51
|
-
queue_has_locked_requests:
|
|
61
|
+
queue_has_locked_requests: bool | None = False
|
|
52
62
|
"""Indicates whether the queue contains any locked requests."""
|
|
53
63
|
|
|
54
|
-
items: Annotated[list[Request], Field(
|
|
64
|
+
items: Annotated[list[Request], Field(default_factory=list[Request])]
|
|
55
65
|
"""The list of request objects retrieved from the beginning of the queue."""
|
|
56
66
|
|
|
57
67
|
@classmethod
|
|
@@ -77,6 +87,8 @@ class CachedRequest(BaseModel):
|
|
|
77
87
|
Only internal structure.
|
|
78
88
|
"""
|
|
79
89
|
|
|
90
|
+
model_config = ConfigDict(populate_by_name=True, alias_generator=to_camel)
|
|
91
|
+
|
|
80
92
|
id: str
|
|
81
93
|
"""Id of the request."""
|
|
82
94
|
|
|
@@ -91,5 +103,7 @@ class CachedRequest(BaseModel):
|
|
|
91
103
|
|
|
92
104
|
|
|
93
105
|
class ApifyRequestQueueMetadata(RequestQueueMetadata):
|
|
94
|
-
|
|
106
|
+
model_config = ConfigDict(alias_generator=to_camel)
|
|
107
|
+
|
|
108
|
+
stats: Annotated[RequestQueueStats, Field(default_factory=RequestQueueStats)]
|
|
95
109
|
"""Additional statistics about the request queue."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_request_queue_shared_client.py
RENAMED
|
File without changes
|
{apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_apify/_request_queue_single_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-3.4.2b6 → apify-3.4.2b8}/src/apify/storage_clients/_file_system/_key_value_store_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|