apify 1.7.3b3__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +19 -4
- apify/_actor.py +979 -0
- apify/_configuration.py +310 -0
- apify/_consts.py +10 -0
- apify/_crypto.py +29 -27
- apify/_models.py +110 -0
- apify/_platform_event_manager.py +222 -0
- apify/_proxy_configuration.py +316 -0
- apify/_utils.py +0 -497
- apify/apify_storage_client/__init__.py +3 -0
- apify/apify_storage_client/_apify_storage_client.py +56 -0
- apify/apify_storage_client/_dataset_client.py +188 -0
- apify/apify_storage_client/_dataset_collection_client.py +50 -0
- apify/apify_storage_client/_key_value_store_client.py +98 -0
- apify/apify_storage_client/_key_value_store_collection_client.py +50 -0
- apify/apify_storage_client/_request_queue_client.py +208 -0
- apify/apify_storage_client/_request_queue_collection_client.py +50 -0
- apify/apify_storage_client/py.typed +0 -0
- apify/log.py +24 -105
- apify/scrapy/__init__.py +11 -3
- apify/scrapy/middlewares/__init__.py +3 -1
- apify/scrapy/middlewares/apify_proxy.py +21 -21
- apify/scrapy/middlewares/py.typed +0 -0
- apify/scrapy/pipelines/__init__.py +3 -1
- apify/scrapy/pipelines/actor_dataset_push.py +1 -1
- apify/scrapy/pipelines/py.typed +0 -0
- apify/scrapy/py.typed +0 -0
- apify/scrapy/requests.py +55 -54
- apify/scrapy/scheduler.py +19 -13
- apify/scrapy/utils.py +2 -31
- apify/storages/__init__.py +2 -10
- apify/storages/py.typed +0 -0
- apify-2.0.0.dist-info/METADATA +209 -0
- apify-2.0.0.dist-info/RECORD +37 -0
- {apify-1.7.3b3.dist-info → apify-2.0.0.dist-info}/WHEEL +1 -2
- apify/_memory_storage/__init__.py +0 -3
- apify/_memory_storage/file_storage_utils.py +0 -71
- apify/_memory_storage/memory_storage_client.py +0 -219
- apify/_memory_storage/resource_clients/__init__.py +0 -19
- apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
- apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
- apify/_memory_storage/resource_clients/dataset.py +0 -452
- apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
- apify/_memory_storage/resource_clients/key_value_store.py +0 -533
- apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
- apify/_memory_storage/resource_clients/request_queue.py +0 -466
- apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
- apify/actor.py +0 -1357
- apify/config.py +0 -130
- apify/consts.py +0 -67
- apify/event_manager.py +0 -236
- apify/proxy_configuration.py +0 -365
- apify/storages/base_storage.py +0 -181
- apify/storages/dataset.py +0 -494
- apify/storages/key_value_store.py +0 -257
- apify/storages/request_queue.py +0 -602
- apify/storages/storage_client_manager.py +0 -72
- apify-1.7.3b3.dist-info/METADATA +0 -150
- apify-1.7.3b3.dist-info/RECORD +0 -41
- apify-1.7.3b3.dist-info/top_level.txt +0 -1
- {apify-1.7.3b3.dist-info → apify-2.0.0.dist-info}/LICENSE +0 -0
apify/_configuration.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# ruff: noqa: TCH001 TCH002 TCH003 (so that pydantic annotations work)
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from typing import Annotated
|
|
6
|
+
|
|
7
|
+
from pydantic import AliasChoices, BeforeValidator, Field
|
|
8
|
+
|
|
9
|
+
from crawlee._utils.models import timedelta_ms
|
|
10
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Configuration(CrawleeConfiguration):
|
|
14
|
+
"""A class for specifying the configuration of an Actor.
|
|
15
|
+
|
|
16
|
+
Can be used either globally via `Configuration.get_global_configuration()`,
|
|
17
|
+
or it can be specific to each `Actor` instance on the `actor.config` property.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
actor_id: Annotated[
|
|
21
|
+
str | None,
|
|
22
|
+
Field(
|
|
23
|
+
validation_alias=AliasChoices(
|
|
24
|
+
'actor_id',
|
|
25
|
+
'apify_actor_id',
|
|
26
|
+
'apify_act_id',
|
|
27
|
+
),
|
|
28
|
+
description='ID of the Actor',
|
|
29
|
+
),
|
|
30
|
+
] = None
|
|
31
|
+
|
|
32
|
+
actor_run_id: Annotated[
|
|
33
|
+
str | None,
|
|
34
|
+
Field(
|
|
35
|
+
validation_alias=AliasChoices(
|
|
36
|
+
'actor_run_id',
|
|
37
|
+
'apify_actor_run_id',
|
|
38
|
+
'apify_act_run_id',
|
|
39
|
+
),
|
|
40
|
+
description='ID of the Actor run',
|
|
41
|
+
),
|
|
42
|
+
] = None
|
|
43
|
+
|
|
44
|
+
actor_build_id: Annotated[
|
|
45
|
+
str | None,
|
|
46
|
+
Field(
|
|
47
|
+
validation_alias=AliasChoices(
|
|
48
|
+
'actor_build_id',
|
|
49
|
+
'apify_actor_build_id',
|
|
50
|
+
),
|
|
51
|
+
description='ID of the Actor build used in the run',
|
|
52
|
+
),
|
|
53
|
+
] = None
|
|
54
|
+
|
|
55
|
+
actor_build_number: Annotated[
|
|
56
|
+
str | None,
|
|
57
|
+
Field(
|
|
58
|
+
validation_alias=AliasChoices(
|
|
59
|
+
'actor_build_number',
|
|
60
|
+
'apify_actor_build_number',
|
|
61
|
+
),
|
|
62
|
+
description='Build number of the Actor build used in the run',
|
|
63
|
+
),
|
|
64
|
+
] = None
|
|
65
|
+
|
|
66
|
+
actor_task_id: Annotated[
|
|
67
|
+
str | None,
|
|
68
|
+
Field(
|
|
69
|
+
validation_alias=AliasChoices(
|
|
70
|
+
'actor_task_id',
|
|
71
|
+
'apify_actor_task_id',
|
|
72
|
+
),
|
|
73
|
+
description='ID of the Actor task. Empty if Actor is run outside of any task, e.g. directly using the API',
|
|
74
|
+
),
|
|
75
|
+
] = None
|
|
76
|
+
|
|
77
|
+
actor_events_ws_url: Annotated[
|
|
78
|
+
str | None,
|
|
79
|
+
Field(
|
|
80
|
+
validation_alias=AliasChoices(
|
|
81
|
+
'actor_events_websocket_url',
|
|
82
|
+
'apify_actor_events_ws_url',
|
|
83
|
+
),
|
|
84
|
+
description='Websocket URL where Actor may listen for events from Actor platform',
|
|
85
|
+
),
|
|
86
|
+
] = None
|
|
87
|
+
|
|
88
|
+
api_base_url: Annotated[
|
|
89
|
+
str,
|
|
90
|
+
Field(
|
|
91
|
+
alias='apify_api_base_url',
|
|
92
|
+
description='Internal URL of the Apify API. May be used to interact with the platform programmatically',
|
|
93
|
+
),
|
|
94
|
+
] = 'https://api.apify.com'
|
|
95
|
+
|
|
96
|
+
api_public_base_url: Annotated[
|
|
97
|
+
str,
|
|
98
|
+
Field(
|
|
99
|
+
alias='apify_api_public_base_url',
|
|
100
|
+
description='Public URL of the Apify API. May be used to link to REST API resources',
|
|
101
|
+
),
|
|
102
|
+
] = 'https://api.apify.com'
|
|
103
|
+
|
|
104
|
+
dedicated_cpus: Annotated[
|
|
105
|
+
float | None,
|
|
106
|
+
Field(
|
|
107
|
+
alias='apify_dedicated_cpus',
|
|
108
|
+
description='Number of CPU cores reserved for the actor, based on allocated memory',
|
|
109
|
+
),
|
|
110
|
+
] = None
|
|
111
|
+
|
|
112
|
+
disable_outdated_warning: Annotated[
|
|
113
|
+
bool,
|
|
114
|
+
Field(
|
|
115
|
+
alias='apify_disable_outdated_warning',
|
|
116
|
+
description='Controls the display of outdated SDK version warnings',
|
|
117
|
+
),
|
|
118
|
+
BeforeValidator(lambda val: val or False),
|
|
119
|
+
] = False
|
|
120
|
+
|
|
121
|
+
fact: Annotated[str | None, Field(alias='apify_fact')] = None
|
|
122
|
+
|
|
123
|
+
input_key: Annotated[
|
|
124
|
+
str,
|
|
125
|
+
Field(
|
|
126
|
+
validation_alias=AliasChoices(
|
|
127
|
+
'actor_input_key',
|
|
128
|
+
'apify_input_key',
|
|
129
|
+
'crawlee_input_key',
|
|
130
|
+
),
|
|
131
|
+
description='Key of the record in the default key-value store that holds the Actor input',
|
|
132
|
+
),
|
|
133
|
+
] = 'INPUT'
|
|
134
|
+
|
|
135
|
+
input_secrets_private_key_file: Annotated[
|
|
136
|
+
str | None,
|
|
137
|
+
Field(
|
|
138
|
+
alias='apify_input_secrets_private_key_file',
|
|
139
|
+
description='Path to the secret key used to decrypt Secret inputs.',
|
|
140
|
+
),
|
|
141
|
+
] = None
|
|
142
|
+
|
|
143
|
+
input_secrets_private_key_passphrase: Annotated[
|
|
144
|
+
str | None,
|
|
145
|
+
Field(
|
|
146
|
+
alias='apify_input_secrets_private_key_passphrase',
|
|
147
|
+
description='Passphrase for the input secret key',
|
|
148
|
+
),
|
|
149
|
+
] = None
|
|
150
|
+
|
|
151
|
+
is_at_home: Annotated[
|
|
152
|
+
bool,
|
|
153
|
+
Field(
|
|
154
|
+
alias='apify_is_at_home',
|
|
155
|
+
description='True if the Actor is running on Apify servers',
|
|
156
|
+
),
|
|
157
|
+
] = False
|
|
158
|
+
|
|
159
|
+
latest_sdk_version: Annotated[
|
|
160
|
+
str | None,
|
|
161
|
+
Field(
|
|
162
|
+
alias='apify_sdk_latest_version',
|
|
163
|
+
deprecated=True,
|
|
164
|
+
description='Specifies the most recent release version of the Apify SDK for Javascript. Used for '
|
|
165
|
+
'checking for updates.',
|
|
166
|
+
),
|
|
167
|
+
] = None
|
|
168
|
+
|
|
169
|
+
log_format: Annotated[
|
|
170
|
+
str | None,
|
|
171
|
+
Field(alias='apify_log_format', deprecated=True),
|
|
172
|
+
] = None
|
|
173
|
+
|
|
174
|
+
max_paid_dataset_items: Annotated[
|
|
175
|
+
int | None,
|
|
176
|
+
Field(
|
|
177
|
+
alias='actor_max_paid_dataset_items',
|
|
178
|
+
description='For paid-per-result Actors, the user-set limit on returned results. Do not exceed this limit',
|
|
179
|
+
),
|
|
180
|
+
BeforeValidator(lambda val: val or None),
|
|
181
|
+
] = None
|
|
182
|
+
|
|
183
|
+
meta_origin: Annotated[
|
|
184
|
+
str | None,
|
|
185
|
+
Field(
|
|
186
|
+
alias='apify_meta_origin',
|
|
187
|
+
description='Specifies how an Actor run was started',
|
|
188
|
+
),
|
|
189
|
+
] = None
|
|
190
|
+
|
|
191
|
+
metamorph_after_sleep: Annotated[
|
|
192
|
+
timedelta_ms,
|
|
193
|
+
Field(
|
|
194
|
+
alias='apify_metamorph_after_sleep_millis',
|
|
195
|
+
description='How long the Actor needs to wait before exiting after triggering a metamorph',
|
|
196
|
+
),
|
|
197
|
+
] = timedelta(minutes=5)
|
|
198
|
+
|
|
199
|
+
proxy_hostname: Annotated[
|
|
200
|
+
str,
|
|
201
|
+
Field(
|
|
202
|
+
alias='apify_proxy_hostname',
|
|
203
|
+
description='Hostname of the Apify proxy',
|
|
204
|
+
),
|
|
205
|
+
] = 'proxy.apify.com'
|
|
206
|
+
|
|
207
|
+
proxy_password: Annotated[
|
|
208
|
+
str | None,
|
|
209
|
+
Field(
|
|
210
|
+
alias='apify_proxy_password',
|
|
211
|
+
description='Password to the Apify proxy',
|
|
212
|
+
),
|
|
213
|
+
] = None
|
|
214
|
+
|
|
215
|
+
proxy_port: Annotated[
|
|
216
|
+
int,
|
|
217
|
+
Field(
|
|
218
|
+
alias='apify_proxy_port',
|
|
219
|
+
description='Port to communicate with the Apify proxy',
|
|
220
|
+
),
|
|
221
|
+
] = 8000
|
|
222
|
+
|
|
223
|
+
proxy_status_url: Annotated[
|
|
224
|
+
str,
|
|
225
|
+
Field(
|
|
226
|
+
alias='apify_proxy_status_url',
|
|
227
|
+
description='URL for retrieving proxy status information',
|
|
228
|
+
),
|
|
229
|
+
] = 'http://proxy.apify.com'
|
|
230
|
+
|
|
231
|
+
started_at: Annotated[
|
|
232
|
+
datetime | None,
|
|
233
|
+
Field(
|
|
234
|
+
validation_alias=AliasChoices(
|
|
235
|
+
'actor_started_at',
|
|
236
|
+
'apify_started_at',
|
|
237
|
+
),
|
|
238
|
+
description='Date when the Actor was started',
|
|
239
|
+
),
|
|
240
|
+
] = None
|
|
241
|
+
|
|
242
|
+
timeout_at: Annotated[
|
|
243
|
+
datetime | None,
|
|
244
|
+
Field(
|
|
245
|
+
validation_alias=AliasChoices(
|
|
246
|
+
'actor_timeout_at',
|
|
247
|
+
'apify_timeout_at',
|
|
248
|
+
),
|
|
249
|
+
description='Date when the Actor will time out',
|
|
250
|
+
),
|
|
251
|
+
] = None
|
|
252
|
+
|
|
253
|
+
standby_port: Annotated[
|
|
254
|
+
int,
|
|
255
|
+
Field(
|
|
256
|
+
alias='actor_standby_port',
|
|
257
|
+
description='TCP port for the Actor to start an HTTP server to receive messages in the Actor Standby mode',
|
|
258
|
+
),
|
|
259
|
+
] = 4322
|
|
260
|
+
|
|
261
|
+
token: Annotated[
|
|
262
|
+
str | None,
|
|
263
|
+
Field(
|
|
264
|
+
alias='apify_token',
|
|
265
|
+
description='API token of the user who started the Actor',
|
|
266
|
+
),
|
|
267
|
+
] = None
|
|
268
|
+
|
|
269
|
+
user_id: Annotated[
|
|
270
|
+
str | None,
|
|
271
|
+
Field(
|
|
272
|
+
alias='apify_user_id',
|
|
273
|
+
description='ID of the user who started the Actor. May differ from the Actor owner',
|
|
274
|
+
),
|
|
275
|
+
] = None
|
|
276
|
+
|
|
277
|
+
web_server_port: Annotated[
|
|
278
|
+
int,
|
|
279
|
+
Field(
|
|
280
|
+
validation_alias=AliasChoices(
|
|
281
|
+
'actor_web_server_port',
|
|
282
|
+
'apify_container_port',
|
|
283
|
+
),
|
|
284
|
+
description='TCP port for the Actor to start an HTTP server on'
|
|
285
|
+
'This server can be used to receive external messages or expose monitoring and control interfaces',
|
|
286
|
+
),
|
|
287
|
+
] = 4321
|
|
288
|
+
|
|
289
|
+
web_server_url: Annotated[
|
|
290
|
+
str,
|
|
291
|
+
Field(
|
|
292
|
+
validation_alias=AliasChoices(
|
|
293
|
+
'actor_web_server_url',
|
|
294
|
+
'apify_container_url',
|
|
295
|
+
),
|
|
296
|
+
description='Unique public URL for accessing the Actor run web server from the outside world',
|
|
297
|
+
),
|
|
298
|
+
] = 'http://localhost:4321'
|
|
299
|
+
|
|
300
|
+
workflow_key: Annotated[
|
|
301
|
+
str | None,
|
|
302
|
+
Field(
|
|
303
|
+
alias='apify_workflow_key',
|
|
304
|
+
description='Identifier used for grouping related runs and API calls together',
|
|
305
|
+
),
|
|
306
|
+
] = None
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# Monkey-patch the base class so that it works with the extended configuration
|
|
310
|
+
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore
|
apify/_consts.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import timedelta
|
|
5
|
+
|
|
6
|
+
EVENT_LISTENERS_TIMEOUT = timedelta(seconds=5)
|
|
7
|
+
|
|
8
|
+
BASE64_REGEXP = '[-A-Za-z0-9+/]*={0,3}'
|
|
9
|
+
ENCRYPTED_INPUT_VALUE_PREFIX = 'ENCRYPTED_VALUE'
|
|
10
|
+
ENCRYPTED_INPUT_VALUE_REGEXP = re.compile(f'^{ENCRYPTED_INPUT_VALUE_PREFIX}:({BASE64_REGEXP}):({BASE64_REGEXP})$')
|
apify/_crypto.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
|
-
import secrets
|
|
5
4
|
from typing import Any
|
|
6
5
|
|
|
7
|
-
from apify_shared.utils import ignore_docs
|
|
8
6
|
from cryptography.exceptions import InvalidTag as InvalidTagException
|
|
9
7
|
from cryptography.hazmat.primitives import hashes, serialization
|
|
10
8
|
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
|
11
9
|
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
|
12
10
|
|
|
13
|
-
from
|
|
11
|
+
from apify_shared.utils import ignore_docs
|
|
12
|
+
from crawlee._utils.crypto import crypto_random_object_id
|
|
13
|
+
|
|
14
|
+
from apify._consts import ENCRYPTED_INPUT_VALUE_REGEXP
|
|
14
15
|
|
|
15
16
|
ENCRYPTION_KEY_LENGTH = 32
|
|
16
17
|
ENCRYPTION_IV_LENGTH = 16
|
|
@@ -25,11 +26,10 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
|
|
|
25
26
|
It returns the encrypted password and encrypted value in BASE64 format.
|
|
26
27
|
|
|
27
28
|
Args:
|
|
28
|
-
value
|
|
29
|
-
public_key
|
|
29
|
+
value: The value which should be encrypted.
|
|
30
|
+
public_key: Public key to use for encryption.
|
|
30
31
|
|
|
31
|
-
Returns:
|
|
32
|
-
disc: Encrypted password and value.
|
|
32
|
+
Returns: Encrypted password and value.
|
|
33
33
|
"""
|
|
34
34
|
key_bytes = crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
|
|
35
35
|
initialized_vector_bytes = crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
|
|
@@ -37,8 +37,15 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
|
|
|
37
37
|
|
|
38
38
|
password_bytes = key_bytes + initialized_vector_bytes
|
|
39
39
|
|
|
40
|
-
# NOTE: Auth Tag is appended to the end of the encrypted data, it has length of 16 bytes and ensures integrity
|
|
41
|
-
|
|
40
|
+
# NOTE: Auth Tag is appended to the end of the encrypted data, it has length of 16 bytes and ensures integrity
|
|
41
|
+
# of the data.
|
|
42
|
+
cipher = Cipher(
|
|
43
|
+
algorithms.AES(key_bytes),
|
|
44
|
+
modes.GCM(
|
|
45
|
+
initialized_vector_bytes,
|
|
46
|
+
min_tag_length=ENCRYPTION_AUTH_TAG_LENGTH,
|
|
47
|
+
),
|
|
48
|
+
)
|
|
42
49
|
encryptor = cipher.encryptor()
|
|
43
50
|
encrypted_value_bytes = encryptor.update(value_bytes) + encryptor.finalize()
|
|
44
51
|
encrypted_password_bytes = public_key.encrypt(
|
|
@@ -65,12 +72,11 @@ def private_decrypt(
|
|
|
65
72
|
"""Decrypts the given encrypted value using the private key and password.
|
|
66
73
|
|
|
67
74
|
Args:
|
|
68
|
-
encrypted_password
|
|
69
|
-
encrypted_value
|
|
70
|
-
private_key
|
|
75
|
+
encrypted_password: Password used to encrypt the private key encoded as base64 string.
|
|
76
|
+
encrypted_value: Encrypted value to decrypt as base64 string.
|
|
77
|
+
private_key: Private key to use for decryption.
|
|
71
78
|
|
|
72
|
-
Returns:
|
|
73
|
-
str: Decrypted value.
|
|
79
|
+
Returns: Decrypted value.
|
|
74
80
|
"""
|
|
75
81
|
encrypted_password_bytes = base64.b64decode(encrypted_password.encode('utf-8'))
|
|
76
82
|
encrypted_value_bytes = base64.b64decode(encrypted_value.encode('utf-8'))
|
|
@@ -95,7 +101,9 @@ def private_decrypt(
|
|
|
95
101
|
initialization_vector_bytes = password_bytes[ENCRYPTION_KEY_LENGTH:]
|
|
96
102
|
|
|
97
103
|
try:
|
|
98
|
-
cipher = Cipher(
|
|
104
|
+
cipher = Cipher(
|
|
105
|
+
algorithms.AES(encryption_key_bytes), modes.GCM(initialization_vector_bytes, authentication_tag_bytes)
|
|
106
|
+
)
|
|
99
107
|
decryptor = cipher.decryptor()
|
|
100
108
|
decipher_bytes = decryptor.update(encrypted_data_bytes) + decryptor.finalize()
|
|
101
109
|
except InvalidTagException as exc:
|
|
@@ -125,27 +133,21 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey:
|
|
|
125
133
|
return public_key
|
|
126
134
|
|
|
127
135
|
|
|
128
|
-
def
|
|
129
|
-
"""Python reimplementation of cryptoRandomObjectId from `@apify/utilities`."""
|
|
130
|
-
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
|
|
131
|
-
return ''.join(secrets.choice(chars) for _ in range(length))
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input: Any) -> Any: # noqa: A002
|
|
136
|
+
def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> Any:
|
|
135
137
|
"""Decrypt input secrets."""
|
|
136
|
-
if not isinstance(
|
|
137
|
-
return
|
|
138
|
+
if not isinstance(input_data, dict):
|
|
139
|
+
return input_data
|
|
138
140
|
|
|
139
|
-
for key, value in
|
|
141
|
+
for key, value in input_data.items():
|
|
140
142
|
if isinstance(value, str):
|
|
141
143
|
match = ENCRYPTED_INPUT_VALUE_REGEXP.fullmatch(value)
|
|
142
144
|
if match:
|
|
143
145
|
encrypted_password = match.group(1)
|
|
144
146
|
encrypted_value = match.group(2)
|
|
145
|
-
|
|
147
|
+
input_data[key] = private_decrypt(
|
|
146
148
|
encrypted_password,
|
|
147
149
|
encrypted_value,
|
|
148
150
|
private_key=private_key,
|
|
149
151
|
)
|
|
150
152
|
|
|
151
|
-
return
|
|
153
|
+
return input_data
|
apify/_models.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# ruff: noqa: TCH001 TCH002 TCH003 (Pydantic)
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from typing import Annotated
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
|
|
8
|
+
|
|
9
|
+
from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType
|
|
10
|
+
from crawlee._utils.models import timedelta_ms
|
|
11
|
+
from crawlee._utils.urls import validate_http_url
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Webhook(BaseModel):
|
|
15
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
16
|
+
|
|
17
|
+
event_types: Annotated[
|
|
18
|
+
list[WebhookEventType],
|
|
19
|
+
Field(description='Event types that should trigger the webhook'),
|
|
20
|
+
]
|
|
21
|
+
request_url: Annotated[
|
|
22
|
+
str,
|
|
23
|
+
Field(description='URL that the webhook should call'),
|
|
24
|
+
BeforeValidator(validate_http_url),
|
|
25
|
+
]
|
|
26
|
+
payload_template: Annotated[
|
|
27
|
+
str | None,
|
|
28
|
+
Field(description='Template for the payload sent by the webook'),
|
|
29
|
+
] = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ActorRunMeta(BaseModel):
|
|
33
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
34
|
+
|
|
35
|
+
origin: Annotated[MetaOrigin, Field()]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ActorRunStats(BaseModel):
|
|
39
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
40
|
+
|
|
41
|
+
input_body_len: Annotated[int, Field(alias='inputBodyLen')]
|
|
42
|
+
restart_count: Annotated[int, Field(alias='restartCount')]
|
|
43
|
+
resurrect_count: Annotated[int, Field(alias='resurrectCount')]
|
|
44
|
+
mem_avg_bytes: Annotated[float | None, Field(alias='memAvgBytes')] = None
|
|
45
|
+
mem_max_bytes: Annotated[int | None, Field(alias='memMaxBytes')] = None
|
|
46
|
+
mem_current_bytes: Annotated[int | None, Field(alias='memCurrentBytes')] = None
|
|
47
|
+
cpu_avg_usage: Annotated[float | None, Field(alias='cpuAvgUsage')] = None
|
|
48
|
+
cpu_max_usage: Annotated[float | None, Field(alias='cpuMaxUsage')] = None
|
|
49
|
+
cpu_current_usage: Annotated[float | None, Field(alias='cpuCurrentUsage')] = None
|
|
50
|
+
net_rx_bytes: Annotated[int | None, Field(alias='netRxBytes')] = None
|
|
51
|
+
net_tx_bytes: Annotated[int | None, Field(alias='netTxBytes')] = None
|
|
52
|
+
duration: Annotated[timedelta_ms | None, Field(alias='durationMillis')] = None
|
|
53
|
+
run_time: Annotated[timedelta | None, Field(alias='runTimeSecs')] = None
|
|
54
|
+
metamorph: Annotated[int | None, Field(alias='metamorph')] = None
|
|
55
|
+
compute_units: Annotated[float, Field(alias='computeUnits')]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ActorRunOptions(BaseModel):
|
|
59
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
60
|
+
|
|
61
|
+
build: str
|
|
62
|
+
timeout: Annotated[timedelta, Field(alias='timeoutSecs')]
|
|
63
|
+
memory_mbytes: Annotated[int, Field(alias='memoryMbytes')]
|
|
64
|
+
disk_mbytes: Annotated[int, Field(alias='diskMbytes')]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ActorRunUsage(BaseModel):
|
|
68
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
69
|
+
|
|
70
|
+
actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None
|
|
71
|
+
dataset_reads: Annotated[float | None, Field(alias='DATASET_READS')] = None
|
|
72
|
+
dataset_writes: Annotated[float | None, Field(alias='DATASET_WRITES')] = None
|
|
73
|
+
key_value_store_reads: Annotated[float | None, Field(alias='KEY_VALUE_STORE_READS')] = None
|
|
74
|
+
key_value_store_writes: Annotated[float | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None
|
|
75
|
+
key_value_store_lists: Annotated[float | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None
|
|
76
|
+
request_queue_reads: Annotated[float | None, Field(alias='REQUEST_QUEUE_READS')] = None
|
|
77
|
+
request_queue_writes: Annotated[float | None, Field(alias='REQUEST_QUEUE_WRITES')] = None
|
|
78
|
+
data_transfer_internal_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None
|
|
79
|
+
data_transfer_external_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None
|
|
80
|
+
proxy_residential_transfer_gbytes: Annotated[float | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None
|
|
81
|
+
proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class ActorRun(BaseModel):
|
|
85
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
86
|
+
|
|
87
|
+
id: Annotated[str, Field(alias='id')]
|
|
88
|
+
act_id: Annotated[str, Field(alias='actId')]
|
|
89
|
+
user_id: Annotated[str, Field(alias='userId')]
|
|
90
|
+
actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None
|
|
91
|
+
started_at: Annotated[datetime, Field(alias='startedAt')]
|
|
92
|
+
finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None
|
|
93
|
+
status: Annotated[ActorJobStatus, Field(alias='status')]
|
|
94
|
+
status_message: Annotated[str | None, Field(alias='statusMessage')] = None
|
|
95
|
+
is_status_message_terminal: Annotated[bool | None, Field(alias='isStatusMessageTerminal')] = None
|
|
96
|
+
meta: Annotated[ActorRunMeta, Field(alias='meta')]
|
|
97
|
+
stats: Annotated[ActorRunStats, Field(alias='stats')]
|
|
98
|
+
options: Annotated[ActorRunOptions, Field(alias='options')]
|
|
99
|
+
build_id: Annotated[str, Field(alias='buildId')]
|
|
100
|
+
exit_code: Annotated[int | None, Field(alias='exitCode')] = None
|
|
101
|
+
default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')]
|
|
102
|
+
default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')]
|
|
103
|
+
default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')]
|
|
104
|
+
build_number: Annotated[str | None, Field(alias='buildNumber')] = None
|
|
105
|
+
container_url: Annotated[str, Field(alias='containerUrl')]
|
|
106
|
+
is_container_server_ready: Annotated[bool | None, Field(alias='isContainerServerReady')] = None
|
|
107
|
+
git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None
|
|
108
|
+
usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None
|
|
109
|
+
usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None
|
|
110
|
+
usage_usd: Annotated[ActorRunUsage | None, Field(alias='usageUsd')] = None
|