apify 1.7.0b1__py3-none-any.whl → 2.2.0b14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +19 -4
- apify/_actor.py +1030 -0
- apify/_configuration.py +370 -0
- apify/_consts.py +10 -0
- apify/_crypto.py +31 -27
- apify/_models.py +117 -0
- apify/_platform_event_manager.py +231 -0
- apify/_proxy_configuration.py +320 -0
- apify/_utils.py +18 -484
- apify/apify_storage_client/__init__.py +3 -0
- apify/apify_storage_client/_apify_storage_client.py +68 -0
- apify/apify_storage_client/_dataset_client.py +190 -0
- apify/apify_storage_client/_dataset_collection_client.py +51 -0
- apify/apify_storage_client/_key_value_store_client.py +94 -0
- apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
- apify/apify_storage_client/_request_queue_client.py +176 -0
- apify/apify_storage_client/_request_queue_collection_client.py +51 -0
- apify/apify_storage_client/py.typed +0 -0
- apify/log.py +22 -105
- apify/scrapy/__init__.py +11 -3
- apify/scrapy/middlewares/__init__.py +3 -1
- apify/scrapy/middlewares/apify_proxy.py +29 -27
- apify/scrapy/middlewares/py.typed +0 -0
- apify/scrapy/pipelines/__init__.py +3 -1
- apify/scrapy/pipelines/actor_dataset_push.py +6 -3
- apify/scrapy/pipelines/py.typed +0 -0
- apify/scrapy/py.typed +0 -0
- apify/scrapy/requests.py +60 -58
- apify/scrapy/scheduler.py +28 -19
- apify/scrapy/utils.py +10 -32
- apify/storages/__init__.py +4 -10
- apify/storages/_request_list.py +150 -0
- apify/storages/py.typed +0 -0
- apify-2.2.0b14.dist-info/METADATA +211 -0
- apify-2.2.0b14.dist-info/RECORD +38 -0
- {apify-1.7.0b1.dist-info → apify-2.2.0b14.dist-info}/WHEEL +1 -2
- apify/_memory_storage/__init__.py +0 -3
- apify/_memory_storage/file_storage_utils.py +0 -71
- apify/_memory_storage/memory_storage_client.py +0 -219
- apify/_memory_storage/resource_clients/__init__.py +0 -19
- apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
- apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
- apify/_memory_storage/resource_clients/dataset.py +0 -452
- apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
- apify/_memory_storage/resource_clients/key_value_store.py +0 -533
- apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
- apify/_memory_storage/resource_clients/request_queue.py +0 -466
- apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
- apify/actor.py +0 -1351
- apify/config.py +0 -127
- apify/consts.py +0 -67
- apify/event_manager.py +0 -236
- apify/proxy_configuration.py +0 -365
- apify/storages/base_storage.py +0 -181
- apify/storages/dataset.py +0 -494
- apify/storages/key_value_store.py +0 -257
- apify/storages/request_queue.py +0 -602
- apify/storages/storage_client_manager.py +0 -72
- apify-1.7.0b1.dist-info/METADATA +0 -149
- apify-1.7.0b1.dist-info/RECORD +0 -41
- apify-1.7.0b1.dist-info/top_level.txt +0 -1
- {apify-1.7.0b1.dist-info → apify-2.2.0b14.dist-info}/LICENSE +0 -0
apify/_configuration.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from logging import getLogger
|
|
5
|
+
from typing import Annotated, Any
|
|
6
|
+
|
|
7
|
+
from pydantic import AliasChoices, BeforeValidator, Field
|
|
8
|
+
from typing_extensions import deprecated
|
|
9
|
+
|
|
10
|
+
from crawlee._utils.models import timedelta_ms
|
|
11
|
+
from crawlee._utils.urls import validate_http_url
|
|
12
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
13
|
+
|
|
14
|
+
from apify._utils import docs_group
|
|
15
|
+
|
|
16
|
+
logger = getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _transform_to_list(value: Any) -> list[str] | None:
|
|
20
|
+
if value is None:
|
|
21
|
+
return None
|
|
22
|
+
if not value:
|
|
23
|
+
return []
|
|
24
|
+
return value if isinstance(value, list) else str(value).split(',')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@docs_group('Classes')
|
|
28
|
+
class Configuration(CrawleeConfiguration):
|
|
29
|
+
"""A class for specifying the configuration of an Actor.
|
|
30
|
+
|
|
31
|
+
Can be used either globally via `Configuration.get_global_configuration()`,
|
|
32
|
+
or it can be specific to each `Actor` instance on the `actor.config` property.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
actor_id: Annotated[
|
|
36
|
+
str | None,
|
|
37
|
+
Field(
|
|
38
|
+
validation_alias=AliasChoices(
|
|
39
|
+
'actor_id',
|
|
40
|
+
'apify_actor_id',
|
|
41
|
+
'apify_act_id',
|
|
42
|
+
),
|
|
43
|
+
description='ID of the Actor',
|
|
44
|
+
),
|
|
45
|
+
] = None
|
|
46
|
+
|
|
47
|
+
actor_full_name: Annotated[
|
|
48
|
+
str | None,
|
|
49
|
+
Field(
|
|
50
|
+
description='Full name of the Actor',
|
|
51
|
+
),
|
|
52
|
+
] = None
|
|
53
|
+
|
|
54
|
+
actor_run_id: Annotated[
|
|
55
|
+
str | None,
|
|
56
|
+
Field(
|
|
57
|
+
validation_alias=AliasChoices(
|
|
58
|
+
'actor_run_id',
|
|
59
|
+
'apify_actor_run_id',
|
|
60
|
+
'apify_act_run_id',
|
|
61
|
+
),
|
|
62
|
+
description='ID of the Actor run',
|
|
63
|
+
),
|
|
64
|
+
] = None
|
|
65
|
+
|
|
66
|
+
actor_build_id: Annotated[
|
|
67
|
+
str | None,
|
|
68
|
+
Field(
|
|
69
|
+
validation_alias=AliasChoices(
|
|
70
|
+
'actor_build_id',
|
|
71
|
+
'apify_actor_build_id',
|
|
72
|
+
),
|
|
73
|
+
description='ID of the Actor build used in the run',
|
|
74
|
+
),
|
|
75
|
+
] = None
|
|
76
|
+
|
|
77
|
+
actor_build_number: Annotated[
|
|
78
|
+
str | None,
|
|
79
|
+
Field(
|
|
80
|
+
validation_alias=AliasChoices(
|
|
81
|
+
'actor_build_number',
|
|
82
|
+
'apify_actor_build_number',
|
|
83
|
+
),
|
|
84
|
+
description='Build number of the Actor build used in the run',
|
|
85
|
+
),
|
|
86
|
+
] = None
|
|
87
|
+
|
|
88
|
+
actor_build_tags: Annotated[
|
|
89
|
+
list[str] | None,
|
|
90
|
+
Field(
|
|
91
|
+
description='Build tags of the Actor build used in the run',
|
|
92
|
+
),
|
|
93
|
+
BeforeValidator(_transform_to_list),
|
|
94
|
+
] = None
|
|
95
|
+
|
|
96
|
+
actor_task_id: Annotated[
|
|
97
|
+
str | None,
|
|
98
|
+
Field(
|
|
99
|
+
validation_alias=AliasChoices(
|
|
100
|
+
'actor_task_id',
|
|
101
|
+
'apify_actor_task_id',
|
|
102
|
+
),
|
|
103
|
+
description='ID of the Actor task. Empty if Actor is run outside of any task, e.g. directly using the API',
|
|
104
|
+
),
|
|
105
|
+
] = None
|
|
106
|
+
|
|
107
|
+
actor_events_ws_url: Annotated[
|
|
108
|
+
str | None,
|
|
109
|
+
Field(
|
|
110
|
+
validation_alias=AliasChoices(
|
|
111
|
+
'actor_events_websocket_url',
|
|
112
|
+
'apify_actor_events_ws_url',
|
|
113
|
+
),
|
|
114
|
+
description='Websocket URL where Actor may listen for events from Actor platform',
|
|
115
|
+
),
|
|
116
|
+
] = None
|
|
117
|
+
|
|
118
|
+
api_base_url: Annotated[
|
|
119
|
+
str,
|
|
120
|
+
Field(
|
|
121
|
+
alias='apify_api_base_url',
|
|
122
|
+
description='Internal URL of the Apify API. May be used to interact with the platform programmatically',
|
|
123
|
+
),
|
|
124
|
+
] = 'https://api.apify.com'
|
|
125
|
+
|
|
126
|
+
api_public_base_url: Annotated[
|
|
127
|
+
str,
|
|
128
|
+
Field(
|
|
129
|
+
alias='apify_api_public_base_url',
|
|
130
|
+
description='Public URL of the Apify API. May be used to link to REST API resources',
|
|
131
|
+
),
|
|
132
|
+
] = 'https://api.apify.com'
|
|
133
|
+
|
|
134
|
+
dedicated_cpus: Annotated[
|
|
135
|
+
float | None,
|
|
136
|
+
Field(
|
|
137
|
+
alias='apify_dedicated_cpus',
|
|
138
|
+
description='Number of CPU cores reserved for the actor, based on allocated memory',
|
|
139
|
+
),
|
|
140
|
+
] = None
|
|
141
|
+
|
|
142
|
+
disable_outdated_warning: Annotated[
|
|
143
|
+
bool,
|
|
144
|
+
Field(
|
|
145
|
+
alias='apify_disable_outdated_warning',
|
|
146
|
+
description='Controls the display of outdated SDK version warnings',
|
|
147
|
+
),
|
|
148
|
+
BeforeValidator(lambda val: val or False),
|
|
149
|
+
] = False
|
|
150
|
+
|
|
151
|
+
fact: Annotated[str | None, Field(alias='apify_fact')] = None
|
|
152
|
+
|
|
153
|
+
input_key: Annotated[
|
|
154
|
+
str,
|
|
155
|
+
Field(
|
|
156
|
+
validation_alias=AliasChoices(
|
|
157
|
+
'actor_input_key',
|
|
158
|
+
'apify_input_key',
|
|
159
|
+
'crawlee_input_key',
|
|
160
|
+
),
|
|
161
|
+
description='Key of the record in the default key-value store that holds the Actor input',
|
|
162
|
+
),
|
|
163
|
+
] = 'INPUT'
|
|
164
|
+
|
|
165
|
+
input_secrets_private_key_file: Annotated[
|
|
166
|
+
str | None,
|
|
167
|
+
Field(
|
|
168
|
+
alias='apify_input_secrets_private_key_file',
|
|
169
|
+
description='Path to the secret key used to decrypt Secret inputs.',
|
|
170
|
+
),
|
|
171
|
+
] = None
|
|
172
|
+
|
|
173
|
+
input_secrets_private_key_passphrase: Annotated[
|
|
174
|
+
str | None,
|
|
175
|
+
Field(
|
|
176
|
+
alias='apify_input_secrets_private_key_passphrase',
|
|
177
|
+
description='Passphrase for the input secret key',
|
|
178
|
+
),
|
|
179
|
+
] = None
|
|
180
|
+
|
|
181
|
+
is_at_home: Annotated[
|
|
182
|
+
bool,
|
|
183
|
+
Field(
|
|
184
|
+
alias='apify_is_at_home',
|
|
185
|
+
description='True if the Actor is running on Apify servers',
|
|
186
|
+
),
|
|
187
|
+
] = False
|
|
188
|
+
|
|
189
|
+
latest_sdk_version: Annotated[
|
|
190
|
+
str | None,
|
|
191
|
+
Field(
|
|
192
|
+
alias='apify_sdk_latest_version',
|
|
193
|
+
description='Specifies the most recent release version of the Apify SDK for Javascript. Used for '
|
|
194
|
+
'checking for updates.',
|
|
195
|
+
),
|
|
196
|
+
deprecated('SDK version checking is not supported for the Python SDK'),
|
|
197
|
+
] = None
|
|
198
|
+
|
|
199
|
+
log_format: Annotated[
|
|
200
|
+
str | None,
|
|
201
|
+
Field(alias='apify_log_format'),
|
|
202
|
+
deprecated('Adjust the log format in code instead'),
|
|
203
|
+
] = None
|
|
204
|
+
|
|
205
|
+
max_paid_dataset_items: Annotated[
|
|
206
|
+
int | None,
|
|
207
|
+
Field(
|
|
208
|
+
alias='actor_max_paid_dataset_items',
|
|
209
|
+
description='For paid-per-result Actors, the user-set limit on returned results. Do not exceed this limit',
|
|
210
|
+
),
|
|
211
|
+
BeforeValidator(lambda val: val or None),
|
|
212
|
+
] = None
|
|
213
|
+
|
|
214
|
+
max_total_charge_usd: Annotated[
|
|
215
|
+
float | None,
|
|
216
|
+
Field(
|
|
217
|
+
alias='actor_max_total_charge_usd',
|
|
218
|
+
description='For pay-per-event Actors, the user-set limit on total charges. Do not exceed this limit',
|
|
219
|
+
),
|
|
220
|
+
BeforeValidator(lambda val: val or None),
|
|
221
|
+
] = None
|
|
222
|
+
|
|
223
|
+
meta_origin: Annotated[
|
|
224
|
+
str | None,
|
|
225
|
+
Field(
|
|
226
|
+
alias='apify_meta_origin',
|
|
227
|
+
description='Specifies how an Actor run was started',
|
|
228
|
+
),
|
|
229
|
+
] = None
|
|
230
|
+
|
|
231
|
+
metamorph_after_sleep: Annotated[
|
|
232
|
+
timedelta_ms,
|
|
233
|
+
Field(
|
|
234
|
+
alias='apify_metamorph_after_sleep_millis',
|
|
235
|
+
description='How long the Actor needs to wait before exiting after triggering a metamorph',
|
|
236
|
+
),
|
|
237
|
+
] = timedelta(minutes=5)
|
|
238
|
+
|
|
239
|
+
proxy_hostname: Annotated[
|
|
240
|
+
str,
|
|
241
|
+
Field(
|
|
242
|
+
alias='apify_proxy_hostname',
|
|
243
|
+
description='Hostname of the Apify proxy',
|
|
244
|
+
),
|
|
245
|
+
] = 'proxy.apify.com'
|
|
246
|
+
|
|
247
|
+
proxy_password: Annotated[
|
|
248
|
+
str | None,
|
|
249
|
+
Field(
|
|
250
|
+
alias='apify_proxy_password',
|
|
251
|
+
description='Password to the Apify proxy',
|
|
252
|
+
),
|
|
253
|
+
] = None
|
|
254
|
+
|
|
255
|
+
proxy_port: Annotated[
|
|
256
|
+
int,
|
|
257
|
+
Field(
|
|
258
|
+
alias='apify_proxy_port',
|
|
259
|
+
description='Port to communicate with the Apify proxy',
|
|
260
|
+
),
|
|
261
|
+
] = 8000
|
|
262
|
+
|
|
263
|
+
proxy_status_url: Annotated[
|
|
264
|
+
str,
|
|
265
|
+
Field(
|
|
266
|
+
alias='apify_proxy_status_url',
|
|
267
|
+
description='URL for retrieving proxy status information',
|
|
268
|
+
),
|
|
269
|
+
] = 'http://proxy.apify.com'
|
|
270
|
+
|
|
271
|
+
started_at: Annotated[
|
|
272
|
+
datetime | None,
|
|
273
|
+
Field(
|
|
274
|
+
validation_alias=AliasChoices(
|
|
275
|
+
'actor_started_at',
|
|
276
|
+
'apify_started_at',
|
|
277
|
+
),
|
|
278
|
+
description='Date when the Actor was started',
|
|
279
|
+
),
|
|
280
|
+
] = None
|
|
281
|
+
|
|
282
|
+
timeout_at: Annotated[
|
|
283
|
+
datetime | None,
|
|
284
|
+
Field(
|
|
285
|
+
validation_alias=AliasChoices(
|
|
286
|
+
'actor_timeout_at',
|
|
287
|
+
'apify_timeout_at',
|
|
288
|
+
),
|
|
289
|
+
description='Date when the Actor will time out',
|
|
290
|
+
),
|
|
291
|
+
BeforeValidator(lambda val: val if val != '' else None), # We should accept empty environment variables as well
|
|
292
|
+
] = None
|
|
293
|
+
|
|
294
|
+
standby_port: Annotated[
|
|
295
|
+
int,
|
|
296
|
+
Field(
|
|
297
|
+
alias='actor_standby_port',
|
|
298
|
+
description='TCP port for the Actor to start an HTTP server to receive messages in the Actor Standby mode',
|
|
299
|
+
),
|
|
300
|
+
deprecated('Use `web_server_port` instead'),
|
|
301
|
+
] = 4321
|
|
302
|
+
|
|
303
|
+
standby_url: Annotated[
|
|
304
|
+
str,
|
|
305
|
+
BeforeValidator(validate_http_url),
|
|
306
|
+
Field(
|
|
307
|
+
alias='actor_standby_url',
|
|
308
|
+
description='URL for accessing web servers of Actor runs in Standby mode',
|
|
309
|
+
),
|
|
310
|
+
] = 'http://localhost'
|
|
311
|
+
|
|
312
|
+
token: Annotated[
|
|
313
|
+
str | None,
|
|
314
|
+
Field(
|
|
315
|
+
alias='apify_token',
|
|
316
|
+
description='API token of the user who started the Actor',
|
|
317
|
+
),
|
|
318
|
+
] = None
|
|
319
|
+
|
|
320
|
+
user_id: Annotated[
|
|
321
|
+
str | None,
|
|
322
|
+
Field(
|
|
323
|
+
alias='apify_user_id',
|
|
324
|
+
description='ID of the user who started the Actor. May differ from the Actor owner',
|
|
325
|
+
),
|
|
326
|
+
] = None
|
|
327
|
+
|
|
328
|
+
web_server_port: Annotated[
|
|
329
|
+
int,
|
|
330
|
+
Field(
|
|
331
|
+
validation_alias=AliasChoices(
|
|
332
|
+
'actor_web_server_port',
|
|
333
|
+
'apify_container_port',
|
|
334
|
+
),
|
|
335
|
+
description='TCP port for the Actor to start an HTTP server on'
|
|
336
|
+
'This server can be used to receive external messages or expose monitoring and control interfaces',
|
|
337
|
+
),
|
|
338
|
+
] = 4321
|
|
339
|
+
|
|
340
|
+
web_server_url: Annotated[
|
|
341
|
+
str,
|
|
342
|
+
Field(
|
|
343
|
+
validation_alias=AliasChoices(
|
|
344
|
+
'actor_web_server_url',
|
|
345
|
+
'apify_container_url',
|
|
346
|
+
),
|
|
347
|
+
description='Unique public URL for accessing a specific Actor run web server from the outside world',
|
|
348
|
+
),
|
|
349
|
+
] = 'http://localhost:4321'
|
|
350
|
+
|
|
351
|
+
workflow_key: Annotated[
|
|
352
|
+
str | None,
|
|
353
|
+
Field(
|
|
354
|
+
alias='apify_workflow_key',
|
|
355
|
+
description='Identifier used for grouping related runs and API calls together',
|
|
356
|
+
),
|
|
357
|
+
] = None
|
|
358
|
+
|
|
359
|
+
@classmethod
|
|
360
|
+
def get_global_configuration(cls) -> Configuration:
|
|
361
|
+
"""Retrieve the global instance of the configuration.
|
|
362
|
+
|
|
363
|
+
Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
|
|
364
|
+
instead.
|
|
365
|
+
"""
|
|
366
|
+
return cls()
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# Monkey-patch the base class so that it works with the extended configuration
|
|
370
|
+
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]
|
apify/_consts.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import timedelta
|
|
5
|
+
|
|
6
|
+
EVENT_LISTENERS_TIMEOUT = timedelta(seconds=5)
|
|
7
|
+
|
|
8
|
+
BASE64_REGEXP = '[-A-Za-z0-9+/]*={0,3}'
|
|
9
|
+
ENCRYPTED_INPUT_VALUE_PREFIX = 'ENCRYPTED_VALUE'
|
|
10
|
+
ENCRYPTED_INPUT_VALUE_REGEXP = re.compile(f'^{ENCRYPTED_INPUT_VALUE_PREFIX}:({BASE64_REGEXP}):({BASE64_REGEXP})$')
|
apify/_crypto.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
|
-
import secrets
|
|
5
4
|
from typing import Any
|
|
6
5
|
|
|
7
|
-
from apify_shared.utils import ignore_docs
|
|
8
6
|
from cryptography.exceptions import InvalidTag as InvalidTagException
|
|
9
7
|
from cryptography.hazmat.primitives import hashes, serialization
|
|
10
8
|
from cryptography.hazmat.primitives.asymmetric import padding, rsa
|
|
11
9
|
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
|
12
10
|
|
|
13
|
-
from
|
|
11
|
+
from apify_shared.utils import ignore_docs
|
|
12
|
+
from crawlee._utils.crypto import crypto_random_object_id
|
|
13
|
+
|
|
14
|
+
from apify._consts import ENCRYPTED_INPUT_VALUE_REGEXP
|
|
14
15
|
|
|
15
16
|
ENCRYPTION_KEY_LENGTH = 32
|
|
16
17
|
ENCRYPTION_IV_LENGTH = 16
|
|
@@ -25,11 +26,10 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
|
|
|
25
26
|
It returns the encrypted password and encrypted value in BASE64 format.
|
|
26
27
|
|
|
27
28
|
Args:
|
|
28
|
-
value
|
|
29
|
-
public_key
|
|
29
|
+
value: The value which should be encrypted.
|
|
30
|
+
public_key: Public key to use for encryption.
|
|
30
31
|
|
|
31
|
-
Returns:
|
|
32
|
-
disc: Encrypted password and value.
|
|
32
|
+
Returns: Encrypted password and value.
|
|
33
33
|
"""
|
|
34
34
|
key_bytes = crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
|
|
35
35
|
initialized_vector_bytes = crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
|
|
@@ -37,8 +37,15 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
|
|
|
37
37
|
|
|
38
38
|
password_bytes = key_bytes + initialized_vector_bytes
|
|
39
39
|
|
|
40
|
-
# NOTE: Auth Tag is appended to the end of the encrypted data, it has length of 16 bytes and ensures integrity
|
|
41
|
-
|
|
40
|
+
# NOTE: Auth Tag is appended to the end of the encrypted data, it has length of 16 bytes and ensures integrity
|
|
41
|
+
# of the data.
|
|
42
|
+
cipher = Cipher(
|
|
43
|
+
algorithms.AES(key_bytes),
|
|
44
|
+
modes.GCM(
|
|
45
|
+
initialized_vector_bytes,
|
|
46
|
+
min_tag_length=ENCRYPTION_AUTH_TAG_LENGTH,
|
|
47
|
+
),
|
|
48
|
+
)
|
|
42
49
|
encryptor = cipher.encryptor()
|
|
43
50
|
encrypted_value_bytes = encryptor.update(value_bytes) + encryptor.finalize()
|
|
44
51
|
encrypted_password_bytes = public_key.encrypt(
|
|
@@ -65,12 +72,11 @@ def private_decrypt(
|
|
|
65
72
|
"""Decrypts the given encrypted value using the private key and password.
|
|
66
73
|
|
|
67
74
|
Args:
|
|
68
|
-
encrypted_password
|
|
69
|
-
encrypted_value
|
|
70
|
-
private_key
|
|
75
|
+
encrypted_password: Password used to encrypt the private key encoded as base64 string.
|
|
76
|
+
encrypted_value: Encrypted value to decrypt as base64 string.
|
|
77
|
+
private_key: Private key to use for decryption.
|
|
71
78
|
|
|
72
|
-
Returns:
|
|
73
|
-
str: Decrypted value.
|
|
79
|
+
Returns: Decrypted value.
|
|
74
80
|
"""
|
|
75
81
|
encrypted_password_bytes = base64.b64decode(encrypted_password.encode('utf-8'))
|
|
76
82
|
encrypted_value_bytes = base64.b64decode(encrypted_value.encode('utf-8'))
|
|
@@ -95,7 +101,9 @@ def private_decrypt(
|
|
|
95
101
|
initialization_vector_bytes = password_bytes[ENCRYPTION_KEY_LENGTH:]
|
|
96
102
|
|
|
97
103
|
try:
|
|
98
|
-
cipher = Cipher(
|
|
104
|
+
cipher = Cipher(
|
|
105
|
+
algorithms.AES(encryption_key_bytes), modes.GCM(initialization_vector_bytes, authentication_tag_bytes)
|
|
106
|
+
)
|
|
99
107
|
decryptor = cipher.decryptor()
|
|
100
108
|
decipher_bytes = decryptor.update(encrypted_data_bytes) + decryptor.finalize()
|
|
101
109
|
except InvalidTagException as exc:
|
|
@@ -106,6 +114,7 @@ def private_decrypt(
|
|
|
106
114
|
return decipher_bytes.decode('utf-8')
|
|
107
115
|
|
|
108
116
|
|
|
117
|
+
@ignore_docs
|
|
109
118
|
def load_private_key(private_key_file_base64: str, private_key_password: str) -> rsa.RSAPrivateKey:
|
|
110
119
|
private_key = serialization.load_pem_private_key(
|
|
111
120
|
base64.b64decode(private_key_file_base64.encode('utf-8')),
|
|
@@ -125,27 +134,22 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey:
|
|
|
125
134
|
return public_key
|
|
126
135
|
|
|
127
136
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
|
|
131
|
-
return ''.join(secrets.choice(chars) for _ in range(length))
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input: Any) -> Any: # noqa: A002
|
|
137
|
+
@ignore_docs
|
|
138
|
+
def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> Any:
|
|
135
139
|
"""Decrypt input secrets."""
|
|
136
|
-
if not isinstance(
|
|
137
|
-
return
|
|
140
|
+
if not isinstance(input_data, dict):
|
|
141
|
+
return input_data
|
|
138
142
|
|
|
139
|
-
for key, value in
|
|
143
|
+
for key, value in input_data.items():
|
|
140
144
|
if isinstance(value, str):
|
|
141
145
|
match = ENCRYPTED_INPUT_VALUE_REGEXP.fullmatch(value)
|
|
142
146
|
if match:
|
|
143
147
|
encrypted_password = match.group(1)
|
|
144
148
|
encrypted_value = match.group(2)
|
|
145
|
-
|
|
149
|
+
input_data[key] = private_decrypt(
|
|
146
150
|
encrypted_password,
|
|
147
151
|
encrypted_value,
|
|
148
152
|
private_key=private_key,
|
|
149
153
|
)
|
|
150
154
|
|
|
151
|
-
return
|
|
155
|
+
return input_data
|
apify/_models.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field
|
|
7
|
+
|
|
8
|
+
from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType
|
|
9
|
+
from crawlee._utils.models import timedelta_ms
|
|
10
|
+
from crawlee._utils.urls import validate_http_url
|
|
11
|
+
|
|
12
|
+
from apify._utils import docs_group
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@docs_group('Data structures')
|
|
16
|
+
class Webhook(BaseModel):
|
|
17
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
18
|
+
|
|
19
|
+
event_types: Annotated[
|
|
20
|
+
list[WebhookEventType],
|
|
21
|
+
Field(description='Event types that should trigger the webhook'),
|
|
22
|
+
]
|
|
23
|
+
request_url: Annotated[
|
|
24
|
+
str,
|
|
25
|
+
Field(description='URL that the webhook should call'),
|
|
26
|
+
BeforeValidator(validate_http_url),
|
|
27
|
+
]
|
|
28
|
+
payload_template: Annotated[
|
|
29
|
+
str | None,
|
|
30
|
+
Field(description='Template for the payload sent by the webook'),
|
|
31
|
+
] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@docs_group('Data structures')
|
|
35
|
+
class ActorRunMeta(BaseModel):
|
|
36
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
37
|
+
|
|
38
|
+
origin: Annotated[MetaOrigin, Field()]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@docs_group('Data structures')
|
|
42
|
+
class ActorRunStats(BaseModel):
|
|
43
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
44
|
+
|
|
45
|
+
input_body_len: Annotated[int, Field(alias='inputBodyLen')]
|
|
46
|
+
restart_count: Annotated[int, Field(alias='restartCount')]
|
|
47
|
+
resurrect_count: Annotated[int, Field(alias='resurrectCount')]
|
|
48
|
+
mem_avg_bytes: Annotated[float | None, Field(alias='memAvgBytes')] = None
|
|
49
|
+
mem_max_bytes: Annotated[int | None, Field(alias='memMaxBytes')] = None
|
|
50
|
+
mem_current_bytes: Annotated[int | None, Field(alias='memCurrentBytes')] = None
|
|
51
|
+
cpu_avg_usage: Annotated[float | None, Field(alias='cpuAvgUsage')] = None
|
|
52
|
+
cpu_max_usage: Annotated[float | None, Field(alias='cpuMaxUsage')] = None
|
|
53
|
+
cpu_current_usage: Annotated[float | None, Field(alias='cpuCurrentUsage')] = None
|
|
54
|
+
net_rx_bytes: Annotated[int | None, Field(alias='netRxBytes')] = None
|
|
55
|
+
net_tx_bytes: Annotated[int | None, Field(alias='netTxBytes')] = None
|
|
56
|
+
duration: Annotated[timedelta_ms | None, Field(alias='durationMillis')] = None
|
|
57
|
+
run_time: Annotated[timedelta | None, Field(alias='runTimeSecs')] = None
|
|
58
|
+
metamorph: Annotated[int | None, Field(alias='metamorph')] = None
|
|
59
|
+
compute_units: Annotated[float, Field(alias='computeUnits')]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@docs_group('Data structures')
|
|
63
|
+
class ActorRunOptions(BaseModel):
|
|
64
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
65
|
+
|
|
66
|
+
build: str
|
|
67
|
+
timeout: Annotated[timedelta, Field(alias='timeoutSecs')]
|
|
68
|
+
memory_mbytes: Annotated[int, Field(alias='memoryMbytes')]
|
|
69
|
+
disk_mbytes: Annotated[int, Field(alias='diskMbytes')]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@docs_group('Data structures')
|
|
73
|
+
class ActorRunUsage(BaseModel):
|
|
74
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
75
|
+
|
|
76
|
+
actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None
|
|
77
|
+
dataset_reads: Annotated[float | None, Field(alias='DATASET_READS')] = None
|
|
78
|
+
dataset_writes: Annotated[float | None, Field(alias='DATASET_WRITES')] = None
|
|
79
|
+
key_value_store_reads: Annotated[float | None, Field(alias='KEY_VALUE_STORE_READS')] = None
|
|
80
|
+
key_value_store_writes: Annotated[float | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None
|
|
81
|
+
key_value_store_lists: Annotated[float | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None
|
|
82
|
+
request_queue_reads: Annotated[float | None, Field(alias='REQUEST_QUEUE_READS')] = None
|
|
83
|
+
request_queue_writes: Annotated[float | None, Field(alias='REQUEST_QUEUE_WRITES')] = None
|
|
84
|
+
data_transfer_internal_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None
|
|
85
|
+
data_transfer_external_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None
|
|
86
|
+
proxy_residential_transfer_gbytes: Annotated[float | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None
|
|
87
|
+
proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@docs_group('Data structures')
|
|
91
|
+
class ActorRun(BaseModel):
|
|
92
|
+
__model_config__ = ConfigDict(populate_by_name=True)
|
|
93
|
+
|
|
94
|
+
id: Annotated[str, Field(alias='id')]
|
|
95
|
+
act_id: Annotated[str, Field(alias='actId')]
|
|
96
|
+
user_id: Annotated[str, Field(alias='userId')]
|
|
97
|
+
actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None
|
|
98
|
+
started_at: Annotated[datetime, Field(alias='startedAt')]
|
|
99
|
+
finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None
|
|
100
|
+
status: Annotated[ActorJobStatus, Field(alias='status')]
|
|
101
|
+
status_message: Annotated[str | None, Field(alias='statusMessage')] = None
|
|
102
|
+
is_status_message_terminal: Annotated[bool | None, Field(alias='isStatusMessageTerminal')] = None
|
|
103
|
+
meta: Annotated[ActorRunMeta, Field(alias='meta')]
|
|
104
|
+
stats: Annotated[ActorRunStats, Field(alias='stats')]
|
|
105
|
+
options: Annotated[ActorRunOptions, Field(alias='options')]
|
|
106
|
+
build_id: Annotated[str, Field(alias='buildId')]
|
|
107
|
+
exit_code: Annotated[int | None, Field(alias='exitCode')] = None
|
|
108
|
+
default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')]
|
|
109
|
+
default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')]
|
|
110
|
+
default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')]
|
|
111
|
+
build_number: Annotated[str | None, Field(alias='buildNumber')] = None
|
|
112
|
+
container_url: Annotated[str, Field(alias='containerUrl')]
|
|
113
|
+
is_container_server_ready: Annotated[bool | None, Field(alias='isContainerServerReady')] = None
|
|
114
|
+
git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None
|
|
115
|
+
usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None
|
|
116
|
+
usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None
|
|
117
|
+
usage_usd: Annotated[ActorRunUsage | None, Field(alias='usageUsd')] = None
|