apify 2.5.0b3__py3-none-any.whl → 2.5.0b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +1 -1
- apify/_configuration.py +15 -2
- apify/_platform_event_manager.py +3 -3
- apify/_utils.py +16 -6
- apify/scrapy/_async_thread.py +1 -1
- apify/storages/_request_list.py +9 -4
- {apify-2.5.0b3.dist-info → apify-2.5.0b8.dist-info}/METADATA +1 -1
- {apify-2.5.0b3.dist-info → apify-2.5.0b8.dist-info}/RECORD +10 -10
- {apify-2.5.0b3.dist-info → apify-2.5.0b8.dist-info}/WHEEL +0 -0
- {apify-2.5.0b3.dist-info → apify-2.5.0b8.dist-info}/licenses/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -1138,7 +1138,7 @@ class _ActorType:
|
|
|
1138
1138
|
return proxy_configuration
|
|
1139
1139
|
|
|
1140
1140
|
def _get_default_exit_process(self) -> bool:
|
|
1141
|
-
"""
|
|
1141
|
+
"""Return False for IPython, Pytest, and Scrapy environments, True otherwise."""
|
|
1142
1142
|
if is_running_in_ipython():
|
|
1143
1143
|
self.log.debug('Running in IPython, setting default `exit_process` to False.')
|
|
1144
1144
|
return False
|
apify/_configuration.py
CHANGED
|
@@ -5,8 +5,8 @@ from decimal import Decimal
|
|
|
5
5
|
from logging import getLogger
|
|
6
6
|
from typing import Annotated, Any
|
|
7
7
|
|
|
8
|
-
from pydantic import AliasChoices, BeforeValidator, Field
|
|
9
|
-
from typing_extensions import deprecated
|
|
8
|
+
from pydantic import AliasChoices, BeforeValidator, Field, model_validator
|
|
9
|
+
from typing_extensions import Self, deprecated
|
|
10
10
|
|
|
11
11
|
from crawlee._utils.models import timedelta_ms
|
|
12
12
|
from crawlee._utils.urls import validate_http_url
|
|
@@ -365,6 +365,19 @@ class Configuration(CrawleeConfiguration):
|
|
|
365
365
|
),
|
|
366
366
|
] = None
|
|
367
367
|
|
|
368
|
+
@model_validator(mode='after')
|
|
369
|
+
def disable_browser_sandbox_on_platform(self) -> Self:
|
|
370
|
+
"""Disable the browser sandbox mode when running on the Apify platform.
|
|
371
|
+
|
|
372
|
+
Running in environment where `is_at_home` is True does not benefit from browser sandbox as it is already running
|
|
373
|
+
in a container. It can be on the contrary undesired as the process in the container might be running as root and
|
|
374
|
+
this will crash chromium that was started with browser sandbox mode.
|
|
375
|
+
"""
|
|
376
|
+
if self.is_at_home and not self.disable_browser_sandbox:
|
|
377
|
+
self.disable_browser_sandbox = True
|
|
378
|
+
logger.warning('Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True.')
|
|
379
|
+
return self
|
|
380
|
+
|
|
368
381
|
@classmethod
|
|
369
382
|
def get_global_configuration(cls) -> Configuration:
|
|
370
383
|
"""Retrieve the global instance of the configuration.
|
apify/_platform_event_manager.py
CHANGED
|
@@ -48,11 +48,11 @@ class SystemInfoEventData(BaseModel):
|
|
|
48
48
|
is_cpu_overloaded: Annotated[bool, Field(alias='isCpuOverloaded')]
|
|
49
49
|
created_at: Annotated[datetime, Field(alias='createdAt')]
|
|
50
50
|
|
|
51
|
-
def to_crawlee_format(self) -> EventSystemInfoData:
|
|
51
|
+
def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
|
|
52
52
|
return EventSystemInfoData.model_validate(
|
|
53
53
|
{
|
|
54
54
|
'cpu_info': {
|
|
55
|
-
'used_ratio': self.cpu_current_usage / 100,
|
|
55
|
+
'used_ratio': (self.cpu_current_usage / 100) / dedicated_cpus,
|
|
56
56
|
'created_at': self.created_at,
|
|
57
57
|
},
|
|
58
58
|
'memory_info': {
|
|
@@ -218,7 +218,7 @@ class PlatformEventManager(EventManager):
|
|
|
218
218
|
event=parsed_message.name,
|
|
219
219
|
event_data=parsed_message.data
|
|
220
220
|
if not isinstance(parsed_message.data, SystemInfoEventData)
|
|
221
|
-
else parsed_message.data.to_crawlee_format(),
|
|
221
|
+
else parsed_message.data.to_crawlee_format(self._config.dedicated_cpus or 1),
|
|
222
222
|
)
|
|
223
223
|
|
|
224
224
|
if parsed_message.name == Event.MIGRATING:
|
apify/_utils.py
CHANGED
|
@@ -31,10 +31,16 @@ GroupName = Literal['Classes', 'Abstract classes', 'Interfaces', 'Data structure
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
|
|
34
|
-
"""
|
|
34
|
+
"""Mark a symbol for rendering and grouping in documentation.
|
|
35
35
|
|
|
36
|
-
This decorator is used
|
|
36
|
+
This decorator is used solely for documentation purposes and does not modify the behavior
|
|
37
37
|
of the decorated callable.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
group_name: The documentation group to which the symbol belongs.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
The original callable without modification.
|
|
38
44
|
"""
|
|
39
45
|
|
|
40
46
|
def wrapper(func: Callable) -> Callable:
|
|
@@ -44,12 +50,16 @@ def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
|
|
|
44
50
|
|
|
45
51
|
|
|
46
52
|
def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
|
|
47
|
-
"""
|
|
53
|
+
"""Rename a symbol for documentation rendering.
|
|
48
54
|
|
|
49
|
-
This
|
|
55
|
+
This decorator modifies only the displayed name of the symbol in the generated documentation
|
|
56
|
+
and does not affect its runtime behavior.
|
|
50
57
|
|
|
51
|
-
|
|
52
|
-
|
|
58
|
+
Args:
|
|
59
|
+
symbol_name: The name to be used in the documentation.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The original callable without modification.
|
|
53
63
|
"""
|
|
54
64
|
|
|
55
65
|
def wrapper(func: Callable) -> Callable:
|
apify/scrapy/_async_thread.py
CHANGED
|
@@ -113,7 +113,7 @@ class AsyncThread:
|
|
|
113
113
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
114
114
|
|
|
115
115
|
def _force_exit_event_loop(self) -> None:
|
|
116
|
-
"""
|
|
116
|
+
"""Shut down the event loop and its thread forcefully."""
|
|
117
117
|
try:
|
|
118
118
|
logger.info('Forced shutdown of the event loop and its thread...')
|
|
119
119
|
self._eventloop.call_soon_threadsafe(self._eventloop.stop)
|
apify/storages/_request_list.py
CHANGED
|
@@ -51,7 +51,7 @@ class RequestList(CrawleeRequestList):
|
|
|
51
51
|
request_list_sources_input: list[dict[str, Any]] | None = None,
|
|
52
52
|
http_client: HttpClient | None = None,
|
|
53
53
|
) -> RequestList:
|
|
54
|
-
"""
|
|
54
|
+
"""Initialize a new instance from request list source input.
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
57
|
name: Name of the returned RequestList.
|
|
@@ -108,9 +108,10 @@ class RequestList(CrawleeRequestList):
|
|
|
108
108
|
|
|
109
109
|
@staticmethod
|
|
110
110
|
async def _fetch_requests_from_url(
|
|
111
|
-
remote_url_requests_inputs: list[_RequestsFromUrlInput],
|
|
111
|
+
remote_url_requests_inputs: list[_RequestsFromUrlInput],
|
|
112
|
+
http_client: HttpClient,
|
|
112
113
|
) -> list[Request]:
|
|
113
|
-
"""
|
|
114
|
+
"""Create list of requests from url.
|
|
114
115
|
|
|
115
116
|
Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
|
|
116
117
|
callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
|
|
@@ -119,7 +120,11 @@ class RequestList(CrawleeRequestList):
|
|
|
119
120
|
created_requests: list[Request] = []
|
|
120
121
|
|
|
121
122
|
def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
|
|
122
|
-
"""
|
|
123
|
+
"""Extract links from response body and use them to create `Request` objects.
|
|
124
|
+
|
|
125
|
+
Use the regular expression to find all matching links in the response body, then create `Request`
|
|
126
|
+
objects from these links and the provided input attributes.
|
|
127
|
+
"""
|
|
123
128
|
matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
|
|
124
129
|
created_requests.extend(
|
|
125
130
|
[
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
|
|
2
|
-
apify/_actor.py,sha256=
|
|
2
|
+
apify/_actor.py,sha256=k6N1-A9ziheA4z9XuNx9ddJ0EG3g-KGU_Dsy--hiPng,49704
|
|
3
3
|
apify/_charging.py,sha256=m7hJIQde4M7vS4g_4hsNRP5xHNXjYQ8MyqOEGeNb7VY,12267
|
|
4
|
-
apify/_configuration.py,sha256=
|
|
4
|
+
apify/_configuration.py,sha256=AVztnlaBkHxBs0VkLIUhFHWwvlgHY-koMNUc0aqw9ZI,11908
|
|
5
5
|
apify/_consts.py,sha256=_Xq4hOfOA1iZ3n1P967YWdyncKivpbX6RTlp_qanUoE,330
|
|
6
6
|
apify/_crypto.py,sha256=8BgeQC0ZhYP5KdmLxxLQAW87Gq-Z4HlREbYGXr46w0U,6607
|
|
7
7
|
apify/_models.py,sha256=-Y0rljBJWxMMCp8iDCTG4UV3bEvNZzp-kx2SYbPfeIY,7919
|
|
8
|
-
apify/_platform_event_manager.py,sha256=
|
|
8
|
+
apify/_platform_event_manager.py,sha256=igi9dRTfB7t0mRBM1bCfzMh7RBbr5adrJ0iRymUQ8S8,7990
|
|
9
9
|
apify/_proxy_configuration.py,sha256=c-O6_PZ9pUD-i4J0RFEKTtfyJPP2rTRJJA1TH8NVsV8,13189
|
|
10
|
-
apify/_utils.py,sha256=
|
|
10
|
+
apify/_utils.py,sha256=7YL7VhmNND0XARsXtPQWWQCyK4825n-ZVB8n9cgWm0A,1838
|
|
11
11
|
apify/log.py,sha256=j-E4t-WeA93bc1NCQRG8sTntehQCiiN8ia-MdQe3_Ts,1291
|
|
12
12
|
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
apify/apify_storage_client/__init__.py,sha256=-UbR68bFsDR6ln8OFs4t50eqcnY36hujO-SeOt-KmcA,114
|
|
@@ -21,7 +21,7 @@ apify/apify_storage_client/_request_queue_collection_client.py,sha256=MTLM2cG0tx
|
|
|
21
21
|
apify/apify_storage_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
|
|
23
23
|
apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
|
|
24
|
-
apify/scrapy/_async_thread.py,sha256=
|
|
24
|
+
apify/scrapy/_async_thread.py,sha256=8xif_fWce7vaMLuDc-XuDzZlHbCI-NY61YXdP2P27QY,4753
|
|
25
25
|
apify/scrapy/_logging_config.py,sha256=hFq90fNtZyjjJA7w2k-mtuEC8xCFiBMTalbwPDcaig4,2022
|
|
26
26
|
apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
apify/scrapy/requests.py,sha256=vZEU1IwNotCkqZ-b-LfM15iAyr1LnZ_fF8oWyMFVVFI,6553
|
|
@@ -36,9 +36,9 @@ apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1cc
|
|
|
36
36
|
apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
|
|
37
37
|
apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
apify/storages/__init__.py,sha256=FW-z6ubuPnHGM-Wp15T8mR5q6lnpDGrCW-IkgZd5L30,177
|
|
39
|
-
apify/storages/_request_list.py,sha256=
|
|
39
|
+
apify/storages/_request_list.py,sha256=FCC4X2MX2V8vLZBCUi5Q1qg9w62y9UkF4ptOqyPVhG8,6052
|
|
40
40
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
apify-2.5.
|
|
42
|
-
apify-2.5.
|
|
43
|
-
apify-2.5.
|
|
44
|
-
apify-2.5.
|
|
41
|
+
apify-2.5.0b8.dist-info/METADATA,sha256=MS5o8bVrRaTSqX5pDbOcj9AidYS0uTdMYdfZJKkqNHs,21558
|
|
42
|
+
apify-2.5.0b8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
43
|
+
apify-2.5.0b8.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
44
|
+
apify-2.5.0b8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|