apify 2.5.0b3__py3-none-any.whl → 2.5.0b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/_actor.py CHANGED
@@ -1138,7 +1138,7 @@ class _ActorType:
1138
1138
  return proxy_configuration
1139
1139
 
1140
1140
  def _get_default_exit_process(self) -> bool:
1141
- """Returns False for IPython, Pytest, and Scrapy environments, True otherwise."""
1141
+ """Return False for IPython, Pytest, and Scrapy environments, True otherwise."""
1142
1142
  if is_running_in_ipython():
1143
1143
  self.log.debug('Running in IPython, setting default `exit_process` to False.')
1144
1144
  return False
apify/_configuration.py CHANGED
@@ -5,8 +5,8 @@ from decimal import Decimal
5
5
  from logging import getLogger
6
6
  from typing import Annotated, Any
7
7
 
8
- from pydantic import AliasChoices, BeforeValidator, Field
9
- from typing_extensions import deprecated
8
+ from pydantic import AliasChoices, BeforeValidator, Field, model_validator
9
+ from typing_extensions import Self, deprecated
10
10
 
11
11
  from crawlee._utils.models import timedelta_ms
12
12
  from crawlee._utils.urls import validate_http_url
@@ -365,6 +365,19 @@ class Configuration(CrawleeConfiguration):
365
365
  ),
366
366
  ] = None
367
367
 
368
+ @model_validator(mode='after')
369
+ def disable_browser_sandbox_on_platform(self) -> Self:
370
+ """Disable the browser sandbox mode when running on the Apify platform.
371
+
372
+ Running in environment where `is_at_home` is True does not benefit from browser sandbox as it is already running
373
+ in a container. It can be on the contrary undesired as the process in the container might be running as root and
374
+ this will crash chromium that was started with browser sandbox mode.
375
+ """
376
+ if self.is_at_home and not self.disable_browser_sandbox:
377
+ self.disable_browser_sandbox = True
378
+ logger.warning('Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True.')
379
+ return self
380
+
368
381
  @classmethod
369
382
  def get_global_configuration(cls) -> Configuration:
370
383
  """Retrieve the global instance of the configuration.
@@ -48,11 +48,11 @@ class SystemInfoEventData(BaseModel):
48
48
  is_cpu_overloaded: Annotated[bool, Field(alias='isCpuOverloaded')]
49
49
  created_at: Annotated[datetime, Field(alias='createdAt')]
50
50
 
51
- def to_crawlee_format(self) -> EventSystemInfoData:
51
+ def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
52
52
  return EventSystemInfoData.model_validate(
53
53
  {
54
54
  'cpu_info': {
55
- 'used_ratio': self.cpu_current_usage / 100,
55
+ 'used_ratio': (self.cpu_current_usage / 100) / dedicated_cpus,
56
56
  'created_at': self.created_at,
57
57
  },
58
58
  'memory_info': {
@@ -218,7 +218,7 @@ class PlatformEventManager(EventManager):
218
218
  event=parsed_message.name,
219
219
  event_data=parsed_message.data
220
220
  if not isinstance(parsed_message.data, SystemInfoEventData)
221
- else parsed_message.data.to_crawlee_format(),
221
+ else parsed_message.data.to_crawlee_format(self._config.dedicated_cpus or 1),
222
222
  )
223
223
 
224
224
  if parsed_message.name == Event.MIGRATING:
apify/_utils.py CHANGED
@@ -31,10 +31,16 @@ GroupName = Literal['Classes', 'Abstract classes', 'Interfaces', 'Data structure
31
31
 
32
32
 
33
33
  def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
34
- """Decorator to mark symbols for rendering and grouping in documentation.
34
+ """Mark a symbol for rendering and grouping in documentation.
35
35
 
36
- This decorator is used purely for documentation purposes and does not alter the behavior
36
+ This decorator is used solely for documentation purposes and does not modify the behavior
37
37
  of the decorated callable.
38
+
39
+ Args:
40
+ group_name: The documentation group to which the symbol belongs.
41
+
42
+ Returns:
43
+ The original callable without modification.
38
44
  """
39
45
 
40
46
  def wrapper(func: Callable) -> Callable:
@@ -44,12 +50,16 @@ def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
44
50
 
45
51
 
46
52
  def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
47
- """Decorator for renaming symbols in documentation.
53
+ """Rename a symbol for documentation rendering.
48
54
 
49
- This changes the rendered name of the symbol only in the rendered web documentation.
55
+ This decorator modifies only the displayed name of the symbol in the generated documentation
56
+ and does not affect its runtime behavior.
50
57
 
51
- This decorator is used purely for documentation purposes and does not alter the behavior
52
- of the decorated callable.
58
+ Args:
59
+ symbol_name: The name to be used in the documentation.
60
+
61
+ Returns:
62
+ The original callable without modification.
53
63
  """
54
64
 
55
65
  def wrapper(func: Callable) -> Callable:
@@ -113,7 +113,7 @@ class AsyncThread:
113
113
  await asyncio.gather(*tasks, return_exceptions=True)
114
114
 
115
115
  def _force_exit_event_loop(self) -> None:
116
- """Forcefully shut down the event loop and its thread."""
116
+ """Shut down the event loop and its thread forcefully."""
117
117
  try:
118
118
  logger.info('Forced shutdown of the event loop and its thread...')
119
119
  self._eventloop.call_soon_threadsafe(self._eventloop.stop)
@@ -51,7 +51,7 @@ class RequestList(CrawleeRequestList):
51
51
  request_list_sources_input: list[dict[str, Any]] | None = None,
52
52
  http_client: HttpClient | None = None,
53
53
  ) -> RequestList:
54
- """Creates RequestList from Actor input requestListSources.
54
+ """Initialize a new instance from request list source input.
55
55
 
56
56
  Args:
57
57
  name: Name of the returned RequestList.
@@ -108,9 +108,10 @@ class RequestList(CrawleeRequestList):
108
108
 
109
109
  @staticmethod
110
110
  async def _fetch_requests_from_url(
111
- remote_url_requests_inputs: list[_RequestsFromUrlInput], http_client: HttpClient
111
+ remote_url_requests_inputs: list[_RequestsFromUrlInput],
112
+ http_client: HttpClient,
112
113
  ) -> list[Request]:
113
- """Crete list of requests from url.
114
+ """Create list of requests from url.
114
115
 
115
116
  Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
116
117
  callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
@@ -119,7 +120,11 @@ class RequestList(CrawleeRequestList):
119
120
  created_requests: list[Request] = []
120
121
 
121
122
  def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
122
- """Callback to scrape response body with regexp and create Requests from matches."""
123
+ """Extract links from response body and use them to create `Request` objects.
124
+
125
+ Use the regular expression to find all matching links in the response body, then create `Request`
126
+ objects from these links and the provided input attributes.
127
+ """
123
128
  matches = re.finditer(URL_NO_COMMAS_REGEX, task.result().read().decode('utf-8'))
124
129
  created_requests.extend(
125
130
  [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 2.5.0b3
3
+ Version: 2.5.0b5
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Homepage, https://docs.apify.com/sdk/python/
6
6
  Project-URL: Apify homepage, https://apify.com
@@ -1,13 +1,13 @@
1
1
  apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
2
- apify/_actor.py,sha256=Avo0KdWzeuZFiapNyQWwAfWWeUObrdOKbD97wCc07fc,49705
2
+ apify/_actor.py,sha256=k6N1-A9ziheA4z9XuNx9ddJ0EG3g-KGU_Dsy--hiPng,49704
3
3
  apify/_charging.py,sha256=m7hJIQde4M7vS4g_4hsNRP5xHNXjYQ8MyqOEGeNb7VY,12267
4
- apify/_configuration.py,sha256=yidcWHsu-IJ2mmLmXStKq_HHcdfQxZq7koYjlZfRnQ8,11128
4
+ apify/_configuration.py,sha256=AVztnlaBkHxBs0VkLIUhFHWwvlgHY-koMNUc0aqw9ZI,11908
5
5
  apify/_consts.py,sha256=_Xq4hOfOA1iZ3n1P967YWdyncKivpbX6RTlp_qanUoE,330
6
6
  apify/_crypto.py,sha256=8BgeQC0ZhYP5KdmLxxLQAW87Gq-Z4HlREbYGXr46w0U,6607
7
7
  apify/_models.py,sha256=-Y0rljBJWxMMCp8iDCTG4UV3bEvNZzp-kx2SYbPfeIY,7919
8
- apify/_platform_event_manager.py,sha256=k1e5ruSJdcMKr6j-_XIF-gfhrgyMzdSenYW0QoJROu8,7916
8
+ apify/_platform_event_manager.py,sha256=igi9dRTfB7t0mRBM1bCfzMh7RBbr5adrJ0iRymUQ8S8,7990
9
9
  apify/_proxy_configuration.py,sha256=c-O6_PZ9pUD-i4J0RFEKTtfyJPP2rTRJJA1TH8NVsV8,13189
10
- apify/_utils.py,sha256=92byxeXTpDFwhBq7ZS-obeXKtKWvVzCZMV0Drg3EjhQ,1634
10
+ apify/_utils.py,sha256=7YL7VhmNND0XARsXtPQWWQCyK4825n-ZVB8n9cgWm0A,1838
11
11
  apify/log.py,sha256=j-E4t-WeA93bc1NCQRG8sTntehQCiiN8ia-MdQe3_Ts,1291
12
12
  apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  apify/apify_storage_client/__init__.py,sha256=-UbR68bFsDR6ln8OFs4t50eqcnY36hujO-SeOt-KmcA,114
@@ -21,7 +21,7 @@ apify/apify_storage_client/_request_queue_collection_client.py,sha256=MTLM2cG0tx
21
21
  apify/apify_storage_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
23
23
  apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
24
- apify/scrapy/_async_thread.py,sha256=AfeH9ZkSRZXxL11wzwrroDNsTzq4tAvURlinUZBtYMA,4753
24
+ apify/scrapy/_async_thread.py,sha256=8xif_fWce7vaMLuDc-XuDzZlHbCI-NY61YXdP2P27QY,4753
25
25
  apify/scrapy/_logging_config.py,sha256=hFq90fNtZyjjJA7w2k-mtuEC8xCFiBMTalbwPDcaig4,2022
26
26
  apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  apify/scrapy/requests.py,sha256=vZEU1IwNotCkqZ-b-LfM15iAyr1LnZ_fF8oWyMFVVFI,6553
@@ -36,9 +36,9 @@ apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1cc
36
36
  apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
37
37
  apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  apify/storages/__init__.py,sha256=FW-z6ubuPnHGM-Wp15T8mR5q6lnpDGrCW-IkgZd5L30,177
39
- apify/storages/_request_list.py,sha256=7WpcdWvT3QxEBthynBpTVCSNDLXq6UbpQQmfUVyJ1jE,5849
39
+ apify/storages/_request_list.py,sha256=FCC4X2MX2V8vLZBCUi5Q1qg9w62y9UkF4ptOqyPVhG8,6052
40
40
  apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- apify-2.5.0b3.dist-info/METADATA,sha256=VIpuiQymimAfQg1h2-cI8fvpHHHlcEDjo_tqTrjR4zg,21558
42
- apify-2.5.0b3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
43
- apify-2.5.0b3.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
44
- apify-2.5.0b3.dist-info/RECORD,,
41
+ apify-2.5.0b5.dist-info/METADATA,sha256=EMfgdCeHcfi2Yj8_Itn3gCNz2hup0-Y9Vz1gZsMT78U,21558
42
+ apify-2.5.0b5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
43
+ apify-2.5.0b5.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
44
+ apify-2.5.0b5.dist-info/RECORD,,