apify 1.5.2b3__py3-none-any.whl → 1.5.2b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
4
3
  from urllib.parse import ParseResult, urlparse
5
4
 
6
5
  try:
6
+ from scrapy import Request, Spider # noqa: TCH002
7
7
  from scrapy.core.downloader.handlers.http11 import TunnelError
8
+ from scrapy.crawler import Crawler # noqa: TCH002
8
9
  from scrapy.exceptions import NotConfigured
9
10
  except ImportError as exc:
10
11
  raise ImportError(
@@ -15,10 +16,6 @@ from ...actor import Actor
15
16
  from ...proxy_configuration import ProxyConfiguration
16
17
  from ..utils import get_basic_auth_header
17
18
 
18
- if TYPE_CHECKING:
19
- from scrapy import Request, Spider
20
- from scrapy.crawler import Crawler
21
-
22
19
 
23
20
  class ApifyHttpProxyMiddleware:
24
21
  """Apify HTTP proxy middleware for Scrapy.
@@ -4,7 +4,9 @@ import traceback
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  try:
7
+ from scrapy import Spider # noqa: TCH002
7
8
  from scrapy.downloadermiddlewares.retry import RetryMiddleware
9
+ from scrapy.http import Request, Response # noqa: TCH002
8
10
  from scrapy.utils.response import response_status_message
9
11
  except ImportError as exc:
10
12
  raise ImportError(
@@ -15,9 +17,6 @@ from ...actor import Actor
15
17
  from ..utils import nested_event_loop, open_queue_with_custom_client, to_apify_request
16
18
 
17
19
  if TYPE_CHECKING:
18
- from scrapy import Spider
19
- from scrapy.http import Request, Response
20
-
21
20
  from ...storages import RequestQueue
22
21
 
23
22
 
apify/scrapy/scheduler.py CHANGED
@@ -55,7 +55,9 @@ class ApifyScheduler(BaseScheduler):
55
55
  Returns:
56
56
  True if the scheduler has any pending requests, False otherwise.
57
57
  """
58
- assert isinstance(self._rq, RequestQueue) # noqa: S101
58
+ if not isinstance(self._rq, RequestQueue):
59
+ raise TypeError('self._rq must be an instance of the RequestQueue class')
60
+
59
61
  try:
60
62
  is_finished = nested_event_loop.run_until_complete(self._rq.is_finished())
61
63
  except BaseException:
@@ -76,10 +78,14 @@ class ApifyScheduler(BaseScheduler):
76
78
  call_id = crypto_random_object_id(8)
77
79
  Actor.log.debug(f'[{call_id}]: ApifyScheduler.enqueue_request was called (scrapy_request={request})...')
78
80
 
79
- assert isinstance(self.spider, Spider) # noqa: S101
81
+ if not isinstance(self.spider, Spider):
82
+ raise TypeError('self.spider must be an instance of the Spider class')
83
+
80
84
  apify_request = to_apify_request(request, spider=self.spider)
81
85
  Actor.log.debug(f'[{call_id}]: scrapy_request was transformed to apify_request (apify_request={apify_request})')
82
- assert isinstance(self._rq, RequestQueue) # noqa: S101
86
+
87
+ if not isinstance(self._rq, RequestQueue):
88
+ raise TypeError('self._rq must be an instance of the RequestQueue class')
83
89
 
84
90
  try:
85
91
  result = nested_event_loop.run_until_complete(self._rq.add_request(apify_request))
@@ -98,7 +104,9 @@ class ApifyScheduler(BaseScheduler):
98
104
  """
99
105
  call_id = crypto_random_object_id(8)
100
106
  Actor.log.debug(f'[{call_id}]: ApifyScheduler.next_request was called...')
101
- assert isinstance(self._rq, RequestQueue) # noqa: S101
107
+
108
+ if not isinstance(self._rq, RequestQueue):
109
+ raise TypeError('self._rq must be an instance of the RequestQueue class')
102
110
 
103
111
  try:
104
112
  apify_request = nested_event_loop.run_until_complete(self._rq.fetch_next_request())
@@ -111,7 +119,9 @@ class ApifyScheduler(BaseScheduler):
111
119
  if apify_request is None:
112
120
  return None
113
121
 
114
- assert isinstance(self.spider, Spider) # noqa: S101
122
+ if not isinstance(self.spider, Spider):
123
+ raise TypeError('self.spider must be an instance of the Spider class')
124
+
115
125
  scrapy_request = to_scrapy_request(apify_request, spider=self.spider)
116
126
  Actor.log.debug(
117
127
  f'[{call_id}]: apify_request was transformed to the scrapy_request which is gonna be returned (scrapy_request={scrapy_request})',
apify/scrapy/utils.py CHANGED
@@ -6,10 +6,9 @@ import pickle
6
6
  from base64 import b64encode
7
7
  from urllib.parse import unquote
8
8
 
9
- from scrapy.utils.python import to_bytes
10
-
11
9
  try:
12
10
  from scrapy import Request, Spider
11
+ from scrapy.utils.python import to_bytes
13
12
  from scrapy.utils.request import request_from_dict
14
13
  except ImportError as exc:
15
14
  raise ImportError(
@@ -51,7 +50,8 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
51
50
  Returns:
52
51
  The converted Apify request.
53
52
  """
54
- assert isinstance(scrapy_request, Request) # noqa: S101
53
+ if not isinstance(scrapy_request, Request):
54
+ raise TypeError('scrapy_request must be an instance of the scrapy.Request class')
55
55
 
56
56
  call_id = crypto_random_object_id(8)
57
57
  Actor.log.debug(f'[{call_id}]: to_apify_request was called (scrapy_request={scrapy_request})...')
@@ -91,11 +91,14 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
91
91
  Returns:
92
92
  The converted Scrapy request.
93
93
  """
94
- assert isinstance(apify_request, dict) # noqa: S101
95
- assert 'url' in apify_request # noqa: S101
96
- assert 'method' in apify_request # noqa: S101
97
- assert 'id' in apify_request # noqa: S101
98
- assert 'uniqueKey' in apify_request # noqa: S101
94
+ if not isinstance(apify_request, dict):
95
+ raise TypeError('apify_request must be a dictionary')
96
+
97
+ required_keys = ['url', 'method', 'id', 'uniqueKey']
98
+ missing_keys = [key for key in required_keys if key not in apify_request]
99
+
100
+ if missing_keys:
101
+ raise ValueError(f"apify_request must contain {', '.join(map(repr, missing_keys))} key(s)")
99
102
 
100
103
  call_id = crypto_random_object_id(8)
101
104
  Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...')
@@ -106,14 +109,19 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
106
109
  # - This process involves decoding the base64-encoded request data and reconstructing
107
110
  # the Scrapy Request object from its dictionary representation.
108
111
  Actor.log.debug(f'[{call_id}]: Restoring the Scrapy Request from the apify_request...')
112
+
109
113
  scrapy_request_dict_encoded = apify_request['userData']['scrapy_request']
110
- assert isinstance(scrapy_request_dict_encoded, str) # noqa: S101
114
+ if not isinstance(scrapy_request_dict_encoded, str):
115
+ raise TypeError('scrapy_request_dict_encoded must be a string')
111
116
 
112
117
  scrapy_request_dict = pickle.loads(codecs.decode(scrapy_request_dict_encoded.encode(), 'base64'))
113
- assert isinstance(scrapy_request_dict, dict) # noqa: S101
118
+ if not isinstance(scrapy_request_dict, dict):
119
+ raise TypeError('scrapy_request_dict must be a dictionary')
114
120
 
115
121
  scrapy_request = request_from_dict(scrapy_request_dict, spider=spider)
116
- assert isinstance(scrapy_request, Request) # noqa: S101
122
+ if not isinstance(scrapy_request, Request):
123
+ raise TypeError('scrapy_request must be an instance of the Request class')
124
+
117
125
  Actor.log.debug(f'[{call_id}]: Scrapy Request successfully reconstructed (scrapy_request={scrapy_request})...')
118
126
 
119
127
  # Update the meta field with the meta field from the apify_request
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.2b3
3
+ Version: 1.5.2b4
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
@@ -49,7 +49,7 @@ Requires-Dist: pytest-only ~=2.0.0 ; extra == 'dev'
49
49
  Requires-Dist: pytest-timeout ~=2.2.0 ; extra == 'dev'
50
50
  Requires-Dist: pytest-xdist ~=3.3.1 ; extra == 'dev'
51
51
  Requires-Dist: respx ~=0.20.1 ; extra == 'dev'
52
- Requires-Dist: ruff ~=0.1.6 ; extra == 'dev'
52
+ Requires-Dist: ruff ~=0.1.13 ; extra == 'dev'
53
53
  Requires-Dist: twine ~=4.0.2 ; extra == 'dev'
54
54
  Requires-Dist: types-aiofiles ~=23.2.0.0 ; extra == 'dev'
55
55
  Requires-Dist: types-colorama ~=0.4.15.12 ; extra == 'dev'
@@ -21,11 +21,11 @@ apify/_memory_storage/resource_clients/key_value_store_collection.py,sha256=Oy1j
21
21
  apify/_memory_storage/resource_clients/request_queue.py,sha256=7LS_jrBBJvylFZedZHrgwMPyCsLz8X9-mAvvhOaYzXI,19614
22
22
  apify/_memory_storage/resource_clients/request_queue_collection.py,sha256=ydnYy2zu9wp_A58fr4LnENjvOVNEWgJ8vyG5jjHzXIA,1637
23
23
  apify/scrapy/__init__.py,sha256=tCnqsdzcCx0Rpx13r1THeEJ6SzEgjmyrmHVKdCgfEfo,174
24
- apify/scrapy/scheduler.py,sha256=XUo6_7ZqtoFsnLadG1985AZVfjMNZYOBMPB_UVE0zwc,4562
25
- apify/scrapy/utils.py,sha256=DcspnpCY7LVPGpFPrZzzn9nRB4p5gWgK-UOb1ChCwMA,6777
24
+ apify/scrapy/scheduler.py,sha256=Ogl789PrvY0jAAaK3hgTxjliNQ9Dx0cisE9YM8vm52Y,4926
25
+ apify/scrapy/utils.py,sha256=2qphTfTVHu-wG-1Ibrp0mDG18ONaosZcX0kQlf_nkxY,7162
26
26
  apify/scrapy/middlewares/__init__.py,sha256=zzosV8BD8SZQIrVKsSaGFGV9rHinNLKm5GPL3ZNxSZQ,96
27
- apify/scrapy/middlewares/apify_proxy.py,sha256=f_913T1mqhgb9ca_s3fGy8WETlJGDU1IMHV8OCATGFM,5922
28
- apify/scrapy/middlewares/apify_retry.py,sha256=RrUMrXgk9FTydBG99VbD7m1nDtWccMsO_Kf-rNivunI,4559
27
+ apify/scrapy/middlewares/apify_proxy.py,sha256=FFNGFq7danSNKl722nZ7zC-VKoMLRk6bVtlFL4cksvg,5902
28
+ apify/scrapy/middlewares/apify_retry.py,sha256=HaQcYxoOFm_CMgTWkbL5HPoEhpMPFKkHn11_j-4JvwE,4590
29
29
  apify/scrapy/pipelines/__init__.py,sha256=KBUE3maOWrLfJRmWSsyW6YxxZY4lCGP2GZyMm9Z56VY,57
30
30
  apify/scrapy/pipelines/actor_dataset_push.py,sha256=qXtSFIUhIvqC8KGDdft4N_FmSA2qyfZfsjd9uSYTZo4,956
31
31
  apify/storages/__init__.py,sha256=rBdwhyZxUMG6m_7uAb4sl5eg_dxiLvYVas5aRcZ6PIE,268
@@ -34,8 +34,8 @@ apify/storages/dataset.py,sha256=x_rte5nVOMPdg3ui_KHpH71UdUT2gcN10bGnrLXB6xk,232
34
34
  apify/storages/key_value_store.py,sha256=HKSF6odZTMWgFF6usS9l9xcCGLyRitRq59LAKbmyYAY,10730
35
35
  apify/storages/request_queue.py,sha256=b0Qh2d1BWDtdbf_adAVS68fVkdcR2gtL4KyfxAp1oMY,26915
36
36
  apify/storages/storage_client_manager.py,sha256=QAGbu47pwFkHa-AFfolNW3W5hvR7zNz2yxK9Sv0wQbA,2457
37
- apify-1.5.2b3.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
38
- apify-1.5.2b3.dist-info/METADATA,sha256=0nW0vff906ZLlKhqPhXDIgz8vaK7VFIegy4sSYdcUoo,6235
39
- apify-1.5.2b3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
- apify-1.5.2b3.dist-info/top_level.txt,sha256=2oFNsHggn5m_rCaaP7xijQg_-Va2ByOSYuvKgACsS5w,6
41
- apify-1.5.2b3.dist-info/RECORD,,
37
+ apify-1.5.2b4.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
38
+ apify-1.5.2b4.dist-info/METADATA,sha256=fKJa_aTbHjUaRjVe8mWQfv3B9gguNMuOnHzMU8u3DhQ,6236
39
+ apify-1.5.2b4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
+ apify-1.5.2b4.dist-info/top_level.txt,sha256=2oFNsHggn5m_rCaaP7xijQg_-Va2ByOSYuvKgACsS5w,6
41
+ apify-1.5.2b4.dist-info/RECORD,,