apify 1.5.2b2__py3-none-any.whl → 1.5.2b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import TYPE_CHECKING
3
4
  from urllib.parse import ParseResult, urlparse
4
5
 
5
6
  try:
6
- from scrapy import Request, Spider # noqa: TCH002
7
7
  from scrapy.core.downloader.handlers.http11 import TunnelError
8
- from scrapy.crawler import Crawler # noqa: TCH002
9
8
  from scrapy.exceptions import NotConfigured
10
9
  except ImportError as exc:
11
10
  raise ImportError(
@@ -16,6 +15,10 @@ from ...actor import Actor
16
15
  from ...proxy_configuration import ProxyConfiguration
17
16
  from ..utils import get_basic_auth_header
18
17
 
18
+ if TYPE_CHECKING:
19
+ from scrapy import Request, Spider
20
+ from scrapy.crawler import Crawler
21
+
19
22
 
20
23
  class ApifyHttpProxyMiddleware:
21
24
  """Apify HTTP proxy middleware for Scrapy.
@@ -4,9 +4,7 @@ import traceback
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  try:
7
- from scrapy import Spider # noqa: TCH002
8
7
  from scrapy.downloadermiddlewares.retry import RetryMiddleware
9
- from scrapy.http import Request, Response # noqa: TCH002
10
8
  from scrapy.utils.response import response_status_message
11
9
  except ImportError as exc:
12
10
  raise ImportError(
@@ -17,6 +15,9 @@ from ...actor import Actor
17
15
  from ..utils import nested_event_loop, open_queue_with_custom_client, to_apify_request
18
16
 
19
17
  if TYPE_CHECKING:
18
+ from scrapy import Spider
19
+ from scrapy.http import Request, Response
20
+
20
21
  from ...storages import RequestQueue
21
22
 
22
23
 
apify/scrapy/scheduler.py CHANGED
@@ -55,9 +55,7 @@ class ApifyScheduler(BaseScheduler):
55
55
  Returns:
56
56
  True if the scheduler has any pending requests, False otherwise.
57
57
  """
58
- if not isinstance(self._rq, RequestQueue):
59
- raise TypeError('self._rq must be an instance of the RequestQueue class')
60
-
58
+ assert isinstance(self._rq, RequestQueue) # noqa: S101
61
59
  try:
62
60
  is_finished = nested_event_loop.run_until_complete(self._rq.is_finished())
63
61
  except BaseException:
@@ -78,14 +76,10 @@ class ApifyScheduler(BaseScheduler):
78
76
  call_id = crypto_random_object_id(8)
79
77
  Actor.log.debug(f'[{call_id}]: ApifyScheduler.enqueue_request was called (scrapy_request={request})...')
80
78
 
81
- if not isinstance(self.spider, Spider):
82
- raise TypeError('self.spider must be an instance of the Spider class')
83
-
79
+ assert isinstance(self.spider, Spider) # noqa: S101
84
80
  apify_request = to_apify_request(request, spider=self.spider)
85
81
  Actor.log.debug(f'[{call_id}]: scrapy_request was transformed to apify_request (apify_request={apify_request})')
86
-
87
- if not isinstance(self._rq, RequestQueue):
88
- raise TypeError('self._rq must be an instance of the RequestQueue class')
82
+ assert isinstance(self._rq, RequestQueue) # noqa: S101
89
83
 
90
84
  try:
91
85
  result = nested_event_loop.run_until_complete(self._rq.add_request(apify_request))
@@ -104,9 +98,7 @@ class ApifyScheduler(BaseScheduler):
104
98
  """
105
99
  call_id = crypto_random_object_id(8)
106
100
  Actor.log.debug(f'[{call_id}]: ApifyScheduler.next_request was called...')
107
-
108
- if not isinstance(self._rq, RequestQueue):
109
- raise TypeError('self._rq must be an instance of the RequestQueue class')
101
+ assert isinstance(self._rq, RequestQueue) # noqa: S101
110
102
 
111
103
  try:
112
104
  apify_request = nested_event_loop.run_until_complete(self._rq.fetch_next_request())
@@ -119,9 +111,7 @@ class ApifyScheduler(BaseScheduler):
119
111
  if apify_request is None:
120
112
  return None
121
113
 
122
- if not isinstance(self.spider, Spider):
123
- raise TypeError('self.spider must be an instance of the Spider class')
124
-
114
+ assert isinstance(self.spider, Spider) # noqa: S101
125
115
  scrapy_request = to_scrapy_request(apify_request, spider=self.spider)
126
116
  Actor.log.debug(
127
117
  f'[{call_id}]: apify_request was transformed to the scrapy_request which is gonna be returned (scrapy_request={scrapy_request})',
apify/scrapy/utils.py CHANGED
@@ -6,9 +6,10 @@ import pickle
6
6
  from base64 import b64encode
7
7
  from urllib.parse import unquote
8
8
 
9
+ from scrapy.utils.python import to_bytes
10
+
9
11
  try:
10
12
  from scrapy import Request, Spider
11
- from scrapy.utils.python import to_bytes
12
13
  from scrapy.utils.request import request_from_dict
13
14
  except ImportError as exc:
14
15
  raise ImportError(
@@ -50,8 +51,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
50
51
  Returns:
51
52
  The converted Apify request.
52
53
  """
53
- if not isinstance(scrapy_request, Request):
54
- raise TypeError('scrapy_request must be an instance of the scrapy.Request class')
54
+ assert isinstance(scrapy_request, Request) # noqa: S101
55
55
 
56
56
  call_id = crypto_random_object_id(8)
57
57
  Actor.log.debug(f'[{call_id}]: to_apify_request was called (scrapy_request={scrapy_request})...')
@@ -91,14 +91,11 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
91
91
  Returns:
92
92
  The converted Scrapy request.
93
93
  """
94
- if not isinstance(apify_request, dict):
95
- raise TypeError('apify_request must be a dictionary')
96
-
97
- required_keys = ['url', 'method', 'id', 'uniqueKey']
98
- missing_keys = [key for key in required_keys if key not in apify_request]
99
-
100
- if missing_keys:
101
- raise ValueError(f"apify_request must contain {', '.join(map(repr, missing_keys))} key(s)")
94
+ assert isinstance(apify_request, dict) # noqa: S101
95
+ assert 'url' in apify_request # noqa: S101
96
+ assert 'method' in apify_request # noqa: S101
97
+ assert 'id' in apify_request # noqa: S101
98
+ assert 'uniqueKey' in apify_request # noqa: S101
102
99
 
103
100
  call_id = crypto_random_object_id(8)
104
101
  Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...')
@@ -109,19 +106,14 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
109
106
  # - This process involves decoding the base64-encoded request data and reconstructing
110
107
  # the Scrapy Request object from its dictionary representation.
111
108
  Actor.log.debug(f'[{call_id}]: Restoring the Scrapy Request from the apify_request...')
112
-
113
109
  scrapy_request_dict_encoded = apify_request['userData']['scrapy_request']
114
- if not isinstance(scrapy_request_dict_encoded, str):
115
- raise TypeError('scrapy_request_dict_encoded must be a string')
110
+ assert isinstance(scrapy_request_dict_encoded, str) # noqa: S101
116
111
 
117
112
  scrapy_request_dict = pickle.loads(codecs.decode(scrapy_request_dict_encoded.encode(), 'base64'))
118
- if not isinstance(scrapy_request_dict, dict):
119
- raise TypeError('scrapy_request_dict must be a dictionary')
113
+ assert isinstance(scrapy_request_dict, dict) # noqa: S101
120
114
 
121
115
  scrapy_request = request_from_dict(scrapy_request_dict, spider=spider)
122
- if not isinstance(scrapy_request, Request):
123
- raise TypeError('scrapy_request must be an instance of the Request class')
124
-
116
+ assert isinstance(scrapy_request, Request) # noqa: S101
125
117
  Actor.log.debug(f'[{call_id}]: Scrapy Request successfully reconstructed (scrapy_request={scrapy_request})...')
126
118
 
127
119
  # Update the meta field with the meta field from the apify_request
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.2b2
3
+ Version: 1.5.2b3
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
@@ -21,11 +21,11 @@ apify/_memory_storage/resource_clients/key_value_store_collection.py,sha256=Oy1j
21
21
  apify/_memory_storage/resource_clients/request_queue.py,sha256=7LS_jrBBJvylFZedZHrgwMPyCsLz8X9-mAvvhOaYzXI,19614
22
22
  apify/_memory_storage/resource_clients/request_queue_collection.py,sha256=ydnYy2zu9wp_A58fr4LnENjvOVNEWgJ8vyG5jjHzXIA,1637
23
23
  apify/scrapy/__init__.py,sha256=tCnqsdzcCx0Rpx13r1THeEJ6SzEgjmyrmHVKdCgfEfo,174
24
- apify/scrapy/scheduler.py,sha256=Ogl789PrvY0jAAaK3hgTxjliNQ9Dx0cisE9YM8vm52Y,4926
25
- apify/scrapy/utils.py,sha256=2qphTfTVHu-wG-1Ibrp0mDG18ONaosZcX0kQlf_nkxY,7162
24
+ apify/scrapy/scheduler.py,sha256=XUo6_7ZqtoFsnLadG1985AZVfjMNZYOBMPB_UVE0zwc,4562
25
+ apify/scrapy/utils.py,sha256=DcspnpCY7LVPGpFPrZzzn9nRB4p5gWgK-UOb1ChCwMA,6777
26
26
  apify/scrapy/middlewares/__init__.py,sha256=zzosV8BD8SZQIrVKsSaGFGV9rHinNLKm5GPL3ZNxSZQ,96
27
- apify/scrapy/middlewares/apify_proxy.py,sha256=FFNGFq7danSNKl722nZ7zC-VKoMLRk6bVtlFL4cksvg,5902
28
- apify/scrapy/middlewares/apify_retry.py,sha256=HaQcYxoOFm_CMgTWkbL5HPoEhpMPFKkHn11_j-4JvwE,4590
27
+ apify/scrapy/middlewares/apify_proxy.py,sha256=f_913T1mqhgb9ca_s3fGy8WETlJGDU1IMHV8OCATGFM,5922
28
+ apify/scrapy/middlewares/apify_retry.py,sha256=RrUMrXgk9FTydBG99VbD7m1nDtWccMsO_Kf-rNivunI,4559
29
29
  apify/scrapy/pipelines/__init__.py,sha256=KBUE3maOWrLfJRmWSsyW6YxxZY4lCGP2GZyMm9Z56VY,57
30
30
  apify/scrapy/pipelines/actor_dataset_push.py,sha256=qXtSFIUhIvqC8KGDdft4N_FmSA2qyfZfsjd9uSYTZo4,956
31
31
  apify/storages/__init__.py,sha256=rBdwhyZxUMG6m_7uAb4sl5eg_dxiLvYVas5aRcZ6PIE,268
@@ -34,8 +34,8 @@ apify/storages/dataset.py,sha256=x_rte5nVOMPdg3ui_KHpH71UdUT2gcN10bGnrLXB6xk,232
34
34
  apify/storages/key_value_store.py,sha256=HKSF6odZTMWgFF6usS9l9xcCGLyRitRq59LAKbmyYAY,10730
35
35
  apify/storages/request_queue.py,sha256=b0Qh2d1BWDtdbf_adAVS68fVkdcR2gtL4KyfxAp1oMY,26915
36
36
  apify/storages/storage_client_manager.py,sha256=QAGbu47pwFkHa-AFfolNW3W5hvR7zNz2yxK9Sv0wQbA,2457
37
- apify-1.5.2b2.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
38
- apify-1.5.2b2.dist-info/METADATA,sha256=Cw3UknTKsrRzu5DUsxVUUIbIRTyBhzt0IpDcs49UwPk,6235
39
- apify-1.5.2b2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
- apify-1.5.2b2.dist-info/top_level.txt,sha256=2oFNsHggn5m_rCaaP7xijQg_-Va2ByOSYuvKgACsS5w,6
41
- apify-1.5.2b2.dist-info/RECORD,,
37
+ apify-1.5.2b3.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
38
+ apify-1.5.2b3.dist-info/METADATA,sha256=0nW0vff906ZLlKhqPhXDIgz8vaK7VFIegy4sSYdcUoo,6235
39
+ apify-1.5.2b3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
+ apify-1.5.2b3.dist-info/top_level.txt,sha256=2oFNsHggn5m_rCaaP7xijQg_-Va2ByOSYuvKgACsS5w,6
41
+ apify-1.5.2b3.dist-info/RECORD,,