apify 1.5.2b5__py3-none-any.whl → 1.5.3b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/scrapy/utils.py CHANGED
@@ -8,6 +8,8 @@ from urllib.parse import unquote
8
8
 
9
9
  try:
10
10
  from scrapy import Request, Spider
11
+ from scrapy.settings import Settings # noqa: TCH002
12
+ from scrapy.utils.project import get_project_settings
11
13
  from scrapy.utils.python import to_bytes
12
14
  from scrapy.utils.request import request_from_dict
13
15
  except ImportError as exc:
@@ -153,6 +155,45 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
153
155
  return scrapy_request
154
156
 
155
157
 
158
+ def apply_apify_settings(*, settings: Settings | None = None, proxy_config: dict | None = None) -> Settings:
159
+ """Integrates Apify configuration into a Scrapy project settings.
160
+
161
+ Note: The function directly modifies the passed `settings` object and also returns it.
162
+
163
+ Args:
164
+ settings: Scrapy project settings to be modified.
165
+ proxy_config: Proxy configuration to be stored in the settings.
166
+
167
+ Returns:
168
+ Scrapy project settings with custom configurations.
169
+ """
170
+ if settings is None:
171
+ settings = get_project_settings()
172
+
173
+ # Use ApifyScheduler as the scheduler
174
+ settings['SCHEDULER'] = 'apify.scrapy.scheduler.ApifyScheduler'
175
+
176
+ # Add the ActorDatasetPushPipeline into the item pipelines, assigning it the highest integer (1000),
177
+ # ensuring it is executed as the final step in the pipeline sequence
178
+ settings['ITEM_PIPELINES']['apify.scrapy.pipelines.ActorDatasetPushPipeline'] = 1000
179
+
180
+ # Disable the default RobotsTxtMiddleware, Apify's custom scheduler already handles robots.txt
181
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware'] = None
182
+
183
+ # Disable the default HttpProxyMiddleware and add ApifyHttpProxyMiddleware
184
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware'] = None
185
+ settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyHttpProxyMiddleware'] = 950
186
+
187
+ # Disable the default RetryMiddleware and add ApifyRetryMiddleware with the highest integer (1000)
188
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.retry.RetryMiddleware'] = None
189
+ settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyRetryMiddleware'] = 1000
190
+
191
+ # Store the proxy configuration
192
+ settings['APIFY_PROXY_SETTINGS'] = proxy_config
193
+
194
+ return settings
195
+
196
+
156
197
  async def open_queue_with_custom_client() -> RequestQueue:
157
198
  """Open a Request Queue with custom Apify Client.
158
199
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.2b5
3
+ Version: 1.5.3b2
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
@@ -22,7 +22,7 @@ apify/_memory_storage/resource_clients/request_queue.py,sha256=6skV2fGpDxrDtn43o
22
22
  apify/_memory_storage/resource_clients/request_queue_collection.py,sha256=qW7kPLKRl5mukIGCT8iNCsXT82BUYSwCV_KH7MLj528,1713
23
23
  apify/scrapy/__init__.py,sha256=tCnqsdzcCx0Rpx13r1THeEJ6SzEgjmyrmHVKdCgfEfo,174
24
24
  apify/scrapy/scheduler.py,sha256=-6onJc3BPJbOfCs9yypzrp9N0hXdPVre7i7thHlZJ4k,4950
25
- apify/scrapy/utils.py,sha256=cv-lgnzuP8zDrGNlMiGYXVdPXbmEnrNpEA8h3My_2ec,7427
25
+ apify/scrapy/utils.py,sha256=ixFoLbihzLZjjUuMtT8pqGItqVj-eOHrv7y1UEjELZ0,9387
26
26
  apify/scrapy/middlewares/__init__.py,sha256=zzosV8BD8SZQIrVKsSaGFGV9rHinNLKm5GPL3ZNxSZQ,96
27
27
  apify/scrapy/middlewares/apify_proxy.py,sha256=pDNx31y7llSv3ZM1SSujEYZSWQTfNfAqKvsNph5zbbU,5919
28
28
  apify/scrapy/middlewares/apify_retry.py,sha256=6VeWoYW8c_WgxFNE5aoZ4OwlFZwMIdOXF-q-UOSO3QE,4607
@@ -34,8 +34,8 @@ apify/storages/dataset.py,sha256=gfMlJ6dSXDdjCykkStZCyp4u8xKCIZqflGS9-jLzK74,233
34
34
  apify/storages/key_value_store.py,sha256=BUGYPI4MuRlOJ_aPzZr8bq7ay_K04qAZ7yKW7C8ItV4,10760
35
35
  apify/storages/request_queue.py,sha256=PP7KVZAQTR8-BPT54f-nk8Rbgkc5-bkDGFf4u_XQJLs,26957
36
36
  apify/storages/storage_client_manager.py,sha256=fvXg3PRojATNamEN29BBZyZZ5GWN_s0r29A59aiL-wQ,2465
37
- apify-1.5.2b5.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
38
- apify-1.5.2b5.dist-info/METADATA,sha256=mzFxqVGympnEbRPS4bMlnBmYNgwV6n4sHk-5I9AZmaE,6236
39
- apify-1.5.2b5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
- apify-1.5.2b5.dist-info/top_level.txt,sha256=2oFNsHggn5m_rCaaP7xijQg_-Va2ByOSYuvKgACsS5w,6
41
- apify-1.5.2b5.dist-info/RECORD,,
37
+ apify-1.5.3b2.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
38
+ apify-1.5.3b2.dist-info/METADATA,sha256=EQzJ1o7R60UE5ThurCkoelSeRUPb-VfZrvCcQeEslmc,6236
39
+ apify-1.5.3b2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
40
+ apify-1.5.3b2.dist-info/top_level.txt,sha256=2oFNsHggn5m_rCaaP7xijQg_-Va2ByOSYuvKgACsS5w,6
41
+ apify-1.5.3b2.dist-info/RECORD,,