apify 1.5.3b1__tar.gz → 1.5.3b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (46) hide show
  1. {apify-1.5.3b1 → apify-1.5.3b2}/PKG-INFO +1 -1
  2. {apify-1.5.3b1 → apify-1.5.3b2}/pyproject.toml +1 -1
  3. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/utils.py +41 -0
  4. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/PKG-INFO +1 -1
  5. {apify-1.5.3b1 → apify-1.5.3b2}/LICENSE +0 -0
  6. {apify-1.5.3b1 → apify-1.5.3b2}/README.md +0 -0
  7. {apify-1.5.3b1 → apify-1.5.3b2}/setup.cfg +0 -0
  8. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/__init__.py +0 -0
  9. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_crypto.py +0 -0
  10. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/__init__.py +0 -0
  11. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/file_storage_utils.py +0 -0
  12. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/memory_storage_client.py +0 -0
  13. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/__init__.py +0 -0
  14. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/base_resource_client.py +0 -0
  15. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -0
  16. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/dataset.py +0 -0
  17. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/dataset_collection.py +0 -0
  18. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/key_value_store.py +0 -0
  19. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -0
  20. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/request_queue.py +0 -0
  21. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/request_queue_collection.py +0 -0
  22. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_utils.py +0 -0
  23. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/actor.py +0 -0
  24. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/config.py +0 -0
  25. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/consts.py +0 -0
  26. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/event_manager.py +0 -0
  27. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/log.py +0 -0
  28. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/proxy_configuration.py +0 -0
  29. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/py.typed +0 -0
  30. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/__init__.py +0 -0
  31. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
  32. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
  33. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/middlewares/apify_retry.py +0 -0
  34. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
  35. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
  36. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/scheduler.py +0 -0
  37. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/__init__.py +0 -0
  38. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/base_storage.py +0 -0
  39. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/dataset.py +0 -0
  40. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/key_value_store.py +0 -0
  41. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/request_queue.py +0 -0
  42. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/storage_client_manager.py +0 -0
  43. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/SOURCES.txt +0 -0
  44. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/dependency_links.txt +0 -0
  45. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/requires.txt +0 -0
  46. {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.3b1
3
+ Version: 1.5.3b2
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "apify"
3
- version = "1.5.3b1"
3
+ version = "1.5.3b2"
4
4
  description = "Apify SDK for Python"
5
5
  readme = "README.md"
6
6
  license = { text = "Apache Software License" }
@@ -8,6 +8,8 @@ from urllib.parse import unquote
8
8
 
9
9
  try:
10
10
  from scrapy import Request, Spider
11
+ from scrapy.settings import Settings # noqa: TCH002
12
+ from scrapy.utils.project import get_project_settings
11
13
  from scrapy.utils.python import to_bytes
12
14
  from scrapy.utils.request import request_from_dict
13
15
  except ImportError as exc:
@@ -153,6 +155,45 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
153
155
  return scrapy_request
154
156
 
155
157
 
158
+ def apply_apify_settings(*, settings: Settings | None = None, proxy_config: dict | None = None) -> Settings:
159
+ """Integrates Apify configuration into a Scrapy project settings.
160
+
161
+ Note: The function directly modifies the passed `settings` object and also returns it.
162
+
163
+ Args:
164
+ settings: Scrapy project settings to be modified.
165
+ proxy_config: Proxy configuration to be stored in the settings.
166
+
167
+ Returns:
168
+ Scrapy project settings with custom configurations.
169
+ """
170
+ if settings is None:
171
+ settings = get_project_settings()
172
+
173
+ # Use ApifyScheduler as the scheduler
174
+ settings['SCHEDULER'] = 'apify.scrapy.scheduler.ApifyScheduler'
175
+
176
+ # Add the ActorDatasetPushPipeline into the item pipelines, assigning it the highest integer (1000),
177
+ # ensuring it is executed as the final step in the pipeline sequence
178
+ settings['ITEM_PIPELINES']['apify.scrapy.pipelines.ActorDatasetPushPipeline'] = 1000
179
+
180
+ # Disable the default RobotsTxtMiddleware, Apify's custom scheduler already handles robots.txt
181
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware'] = None
182
+
183
+ # Disable the default HttpProxyMiddleware and add ApifyHttpProxyMiddleware
184
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware'] = None
185
+ settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyHttpProxyMiddleware'] = 950
186
+
187
+ # Disable the default RetryMiddleware and add ApifyRetryMiddleware with the highest integer (1000)
188
+ settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.retry.RetryMiddleware'] = None
189
+ settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyRetryMiddleware'] = 1000
190
+
191
+ # Store the proxy configuration
192
+ settings['APIFY_PROXY_SETTINGS'] = proxy_config
193
+
194
+ return settings
195
+
196
+
156
197
  async def open_queue_with_custom_client() -> RequestQueue:
157
198
  """Open a Request Queue with custom Apify Client.
158
199
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 1.5.3b1
3
+ Version: 1.5.3b2
4
4
  Summary: Apify SDK for Python
5
5
  Author-email: "Apify Technologies s.r.o." <support@apify.com>
6
6
  License: Apache Software License
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes