apify 1.5.3b1__tar.gz → 1.5.3b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-1.5.3b1 → apify-1.5.3b2}/PKG-INFO +1 -1
- {apify-1.5.3b1 → apify-1.5.3b2}/pyproject.toml +1 -1
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/utils.py +41 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/PKG-INFO +1 -1
- {apify-1.5.3b1 → apify-1.5.3b2}/LICENSE +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/README.md +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/setup.cfg +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_crypto.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/file_storage_utils.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/memory_storage_client.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/base_resource_client.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/dataset.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/dataset_collection.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/key_value_store.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/request_queue.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/request_queue_collection.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_utils.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/actor.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/config.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/consts.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/event_manager.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/log.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/proxy_configuration.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/py.typed +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/middlewares/apify_retry.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/scrapy/scheduler.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/__init__.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/base_storage.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/dataset.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/key_value_store.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/request_queue.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify/storages/storage_client_manager.py +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/SOURCES.txt +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/dependency_links.txt +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/requires.txt +0 -0
- {apify-1.5.3b1 → apify-1.5.3b2}/src/apify.egg-info/top_level.txt +0 -0
|
@@ -8,6 +8,8 @@ from urllib.parse import unquote
|
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
from scrapy import Request, Spider
|
|
11
|
+
from scrapy.settings import Settings # noqa: TCH002
|
|
12
|
+
from scrapy.utils.project import get_project_settings
|
|
11
13
|
from scrapy.utils.python import to_bytes
|
|
12
14
|
from scrapy.utils.request import request_from_dict
|
|
13
15
|
except ImportError as exc:
|
|
@@ -153,6 +155,45 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
|
|
|
153
155
|
return scrapy_request
|
|
154
156
|
|
|
155
157
|
|
|
158
|
+
def apply_apify_settings(*, settings: Settings | None = None, proxy_config: dict | None = None) -> Settings:
|
|
159
|
+
"""Integrates Apify configuration into a Scrapy project settings.
|
|
160
|
+
|
|
161
|
+
Note: The function directly modifies the passed `settings` object and also returns it.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
settings: Scrapy project settings to be modified.
|
|
165
|
+
proxy_config: Proxy configuration to be stored in the settings.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Scrapy project settings with custom configurations.
|
|
169
|
+
"""
|
|
170
|
+
if settings is None:
|
|
171
|
+
settings = get_project_settings()
|
|
172
|
+
|
|
173
|
+
# Use ApifyScheduler as the scheduler
|
|
174
|
+
settings['SCHEDULER'] = 'apify.scrapy.scheduler.ApifyScheduler'
|
|
175
|
+
|
|
176
|
+
# Add the ActorDatasetPushPipeline into the item pipelines, assigning it the highest integer (1000),
|
|
177
|
+
# ensuring it is executed as the final step in the pipeline sequence
|
|
178
|
+
settings['ITEM_PIPELINES']['apify.scrapy.pipelines.ActorDatasetPushPipeline'] = 1000
|
|
179
|
+
|
|
180
|
+
# Disable the default RobotsTxtMiddleware, Apify's custom scheduler already handles robots.txt
|
|
181
|
+
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware'] = None
|
|
182
|
+
|
|
183
|
+
# Disable the default HttpProxyMiddleware and add ApifyHttpProxyMiddleware
|
|
184
|
+
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware'] = None
|
|
185
|
+
settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyHttpProxyMiddleware'] = 950
|
|
186
|
+
|
|
187
|
+
# Disable the default RetryMiddleware and add ApifyRetryMiddleware with the highest integer (1000)
|
|
188
|
+
settings['DOWNLOADER_MIDDLEWARES']['scrapy.downloadermiddlewares.retry.RetryMiddleware'] = None
|
|
189
|
+
settings['DOWNLOADER_MIDDLEWARES']['apify.scrapy.middlewares.ApifyRetryMiddleware'] = 1000
|
|
190
|
+
|
|
191
|
+
# Store the proxy configuration
|
|
192
|
+
settings['APIFY_PROXY_SETTINGS'] = proxy_config
|
|
193
|
+
|
|
194
|
+
return settings
|
|
195
|
+
|
|
196
|
+
|
|
156
197
|
async def open_queue_with_custom_client() -> RequestQueue:
|
|
157
198
|
"""Open a Request Queue with custom Apify Client.
|
|
158
199
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/base_resource_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/dataset_collection.py
RENAMED
|
File without changes
|
{apify-1.5.3b1 → apify-1.5.3b2}/src/apify/_memory_storage/resource_clients/key_value_store.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|