apify 2.0.1b6__tar.gz → 2.0.1b8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-2.0.1b6 → apify-2.0.1b8}/PKG-INFO +3 -2
- {apify-2.0.1b6 → apify-2.0.1b8}/pyproject.toml +3 -2
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_configuration.py +17 -4
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/requests.py +4 -9
- {apify-2.0.1b6 → apify-2.0.1b8}/LICENSE +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/README.md +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/__init__.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_actor.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_consts.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_crypto.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_models.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_platform_event_manager.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_proxy_configuration.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/_utils.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/__init__.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_apify_storage_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_dataset_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_dataset_collection_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_key_value_store_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_request_queue_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/py.typed +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/log.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/py.typed +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/__init__.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/py.typed +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/scheduler.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/scrapy/utils.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/storages/__init__.py +0 -0
- {apify-2.0.1b6 → apify-2.0.1b8}/src/apify/storages/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1b8
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
|
|
@@ -16,11 +16,12 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
20
|
Classifier: Topic :: Software Development :: Libraries
|
|
20
21
|
Provides-Extra: scrapy
|
|
21
22
|
Requires-Dist: apify-client (>=1.8.1)
|
|
22
23
|
Requires-Dist: apify-shared (>=1.1.2)
|
|
23
|
-
Requires-Dist: crawlee (>=0.3.
|
|
24
|
+
Requires-Dist: crawlee (>=0.3.8)
|
|
24
25
|
Requires-Dist: cryptography (>=42.0.0)
|
|
25
26
|
Requires-Dist: httpx (>=0.27.0)
|
|
26
27
|
Requires-Dist: lazy-object-proxy (>=1.10.0)
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.1b8"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = ["Apify Technologies s.r.o. <support@apify.com>"]
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -48,7 +48,7 @@ keywords = [
|
|
|
48
48
|
python = "^3.9"
|
|
49
49
|
apify-client = ">=1.8.1"
|
|
50
50
|
apify-shared = ">=1.1.2"
|
|
51
|
-
crawlee = ">=0.3.
|
|
51
|
+
crawlee = ">=0.3.8"
|
|
52
52
|
cryptography = ">=42.0.0"
|
|
53
53
|
httpx = ">=0.27.0"
|
|
54
54
|
lazy-object-proxy = ">=1.10.0"
|
|
@@ -162,6 +162,7 @@ max-branches = 18
|
|
|
162
162
|
|
|
163
163
|
[tool.pytest.ini_options]
|
|
164
164
|
addopts = "-ra"
|
|
165
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
165
166
|
asyncio_mode = "auto"
|
|
166
167
|
timeout = 1200
|
|
167
168
|
|
|
@@ -5,8 +5,10 @@ from datetime import datetime, timedelta
|
|
|
5
5
|
from typing import Annotated
|
|
6
6
|
|
|
7
7
|
from pydantic import AliasChoices, BeforeValidator, Field
|
|
8
|
+
from typing_extensions import deprecated
|
|
8
9
|
|
|
9
10
|
from crawlee._utils.models import timedelta_ms
|
|
11
|
+
from crawlee._utils.urls import validate_http_url
|
|
10
12
|
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
11
13
|
|
|
12
14
|
|
|
@@ -160,15 +162,16 @@ class Configuration(CrawleeConfiguration):
|
|
|
160
162
|
str | None,
|
|
161
163
|
Field(
|
|
162
164
|
alias='apify_sdk_latest_version',
|
|
163
|
-
deprecated=True,
|
|
164
165
|
description='Specifies the most recent release version of the Apify SDK for Javascript. Used for '
|
|
165
166
|
'checking for updates.',
|
|
166
167
|
),
|
|
168
|
+
deprecated('SDK version checking is not supported for the Python SDK'),
|
|
167
169
|
] = None
|
|
168
170
|
|
|
169
171
|
log_format: Annotated[
|
|
170
172
|
str | None,
|
|
171
|
-
Field(alias='apify_log_format'
|
|
173
|
+
Field(alias='apify_log_format'),
|
|
174
|
+
deprecated('Adjust the log format in code instead'),
|
|
172
175
|
] = None
|
|
173
176
|
|
|
174
177
|
max_paid_dataset_items: Annotated[
|
|
@@ -256,7 +259,17 @@ class Configuration(CrawleeConfiguration):
|
|
|
256
259
|
alias='actor_standby_port',
|
|
257
260
|
description='TCP port for the Actor to start an HTTP server to receive messages in the Actor Standby mode',
|
|
258
261
|
),
|
|
259
|
-
|
|
262
|
+
deprecated('Use `web_server_port` instead'),
|
|
263
|
+
] = 4321
|
|
264
|
+
|
|
265
|
+
standby_url: Annotated[
|
|
266
|
+
str,
|
|
267
|
+
BeforeValidator(validate_http_url),
|
|
268
|
+
Field(
|
|
269
|
+
alias='actor_standby_url',
|
|
270
|
+
description='URL for accessing web servers of Actor runs in Standby mode',
|
|
271
|
+
),
|
|
272
|
+
] = 'http://localhost'
|
|
260
273
|
|
|
261
274
|
token: Annotated[
|
|
262
275
|
str | None,
|
|
@@ -293,7 +306,7 @@ class Configuration(CrawleeConfiguration):
|
|
|
293
306
|
'actor_web_server_url',
|
|
294
307
|
'apify_container_url',
|
|
295
308
|
),
|
|
296
|
-
description='Unique public URL for accessing
|
|
309
|
+
description='Unique public URL for accessing a specific Actor run web server from the outside world',
|
|
297
310
|
),
|
|
298
311
|
] = 'http://localhost:4321'
|
|
299
312
|
|
|
@@ -16,6 +16,7 @@ except ImportError as exc:
|
|
|
16
16
|
) from exc
|
|
17
17
|
|
|
18
18
|
from crawlee import Request as CrawleeRequest
|
|
19
|
+
from crawlee._types import HttpHeaders
|
|
19
20
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
20
21
|
from crawlee._utils.requests import compute_unique_key, unique_key_to_request_id
|
|
21
22
|
|
|
@@ -77,9 +78,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
77
78
|
id=request_id,
|
|
78
79
|
)
|
|
79
80
|
|
|
80
|
-
# Convert Scrapy's headers to a
|
|
81
|
+
# Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
|
|
81
82
|
if isinstance(scrapy_request.headers, Headers):
|
|
82
|
-
apify_request.headers =
|
|
83
|
+
apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
|
|
83
84
|
else:
|
|
84
85
|
Actor.log.warning(
|
|
85
86
|
f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
|
|
@@ -164,13 +165,7 @@ def to_scrapy_request(apify_request: CrawleeRequest, spider: Spider) -> Request:
|
|
|
164
165
|
|
|
165
166
|
# Add optional 'headers' field
|
|
166
167
|
if apify_request.headers:
|
|
167
|
-
|
|
168
|
-
scrapy_request.headers = Headers(apify_request.headers)
|
|
169
|
-
else:
|
|
170
|
-
Actor.log.warning(
|
|
171
|
-
'apify_request[headers] is not an instance of the dict class, '
|
|
172
|
-
f'apify_request[headers] = {apify_request.headers}',
|
|
173
|
-
)
|
|
168
|
+
scrapy_request.headers |= Headers(apify_request.headers)
|
|
174
169
|
|
|
175
170
|
# Add optional 'userData' field
|
|
176
171
|
if apify_request.user_data:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_dataset_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_key_value_store_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{apify-2.0.1b6 → apify-2.0.1b8}/src/apify/apify_storage_client/_request_queue_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|