apify 2.1.0__tar.gz → 2.1.0b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apify-2.1.0 → apify-2.1.0b2}/PKG-INFO +3 -3
- {apify-2.1.0 → apify-2.1.0b2}/pyproject.toml +17 -18
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_configuration.py +1 -1
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_proxy_configuration.py +1 -1
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/requests.py +5 -7
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/storages/__init__.py +1 -1
- {apify-2.1.0 → apify-2.1.0b2}/LICENSE +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/README.md +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/__init__.py +1 -1
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_actor.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_consts.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_crypto.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_models.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_platform_event_manager.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/_utils.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/__init__.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_apify_storage_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_dataset_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_dataset_collection_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_key_value_store_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_request_queue_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/py.typed +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/log.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/py.typed +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/__init__.py +2 -2
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/py.typed +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/scheduler.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/utils.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/storages/_request_list.py +0 -0
- {apify-2.1.0 → apify-2.1.0b2}/src/apify/storages/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.0b2
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
|
|
@@ -21,9 +21,9 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
21
21
|
Provides-Extra: scrapy
|
|
22
22
|
Requires-Dist: apify-client (>=1.8.1)
|
|
23
23
|
Requires-Dist: apify-shared (>=1.1.2)
|
|
24
|
-
Requires-Dist: crawlee (>=0.
|
|
24
|
+
Requires-Dist: crawlee (>=0.3.9)
|
|
25
25
|
Requires-Dist: cryptography (>=42.0.0)
|
|
26
|
-
Requires-Dist: httpx (>=0.27.0
|
|
26
|
+
Requires-Dist: httpx (>=0.27.0)
|
|
27
27
|
Requires-Dist: lazy-object-proxy (>=1.10.0)
|
|
28
28
|
Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
|
|
29
29
|
Requires-Dist: typing-extensions (>=4.1.0)
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "2.1.
|
|
7
|
+
version = "2.1.0b2"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = ["Apify Technologies s.r.o. <support@apify.com>"]
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -41,15 +41,16 @@ keywords = [
|
|
|
41
41
|
"Issue Tracker" = "https://github.com/apify/apify-sdk-python/issues"
|
|
42
42
|
"Repository" = "https://github.com/apify/apify-sdk-python"
|
|
43
43
|
|
|
44
|
+
# We use inclusive ordered comparison clauses for external packages intentionally in order to enhance SDK's
|
|
45
|
+
# compatibility with external packages. This decision was discussed in detail in the following PR:
|
|
46
|
+
# https://github.com/apify/apify-sdk-python/pull/154.
|
|
44
47
|
[tool.poetry.dependencies]
|
|
45
48
|
python = "^3.9"
|
|
46
49
|
apify-client = ">=1.8.1"
|
|
47
50
|
apify-shared = ">=1.1.2"
|
|
48
|
-
crawlee = "
|
|
51
|
+
crawlee = ">=0.3.9"
|
|
49
52
|
cryptography = ">=42.0.0"
|
|
50
|
-
|
|
51
|
-
# https://github.com/apify/apify-sdk-python/issues/348
|
|
52
|
-
httpx = "~0.27.0"
|
|
53
|
+
httpx = ">=0.27.0"
|
|
53
54
|
lazy-object-proxy = ">=1.10.0"
|
|
54
55
|
scrapy = { version = ">=2.11.0", optional = true }
|
|
55
56
|
typing-extensions = ">=4.1.0"
|
|
@@ -64,13 +65,13 @@ pre-commit = "~4.0.0"
|
|
|
64
65
|
pydoc-markdown = "~4.8.0"
|
|
65
66
|
pytest = "~8.3.0"
|
|
66
67
|
pytest-asyncio = "~0.24.0"
|
|
67
|
-
pytest-cov = "~
|
|
68
|
+
pytest-cov = "~5.0.0"
|
|
68
69
|
pytest-only = "~2.1.0"
|
|
69
70
|
pytest-timeout = "~2.3.0"
|
|
70
71
|
pytest-xdist = "~3.6.0"
|
|
71
72
|
respx = "~0.21.0"
|
|
72
|
-
ruff = "~0.
|
|
73
|
-
setuptools = "~75.
|
|
73
|
+
ruff = "~0.7.0"
|
|
74
|
+
setuptools = "~75.0.0" # setuptools are used by pytest but not explicitly required
|
|
74
75
|
|
|
75
76
|
[tool.poetry.extras]
|
|
76
77
|
scrapy = ["scrapy"]
|
|
@@ -81,6 +82,8 @@ line-length = 120
|
|
|
81
82
|
[tool.ruff.lint]
|
|
82
83
|
select = ["ALL"]
|
|
83
84
|
ignore = [
|
|
85
|
+
"ANN101", # Missing type annotation for `self` in method
|
|
86
|
+
"ANN102", # Missing type annotation for `{name}` in classmethod
|
|
84
87
|
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed in {filename}
|
|
85
88
|
"ASYNC109", # Async function definition with a `timeout` parameter
|
|
86
89
|
"BLE001", # Do not catch blind exception
|
|
@@ -93,6 +96,7 @@ ignore = [
|
|
|
93
96
|
"G004", # Logging statement uses f-string
|
|
94
97
|
"ISC001", # This rule may cause conflicts when used with the formatter
|
|
95
98
|
"FIX", # flake8-fixme
|
|
99
|
+
"PGH003", # Use specific rule codes when ignoring type issues
|
|
96
100
|
"PLR0911", # Too many return statements
|
|
97
101
|
"PLR0913", # Too many arguments in function definition
|
|
98
102
|
"PLR0915", # Too many statements
|
|
@@ -138,10 +142,7 @@ docstring-quotes = "double"
|
|
|
138
142
|
inline-quotes = "single"
|
|
139
143
|
|
|
140
144
|
[tool.ruff.lint.flake8-type-checking]
|
|
141
|
-
runtime-evaluated-base-classes = [
|
|
142
|
-
"pydantic.BaseModel",
|
|
143
|
-
"crawlee.configuration.Configuration",
|
|
144
|
-
]
|
|
145
|
+
runtime-evaluated-base-classes = ["pydantic.BaseModel", "crawlee.configuration.Configuration"]
|
|
145
146
|
|
|
146
147
|
[tool.ruff.lint.flake8-builtins]
|
|
147
148
|
builtins-ignorelist = ["id"]
|
|
@@ -182,17 +183,15 @@ exclude = []
|
|
|
182
183
|
module = ['scrapy', 'scrapy.*', 'lazy_object_proxy']
|
|
183
184
|
ignore_missing_imports = true
|
|
184
185
|
|
|
185
|
-
[tool.basedpyright]
|
|
186
|
-
pythonVersion = "3.9"
|
|
187
|
-
typeCheckingMode = "standard"
|
|
188
|
-
include = ["src", "tests"]
|
|
189
|
-
|
|
190
186
|
[tool.coverage.report]
|
|
191
187
|
exclude_lines = [
|
|
192
188
|
"pragma: no cover",
|
|
193
189
|
"if TYPE_CHECKING:",
|
|
194
|
-
"assert_never()"
|
|
190
|
+
"assert_never()"
|
|
195
191
|
]
|
|
196
192
|
|
|
193
|
+
[tool.basedpyright]
|
|
194
|
+
typeCheckingMode = "standard"
|
|
195
|
+
|
|
197
196
|
[tool.ipdb]
|
|
198
197
|
context = 7
|
|
@@ -323,4 +323,4 @@ class Configuration(CrawleeConfiguration):
|
|
|
323
323
|
|
|
324
324
|
|
|
325
325
|
# Monkey-patch the base class so that it works with the extended configuration
|
|
326
|
-
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore
|
|
326
|
+
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore
|
|
@@ -280,7 +280,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
280
280
|
return
|
|
281
281
|
|
|
282
282
|
status = None
|
|
283
|
-
async with httpx.AsyncClient(
|
|
283
|
+
async with httpx.AsyncClient(proxies=proxy_info.url, timeout=10) as client:
|
|
284
284
|
for _ in range(2):
|
|
285
285
|
try:
|
|
286
286
|
response = await client.get(proxy_status_url)
|
|
@@ -42,10 +42,8 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
42
42
|
Returns:
|
|
43
43
|
The converted Apify request if the conversion was successful, otherwise None.
|
|
44
44
|
"""
|
|
45
|
-
if not isinstance(scrapy_request, Request):
|
|
46
|
-
Actor.log.warning(
|
|
47
|
-
'Failed to convert to Apify request: Scrapy request must be a Request instance.'
|
|
48
|
-
)
|
|
45
|
+
if not isinstance(cast(Any, scrapy_request), Request):
|
|
46
|
+
Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
|
|
49
47
|
return None
|
|
50
48
|
|
|
51
49
|
call_id = crypto_random_object_id(8)
|
|
@@ -55,7 +53,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
55
53
|
if _is_request_produced_by_middleware(scrapy_request):
|
|
56
54
|
unique_key = compute_unique_key(
|
|
57
55
|
url=scrapy_request.url,
|
|
58
|
-
method=scrapy_request.method,
|
|
56
|
+
method=scrapy_request.method,
|
|
59
57
|
payload=scrapy_request.body,
|
|
60
58
|
use_extended_unique_key=True,
|
|
61
59
|
)
|
|
@@ -82,9 +80,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
|
|
|
82
80
|
|
|
83
81
|
# Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
|
|
84
82
|
if isinstance(scrapy_request.headers, Headers):
|
|
85
|
-
apify_request.headers = HttpHeaders(
|
|
83
|
+
apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
|
|
86
84
|
else:
|
|
87
|
-
Actor.log.warning(
|
|
85
|
+
Actor.log.warning(
|
|
88
86
|
f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
|
|
89
87
|
)
|
|
90
88
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_key_value_store_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_request_queue_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -3,9 +3,9 @@ from apify.scrapy.scheduler import ApifyScheduler
|
|
|
3
3
|
from apify.scrapy.utils import get_basic_auth_header, get_running_event_loop_id
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
|
+
'to_apify_request',
|
|
7
|
+
'to_scrapy_request',
|
|
6
8
|
'ApifyScheduler',
|
|
7
9
|
'get_basic_auth_header',
|
|
8
10
|
'get_running_event_loop_id',
|
|
9
|
-
'to_apify_request',
|
|
10
|
-
'to_scrapy_request',
|
|
11
11
|
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|