apify 2.0.2__tar.gz → 2.0.2b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- {apify-2.0.2 → apify-2.0.2b1}/PKG-INFO +3 -3
- {apify-2.0.2 → apify-2.0.2b1}/pyproject.toml +14 -8
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_platform_event_manager.py +1 -1
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_dataset_client.py +2 -3
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/log.py +1 -1
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/middlewares/apify_proxy.py +4 -4
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/pipelines/actor_dataset_push.py +1 -1
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/scheduler.py +5 -5
- {apify-2.0.2 → apify-2.0.2b1}/LICENSE +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/README.md +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/__init__.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_actor.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_configuration.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_consts.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_crypto.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_models.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_proxy_configuration.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/_utils.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/__init__.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_apify_storage_client.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_dataset_collection_client.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_key_value_store_client.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_request_queue_client.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/py.typed +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/py.typed +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/__init__.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/py.typed +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/requests.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/scrapy/utils.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/storages/__init__.py +0 -0
- {apify-2.0.2 → apify-2.0.2b1}/src/apify/storages/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2b1
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
|
|
@@ -21,13 +21,13 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
21
21
|
Provides-Extra: scrapy
|
|
22
22
|
Requires-Dist: apify-client (>=1.8.1)
|
|
23
23
|
Requires-Dist: apify-shared (>=1.1.2)
|
|
24
|
-
Requires-Dist: crawlee (>=0.3.
|
|
24
|
+
Requires-Dist: crawlee (>=0.3.8)
|
|
25
25
|
Requires-Dist: cryptography (>=42.0.0)
|
|
26
26
|
Requires-Dist: httpx (>=0.27.0)
|
|
27
27
|
Requires-Dist: lazy-object-proxy (>=1.10.0)
|
|
28
28
|
Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
|
|
29
29
|
Requires-Dist: typing-extensions (>=4.1.0)
|
|
30
|
-
Requires-Dist: websockets (>=10.0
|
|
30
|
+
Requires-Dist: websockets (>=10.0)
|
|
31
31
|
Project-URL: Apify Homepage, https://apify.com
|
|
32
32
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
33
33
|
Project-URL: Documentation, https://docs.apify.com/sdk/python/
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.2b1"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = ["Apify Technologies s.r.o. <support@apify.com>"]
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -48,20 +48,20 @@ keywords = [
|
|
|
48
48
|
python = "^3.9"
|
|
49
49
|
apify-client = ">=1.8.1"
|
|
50
50
|
apify-shared = ">=1.1.2"
|
|
51
|
-
crawlee = ">=0.3.
|
|
51
|
+
crawlee = ">=0.3.8"
|
|
52
52
|
cryptography = ">=42.0.0"
|
|
53
53
|
httpx = ">=0.27.0"
|
|
54
54
|
lazy-object-proxy = ">=1.10.0"
|
|
55
55
|
scrapy = { version = ">=2.11.0", optional = true }
|
|
56
56
|
typing-extensions = ">=4.1.0"
|
|
57
|
-
websockets = ">=10.0
|
|
57
|
+
websockets = ">=10.0"
|
|
58
58
|
|
|
59
59
|
[tool.poetry.group.dev.dependencies]
|
|
60
60
|
build = "~1.2.0"
|
|
61
61
|
filelock = "~3.16.0"
|
|
62
62
|
griffe = "~1.5.0"
|
|
63
|
-
mypy = "~1.
|
|
64
|
-
pre-commit = "~
|
|
63
|
+
mypy = "~1.11.0"
|
|
64
|
+
pre-commit = "~3.8.0"
|
|
65
65
|
pydoc-markdown = "~4.8.0"
|
|
66
66
|
pytest = "~8.3.0"
|
|
67
67
|
pytest-asyncio = "~0.24.0"
|
|
@@ -70,8 +70,8 @@ pytest-only = "~2.1.0"
|
|
|
70
70
|
pytest-timeout = "~2.3.0"
|
|
71
71
|
pytest-xdist = "~3.6.0"
|
|
72
72
|
respx = "~0.21.0"
|
|
73
|
-
ruff = "~0.
|
|
74
|
-
setuptools = "~
|
|
73
|
+
ruff = "~0.6.0"
|
|
74
|
+
setuptools = "~74.0.0" # setuptools are used by pytest but not explicitly required
|
|
75
75
|
|
|
76
76
|
[tool.poetry.extras]
|
|
77
77
|
scrapy = ["scrapy"]
|
|
@@ -121,6 +121,12 @@ indent-style = "space"
|
|
|
121
121
|
"**/__init__.py" = [
|
|
122
122
|
"F401", # Unused imports
|
|
123
123
|
]
|
|
124
|
+
"**/{scripts}/*" = [
|
|
125
|
+
"D", # Everything from the pydocstyle
|
|
126
|
+
"INP001", # File {filename} is part of an implicit namespace package, add an __init__.py
|
|
127
|
+
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
|
|
128
|
+
"T20", # flake8-print
|
|
129
|
+
]
|
|
124
130
|
"**/{tests}/*" = [
|
|
125
131
|
"D", # Everything from the pydocstyle
|
|
126
132
|
"INP001", # File {filename} is part of an implicit namespace package, add an __init__.py
|
|
@@ -163,7 +169,7 @@ timeout = 1200
|
|
|
163
169
|
[tool.mypy]
|
|
164
170
|
python_version = "3.9"
|
|
165
171
|
plugins = ["pydantic.mypy"]
|
|
166
|
-
files = ["src", "tests"]
|
|
172
|
+
files = ["scripts", "src", "tests"]
|
|
167
173
|
check_untyped_defs = true
|
|
168
174
|
disallow_incomplete_defs = true
|
|
169
175
|
disallow_untyped_calls = true
|
|
@@ -91,7 +91,7 @@ class DatasetClient(BaseDatasetClient):
|
|
|
91
91
|
skip_empty: bool = False,
|
|
92
92
|
skip_hidden: bool = False,
|
|
93
93
|
) -> AsyncIterator[dict]:
|
|
94
|
-
|
|
94
|
+
return self._client.iterate_items(
|
|
95
95
|
offset=offset,
|
|
96
96
|
limit=limit,
|
|
97
97
|
clean=clean,
|
|
@@ -101,8 +101,7 @@ class DatasetClient(BaseDatasetClient):
|
|
|
101
101
|
unwind=unwind,
|
|
102
102
|
skip_empty=skip_empty,
|
|
103
103
|
skip_hidden=skip_hidden,
|
|
104
|
-
)
|
|
105
|
-
yield item
|
|
104
|
+
)
|
|
106
105
|
|
|
107
106
|
@override
|
|
108
107
|
async def get_items_as_bytes(
|
|
@@ -27,7 +27,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
27
27
|
proxy_settings = {'useApifyProxy': true, 'apifyProxyGroups': []}
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
|
-
def __init__(self, proxy_settings: dict) -> None:
|
|
30
|
+
def __init__(self: ApifyHttpProxyMiddleware, proxy_settings: dict) -> None:
|
|
31
31
|
"""Create a new instance.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
@@ -66,7 +66,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
66
66
|
|
|
67
67
|
return cls(proxy_settings)
|
|
68
68
|
|
|
69
|
-
async def process_request(self, request: Request, spider: Spider) -> None:
|
|
69
|
+
async def process_request(self: ApifyHttpProxyMiddleware, request: Request, spider: Spider) -> None:
|
|
70
70
|
"""Process a Scrapy request by assigning a new proxy.
|
|
71
71
|
|
|
72
72
|
Args:
|
|
@@ -89,7 +89,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
89
89
|
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')
|
|
90
90
|
|
|
91
91
|
def process_exception(
|
|
92
|
-
self,
|
|
92
|
+
self: ApifyHttpProxyMiddleware,
|
|
93
93
|
request: Request,
|
|
94
94
|
exception: Exception,
|
|
95
95
|
spider: Spider,
|
|
@@ -116,7 +116,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
116
116
|
'reason="{exception}", skipping...'
|
|
117
117
|
)
|
|
118
118
|
|
|
119
|
-
async def _get_new_proxy_url(self) -> ParseResult:
|
|
119
|
+
async def _get_new_proxy_url(self: ApifyHttpProxyMiddleware) -> ParseResult:
|
|
120
120
|
"""Get a new proxy URL.
|
|
121
121
|
|
|
122
122
|
Raises:
|
|
@@ -29,7 +29,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
29
29
|
This scheduler requires the asyncio Twisted reactor to be installed.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
-
def __init__(self) -> None:
|
|
32
|
+
def __init__(self: ApifyScheduler) -> None:
|
|
33
33
|
"""Create a new instance."""
|
|
34
34
|
if not is_asyncio_reactor_installed():
|
|
35
35
|
raise ValueError(
|
|
@@ -40,7 +40,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
40
40
|
self._rq: RequestQueue | None = None
|
|
41
41
|
self.spider: Spider | None = None
|
|
42
42
|
|
|
43
|
-
def open(self, spider: Spider) -> None: # this has to be named "open"
|
|
43
|
+
def open(self: ApifyScheduler, spider: Spider) -> None: # this has to be named "open"
|
|
44
44
|
"""Open the scheduler.
|
|
45
45
|
|
|
46
46
|
Args:
|
|
@@ -58,7 +58,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
58
58
|
traceback.print_exc()
|
|
59
59
|
raise
|
|
60
60
|
|
|
61
|
-
def has_pending_requests(self) -> bool:
|
|
61
|
+
def has_pending_requests(self: ApifyScheduler) -> bool:
|
|
62
62
|
"""Check if the scheduler has any pending requests.
|
|
63
63
|
|
|
64
64
|
Returns:
|
|
@@ -75,7 +75,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
75
75
|
|
|
76
76
|
return not is_finished
|
|
77
77
|
|
|
78
|
-
def enqueue_request(self, request: Request) -> bool:
|
|
78
|
+
def enqueue_request(self: ApifyScheduler, request: Request) -> bool:
|
|
79
79
|
"""Add a request to the scheduler.
|
|
80
80
|
|
|
81
81
|
This could be called from either from a spider or a downloader middleware (e.g. redirect, retry, ...).
|
|
@@ -111,7 +111,7 @@ class ApifyScheduler(BaseScheduler):
|
|
|
111
111
|
Actor.log.debug(f'[{call_id}]: rq.add_request.result={result}...')
|
|
112
112
|
return bool(result.was_already_present)
|
|
113
113
|
|
|
114
|
-
def next_request(self) -> Request | None:
|
|
114
|
+
def next_request(self: ApifyScheduler) -> Request | None:
|
|
115
115
|
"""Fetch the next request from the scheduler.
|
|
116
116
|
|
|
117
117
|
Returns:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_key_value_store_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{apify-2.0.2 → apify-2.0.2b1}/src/apify/apify_storage_client/_request_queue_collection_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|