apify 2.1.0__tar.gz → 2.1.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {apify-2.1.0 → apify-2.1.0b2}/PKG-INFO +3 -3
  2. {apify-2.1.0 → apify-2.1.0b2}/pyproject.toml +17 -18
  3. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_configuration.py +1 -1
  4. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_proxy_configuration.py +1 -1
  5. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/requests.py +5 -7
  6. {apify-2.1.0 → apify-2.1.0b2}/src/apify/storages/__init__.py +1 -1
  7. {apify-2.1.0 → apify-2.1.0b2}/LICENSE +0 -0
  8. {apify-2.1.0 → apify-2.1.0b2}/README.md +0 -0
  9. {apify-2.1.0 → apify-2.1.0b2}/src/apify/__init__.py +1 -1
  10. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_actor.py +0 -0
  11. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_consts.py +0 -0
  12. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_crypto.py +0 -0
  13. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_models.py +0 -0
  14. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_platform_event_manager.py +0 -0
  15. {apify-2.1.0 → apify-2.1.0b2}/src/apify/_utils.py +0 -0
  16. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/__init__.py +0 -0
  17. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_apify_storage_client.py +0 -0
  18. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_dataset_client.py +0 -0
  19. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_dataset_collection_client.py +0 -0
  20. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_key_value_store_client.py +0 -0
  21. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_key_value_store_collection_client.py +0 -0
  22. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_request_queue_client.py +0 -0
  23. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/_request_queue_collection_client.py +0 -0
  24. {apify-2.1.0 → apify-2.1.0b2}/src/apify/apify_storage_client/py.typed +0 -0
  25. {apify-2.1.0 → apify-2.1.0b2}/src/apify/log.py +0 -0
  26. {apify-2.1.0 → apify-2.1.0b2}/src/apify/py.typed +0 -0
  27. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/__init__.py +2 -2
  28. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
  29. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
  30. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/middlewares/py.typed +0 -0
  31. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
  32. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
  33. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/pipelines/py.typed +0 -0
  34. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/py.typed +0 -0
  35. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/scheduler.py +0 -0
  36. {apify-2.1.0 → apify-2.1.0b2}/src/apify/scrapy/utils.py +0 -0
  37. {apify-2.1.0 → apify-2.1.0b2}/src/apify/storages/_request_list.py +0 -0
  38. {apify-2.1.0 → apify-2.1.0b2}/src/apify/storages/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apify
3
- Version: 2.1.0
3
+ Version: 2.1.0b2
4
4
  Summary: Apify SDK for Python
5
5
  License: Apache-2.0
6
6
  Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
@@ -21,9 +21,9 @@ Classifier: Topic :: Software Development :: Libraries
21
21
  Provides-Extra: scrapy
22
22
  Requires-Dist: apify-client (>=1.8.1)
23
23
  Requires-Dist: apify-shared (>=1.1.2)
24
- Requires-Dist: crawlee (>=0.4.0,<0.5.0)
24
+ Requires-Dist: crawlee (>=0.3.9)
25
25
  Requires-Dist: cryptography (>=42.0.0)
26
- Requires-Dist: httpx (>=0.27.0,<0.28.0)
26
+ Requires-Dist: httpx (>=0.27.0)
27
27
  Requires-Dist: lazy-object-proxy (>=1.10.0)
28
28
  Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
29
29
  Requires-Dist: typing-extensions (>=4.1.0)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "apify"
7
- version = "2.1.0"
7
+ version = "2.1.0b2"
8
8
  description = "Apify SDK for Python"
9
9
  authors = ["Apify Technologies s.r.o. <support@apify.com>"]
10
10
  license = "Apache-2.0"
@@ -41,15 +41,16 @@ keywords = [
41
41
  "Issue Tracker" = "https://github.com/apify/apify-sdk-python/issues"
42
42
  "Repository" = "https://github.com/apify/apify-sdk-python"
43
43
 
44
+ # We use inclusive ordered comparison clauses for external packages intentionally in order to enhance SDK's
45
+ # compatibility with external packages. This decision was discussed in detail in the following PR:
46
+ # https://github.com/apify/apify-sdk-python/pull/154.
44
47
  [tool.poetry.dependencies]
45
48
  python = "^3.9"
46
49
  apify-client = ">=1.8.1"
47
50
  apify-shared = ">=1.1.2"
48
- crawlee = "~0.4.0"
51
+ crawlee = ">=0.3.9"
49
52
  cryptography = ">=42.0.0"
50
- # TODO: relax the upper bound once the issue is resolved:
51
- # https://github.com/apify/apify-sdk-python/issues/348
52
- httpx = "~0.27.0"
53
+ httpx = ">=0.27.0"
53
54
  lazy-object-proxy = ">=1.10.0"
54
55
  scrapy = { version = ">=2.11.0", optional = true }
55
56
  typing-extensions = ">=4.1.0"
@@ -64,13 +65,13 @@ pre-commit = "~4.0.0"
64
65
  pydoc-markdown = "~4.8.0"
65
66
  pytest = "~8.3.0"
66
67
  pytest-asyncio = "~0.24.0"
67
- pytest-cov = "~6.0.0"
68
+ pytest-cov = "~5.0.0"
68
69
  pytest-only = "~2.1.0"
69
70
  pytest-timeout = "~2.3.0"
70
71
  pytest-xdist = "~3.6.0"
71
72
  respx = "~0.21.0"
72
- ruff = "~0.8.0"
73
- setuptools = "~75.6.0" # setuptools are used by pytest but not explicitly required
73
+ ruff = "~0.7.0"
74
+ setuptools = "~75.0.0" # setuptools are used by pytest but not explicitly required
74
75
 
75
76
  [tool.poetry.extras]
76
77
  scrapy = ["scrapy"]
@@ -81,6 +82,8 @@ line-length = 120
81
82
  [tool.ruff.lint]
82
83
  select = ["ALL"]
83
84
  ignore = [
85
+ "ANN101", # Missing type annotation for `self` in method
86
+ "ANN102", # Missing type annotation for `{name}` in classmethod
84
87
  "ANN401", # Dynamically typed expressions (typing.Any) are disallowed in {filename}
85
88
  "ASYNC109", # Async function definition with a `timeout` parameter
86
89
  "BLE001", # Do not catch blind exception
@@ -93,6 +96,7 @@ ignore = [
93
96
  "G004", # Logging statement uses f-string
94
97
  "ISC001", # This rule may cause conflicts when used with the formatter
95
98
  "FIX", # flake8-fixme
99
+ "PGH003", # Use specific rule codes when ignoring type issues
96
100
  "PLR0911", # Too many return statements
97
101
  "PLR0913", # Too many arguments in function definition
98
102
  "PLR0915", # Too many statements
@@ -138,10 +142,7 @@ docstring-quotes = "double"
138
142
  inline-quotes = "single"
139
143
 
140
144
  [tool.ruff.lint.flake8-type-checking]
141
- runtime-evaluated-base-classes = [
142
- "pydantic.BaseModel",
143
- "crawlee.configuration.Configuration",
144
- ]
145
+ runtime-evaluated-base-classes = ["pydantic.BaseModel", "crawlee.configuration.Configuration"]
145
146
 
146
147
  [tool.ruff.lint.flake8-builtins]
147
148
  builtins-ignorelist = ["id"]
@@ -182,17 +183,15 @@ exclude = []
182
183
  module = ['scrapy', 'scrapy.*', 'lazy_object_proxy']
183
184
  ignore_missing_imports = true
184
185
 
185
- [tool.basedpyright]
186
- pythonVersion = "3.9"
187
- typeCheckingMode = "standard"
188
- include = ["src", "tests"]
189
-
190
186
  [tool.coverage.report]
191
187
  exclude_lines = [
192
188
  "pragma: no cover",
193
189
  "if TYPE_CHECKING:",
194
- "assert_never()",
190
+ "assert_never()"
195
191
  ]
196
192
 
193
+ [tool.basedpyright]
194
+ typeCheckingMode = "standard"
195
+
197
196
  [tool.ipdb]
198
197
  context = 7
@@ -323,4 +323,4 @@ class Configuration(CrawleeConfiguration):
323
323
 
324
324
 
325
325
  # Monkey-patch the base class so that it works with the extended configuration
326
- CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]
326
+ CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore
@@ -280,7 +280,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
280
280
  return
281
281
 
282
282
  status = None
283
- async with httpx.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
283
+ async with httpx.AsyncClient(proxies=proxy_info.url, timeout=10) as client:
284
284
  for _ in range(2):
285
285
  try:
286
286
  response = await client.get(proxy_status_url)
@@ -42,10 +42,8 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
42
42
  Returns:
43
43
  The converted Apify request if the conversion was successful, otherwise None.
44
44
  """
45
- if not isinstance(scrapy_request, Request):
46
- Actor.log.warning( # type: ignore[unreachable]
47
- 'Failed to convert to Apify request: Scrapy request must be a Request instance.'
48
- )
45
+ if not isinstance(cast(Any, scrapy_request), Request):
46
+ Actor.log.warning('Failed to convert to Apify request: Scrapy request must be a Request instance.')
49
47
  return None
50
48
 
51
49
  call_id = crypto_random_object_id(8)
@@ -55,7 +53,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
55
53
  if _is_request_produced_by_middleware(scrapy_request):
56
54
  unique_key = compute_unique_key(
57
55
  url=scrapy_request.url,
58
- method=scrapy_request.method, # type: ignore[arg-type] # str vs literal
56
+ method=scrapy_request.method,
59
57
  payload=scrapy_request.body,
60
58
  use_extended_unique_key=True,
61
59
  )
@@ -82,9 +80,9 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> CrawleeRequest
82
80
 
83
81
  # Convert Scrapy's headers to a HttpHeaders and store them in the apify_request
84
82
  if isinstance(scrapy_request.headers, Headers):
85
- apify_request.headers = HttpHeaders(dict(scrapy_request.headers.to_unicode_dict()))
83
+ apify_request.headers = HttpHeaders(scrapy_request.headers.to_unicode_dict())
86
84
  else:
87
- Actor.log.warning( # type: ignore[unreachable]
85
+ Actor.log.warning(
88
86
  f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: {scrapy_request.headers}'
89
87
  )
90
88
 
@@ -2,4 +2,4 @@ from crawlee.storages import Dataset, KeyValueStore, RequestQueue
2
2
 
3
3
  from ._request_list import RequestList
4
4
 
5
- __all__ = ['Dataset', 'KeyValueStore', 'RequestList', 'RequestQueue']
5
+ __all__ = ['Dataset', 'KeyValueStore', 'RequestQueue', 'RequestList']
File without changes
File without changes
@@ -13,8 +13,8 @@ __version__ = metadata.version('apify')
13
13
 
14
14
  __all__ = [
15
15
  'Actor',
16
- 'Configuration',
17
16
  'Event',
17
+ 'Configuration',
18
18
  'ProxyConfiguration',
19
19
  'ProxyInfo',
20
20
  'Request',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -3,9 +3,9 @@ from apify.scrapy.scheduler import ApifyScheduler
3
3
  from apify.scrapy.utils import get_basic_auth_header, get_running_event_loop_id
4
4
 
5
5
  __all__ = [
6
+ 'to_apify_request',
7
+ 'to_scrapy_request',
6
8
  'ApifyScheduler',
7
9
  'get_basic_auth_header',
8
10
  'get_running_event_loop_id',
9
- 'to_apify_request',
10
- 'to_scrapy_request',
11
11
  ]
File without changes
File without changes