datamarket 0.7.108__tar.gz → 0.7.109__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (35) hide show
  1. {datamarket-0.7.108 → datamarket-0.7.109}/PKG-INFO +3 -2
  2. {datamarket-0.7.108 → datamarket-0.7.109}/pyproject.toml +2 -2
  3. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/proxy.py +28 -18
  4. {datamarket-0.7.108 → datamarket-0.7.109}/LICENSE +0 -0
  5. {datamarket-0.7.108 → datamarket-0.7.109}/README.md +0 -0
  6. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/__init__.py +0 -0
  7. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/exceptions/__init__.py +0 -0
  8. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/exceptions/main.py +0 -0
  9. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/__init__.py +0 -0
  10. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/alchemy.py +0 -0
  11. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/aws.py +0 -0
  12. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/azure.py +0 -0
  13. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/drive.py +0 -0
  14. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/ftp.py +0 -0
  15. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/nominatim.py +0 -0
  16. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/peerdb.py +0 -0
  17. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/tinybird.py +0 -0
  18. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/params/__init__.py +0 -0
  19. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/params/nominatim.py +0 -0
  20. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/__init__.py +0 -0
  21. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/airflow.py +0 -0
  22. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/alchemy.py +0 -0
  23. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/main.py +0 -0
  24. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/nominatim.py +0 -0
  25. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/playwright/__init__.py +0 -0
  26. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/playwright/async_api.py +0 -0
  27. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/playwright/sync_api.py +0 -0
  28. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/selenium.py +0 -0
  29. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/soda.py +0 -0
  30. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/__init__.py +0 -0
  31. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/normalization.py +0 -0
  32. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/obfuscation.py +0 -0
  33. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/standardization.py +0 -0
  34. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/typer.py +0 -0
  35. {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/types.py +0 -0
@@ -1,16 +1,17 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.108
3
+ Version: 0.7.109
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
7
7
  Author-email: techsupport@datamarket.es
8
- Requires-Python: >=3.12,<3.13
8
+ Requires-Python: >=3.12,<4.0
9
9
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
14
15
  Provides-Extra: aws
15
16
  Provides-Extra: azure-storage-blob
16
17
  Provides-Extra: boto3
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.7.108"
3
+ version = "0.7.109"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -15,7 +15,7 @@ classifiers = [
15
15
  ]
16
16
 
17
17
  [tool.poetry.dependencies]
18
- python = ">=3.12,<3.13"
18
+ python = "^3.12"
19
19
  typer = "~0.15.0"
20
20
  SQLAlchemy = "^2.0.0"
21
21
  psycopg2-binary = "^2.0.0"
@@ -5,6 +5,7 @@ import logging
5
5
  import random
6
6
  import time
7
7
  from datetime import timedelta
8
+ from functools import partial
8
9
 
9
10
  import requests
10
11
  import tenacity
@@ -246,28 +247,36 @@ class ProxyInterface:
246
247
  cooldown_seconds=30,
247
248
  proxy_rotation_interval=PROXY_ROTATION_INTERVAL,
248
249
  ):
249
- """Get a working proxy entry, performing health checks as needed."""
250
+ """Get a working proxy entry, performing health checks as needed.
251
+
252
+ - Fails fast if there are no entries.
253
+ - Optionally retries for up to `proxy_rotation_interval`,
254
+ refreshing the traversal queue before each attempt.
255
+ """
256
+
257
+ if not self.entries:
258
+ raise NoWorkingProxiesError("No proxies available")
259
+
250
260
  pool = self._build_pool(use_auth)
251
261
  self._refresh_traversal_queue(pool, randomize)
252
262
 
253
- def _find_working_entry():
254
- if not self.entries:
255
- raise NoWorkingProxiesError("No proxies available")
256
- return self._find_working_entry_once(check_timeout, cooldown_seconds)
257
-
258
- # Handle both timedelta and numeric seconds for backward compatibility
259
- if proxy_rotation_interval:
260
- retrying = tenacity.Retrying(
261
- wait=tenacity.wait_fixed(cooldown_seconds),
262
- stop=tenacity.stop_after_delay(proxy_rotation_interval),
263
- before_sleep=tenacity.before_sleep_log(logger, logging.INFO),
264
- reraise=True,
265
- )
266
- entry = retrying(_find_working_entry)
267
- else:
268
- entry = _find_working_entry()
263
+ find_once = partial(self._find_working_entry_once, check_timeout, cooldown_seconds)
264
+
265
+ if not proxy_rotation_interval:
266
+ return find_once()
269
267
 
270
- return entry
268
+ def before_sleep(retry_state):
269
+ tenacity.before_sleep_log(logger, logging.INFO)(retry_state)
270
+ self._refresh_traversal_queue(pool, randomize)
271
+
272
+ retrying = tenacity.Retrying(
273
+ wait=tenacity.wait_fixed(cooldown_seconds),
274
+ stop=tenacity.stop_after_delay(proxy_rotation_interval),
275
+ before_sleep=before_sleep,
276
+ retry=tenacity.retry_if_exception_type(NoWorkingProxiesError),
277
+ reraise=True,
278
+ )
279
+ return retrying(find_once)
271
280
 
272
281
  def _get_round_robin_candidates(self, pool):
273
282
  """Get candidates in round-robin order starting from current_index."""
@@ -329,6 +338,7 @@ class ProxyInterface:
329
338
  self._traversal_queue.remove(idx)
330
339
  return entry
331
340
 
341
+ self._traversal_queue = []
332
342
  raise NoWorkingProxiesError("No working proxies available")
333
343
 
334
344
  def _wait_for_new_ip(self, entry, baseline, timeout, interval, check_timeout):
File without changes
File without changes