datamarket 0.7.108__py3-none-any.whl → 0.7.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -5,6 +5,7 @@ import logging
5
5
  import random
6
6
  import time
7
7
  from datetime import timedelta
8
+ from functools import partial
8
9
 
9
10
  import requests
10
11
  import tenacity
@@ -246,28 +247,36 @@ class ProxyInterface:
246
247
  cooldown_seconds=30,
247
248
  proxy_rotation_interval=PROXY_ROTATION_INTERVAL,
248
249
  ):
249
- """Get a working proxy entry, performing health checks as needed."""
250
+ """Get a working proxy entry, performing health checks as needed.
251
+
252
+ - Fails fast if there are no entries.
253
+ - Optionally retries for up to `proxy_rotation_interval`,
254
+ refreshing the traversal queue before each attempt.
255
+ """
256
+
257
+ if not self.entries:
258
+ raise NoWorkingProxiesError("No proxies available")
259
+
250
260
  pool = self._build_pool(use_auth)
251
261
  self._refresh_traversal_queue(pool, randomize)
252
262
 
253
- def _find_working_entry():
254
- if not self.entries:
255
- raise NoWorkingProxiesError("No proxies available")
256
- return self._find_working_entry_once(check_timeout, cooldown_seconds)
257
-
258
- # Handle both timedelta and numeric seconds for backward compatibility
259
- if proxy_rotation_interval:
260
- retrying = tenacity.Retrying(
261
- wait=tenacity.wait_fixed(cooldown_seconds),
262
- stop=tenacity.stop_after_delay(proxy_rotation_interval),
263
- before_sleep=tenacity.before_sleep_log(logger, logging.INFO),
264
- reraise=True,
265
- )
266
- entry = retrying(_find_working_entry)
267
- else:
268
- entry = _find_working_entry()
263
+ find_once = partial(self._find_working_entry_once, check_timeout, cooldown_seconds)
264
+
265
+ if not proxy_rotation_interval:
266
+ return find_once()
269
267
 
270
- return entry
268
+ def before_sleep(retry_state):
269
+ tenacity.before_sleep_log(logger, logging.INFO)(retry_state)
270
+ self._refresh_traversal_queue(pool, randomize)
271
+
272
+ retrying = tenacity.Retrying(
273
+ wait=tenacity.wait_fixed(cooldown_seconds),
274
+ stop=tenacity.stop_after_delay(proxy_rotation_interval),
275
+ before_sleep=before_sleep,
276
+ retry=tenacity.retry_if_exception_type(NoWorkingProxiesError),
277
+ reraise=True,
278
+ )
279
+ return retrying(find_once)
271
280
 
272
281
  def _get_round_robin_candidates(self, pool):
273
282
  """Get candidates in round-robin order starting from current_index."""
@@ -329,6 +338,7 @@ class ProxyInterface:
329
338
  self._traversal_queue.remove(idx)
330
339
  return entry
331
340
 
341
+ self._traversal_queue = []
332
342
  raise NoWorkingProxiesError("No working proxies available")
333
343
 
334
344
  def _wait_for_new_ip(self, entry, baseline, timeout, interval, check_timeout):
@@ -1,16 +1,17 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.108
3
+ Version: 0.7.109
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
7
7
  Author-email: techsupport@datamarket.es
8
- Requires-Python: >=3.12,<3.13
8
+ Requires-Python: >=3.12,<4.0
9
9
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
14
15
  Provides-Extra: aws
15
16
  Provides-Extra: azure-storage-blob
16
17
  Provides-Extra: boto3
@@ -9,7 +9,7 @@ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjH
9
9
  datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
10
10
  datamarket/interfaces/nominatim.py,sha256=xizT94tVum7QPppfDgI5sEhx1mAXT-SM3JyPl8CDxxU,15148
11
11
  datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
12
- datamarket/interfaces/proxy.py,sha256=T5imj-f2-ZIy7TM8UhkkUWAAdlEbJG95KHohoYgpZEo,14903
12
+ datamarket/interfaces/proxy.py,sha256=64r5os8yIRpG_f4HxdGXLIRguzVEErG-GRAF8vcH58Y,15171
13
13
  datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
14
14
  datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datamarket/params/nominatim.py,sha256=S9TEB4FxmffvFyK9KffWl20TfXzWX69IAdbEehKar1I,11920
@@ -29,7 +29,7 @@ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnm
29
29
  datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
30
30
  datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
31
31
  datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
32
- datamarket-0.7.108.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
- datamarket-0.7.108.dist-info/METADATA,sha256=nFR_qzIVAlSc3L_0P2D1fWN7GdQTTINVZ_AkCDmymgc,7382
34
- datamarket-0.7.108.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
- datamarket-0.7.108.dist-info/RECORD,,
32
+ datamarket-0.7.109.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
+ datamarket-0.7.109.dist-info/METADATA,sha256=Q01pb6jVLuJFXAYqT_2qNgUKBUlbu89RI_Fj4aIL_I4,7432
34
+ datamarket-0.7.109.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
+ datamarket-0.7.109.dist-info/RECORD,,