datamarket 0.7.107__py3-none-any.whl → 0.7.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -1,7 +1,11 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
1
4
  import logging
2
5
  import random
3
6
  import time
4
7
  from datetime import timedelta
8
+ from functools import partial
5
9
 
6
10
  import requests
7
11
  import tenacity
@@ -10,12 +14,18 @@ from stem.control import Controller
10
14
 
11
15
  from datamarket.exceptions import EnsureNewIPTimeoutError, NoWorkingProxiesError
12
16
 
17
+ ########################################################################################################################
18
+ # SETUP
19
+
13
20
  logger = logging.getLogger(__name__)
14
21
  logging.getLogger("stem").setLevel(logging.WARNING)
15
22
 
16
23
  PROXY_ROTATION_INTERVAL = timedelta(minutes=10)
17
24
  PROXY_ROTATION_TIMEOUT_SECONDS = int(PROXY_ROTATION_INTERVAL.total_seconds())
18
25
 
26
+ ########################################################################################################################
27
+ # CLASSES
28
+
19
29
 
20
30
  class ProxyInterface:
21
31
  """
@@ -237,26 +247,36 @@ class ProxyInterface:
237
247
  cooldown_seconds=30,
238
248
  proxy_rotation_interval=PROXY_ROTATION_INTERVAL,
239
249
  ):
240
- """Get a working proxy entry, performing health checks as needed."""
250
+ """Get a working proxy entry, performing health checks as needed.
251
+
252
+ - Fails fast if there are no entries.
253
+ - Optionally retries for up to `proxy_rotation_interval`,
254
+ refreshing the traversal queue before each attempt.
255
+ """
256
+
257
+ if not self.entries:
258
+ raise NoWorkingProxiesError("No proxies available")
259
+
241
260
  pool = self._build_pool(use_auth)
242
261
  self._refresh_traversal_queue(pool, randomize)
243
262
 
244
- def _find_working_entry():
245
- if not self.entries:
246
- raise NoWorkingProxiesError("No proxies available")
247
- return self._find_working_entry_once(check_timeout, cooldown_seconds)
248
-
249
- # Handle both timedelta and numeric seconds for backward compatibility
250
- if proxy_rotation_interval:
251
- retrying = tenacity.Retrying(
252
- stop=tenacity.stop_after_delay(proxy_rotation_interval),
253
- reraise=True,
254
- )
255
- entry = retrying(_find_working_entry)
256
- else:
257
- entry = _find_working_entry()
263
+ find_once = partial(self._find_working_entry_once, check_timeout, cooldown_seconds)
264
+
265
+ if not proxy_rotation_interval:
266
+ return find_once()
258
267
 
259
- return entry
268
+ def before_sleep(retry_state):
269
+ tenacity.before_sleep_log(logger, logging.INFO)(retry_state)
270
+ self._refresh_traversal_queue(pool, randomize)
271
+
272
+ retrying = tenacity.Retrying(
273
+ wait=tenacity.wait_fixed(cooldown_seconds),
274
+ stop=tenacity.stop_after_delay(proxy_rotation_interval),
275
+ before_sleep=before_sleep,
276
+ retry=tenacity.retry_if_exception_type(NoWorkingProxiesError),
277
+ reraise=True,
278
+ )
279
+ return retrying(find_once)
260
280
 
261
281
  def _get_round_robin_candidates(self, pool):
262
282
  """Get candidates in round-robin order starting from current_index."""
@@ -318,9 +338,11 @@ class ProxyInterface:
318
338
  self._traversal_queue.remove(idx)
319
339
  return entry
320
340
 
341
+ self._traversal_queue = []
321
342
  raise NoWorkingProxiesError("No working proxies available")
322
343
 
323
344
  def _wait_for_new_ip(self, entry, baseline, timeout, interval, check_timeout):
345
+ logger.info("Refreshing proxy IP...")
324
346
  start = time.time()
325
347
  while time.time() - start < timeout:
326
348
  host, port, user, pwd = entry
@@ -1,16 +1,17 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.107
3
+ Version: 0.7.109
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
7
7
  Author-email: techsupport@datamarket.es
8
- Requires-Python: >=3.12,<3.13
8
+ Requires-Python: >=3.12,<4.0
9
9
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
14
15
  Provides-Extra: aws
15
16
  Provides-Extra: azure-storage-blob
16
17
  Provides-Extra: boto3
@@ -9,7 +9,7 @@ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjH
9
9
  datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
10
10
  datamarket/interfaces/nominatim.py,sha256=xizT94tVum7QPppfDgI5sEhx1mAXT-SM3JyPl8CDxxU,15148
11
11
  datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
12
- datamarket/interfaces/proxy.py,sha256=Hxwmkii3wP3oplg4yMhr_NF6ru8tmz7z8jHld4SfPRw,14325
12
+ datamarket/interfaces/proxy.py,sha256=64r5os8yIRpG_f4HxdGXLIRguzVEErG-GRAF8vcH58Y,15171
13
13
  datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
14
14
  datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datamarket/params/nominatim.py,sha256=S9TEB4FxmffvFyK9KffWl20TfXzWX69IAdbEehKar1I,11920
@@ -29,7 +29,7 @@ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnm
29
29
  datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
30
30
  datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
31
31
  datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
32
- datamarket-0.7.107.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
- datamarket-0.7.107.dist-info/METADATA,sha256=F0zxym6rN2EWf3jh_1ilKdrIXMqdd_D4py1MzuYzuLA,7382
34
- datamarket-0.7.107.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
- datamarket-0.7.107.dist-info/RECORD,,
32
+ datamarket-0.7.109.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
+ datamarket-0.7.109.dist-info/METADATA,sha256=Q01pb6jVLuJFXAYqT_2qNgUKBUlbu89RI_Fj4aIL_I4,7432
34
+ datamarket-0.7.109.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
+ datamarket-0.7.109.dist-info/RECORD,,