datamarket 0.7.107__py3-none-any.whl → 0.7.109__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/proxy.py +38 -16
- {datamarket-0.7.107.dist-info → datamarket-0.7.109.dist-info}/METADATA +3 -2
- {datamarket-0.7.107.dist-info → datamarket-0.7.109.dist-info}/RECORD +5 -5
- {datamarket-0.7.107.dist-info → datamarket-0.7.109.dist-info}/LICENSE +0 -0
- {datamarket-0.7.107.dist-info → datamarket-0.7.109.dist-info}/WHEEL +0 -0
datamarket/interfaces/proxy.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# IMPORTS
|
|
3
|
+
|
|
1
4
|
import logging
|
|
2
5
|
import random
|
|
3
6
|
import time
|
|
4
7
|
from datetime import timedelta
|
|
8
|
+
from functools import partial
|
|
5
9
|
|
|
6
10
|
import requests
|
|
7
11
|
import tenacity
|
|
@@ -10,12 +14,18 @@ from stem.control import Controller
|
|
|
10
14
|
|
|
11
15
|
from datamarket.exceptions import EnsureNewIPTimeoutError, NoWorkingProxiesError
|
|
12
16
|
|
|
17
|
+
########################################################################################################################
|
|
18
|
+
# SETUP
|
|
19
|
+
|
|
13
20
|
logger = logging.getLogger(__name__)
|
|
14
21
|
logging.getLogger("stem").setLevel(logging.WARNING)
|
|
15
22
|
|
|
16
23
|
PROXY_ROTATION_INTERVAL = timedelta(minutes=10)
|
|
17
24
|
PROXY_ROTATION_TIMEOUT_SECONDS = int(PROXY_ROTATION_INTERVAL.total_seconds())
|
|
18
25
|
|
|
26
|
+
########################################################################################################################
|
|
27
|
+
# CLASSES
|
|
28
|
+
|
|
19
29
|
|
|
20
30
|
class ProxyInterface:
|
|
21
31
|
"""
|
|
@@ -237,26 +247,36 @@ class ProxyInterface:
|
|
|
237
247
|
cooldown_seconds=30,
|
|
238
248
|
proxy_rotation_interval=PROXY_ROTATION_INTERVAL,
|
|
239
249
|
):
|
|
240
|
-
"""Get a working proxy entry, performing health checks as needed.
|
|
250
|
+
"""Get a working proxy entry, performing health checks as needed.
|
|
251
|
+
|
|
252
|
+
- Fails fast if there are no entries.
|
|
253
|
+
- Optionally retries for up to `proxy_rotation_interval`,
|
|
254
|
+
refreshing the traversal queue before each attempt.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
if not self.entries:
|
|
258
|
+
raise NoWorkingProxiesError("No proxies available")
|
|
259
|
+
|
|
241
260
|
pool = self._build_pool(use_auth)
|
|
242
261
|
self._refresh_traversal_queue(pool, randomize)
|
|
243
262
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
return
|
|
248
|
-
|
|
249
|
-
# Handle both timedelta and numeric seconds for backward compatibility
|
|
250
|
-
if proxy_rotation_interval:
|
|
251
|
-
retrying = tenacity.Retrying(
|
|
252
|
-
stop=tenacity.stop_after_delay(proxy_rotation_interval),
|
|
253
|
-
reraise=True,
|
|
254
|
-
)
|
|
255
|
-
entry = retrying(_find_working_entry)
|
|
256
|
-
else:
|
|
257
|
-
entry = _find_working_entry()
|
|
263
|
+
find_once = partial(self._find_working_entry_once, check_timeout, cooldown_seconds)
|
|
264
|
+
|
|
265
|
+
if not proxy_rotation_interval:
|
|
266
|
+
return find_once()
|
|
258
267
|
|
|
259
|
-
|
|
268
|
+
def before_sleep(retry_state):
|
|
269
|
+
tenacity.before_sleep_log(logger, logging.INFO)(retry_state)
|
|
270
|
+
self._refresh_traversal_queue(pool, randomize)
|
|
271
|
+
|
|
272
|
+
retrying = tenacity.Retrying(
|
|
273
|
+
wait=tenacity.wait_fixed(cooldown_seconds),
|
|
274
|
+
stop=tenacity.stop_after_delay(proxy_rotation_interval),
|
|
275
|
+
before_sleep=before_sleep,
|
|
276
|
+
retry=tenacity.retry_if_exception_type(NoWorkingProxiesError),
|
|
277
|
+
reraise=True,
|
|
278
|
+
)
|
|
279
|
+
return retrying(find_once)
|
|
260
280
|
|
|
261
281
|
def _get_round_robin_candidates(self, pool):
|
|
262
282
|
"""Get candidates in round-robin order starting from current_index."""
|
|
@@ -318,9 +338,11 @@ class ProxyInterface:
|
|
|
318
338
|
self._traversal_queue.remove(idx)
|
|
319
339
|
return entry
|
|
320
340
|
|
|
341
|
+
self._traversal_queue = []
|
|
321
342
|
raise NoWorkingProxiesError("No working proxies available")
|
|
322
343
|
|
|
323
344
|
def _wait_for_new_ip(self, entry, baseline, timeout, interval, check_timeout):
|
|
345
|
+
logger.info("Refreshing proxy IP...")
|
|
324
346
|
start = time.time()
|
|
325
347
|
while time.time() - start < timeout:
|
|
326
348
|
host, port, user, pwd = entry
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.109
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
6
|
Author: DataMarket
|
|
7
7
|
Author-email: techsupport@datamarket.es
|
|
8
|
-
Requires-Python: >=3.12,<
|
|
8
|
+
Requires-Python: >=3.12,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
10
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
15
|
Provides-Extra: aws
|
|
15
16
|
Provides-Extra: azure-storage-blob
|
|
16
17
|
Provides-Extra: boto3
|
|
@@ -9,7 +9,7 @@ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjH
|
|
|
9
9
|
datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
|
|
10
10
|
datamarket/interfaces/nominatim.py,sha256=xizT94tVum7QPppfDgI5sEhx1mAXT-SM3JyPl8CDxxU,15148
|
|
11
11
|
datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
|
|
12
|
-
datamarket/interfaces/proxy.py,sha256=
|
|
12
|
+
datamarket/interfaces/proxy.py,sha256=64r5os8yIRpG_f4HxdGXLIRguzVEErG-GRAF8vcH58Y,15171
|
|
13
13
|
datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
|
|
14
14
|
datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datamarket/params/nominatim.py,sha256=S9TEB4FxmffvFyK9KffWl20TfXzWX69IAdbEehKar1I,11920
|
|
@@ -29,7 +29,7 @@ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnm
|
|
|
29
29
|
datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
|
|
30
30
|
datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
|
|
31
31
|
datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
|
|
32
|
-
datamarket-0.7.
|
|
33
|
-
datamarket-0.7.
|
|
34
|
-
datamarket-0.7.
|
|
35
|
-
datamarket-0.7.
|
|
32
|
+
datamarket-0.7.109.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
33
|
+
datamarket-0.7.109.dist-info/METADATA,sha256=Q01pb6jVLuJFXAYqT_2qNgUKBUlbu89RI_Fj4aIL_I4,7432
|
|
34
|
+
datamarket-0.7.109.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
35
|
+
datamarket-0.7.109.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|