datamarket 0.7.108__tar.gz → 0.7.109__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.7.108 → datamarket-0.7.109}/PKG-INFO +3 -2
- {datamarket-0.7.108 → datamarket-0.7.109}/pyproject.toml +2 -2
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/proxy.py +28 -18
- {datamarket-0.7.108 → datamarket-0.7.109}/LICENSE +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/README.md +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/exceptions/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/exceptions/main.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/azure.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/ftp.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/peerdb.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/nominatim.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/playwright/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/playwright/async_api.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/playwright/sync_api.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/__init__.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/normalization.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/obfuscation.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/strings/standardization.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/typer.py +0 -0
- {datamarket-0.7.108 → datamarket-0.7.109}/src/datamarket/utils/types.py +0 -0
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.109
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
6
|
Author: DataMarket
|
|
7
7
|
Author-email: techsupport@datamarket.es
|
|
8
|
-
Requires-Python: >=3.12,<
|
|
8
|
+
Requires-Python: >=3.12,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
10
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
15
|
Provides-Extra: aws
|
|
15
16
|
Provides-Extra: azure-storage-blob
|
|
16
17
|
Provides-Extra: boto3
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "datamarket"
|
|
3
|
-
version = "0.7.
|
|
3
|
+
version = "0.7.109"
|
|
4
4
|
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
5
|
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
@@ -15,7 +15,7 @@ classifiers = [
|
|
|
15
15
|
]
|
|
16
16
|
|
|
17
17
|
[tool.poetry.dependencies]
|
|
18
|
-
python = "
|
|
18
|
+
python = "^3.12"
|
|
19
19
|
typer = "~0.15.0"
|
|
20
20
|
SQLAlchemy = "^2.0.0"
|
|
21
21
|
psycopg2-binary = "^2.0.0"
|
|
@@ -5,6 +5,7 @@ import logging
|
|
|
5
5
|
import random
|
|
6
6
|
import time
|
|
7
7
|
from datetime import timedelta
|
|
8
|
+
from functools import partial
|
|
8
9
|
|
|
9
10
|
import requests
|
|
10
11
|
import tenacity
|
|
@@ -246,28 +247,36 @@ class ProxyInterface:
|
|
|
246
247
|
cooldown_seconds=30,
|
|
247
248
|
proxy_rotation_interval=PROXY_ROTATION_INTERVAL,
|
|
248
249
|
):
|
|
249
|
-
"""Get a working proxy entry, performing health checks as needed.
|
|
250
|
+
"""Get a working proxy entry, performing health checks as needed.
|
|
251
|
+
|
|
252
|
+
- Fails fast if there are no entries.
|
|
253
|
+
- Optionally retries for up to `proxy_rotation_interval`,
|
|
254
|
+
refreshing the traversal queue before each attempt.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
if not self.entries:
|
|
258
|
+
raise NoWorkingProxiesError("No proxies available")
|
|
259
|
+
|
|
250
260
|
pool = self._build_pool(use_auth)
|
|
251
261
|
self._refresh_traversal_queue(pool, randomize)
|
|
252
262
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
return
|
|
257
|
-
|
|
258
|
-
# Handle both timedelta and numeric seconds for backward compatibility
|
|
259
|
-
if proxy_rotation_interval:
|
|
260
|
-
retrying = tenacity.Retrying(
|
|
261
|
-
wait=tenacity.wait_fixed(cooldown_seconds),
|
|
262
|
-
stop=tenacity.stop_after_delay(proxy_rotation_interval),
|
|
263
|
-
before_sleep=tenacity.before_sleep_log(logger, logging.INFO),
|
|
264
|
-
reraise=True,
|
|
265
|
-
)
|
|
266
|
-
entry = retrying(_find_working_entry)
|
|
267
|
-
else:
|
|
268
|
-
entry = _find_working_entry()
|
|
263
|
+
find_once = partial(self._find_working_entry_once, check_timeout, cooldown_seconds)
|
|
264
|
+
|
|
265
|
+
if not proxy_rotation_interval:
|
|
266
|
+
return find_once()
|
|
269
267
|
|
|
270
|
-
|
|
268
|
+
def before_sleep(retry_state):
|
|
269
|
+
tenacity.before_sleep_log(logger, logging.INFO)(retry_state)
|
|
270
|
+
self._refresh_traversal_queue(pool, randomize)
|
|
271
|
+
|
|
272
|
+
retrying = tenacity.Retrying(
|
|
273
|
+
wait=tenacity.wait_fixed(cooldown_seconds),
|
|
274
|
+
stop=tenacity.stop_after_delay(proxy_rotation_interval),
|
|
275
|
+
before_sleep=before_sleep,
|
|
276
|
+
retry=tenacity.retry_if_exception_type(NoWorkingProxiesError),
|
|
277
|
+
reraise=True,
|
|
278
|
+
)
|
|
279
|
+
return retrying(find_once)
|
|
271
280
|
|
|
272
281
|
def _get_round_robin_candidates(self, pool):
|
|
273
282
|
"""Get candidates in round-robin order starting from current_index."""
|
|
@@ -329,6 +338,7 @@ class ProxyInterface:
|
|
|
329
338
|
self._traversal_queue.remove(idx)
|
|
330
339
|
return entry
|
|
331
340
|
|
|
341
|
+
self._traversal_queue = []
|
|
332
342
|
raise NoWorkingProxiesError("No working proxies available")
|
|
333
343
|
|
|
334
344
|
def _wait_for_new_ip(self, entry, baseline, timeout, interval, check_timeout):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|