datamarket 0.7.104__tar.gz → 0.7.105__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (35) hide show
  1. {datamarket-0.7.104 → datamarket-0.7.105}/PKG-INFO +1 -1
  2. {datamarket-0.7.104 → datamarket-0.7.105}/pyproject.toml +1 -1
  3. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/proxy.py +17 -9
  4. {datamarket-0.7.104 → datamarket-0.7.105}/LICENSE +0 -0
  5. {datamarket-0.7.104 → datamarket-0.7.105}/README.md +0 -0
  6. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/__init__.py +0 -0
  7. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/exceptions/__init__.py +0 -0
  8. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/exceptions/main.py +0 -0
  9. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/__init__.py +0 -0
  10. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/alchemy.py +0 -0
  11. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/aws.py +0 -0
  12. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/azure.py +0 -0
  13. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/drive.py +0 -0
  14. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/ftp.py +0 -0
  15. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/nominatim.py +0 -0
  16. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/peerdb.py +0 -0
  17. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/interfaces/tinybird.py +0 -0
  18. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/params/__init__.py +0 -0
  19. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/params/nominatim.py +0 -0
  20. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/__init__.py +0 -0
  21. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/airflow.py +0 -0
  22. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/alchemy.py +0 -0
  23. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/main.py +0 -0
  24. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/nominatim.py +0 -0
  25. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/playwright/__init__.py +0 -0
  26. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/playwright/async_api.py +0 -0
  27. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/playwright/sync_api.py +0 -0
  28. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/selenium.py +0 -0
  29. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/soda.py +0 -0
  30. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/strings/__init__.py +0 -0
  31. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/strings/normalization.py +0 -0
  32. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/strings/obfuscation.py +0 -0
  33. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/strings/standardization.py +0 -0
  34. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/typer.py +0 -0
  35. {datamarket-0.7.104 → datamarket-0.7.105}/src/datamarket/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.104
3
+ Version: 0.7.105
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.7.104"
3
+ version = "0.7.105"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -179,13 +179,13 @@ class ProxyInterface:
179
179
  return
180
180
 
181
181
  try:
182
- logger.info(f"Current IP: {self.check_current_ip()}")
182
+ logger.debug(f"Current IP: {self.check_current_ip()}")
183
183
  with Controller.from_port(port=9051) as controller:
184
184
  controller.authenticate(password=self.tor_password)
185
185
  controller.signal(Signal.NEWNYM)
186
186
 
187
187
  time.sleep(5)
188
- logger.info(f"New IP: {self.check_current_ip()}")
188
+ logger.debug(f"New IP: {self.check_current_ip()}")
189
189
  except Exception as ex:
190
190
  logger.error("Failed to renew Tor IP")
191
191
  logger.error(ex)
@@ -231,6 +231,11 @@ class ProxyInterface:
231
231
  pool = self._build_pool(use_auth)
232
232
  candidates = self._get_candidates(pool, randomize)
233
233
 
234
+ # Capture baseline before any health checks that might set it
235
+ baseline_before = None
236
+ if len(candidates) == 1:
237
+ baseline_before = self._health.get(candidates[0], {}).get("last_ip")
238
+
234
239
  def _find_working_entry():
235
240
  if not self.entries:
236
241
  raise NoWorkingProxiesError("No proxies available")
@@ -246,13 +251,16 @@ class ProxyInterface:
246
251
  entry = _find_working_entry()
247
252
 
248
253
  if ensure_new_ip and len(pool) == 1:
249
- logger.info(f"ensure_new_ip=True and single proxy, waiting for IP change: {entry[0]}:{entry[1]}")
254
+ logger.debug(f"ensure_new_ip=True and single proxy, handling IP change check: {entry[0]}:{entry[1]}")
250
255
  baseline = self._health.get(entry, {}).get("last_ip")
251
- if not baseline:
252
- if not self.is_entry_alive(entry, timeout=check_timeout):
253
- raise NoWorkingProxiesError("Proxy became unavailable during ensure_new_ip")
254
- baseline = self._health.get(entry, {}).get("last_ip")
255
- entry = self._wait_for_new_ip(entry, baseline, ensure_new_ip_timeout, ensure_new_ip_interval, check_timeout)
256
+ if not baseline_before:
257
+ # First time seeing this proxy: it was already checked in _find_working_entry, so return immediately
258
+ logger.debug("No baseline IP found; returning first-working proxy without waiting for IP change")
259
+ else:
260
+ # There is a baseline: wait for the IP to change
261
+ entry = self._wait_for_new_ip(
262
+ entry, baseline, ensure_new_ip_timeout, ensure_new_ip_interval, check_timeout
263
+ )
256
264
 
257
265
  return entry
258
266
 
@@ -327,7 +335,7 @@ class ProxyInterface:
327
335
 
328
336
  if current_ip and current_ip != baseline:
329
337
  self.mark_entry_status(entry, True, last_ip=current_ip)
330
- logger.info(f"IP changed from {baseline} to {current_ip}")
338
+ logger.debug(f"IP changed from {baseline} to {current_ip}")
331
339
  return entry
332
340
 
333
341
  time.sleep(interval)
File without changes
File without changes