datamarket 0.7.105__py3-none-any.whl → 0.7.106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/proxy.py +74 -22
- {datamarket-0.7.105.dist-info → datamarket-0.7.106.dist-info}/METADATA +1 -1
- {datamarket-0.7.105.dist-info → datamarket-0.7.106.dist-info}/RECORD +5 -5
- {datamarket-0.7.105.dist-info → datamarket-0.7.106.dist-info}/LICENSE +0 -0
- {datamarket-0.7.105.dist-info → datamarket-0.7.106.dist-info}/WHEEL +0 -0
datamarket/interfaces/proxy.py
CHANGED
|
@@ -22,7 +22,7 @@ class ProxyInterface:
|
|
|
22
22
|
|
|
23
23
|
def __init__(self, config):
|
|
24
24
|
self._load_from_config(config)
|
|
25
|
-
self.current_index =
|
|
25
|
+
self.current_index = -2 # -2 means no selection made yet, -1 means Tor selected
|
|
26
26
|
self._health = {} # {entry: {"ok": bool, "last_checked": time.time(), "last_error": str}}
|
|
27
27
|
|
|
28
28
|
def _load_from_config(self, cfg):
|
|
@@ -92,6 +92,7 @@ class ProxyInterface:
|
|
|
92
92
|
"""
|
|
93
93
|
# Tor handling (skip health check for tor)
|
|
94
94
|
if use_tor:
|
|
95
|
+
self.current_index = -1 # Indicate Tor is selected
|
|
95
96
|
if raw:
|
|
96
97
|
return ("127.0.0.1", "9050", None, None)
|
|
97
98
|
return {"socks5": self.get_proxy_url("127.0.0.1", 9050, schema="socks5")}
|
|
@@ -135,15 +136,19 @@ class ProxyInterface:
|
|
|
135
136
|
if not pool:
|
|
136
137
|
pool = self.entries
|
|
137
138
|
|
|
138
|
-
#
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
# Start from the next index after current_index
|
|
140
|
+
start_idx = (self.current_index + 1) % len(self.entries) if self.current_index >= 0 else 0
|
|
141
|
+
|
|
142
|
+
# Find next in pool starting from start_idx
|
|
143
|
+
for i in range(len(self.entries)):
|
|
144
|
+
idx = (start_idx + i) % len(self.entries)
|
|
142
145
|
entry = self.entries[idx]
|
|
143
146
|
if entry in pool:
|
|
147
|
+
self.current_index = idx # Update to selected index
|
|
144
148
|
return entry
|
|
145
149
|
|
|
146
150
|
# Fallback to first entry
|
|
151
|
+
self.current_index = 0
|
|
147
152
|
return self.entries[0]
|
|
148
153
|
|
|
149
154
|
def get_random(self, use_auth=False):
|
|
@@ -156,10 +161,10 @@ class ProxyInterface:
|
|
|
156
161
|
pool = self.entries
|
|
157
162
|
|
|
158
163
|
entry = random.choice(pool) # noqa: S311
|
|
159
|
-
# Update index to
|
|
164
|
+
# Update index to selected entry
|
|
160
165
|
try:
|
|
161
166
|
pos = self.entries.index(entry)
|
|
162
|
-
self.current_index =
|
|
167
|
+
self.current_index = pos
|
|
163
168
|
except ValueError:
|
|
164
169
|
pass
|
|
165
170
|
|
|
@@ -190,6 +195,52 @@ class ProxyInterface:
|
|
|
190
195
|
logger.error("Failed to renew Tor IP")
|
|
191
196
|
logger.error(ex)
|
|
192
197
|
|
|
198
|
+
def wait_for_new_ip(self, timeout=600, interval=5, check_timeout=5):
|
|
199
|
+
"""
|
|
200
|
+
Wait for the IP address of the currently selected proxy to change.
|
|
201
|
+
|
|
202
|
+
:param timeout: Max seconds to wait for IP change
|
|
203
|
+
:param interval: Seconds between IP checks
|
|
204
|
+
:param check_timeout: Timeout for individual IP check requests
|
|
205
|
+
:return: The selected entry (unchanged)
|
|
206
|
+
:raises RuntimeError: If no proxy is available or baseline cannot be determined
|
|
207
|
+
:raises EnsureNewIPTimeoutError: If IP doesn't change within timeout
|
|
208
|
+
"""
|
|
209
|
+
# Use currently selected proxy
|
|
210
|
+
if self.current_index == -1:
|
|
211
|
+
# Tor is selected
|
|
212
|
+
entry = ("127.0.0.1", "9050", None, None)
|
|
213
|
+
elif self.current_index >= 0 and self.current_index < len(self.entries):
|
|
214
|
+
# current_index points to the selected entry
|
|
215
|
+
entry = self.entries[self.current_index]
|
|
216
|
+
else:
|
|
217
|
+
# No valid selection, select one
|
|
218
|
+
logger.debug("No proxy currently selected, selecting one for IP waiting")
|
|
219
|
+
self.get_proxies(raw=True)
|
|
220
|
+
if self.current_index == -1:
|
|
221
|
+
entry = ("127.0.0.1", "9050", None, None)
|
|
222
|
+
elif self.current_index >= 0 and self.current_index < len(self.entries):
|
|
223
|
+
entry = self.entries[self.current_index]
|
|
224
|
+
else:
|
|
225
|
+
raise RuntimeError("Could not select a proxy for IP waiting")
|
|
226
|
+
|
|
227
|
+
# Auto-detect baseline IP
|
|
228
|
+
host, port, user, pwd = entry
|
|
229
|
+
proxies_map = {
|
|
230
|
+
"http": self.get_proxy_url(host, port, user, pwd, "http"),
|
|
231
|
+
"https": self.get_proxy_url(host, port, user, pwd, "http"),
|
|
232
|
+
}
|
|
233
|
+
try:
|
|
234
|
+
resp = requests.get(self.CHECK_IP_URL, proxies=proxies_map, timeout=check_timeout)
|
|
235
|
+
baseline = resp.json().get("YourFuckingIPAddress")
|
|
236
|
+
if not baseline:
|
|
237
|
+
raise RuntimeError(f"Could not determine baseline IP for entry {host}:{port}")
|
|
238
|
+
logger.debug(f"Auto-detected baseline IP: {baseline}")
|
|
239
|
+
except Exception as ex:
|
|
240
|
+
raise RuntimeError(f"Could not determine baseline IP for entry {host}:{port}: {ex}") from ex
|
|
241
|
+
|
|
242
|
+
return self._wait_for_new_ip(entry, baseline, timeout, interval, check_timeout)
|
|
243
|
+
|
|
193
244
|
def mark_entry_status(self, entry, ok, error=None, last_ip=None):
|
|
194
245
|
"""Update health cache for an entry."""
|
|
195
246
|
self._health[entry] = {
|
|
@@ -234,7 +285,9 @@ class ProxyInterface:
|
|
|
234
285
|
# Capture baseline before any health checks that might set it
|
|
235
286
|
baseline_before = None
|
|
236
287
|
if len(candidates) == 1:
|
|
237
|
-
|
|
288
|
+
idx = candidates[0]
|
|
289
|
+
entry = self.entries[idx]
|
|
290
|
+
baseline_before = self._health.get(entry, {}).get("last_ip")
|
|
238
291
|
|
|
239
292
|
def _find_working_entry():
|
|
240
293
|
if not self.entries:
|
|
@@ -251,13 +304,13 @@ class ProxyInterface:
|
|
|
251
304
|
entry = _find_working_entry()
|
|
252
305
|
|
|
253
306
|
if ensure_new_ip and len(pool) == 1:
|
|
254
|
-
logger.debug(f"ensure_new_ip=True and single proxy, handling IP change check: {entry[0]}:{entry[1]}")
|
|
255
307
|
baseline = self._health.get(entry, {}).get("last_ip")
|
|
256
308
|
if not baseline_before:
|
|
257
309
|
# First time seeing this proxy: it was already checked in _find_working_entry, so return immediately
|
|
258
310
|
logger.debug("No baseline IP found; returning first-working proxy without waiting for IP change")
|
|
259
311
|
else:
|
|
260
312
|
# There is a baseline: wait for the IP to change
|
|
313
|
+
logger.info(f"ensure_new_ip=True and single proxy, waiting for IP change: {entry[0]}:{entry[1]}")
|
|
261
314
|
entry = self._wait_for_new_ip(
|
|
262
315
|
entry, baseline, ensure_new_ip_timeout, ensure_new_ip_interval, check_timeout
|
|
263
316
|
)
|
|
@@ -267,19 +320,12 @@ class ProxyInterface:
|
|
|
267
320
|
def _get_round_robin_candidates(self, pool):
|
|
268
321
|
"""Get candidates in round-robin order starting from current_index."""
|
|
269
322
|
candidates = []
|
|
270
|
-
start_idx = self.current_index
|
|
323
|
+
start_idx = (self.current_index + 1) % len(self.entries) if self.current_index >= 0 else 0
|
|
271
324
|
for i in range(len(self.entries)):
|
|
272
325
|
idx = (start_idx + i) % len(self.entries)
|
|
273
326
|
entry = self.entries[idx]
|
|
274
327
|
if entry in pool:
|
|
275
|
-
candidates.append(
|
|
276
|
-
# Update current_index for next call
|
|
277
|
-
if candidates:
|
|
278
|
-
try:
|
|
279
|
-
pos = self.entries.index(candidates[0])
|
|
280
|
-
self.current_index = (pos + 1) % len(self.entries)
|
|
281
|
-
except ValueError:
|
|
282
|
-
pass
|
|
328
|
+
candidates.append(idx)
|
|
283
329
|
return candidates
|
|
284
330
|
|
|
285
331
|
def _build_pool(self, use_auth):
|
|
@@ -290,12 +336,14 @@ class ProxyInterface:
|
|
|
290
336
|
|
|
291
337
|
def _get_candidates(self, pool, randomize):
|
|
292
338
|
if randomize:
|
|
293
|
-
|
|
339
|
+
candidates = [idx for idx, entry in enumerate(self.entries) if entry in pool]
|
|
340
|
+
return random.sample(candidates, k=len(candidates))
|
|
294
341
|
else:
|
|
295
342
|
return self._get_round_robin_candidates(pool)
|
|
296
343
|
|
|
297
344
|
def _find_working_entry_once(self, candidates, check_timeout, cooldown_seconds):
|
|
298
|
-
for
|
|
345
|
+
for idx in candidates:
|
|
346
|
+
entry = self.entries[idx]
|
|
299
347
|
health = self._health.get(entry, {})
|
|
300
348
|
last_checked = health.get("last_checked", 0)
|
|
301
349
|
ok = health.get("ok", False)
|
|
@@ -303,18 +351,22 @@ class ProxyInterface:
|
|
|
303
351
|
|
|
304
352
|
if ok and (now - last_checked) < cooldown_seconds:
|
|
305
353
|
logger.debug(f"Using cached working proxy: {entry[0]}:{entry[1]}")
|
|
354
|
+
self.current_index = idx
|
|
306
355
|
return entry
|
|
307
356
|
elif not ok and (now - last_checked) < cooldown_seconds:
|
|
308
357
|
continue
|
|
309
358
|
else:
|
|
310
359
|
logger.debug(f"Checking proxy health: {entry[0]}:{entry[1]}")
|
|
311
360
|
if self.is_entry_alive(entry, timeout=check_timeout):
|
|
361
|
+
self.current_index = idx
|
|
312
362
|
return entry
|
|
313
363
|
|
|
314
364
|
logger.warning("No cached working proxies, forcing fresh checks")
|
|
315
|
-
for
|
|
365
|
+
for idx in candidates:
|
|
366
|
+
entry = self.entries[idx]
|
|
316
367
|
logger.debug(f"Force-checking proxy: {entry[0]}:{entry[1]}")
|
|
317
368
|
if self.is_entry_alive(entry, timeout=check_timeout):
|
|
369
|
+
self.current_index = idx
|
|
318
370
|
return entry
|
|
319
371
|
|
|
320
372
|
raise NoWorkingProxiesError("No working proxies available")
|
|
@@ -335,7 +387,7 @@ class ProxyInterface:
|
|
|
335
387
|
|
|
336
388
|
if current_ip and current_ip != baseline:
|
|
337
389
|
self.mark_entry_status(entry, True, last_ip=current_ip)
|
|
338
|
-
logger.
|
|
390
|
+
logger.info(f"IP changed from {baseline} to {current_ip}")
|
|
339
391
|
return entry
|
|
340
392
|
|
|
341
393
|
time.sleep(interval)
|
|
@@ -9,7 +9,7 @@ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjH
|
|
|
9
9
|
datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
|
|
10
10
|
datamarket/interfaces/nominatim.py,sha256=xizT94tVum7QPppfDgI5sEhx1mAXT-SM3JyPl8CDxxU,15148
|
|
11
11
|
datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
|
|
12
|
-
datamarket/interfaces/proxy.py,sha256=
|
|
12
|
+
datamarket/interfaces/proxy.py,sha256=e-bbmtjyjkh4ZAuk6o0nm-UcynH4qmDZnzDQzbOasb8,16063
|
|
13
13
|
datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
|
|
14
14
|
datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datamarket/params/nominatim.py,sha256=S9TEB4FxmffvFyK9KffWl20TfXzWX69IAdbEehKar1I,11920
|
|
@@ -29,7 +29,7 @@ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnm
|
|
|
29
29
|
datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
|
|
30
30
|
datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
|
|
31
31
|
datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
|
|
32
|
-
datamarket-0.7.
|
|
33
|
-
datamarket-0.7.
|
|
34
|
-
datamarket-0.7.
|
|
35
|
-
datamarket-0.7.
|
|
32
|
+
datamarket-0.7.106.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
33
|
+
datamarket-0.7.106.dist-info/METADATA,sha256=A76E203rT92P2sSWe_FpjyL4I3vqirVPWHOFAXYlZug,7382
|
|
34
|
+
datamarket-0.7.106.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
35
|
+
datamarket-0.7.106.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|