datamarket 0.7.104__py3-none-any.whl → 0.7.106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/proxy.py +87 -27
- {datamarket-0.7.104.dist-info → datamarket-0.7.106.dist-info}/METADATA +1 -1
- {datamarket-0.7.104.dist-info → datamarket-0.7.106.dist-info}/RECORD +5 -5
- {datamarket-0.7.104.dist-info → datamarket-0.7.106.dist-info}/LICENSE +0 -0
- {datamarket-0.7.104.dist-info → datamarket-0.7.106.dist-info}/WHEEL +0 -0
datamarket/interfaces/proxy.py
CHANGED
|
@@ -22,7 +22,7 @@ class ProxyInterface:
|
|
|
22
22
|
|
|
23
23
|
def __init__(self, config):
|
|
24
24
|
self._load_from_config(config)
|
|
25
|
-
self.current_index =
|
|
25
|
+
self.current_index = -2 # -2 means no selection made yet, -1 means Tor selected
|
|
26
26
|
self._health = {} # {entry: {"ok": bool, "last_checked": time.time(), "last_error": str}}
|
|
27
27
|
|
|
28
28
|
def _load_from_config(self, cfg):
|
|
@@ -92,6 +92,7 @@ class ProxyInterface:
|
|
|
92
92
|
"""
|
|
93
93
|
# Tor handling (skip health check for tor)
|
|
94
94
|
if use_tor:
|
|
95
|
+
self.current_index = -1 # Indicate Tor is selected
|
|
95
96
|
if raw:
|
|
96
97
|
return ("127.0.0.1", "9050", None, None)
|
|
97
98
|
return {"socks5": self.get_proxy_url("127.0.0.1", 9050, schema="socks5")}
|
|
@@ -135,15 +136,19 @@ class ProxyInterface:
|
|
|
135
136
|
if not pool:
|
|
136
137
|
pool = self.entries
|
|
137
138
|
|
|
138
|
-
#
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
# Start from the next index after current_index
|
|
140
|
+
start_idx = (self.current_index + 1) % len(self.entries) if self.current_index >= 0 else 0
|
|
141
|
+
|
|
142
|
+
# Find next in pool starting from start_idx
|
|
143
|
+
for i in range(len(self.entries)):
|
|
144
|
+
idx = (start_idx + i) % len(self.entries)
|
|
142
145
|
entry = self.entries[idx]
|
|
143
146
|
if entry in pool:
|
|
147
|
+
self.current_index = idx # Update to selected index
|
|
144
148
|
return entry
|
|
145
149
|
|
|
146
150
|
# Fallback to first entry
|
|
151
|
+
self.current_index = 0
|
|
147
152
|
return self.entries[0]
|
|
148
153
|
|
|
149
154
|
def get_random(self, use_auth=False):
|
|
@@ -156,10 +161,10 @@ class ProxyInterface:
|
|
|
156
161
|
pool = self.entries
|
|
157
162
|
|
|
158
163
|
entry = random.choice(pool) # noqa: S311
|
|
159
|
-
# Update index to
|
|
164
|
+
# Update index to selected entry
|
|
160
165
|
try:
|
|
161
166
|
pos = self.entries.index(entry)
|
|
162
|
-
self.current_index =
|
|
167
|
+
self.current_index = pos
|
|
163
168
|
except ValueError:
|
|
164
169
|
pass
|
|
165
170
|
|
|
@@ -179,17 +184,63 @@ class ProxyInterface:
|
|
|
179
184
|
return
|
|
180
185
|
|
|
181
186
|
try:
|
|
182
|
-
logger.
|
|
187
|
+
logger.debug(f"Current IP: {self.check_current_ip()}")
|
|
183
188
|
with Controller.from_port(port=9051) as controller:
|
|
184
189
|
controller.authenticate(password=self.tor_password)
|
|
185
190
|
controller.signal(Signal.NEWNYM)
|
|
186
191
|
|
|
187
192
|
time.sleep(5)
|
|
188
|
-
logger.
|
|
193
|
+
logger.debug(f"New IP: {self.check_current_ip()}")
|
|
189
194
|
except Exception as ex:
|
|
190
195
|
logger.error("Failed to renew Tor IP")
|
|
191
196
|
logger.error(ex)
|
|
192
197
|
|
|
198
|
+
def wait_for_new_ip(self, timeout=600, interval=5, check_timeout=5):
|
|
199
|
+
"""
|
|
200
|
+
Wait for the IP address of the currently selected proxy to change.
|
|
201
|
+
|
|
202
|
+
:param timeout: Max seconds to wait for IP change
|
|
203
|
+
:param interval: Seconds between IP checks
|
|
204
|
+
:param check_timeout: Timeout for individual IP check requests
|
|
205
|
+
:return: The selected entry (unchanged)
|
|
206
|
+
:raises RuntimeError: If no proxy is available or baseline cannot be determined
|
|
207
|
+
:raises EnsureNewIPTimeoutError: If IP doesn't change within timeout
|
|
208
|
+
"""
|
|
209
|
+
# Use currently selected proxy
|
|
210
|
+
if self.current_index == -1:
|
|
211
|
+
# Tor is selected
|
|
212
|
+
entry = ("127.0.0.1", "9050", None, None)
|
|
213
|
+
elif self.current_index >= 0 and self.current_index < len(self.entries):
|
|
214
|
+
# current_index points to the selected entry
|
|
215
|
+
entry = self.entries[self.current_index]
|
|
216
|
+
else:
|
|
217
|
+
# No valid selection, select one
|
|
218
|
+
logger.debug("No proxy currently selected, selecting one for IP waiting")
|
|
219
|
+
self.get_proxies(raw=True)
|
|
220
|
+
if self.current_index == -1:
|
|
221
|
+
entry = ("127.0.0.1", "9050", None, None)
|
|
222
|
+
elif self.current_index >= 0 and self.current_index < len(self.entries):
|
|
223
|
+
entry = self.entries[self.current_index]
|
|
224
|
+
else:
|
|
225
|
+
raise RuntimeError("Could not select a proxy for IP waiting")
|
|
226
|
+
|
|
227
|
+
# Auto-detect baseline IP
|
|
228
|
+
host, port, user, pwd = entry
|
|
229
|
+
proxies_map = {
|
|
230
|
+
"http": self.get_proxy_url(host, port, user, pwd, "http"),
|
|
231
|
+
"https": self.get_proxy_url(host, port, user, pwd, "http"),
|
|
232
|
+
}
|
|
233
|
+
try:
|
|
234
|
+
resp = requests.get(self.CHECK_IP_URL, proxies=proxies_map, timeout=check_timeout)
|
|
235
|
+
baseline = resp.json().get("YourFuckingIPAddress")
|
|
236
|
+
if not baseline:
|
|
237
|
+
raise RuntimeError(f"Could not determine baseline IP for entry {host}:{port}")
|
|
238
|
+
logger.debug(f"Auto-detected baseline IP: {baseline}")
|
|
239
|
+
except Exception as ex:
|
|
240
|
+
raise RuntimeError(f"Could not determine baseline IP for entry {host}:{port}: {ex}") from ex
|
|
241
|
+
|
|
242
|
+
return self._wait_for_new_ip(entry, baseline, timeout, interval, check_timeout)
|
|
243
|
+
|
|
193
244
|
def mark_entry_status(self, entry, ok, error=None, last_ip=None):
|
|
194
245
|
"""Update health cache for an entry."""
|
|
195
246
|
self._health[entry] = {
|
|
@@ -231,6 +282,13 @@ class ProxyInterface:
|
|
|
231
282
|
pool = self._build_pool(use_auth)
|
|
232
283
|
candidates = self._get_candidates(pool, randomize)
|
|
233
284
|
|
|
285
|
+
# Capture baseline before any health checks that might set it
|
|
286
|
+
baseline_before = None
|
|
287
|
+
if len(candidates) == 1:
|
|
288
|
+
idx = candidates[0]
|
|
289
|
+
entry = self.entries[idx]
|
|
290
|
+
baseline_before = self._health.get(entry, {}).get("last_ip")
|
|
291
|
+
|
|
234
292
|
def _find_working_entry():
|
|
235
293
|
if not self.entries:
|
|
236
294
|
raise NoWorkingProxiesError("No proxies available")
|
|
@@ -246,32 +304,28 @@ class ProxyInterface:
|
|
|
246
304
|
entry = _find_working_entry()
|
|
247
305
|
|
|
248
306
|
if ensure_new_ip and len(pool) == 1:
|
|
249
|
-
logger.info(f"ensure_new_ip=True and single proxy, waiting for IP change: {entry[0]}:{entry[1]}")
|
|
250
307
|
baseline = self._health.get(entry, {}).get("last_ip")
|
|
251
|
-
if not
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
308
|
+
if not baseline_before:
|
|
309
|
+
# First time seeing this proxy: it was already checked in _find_working_entry, so return immediately
|
|
310
|
+
logger.debug("No baseline IP found; returning first-working proxy without waiting for IP change")
|
|
311
|
+
else:
|
|
312
|
+
# There is a baseline: wait for the IP to change
|
|
313
|
+
logger.info(f"ensure_new_ip=True and single proxy, waiting for IP change: {entry[0]}:{entry[1]}")
|
|
314
|
+
entry = self._wait_for_new_ip(
|
|
315
|
+
entry, baseline, ensure_new_ip_timeout, ensure_new_ip_interval, check_timeout
|
|
316
|
+
)
|
|
256
317
|
|
|
257
318
|
return entry
|
|
258
319
|
|
|
259
320
|
def _get_round_robin_candidates(self, pool):
|
|
260
321
|
"""Get candidates in round-robin order starting from current_index."""
|
|
261
322
|
candidates = []
|
|
262
|
-
start_idx = self.current_index
|
|
323
|
+
start_idx = (self.current_index + 1) % len(self.entries) if self.current_index >= 0 else 0
|
|
263
324
|
for i in range(len(self.entries)):
|
|
264
325
|
idx = (start_idx + i) % len(self.entries)
|
|
265
326
|
entry = self.entries[idx]
|
|
266
327
|
if entry in pool:
|
|
267
|
-
candidates.append(
|
|
268
|
-
# Update current_index for next call
|
|
269
|
-
if candidates:
|
|
270
|
-
try:
|
|
271
|
-
pos = self.entries.index(candidates[0])
|
|
272
|
-
self.current_index = (pos + 1) % len(self.entries)
|
|
273
|
-
except ValueError:
|
|
274
|
-
pass
|
|
328
|
+
candidates.append(idx)
|
|
275
329
|
return candidates
|
|
276
330
|
|
|
277
331
|
def _build_pool(self, use_auth):
|
|
@@ -282,12 +336,14 @@ class ProxyInterface:
|
|
|
282
336
|
|
|
283
337
|
def _get_candidates(self, pool, randomize):
|
|
284
338
|
if randomize:
|
|
285
|
-
|
|
339
|
+
candidates = [idx for idx, entry in enumerate(self.entries) if entry in pool]
|
|
340
|
+
return random.sample(candidates, k=len(candidates))
|
|
286
341
|
else:
|
|
287
342
|
return self._get_round_robin_candidates(pool)
|
|
288
343
|
|
|
289
344
|
def _find_working_entry_once(self, candidates, check_timeout, cooldown_seconds):
|
|
290
|
-
for
|
|
345
|
+
for idx in candidates:
|
|
346
|
+
entry = self.entries[idx]
|
|
291
347
|
health = self._health.get(entry, {})
|
|
292
348
|
last_checked = health.get("last_checked", 0)
|
|
293
349
|
ok = health.get("ok", False)
|
|
@@ -295,18 +351,22 @@ class ProxyInterface:
|
|
|
295
351
|
|
|
296
352
|
if ok and (now - last_checked) < cooldown_seconds:
|
|
297
353
|
logger.debug(f"Using cached working proxy: {entry[0]}:{entry[1]}")
|
|
354
|
+
self.current_index = idx
|
|
298
355
|
return entry
|
|
299
356
|
elif not ok and (now - last_checked) < cooldown_seconds:
|
|
300
357
|
continue
|
|
301
358
|
else:
|
|
302
359
|
logger.debug(f"Checking proxy health: {entry[0]}:{entry[1]}")
|
|
303
360
|
if self.is_entry_alive(entry, timeout=check_timeout):
|
|
361
|
+
self.current_index = idx
|
|
304
362
|
return entry
|
|
305
363
|
|
|
306
364
|
logger.warning("No cached working proxies, forcing fresh checks")
|
|
307
|
-
for
|
|
365
|
+
for idx in candidates:
|
|
366
|
+
entry = self.entries[idx]
|
|
308
367
|
logger.debug(f"Force-checking proxy: {entry[0]}:{entry[1]}")
|
|
309
368
|
if self.is_entry_alive(entry, timeout=check_timeout):
|
|
369
|
+
self.current_index = idx
|
|
310
370
|
return entry
|
|
311
371
|
|
|
312
372
|
raise NoWorkingProxiesError("No working proxies available")
|
|
@@ -9,7 +9,7 @@ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjH
|
|
|
9
9
|
datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
|
|
10
10
|
datamarket/interfaces/nominatim.py,sha256=xizT94tVum7QPppfDgI5sEhx1mAXT-SM3JyPl8CDxxU,15148
|
|
11
11
|
datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
|
|
12
|
-
datamarket/interfaces/proxy.py,sha256=
|
|
12
|
+
datamarket/interfaces/proxy.py,sha256=e-bbmtjyjkh4ZAuk6o0nm-UcynH4qmDZnzDQzbOasb8,16063
|
|
13
13
|
datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
|
|
14
14
|
datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datamarket/params/nominatim.py,sha256=S9TEB4FxmffvFyK9KffWl20TfXzWX69IAdbEehKar1I,11920
|
|
@@ -29,7 +29,7 @@ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnm
|
|
|
29
29
|
datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
|
|
30
30
|
datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
|
|
31
31
|
datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
|
|
32
|
-
datamarket-0.7.
|
|
33
|
-
datamarket-0.7.
|
|
34
|
-
datamarket-0.7.
|
|
35
|
-
datamarket-0.7.
|
|
32
|
+
datamarket-0.7.106.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
33
|
+
datamarket-0.7.106.dist-info/METADATA,sha256=A76E203rT92P2sSWe_FpjyL4I3vqirVPWHOFAXYlZug,7382
|
|
34
|
+
datamarket-0.7.106.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
35
|
+
datamarket-0.7.106.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|