datamarket 0.7.103__py3-none-any.whl → 0.7.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -1 +1 @@
1
- from .main import * # noqa: F403
1
+ from .main import * # noqa: F403
@@ -21,9 +21,11 @@ class BadRequestError(Exception):
21
21
  def __init__(self, message="Bad request!"):
22
22
  self.message = message
23
23
  super().__init__(self.message)
24
-
24
+
25
+
25
26
  class ManagedHTTPError(Exception):
26
27
  """Signal that this HTTP status was handled and should not be retried."""
28
+
27
29
  def __init__(self, response: requests.Response, *, url: str | None = None, message: str | None = None):
28
30
  self.response = response
29
31
  self.request = getattr(response, "request", None)
@@ -31,3 +33,15 @@ class ManagedHTTPError(Exception):
31
33
  self.url = url or (self.request.url if self.request is not None else None)
32
34
  self.message = message
33
35
  super().__init__(message or f"HTTP {self.status_code} for {self.url}")
36
+
37
+
38
+ class NoWorkingProxiesError(Exception):
39
+ def __init__(self, message="No working proxies available"):
40
+ self.message = message
41
+ super().__init__(self.message)
42
+
43
+
44
+ class EnsureNewIPTimeoutError(Exception):
45
+ def __init__(self, message="Timed out waiting for new IP"):
46
+ self.message = message
47
+ super().__init__(self.message)
@@ -3,6 +3,7 @@
3
3
 
4
4
  import logging
5
5
  from collections.abc import MutableMapping
6
+ from enum import Enum, auto
6
7
  from typing import Any, Iterator, List, Optional, Type, TypeVar, Union
7
8
  from urllib.parse import quote_plus
8
9
 
@@ -12,7 +13,6 @@ from sqlalchemy.exc import IntegrityError
12
13
  from sqlalchemy.ext.declarative import DeclarativeMeta
13
14
  from sqlalchemy.orm import Session, sessionmaker
14
15
  from sqlalchemy.sql.expression import ClauseElement
15
- from enum import Enum, auto
16
16
 
17
17
  ########################################################################################################################
18
18
  # CLASSES
@@ -198,29 +198,28 @@ class AlchemyInterface:
198
198
  @staticmethod
199
199
  def _log_integrity_error(ex: IntegrityError, alchemy_obj, action="insert"):
200
200
  """
201
- Compact, readable IntegrityError logger using SQLSTATE codes.
202
- Consult https://www.postgresql.org/docs/current/errcodes-appendix.html for details.
201
+ Compact, readable IntegrityError logger using SQLSTATE codes.
202
+ Consult https://www.postgresql.org/docs/current/errcodes-appendix.html for details.
203
203
  """
204
204
 
205
205
  PG_ERROR_LABELS = {
206
- "23000": "Integrity constraint violation",
207
- "23001": "Restrict violation",
208
- "23502": "NOT NULL violation",
209
- "23503": "Foreign key violation",
210
- "23505": "Unique violation",
211
- "23514": "Check constraint violation",
212
- "23P01": "Exclusion constraint violation",
206
+ "23000": "Integrity constraint violation",
207
+ "23001": "Restrict violation",
208
+ "23502": "NOT NULL violation",
209
+ "23503": "Foreign key violation",
210
+ "23505": "Unique violation",
211
+ "23514": "Check constraint violation",
212
+ "23P01": "Exclusion constraint violation",
213
213
  }
214
214
  code = getattr(ex.orig, "pgcode", None)
215
215
  label = PG_ERROR_LABELS.get(code, "Integrity error (unspecified)")
216
216
 
217
217
  # Log one clean message with trace + the raw DB message separately
218
- if code == "23505": # A simple info log for unique violations
218
+ if code == "23505": # A simple info log for unique violations
219
219
  logger.info(f"{label} trying to {action} {alchemy_obj}")
220
220
  else:
221
221
  logger.error(f"{label} trying to {action} {alchemy_obj}\nPostgreSQL message: {ex.orig}")
222
222
 
223
-
224
223
  def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> bool:
225
224
  if self.session is None:
226
225
  raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
@@ -3,8 +3,9 @@
3
3
 
4
4
  import io
5
5
  import logging
6
+ from typing import Any, Dict, List, Optional
7
+
6
8
  import boto3
7
- from typing import Optional, List, Dict, Any
8
9
 
9
10
  ########################################################################################################################
10
11
  # CLASSES
@@ -29,17 +29,15 @@ class AzureBlobInterface:
29
29
  {
30
30
  "profile": profile_name,
31
31
  "container_name": container_name,
32
- "session": BlobServiceClient.from_connection_string(
33
- connection_string
34
- ).get_container_client(container_name),
32
+ "session": BlobServiceClient.from_connection_string(connection_string).get_container_client(
33
+ container_name
34
+ ),
35
35
  }
36
36
  )
37
37
 
38
38
  if not self.profiles:
39
39
  logger.warning("No Azure profiles found in config file")
40
- self.current_profile: Optional[Dict[str, Any]] = (
41
- self.profiles[0] if self.profiles else None
42
- )
40
+ self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
43
41
 
44
42
  def switch_profile(self, profile_name: str) -> None:
45
43
  for profile in self.profiles:
@@ -109,14 +107,10 @@ class AzureBlobInterface:
109
107
  if blob_client.exists():
110
108
  properties = blob_client.get_blob_properties()
111
109
  if properties.size > 100: # Check if size is greater than 100 bytes
112
- logger.debug(
113
- f"Blob '{remote_path}' exists and is not empty (size: {properties.size})."
114
- )
110
+ logger.debug(f"Blob '{remote_path}' exists and is not empty (size: {properties.size}).")
115
111
  return True
116
112
  else:
117
- logger.debug(
118
- f"Blob '{remote_path}' exists but size ({properties.size}) is not > 100 bytes."
119
- )
113
+ logger.debug(f"Blob '{remote_path}' exists but size ({properties.size}) is not > 100 bytes.")
120
114
  return False
121
115
  else:
122
116
  logger.debug(f"Blob '{remote_path}' does not exist.")
@@ -20,22 +20,14 @@ class FTPInterface:
20
20
  if section.startswith("ftp:"):
21
21
  profile_name = section.split(":", 1)[1]
22
22
  ftps = self.config[section]["ftps"].lower() == "true"
23
- ftp_conn = (
24
- FTP_TLS(self.config[section]["server"])
25
- if ftps
26
- else FTP(self.config[section]["server"])
27
- ) # noqa: S321
28
- ftp_conn.login(
29
- self.config[section]["username"], self.config[section]["password"]
30
- )
23
+ ftp_conn = FTP_TLS(self.config[section]["server"]) if ftps else FTP(self.config[section]["server"]) # noqa: S321
24
+ ftp_conn.login(self.config[section]["username"], self.config[section]["password"])
31
25
  self.profiles.append({"profile": profile_name, "session": ftp_conn})
32
26
 
33
27
  if not self.profiles:
34
28
  logger.warning("no ftp section in config")
35
29
 
36
- self.current_profile: Optional[Dict[str, Any]] = (
37
- self.profiles[0] if self.profiles else None
38
- )
30
+ self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
39
31
  self.ftp = self.current_profile["session"] if self.current_profile else None
40
32
 
41
33
  def switch_profile(self, profile_name: str) -> None:
@@ -11,8 +11,8 @@ from geopy.distance import geodesic
11
11
  from jellyfish import jaro_winkler_similarity
12
12
 
13
13
  from ..params.nominatim import CITY_TO_PROVINCE, POSTCODES
14
- from ..utils.strings import normalize
15
14
  from ..utils.nominatim import standardize_admin_division
15
+ from ..utils.strings import normalize
16
16
 
17
17
  ########################################################################################################################
18
18
  # PARAMETERS
@@ -335,16 +335,14 @@ class Nominatim:
335
335
  selected_province_from_postcode,
336
336
  selected_state,
337
337
  )
338
-
338
+
339
339
  # Standardize
340
340
  final_result["province"] = standardize_admin_division(
341
- name=final_result["province"],
342
- level="province",
343
- country_code=final_result["country_code"])
341
+ name=final_result["province"], level="province", country_code=final_result["country_code"]
342
+ )
344
343
  final_result["state"] = standardize_admin_division(
345
- name=final_result["state"],
346
- level="state",
347
- country_code=final_result["country_code"])
344
+ name=final_result["state"], level="state", country_code=final_result["country_code"]
345
+ )
348
346
  return final_result
349
347
 
350
348
 
@@ -358,4 +356,4 @@ class NominatimInterface(Nominatim):
358
356
 
359
357
  super().__init__(self.nominatim_endpoint, self.geonames_endpoint)
360
358
  else:
361
- logger.warning("no osm section in config")
359
+ logger.warning("no osm section in config")
@@ -1,10 +1,14 @@
1
1
  import logging
2
- import time
3
2
  import random
3
+ import time
4
+
4
5
  import requests
6
+ import tenacity
5
7
  from stem import Signal
6
8
  from stem.control import Controller
7
9
 
10
+ from datamarket.exceptions import EnsureNewIPTimeoutError, NoWorkingProxiesError
11
+
8
12
  logger = logging.getLogger(__name__)
9
13
  logging.getLogger("stem").setLevel(logging.WARNING)
10
14
 
@@ -18,7 +22,7 @@ class ProxyInterface:
18
22
 
19
23
  def __init__(self, config):
20
24
  self._load_from_config(config)
21
- self.current_index = random.randrange(len(self.entries)) if self.entries else 0
25
+ self.current_index = random.randrange(len(self.entries)) if self.entries else 0 # noqa: S311
22
26
  self._health = {} # {entry: {"ok": bool, "last_checked": time.time(), "last_error": str}}
23
27
 
24
28
  def _load_from_config(self, cfg):
@@ -66,6 +70,10 @@ class ProxyInterface:
66
70
  health_check=True,
67
71
  check_timeout=5,
68
72
  cooldown_seconds=600,
73
+ ensure_new_ip=False,
74
+ ensure_new_ip_timeout=600,
75
+ ensure_new_ip_interval=5,
76
+ max_retry_seconds=600,
69
77
  ):
70
78
  """
71
79
  Return parsed proxy URLs or raw entry tuple for a working proxy.
@@ -77,6 +85,10 @@ class ProxyInterface:
77
85
  :param health_check: perform health checks to ensure proxy is working if True
78
86
  :param check_timeout: timeout in seconds for health check requests
79
87
  :param cooldown_seconds: how long to cache health status before re-checking
88
+ :param ensure_new_ip: if True and only one proxy available, wait until IP changes before returning
89
+ :param ensure_new_ip_timeout: max seconds to wait for IP change when ensure_new_ip=True
90
+ :param ensure_new_ip_interval: seconds between IP checks when ensure_new_ip=True
91
+ :param max_retry_seconds: max seconds to retry finding working proxies (0 to disable)
80
92
  """
81
93
  # Tor handling (skip health check for tor)
82
94
  if use_tor:
@@ -87,7 +99,14 @@ class ProxyInterface:
87
99
  # Get a working entry (with health checks if enabled)
88
100
  if health_check:
89
101
  host, port, user, password = self._get_working_entry(
90
- use_auth=use_auth, randomize=randomize, check_timeout=check_timeout, cooldown_seconds=cooldown_seconds
102
+ use_auth=use_auth,
103
+ randomize=randomize,
104
+ check_timeout=check_timeout,
105
+ cooldown_seconds=cooldown_seconds,
106
+ ensure_new_ip=ensure_new_ip,
107
+ ensure_new_ip_timeout=ensure_new_ip_timeout,
108
+ ensure_new_ip_interval=ensure_new_ip_interval,
109
+ max_retry_seconds=max_retry_seconds,
91
110
  )
92
111
  else:
93
112
  # Legacy behavior: no health check
@@ -110,7 +129,7 @@ class ProxyInterface:
110
129
  def get_next(self, use_auth=False):
111
130
  # Round-robin selection, optionally filtering out authenticated proxies
112
131
  if not self.entries:
113
- raise RuntimeError("No proxies available")
132
+ raise NoWorkingProxiesError("No proxies available")
114
133
 
115
134
  pool = self.entries if use_auth else [e for e in self.entries if not e[2] and not e[3]]
116
135
  if not pool:
@@ -130,13 +149,13 @@ class ProxyInterface:
130
149
  def get_random(self, use_auth=False):
131
150
  # Random selection, optionally filtering out authenticated proxies
132
151
  if not self.entries:
133
- raise RuntimeError("No proxies available")
152
+ raise NoWorkingProxiesError("No proxies available")
134
153
 
135
154
  pool = self.entries if use_auth else [e for e in self.entries if not e[2] and not e[3]]
136
155
  if not pool:
137
156
  pool = self.entries
138
157
 
139
- entry = random.choice(pool)
158
+ entry = random.choice(pool) # noqa: S311
140
159
  # Update index to after selected entry for round-robin continuity
141
160
  try:
142
161
  pos = self.entries.index(entry)
@@ -146,9 +165,10 @@ class ProxyInterface:
146
165
 
147
166
  return entry
148
167
 
149
- def check_current_ip(self):
168
+ def check_current_ip(self, proxies=None):
150
169
  try:
151
- resp = requests.get(self.CHECK_IP_URL, proxies={"http": self.proxies["http"]})
170
+ proxies_arg = proxies or {"http": self.proxies["http"]}
171
+ resp = requests.get(self.CHECK_IP_URL, proxies=proxies_arg, timeout=30)
152
172
  return resp.json().get("YourFuckingIPAddress")
153
173
  except Exception as ex:
154
174
  logger.error(ex)
@@ -170,12 +190,13 @@ class ProxyInterface:
170
190
  logger.error("Failed to renew Tor IP")
171
191
  logger.error(ex)
172
192
 
173
- def mark_entry_status(self, entry, ok, error=None):
193
+ def mark_entry_status(self, entry, ok, error=None, last_ip=None):
174
194
  """Update health cache for an entry."""
175
195
  self._health[entry] = {
176
196
  "ok": ok,
177
197
  "last_checked": time.time(),
178
198
  "last_error": error,
199
+ "last_ip": last_ip,
179
200
  }
180
201
 
181
202
  def is_entry_alive(self, entry, timeout=5):
@@ -188,25 +209,84 @@ class ProxyInterface:
188
209
  }
189
210
  resp = requests.get(self.CHECK_IP_URL, proxies=proxies, timeout=timeout)
190
211
  ok = resp.status_code == 200
191
- self.mark_entry_status(entry, ok)
212
+ last_ip = resp.json().get("YourFuckingIPAddress") if ok else None
213
+ self.mark_entry_status(entry, ok, last_ip=last_ip)
192
214
  return ok
193
215
  except Exception as ex:
194
216
  self.mark_entry_status(entry, False, str(ex))
195
217
  return False
196
218
 
197
- def _get_working_entry(self, use_auth=False, randomize=False, check_timeout=5, cooldown_seconds=60):
219
+ def _get_working_entry(
220
+ self,
221
+ use_auth=False,
222
+ randomize=False,
223
+ check_timeout=5,
224
+ cooldown_seconds=60,
225
+ ensure_new_ip=False,
226
+ ensure_new_ip_timeout=600,
227
+ ensure_new_ip_interval=5,
228
+ max_retry_seconds=600,
229
+ ):
198
230
  """Get a working proxy entry, performing health checks as needed."""
199
- if not self.entries:
200
- raise RuntimeError("No proxies available")
231
+ pool = self._build_pool(use_auth)
232
+ candidates = self._get_candidates(pool, randomize)
233
+
234
+ def _find_working_entry():
235
+ if not self.entries:
236
+ raise NoWorkingProxiesError("No proxies available")
237
+ return self._find_working_entry_once(candidates, check_timeout, cooldown_seconds)
238
+
239
+ if max_retry_seconds > 0:
240
+ retrying = tenacity.Retrying(
241
+ stop=tenacity.stop_after_delay(max_retry_seconds),
242
+ reraise=True,
243
+ )
244
+ entry = retrying(_find_working_entry)
245
+ else:
246
+ entry = _find_working_entry()
247
+
248
+ if ensure_new_ip and len(pool) == 1:
249
+ logger.info(f"ensure_new_ip=True and single proxy, waiting for IP change: {entry[0]}:{entry[1]}")
250
+ baseline = self._health.get(entry, {}).get("last_ip")
251
+ if not baseline:
252
+ if not self.is_entry_alive(entry, timeout=check_timeout):
253
+ raise NoWorkingProxiesError("Proxy became unavailable during ensure_new_ip")
254
+ baseline = self._health.get(entry, {}).get("last_ip")
255
+ entry = self._wait_for_new_ip(entry, baseline, ensure_new_ip_timeout, ensure_new_ip_interval, check_timeout)
256
+
257
+ return entry
258
+
259
+ def _get_round_robin_candidates(self, pool):
260
+ """Get candidates in round-robin order starting from current_index."""
261
+ candidates = []
262
+ start_idx = self.current_index
263
+ for i in range(len(self.entries)):
264
+ idx = (start_idx + i) % len(self.entries)
265
+ entry = self.entries[idx]
266
+ if entry in pool:
267
+ candidates.append(entry)
268
+ # Update current_index for next call
269
+ if candidates:
270
+ try:
271
+ pos = self.entries.index(candidates[0])
272
+ self.current_index = (pos + 1) % len(self.entries)
273
+ except ValueError:
274
+ pass
275
+ return candidates
201
276
 
202
- # Build candidate list respecting use_auth and randomize/round-robin
277
+ def _build_pool(self, use_auth):
203
278
  pool = self.entries if use_auth else [e for e in self.entries if not e[2] and not e[3]]
204
279
  if not pool:
205
280
  pool = self.entries
281
+ return pool
206
282
 
207
- candidates = pool[:] if randomize else self._get_round_robin_candidates(pool)
283
+ def _get_candidates(self, pool, randomize):
284
+ if randomize:
285
+ return pool[:]
286
+ else:
287
+ return self._get_round_robin_candidates(pool)
208
288
 
209
- # First pass: check cache and health
289
+ def _find_working_entry_once(self, candidates, check_timeout, cooldown_seconds):
210
290
  for entry in candidates:
211
291
  health = self._health.get(entry, {})
212
292
  last_checked = health.get("last_checked", 0)
@@ -214,42 +294,42 @@ class ProxyInterface:
214
294
  now = time.time()
215
295
 
216
296
  if ok and (now - last_checked) < cooldown_seconds:
217
- # Cached as working and recent
218
297
  logger.debug(f"Using cached working proxy: {entry[0]}:{entry[1]}")
219
298
  return entry
220
299
  elif not ok and (now - last_checked) < cooldown_seconds:
221
- # Cached as failed and recent, skip
222
300
  continue
223
301
  else:
224
- # Not cached or expired, check now
225
302
  logger.debug(f"Checking proxy health: {entry[0]}:{entry[1]}")
226
303
  if self.is_entry_alive(entry, timeout=check_timeout):
227
304
  return entry
228
305
 
229
- # Second pass: force fresh check for all candidates (in case cache skipped everything)
230
306
  logger.warning("No cached working proxies, forcing fresh checks")
231
307
  for entry in candidates:
232
308
  logger.debug(f"Force-checking proxy: {entry[0]}:{entry[1]}")
233
309
  if self.is_entry_alive(entry, timeout=check_timeout):
234
310
  return entry
235
311
 
236
- # No working proxies found
237
- raise RuntimeError("No working proxies available")
312
+ raise NoWorkingProxiesError("No working proxies available")
238
313
 
239
- def _get_round_robin_candidates(self, pool):
240
- """Get candidates in round-robin order starting from current_index."""
241
- candidates = []
242
- start_idx = self.current_index
243
- for i in range(len(self.entries)):
244
- idx = (start_idx + i) % len(self.entries)
245
- entry = self.entries[idx]
246
- if entry in pool:
247
- candidates.append(entry)
248
- # Update current_index for next call
249
- if candidates:
314
+ def _wait_for_new_ip(self, entry, baseline, timeout, interval, check_timeout):
315
+ start = time.time()
316
+ while time.time() - start < timeout:
317
+ host, port, user, pwd = entry
318
+ proxies_map = {
319
+ "http": self.get_proxy_url(host, port, user, pwd, "http"),
320
+ "https": self.get_proxy_url(host, port, user, pwd, "http"),
321
+ }
250
322
  try:
251
- pos = self.entries.index(candidates[0])
252
- self.current_index = (pos + 1) % len(self.entries)
253
- except ValueError:
254
- pass
255
- return candidates
323
+ resp = requests.get(self.CHECK_IP_URL, proxies=proxies_map, timeout=check_timeout)
324
+ current_ip = resp.json().get("YourFuckingIPAddress")
325
+ except Exception:
326
+ current_ip = None
327
+
328
+ if current_ip and current_ip != baseline:
329
+ self.mark_entry_status(entry, True, last_ip=current_ip)
330
+ logger.info(f"IP changed from {baseline} to {current_ip}")
331
+ return entry
332
+
333
+ time.sleep(interval)
334
+
335
+ raise EnsureNewIPTimeoutError(f"Timed out waiting for new IP after {timeout}s")
@@ -43,9 +43,7 @@ class TinybirdInterface:
43
43
  }
44
44
 
45
45
  def __prepare_json_row(self, obj_dict):
46
- return json.dumps(
47
- self.__dict_lists_to_string(obj_dict), default=self.__converter
48
- )
46
+ return json.dumps(self.__dict_lists_to_string(obj_dict), default=self.__converter)
49
47
 
50
48
  @staticmethod
51
49
  def __handle_api_response(json_response):
@@ -53,13 +51,9 @@ class TinybirdInterface:
53
51
  quarantined_rows = json_response["quarantined_rows"]
54
52
 
55
53
  if quarantined_rows > 0:
56
- logger.error(
57
- f"wrong insertion of {quarantined_rows} records to Tinybird API..."
58
- )
54
+ logger.error(f"wrong insertion of {quarantined_rows} records to Tinybird API...")
59
55
  else:
60
- logger.info(
61
- f"successfully inserted {successful_rows} records to Tinybird API!"
62
- )
56
+ logger.info(f"successfully inserted {successful_rows} records to Tinybird API!")
63
57
 
64
58
  return successful_rows, quarantined_rows
65
59
 
@@ -72,9 +66,7 @@ class TinybirdInterface:
72
66
  return self.__insert_data_to_endpoint(self.__prepare_json_row(obj_dict))
73
67
 
74
68
  def insert_batch_to_api(self, batch):
75
- return self.__insert_data_to_endpoint(
76
- "\n".join([self.__prepare_json_row(x) for x in batch])
77
- )
69
+ return self.__insert_data_to_endpoint("\n".join([self.__prepare_json_row(x) for x in batch]))
78
70
 
79
71
  def insert_pandas_df_to_api(self, df):
80
72
  return self.__insert_data_to_endpoint(df.to_json(orient="records", lines=True))
@@ -1,6 +1,7 @@
1
- from unidecode import unidecode
2
1
  import re
3
2
 
3
+ from unidecode import unidecode
4
+
4
5
  CITY_TO_PROVINCE = {"Madrid": "Madrid"}
5
6
 
6
7
  POSTCODES = {
@@ -396,31 +397,27 @@ _NORMALIZED_PROVINCE_CACHE = {}
396
397
  for country, provinces in PROVINCE_TO_POSTCODE.items():
397
398
  # Get the original keys (e.g., "A Coruña", "Álava")
398
399
  original_keys = list(provinces.keys())
399
-
400
+
400
401
  # Create the normalized list (e.g., "a coruna", "alava")
401
402
  normalized_choices = [unidecode(p).lower() for p in original_keys]
402
-
403
+
403
404
  _NORMALIZED_PROVINCE_CACHE[country] = {
404
- "choices": normalized_choices, # The list for rapidfuzz to search in
405
- "keys": original_keys # The list to find the name by index
405
+ "choices": normalized_choices, # The list for rapidfuzz to search in
406
+ "keys": original_keys, # The list to find the name by index
406
407
  }
407
408
 
408
409
  # Source: https://github.com/ariankoochak/regex-patterns-of-all-countries
409
410
  COUNTRY_PARSING_RULES = {
410
411
  "es": {
411
412
  "zip_validate_pattern": re.compile(r"^\d{5}$"),
412
-
413
- "zip_search_pattern": re.compile(r"\b\d{5}\b"),
414
-
415
- "phone_validate_pattern": re.compile(r"^(\+?34)?[67]\d{8}$")
413
+ "zip_search_pattern": re.compile(r"\b\d{5}\b"),
414
+ "phone_validate_pattern": re.compile(r"^(\+?34)?[67]\d{8}$"),
416
415
  },
417
416
  "pt": {
418
417
  "zip_validate_pattern": re.compile(r"^\d{4}[- ]{0,1}\d{3}$|^\d{4}$"),
419
-
420
418
  "zip_search_pattern": re.compile(r"\b\d{4}[- ]?\d{3}\b|\b\d{4}\b"),
421
-
422
- "phone_validate_pattern": re.compile(r"^(\+?351)?9[1236]\d{7}$")
423
- }
419
+ "phone_validate_pattern": re.compile(r"^(\+?351)?9[1236]\d{7}$"),
420
+ },
424
421
  }
425
422
 
426
423
  # Cutoff score for rapidfuzz in the name standardization function
@@ -3,20 +3,23 @@
3
3
 
4
4
  import re
5
5
  import unicodedata
6
+
6
7
  import inflection
7
8
 
8
9
  ########################################################################################################################
9
10
  # FUNCTIONS
10
11
 
12
+
11
13
  def process_task_name(task_id):
12
- task_id = ''.join(
13
- f"_{unicodedata.name(c)}_" if not c.isalnum() else c for c in task_id
14
- if c.isalnum() or (unicodedata.category(c) not in ('Cc', 'Cf', 'Cs', 'Co', 'Cn'))
14
+ task_id = "".join(
15
+ f"_{unicodedata.name(c)}_" if not c.isalnum() else c
16
+ for c in task_id
17
+ if c.isalnum() or (unicodedata.category(c) not in ("Cc", "Cf", "Cs", "Co", "Cn"))
15
18
  )
16
- task_id = inflection.parameterize(task_id, separator='_')
19
+ task_id = inflection.parameterize(task_id, separator="_")
17
20
  task_id = task_id.lower()
18
- task_id = task_id.strip('_')
19
- task_id = re.sub(r'_+', '_', task_id)
21
+ task_id = task_id.strip("_")
22
+ task_id = re.sub(r"_+", "_", task_id)
20
23
  if task_id[0].isdigit():
21
- task_id = 'task_' + task_id
24
+ task_id = "task_" + task_id
22
25
  return task_id
@@ -8,6 +8,7 @@ from sqlalchemy.ext.declarative import declarative_base
8
8
 
9
9
  Base = declarative_base()
10
10
 
11
+
11
12
  class View(Base):
12
13
  __abstract__ = True
13
14
  is_view = True
@@ -19,4 +20,4 @@ class View(Base):
19
20
  """
20
21
  conn.execute(f"""
21
22
  CREATE OR REPLACE VIEW {cls.__tablename__} AS {query}
22
- """)
23
+ """)
datamarket/utils/main.py CHANGED
@@ -3,19 +3,19 @@
3
3
 
4
4
  import asyncio
5
5
  import configparser
6
- from datetime import timedelta
7
6
  import logging
8
7
  import random
9
8
  import re
10
9
  import shlex
11
10
  import subprocess
12
11
  import time
12
+ from datetime import timedelta
13
13
  from typing import Sequence
14
- from babel.numbers import parse_decimal
15
14
 
16
- from bs4 import BeautifulSoup
17
15
  import pendulum
18
16
  import requests
17
+ from babel.numbers import parse_decimal
18
+ from bs4 import BeautifulSoup
19
19
  from requests.exceptions import ProxyError
20
20
  from tenacity import (
21
21
  before_sleep_log,
@@ -29,7 +29,7 @@ from tenacity import (
29
29
 
30
30
  from datamarket.exceptions.main import ManagedHTTPError
31
31
 
32
- from ..exceptions import RedirectionDetectedError, NotFoundError, BadRequestError
32
+ from ..exceptions import BadRequestError, NotFoundError, RedirectionDetectedError
33
33
  from ..interfaces.proxy import ProxyInterface
34
34
 
35
35
  ########################################################################################################################
@@ -2,16 +2,18 @@
2
2
  # IMPORTS
3
3
 
4
4
  from typing import Literal, Optional
5
+
5
6
  from rapidfuzz import fuzz, process
6
7
  from unidecode import unidecode
8
+
7
9
  from ..params.nominatim import (
10
+ _NORMALIZED_PROVINCE_CACHE,
11
+ COUNTRY_PARSING_RULES,
8
12
  POSTCODE_TO_STATES,
9
13
  PROVINCE_TO_POSTCODE,
10
14
  PROVINCES,
11
15
  STANDARD_THRESHOLD,
12
16
  STATES,
13
- _NORMALIZED_PROVINCE_CACHE,
14
- COUNTRY_PARSING_RULES
15
17
  )
16
18
  from .strings import normalize
17
19
 
@@ -32,9 +34,7 @@ def standardize_admin_division(
32
34
  return None
33
35
 
34
36
  country_code = country_code.lower()
35
- mapping = (
36
- STATES.get(country_code) if level == "state" else PROVINCES.get(country_code)
37
- )
37
+ mapping = STATES.get(country_code) if level == "state" else PROVINCES.get(country_code)
38
38
 
39
39
  if not mapping: # If country is not standardized, return raw name
40
40
  return name
@@ -103,7 +103,7 @@ def _province_postcode_match(
103
103
  normalized_address,
104
104
  cache["choices"], # <-- Uses pre-computed list
105
105
  scorer=fuzz.partial_ratio,
106
- score_cutoff=100
106
+ score_cutoff=100,
107
107
  )
108
108
 
109
109
  if not result:
@@ -119,11 +119,8 @@ def _province_postcode_match(
119
119
  province_map = PROVINCE_TO_POSTCODE.get(country_code, {})
120
120
  postcode_prefix = province_map[original_province]
121
121
 
122
- return (
123
- postcode_prefix + zip_code[1:]
124
- if len(zip_code) == 4
125
- else zip_code
126
- )
122
+ return postcode_prefix + zip_code[1:] if len(zip_code) == 4 else zip_code
123
+
127
124
 
128
125
  def _parse_es_zip_code(
129
126
  zip_code: str,
@@ -131,16 +128,16 @@ def _parse_es_zip_code(
131
128
  opt_address: str | None,
132
129
  ) -> str:
133
130
  """parse spain zip code"""
134
-
131
+
135
132
  # Get the validation regex from params
136
- validate_regex = COUNTRY_PARSING_RULES['es']['zip_validate_pattern']
137
-
133
+ validate_regex = COUNTRY_PARSING_RULES["es"]["zip_validate_pattern"]
134
+
138
135
  if validate_regex.match(zip_code):
139
136
  return zip_code
140
137
  else:
141
138
  # Use search regex from params
142
- pattern = COUNTRY_PARSING_RULES['es']['zip_search_pattern']
143
-
139
+ pattern = COUNTRY_PARSING_RULES["es"]["zip_search_pattern"]
140
+
144
141
  match = pattern.search(address)
145
142
  if match:
146
143
  return match.group()
@@ -148,7 +145,7 @@ def _parse_es_zip_code(
148
145
  match = pattern.search(opt_address)
149
146
  if match:
150
147
  return match.group()
151
-
148
+
152
149
  province_match = _province_postcode_match(address, zip_code, country_code="es")
153
150
  return province_match or zip_code
154
151
 
@@ -161,18 +158,18 @@ def _parse_pt_zip_code(
161
158
  """parse portugal zip code"""
162
159
 
163
160
  # Get the validation regex from params
164
- validate_regex = COUNTRY_PARSING_RULES['pt']['zip_validate_pattern']
161
+ validate_regex = COUNTRY_PARSING_RULES["pt"]["zip_validate_pattern"]
165
162
 
166
163
  if validate_regex.match(zip_code):
167
164
  return zip_code
168
165
  else:
169
166
  # Use search regex from params
170
- pattern = COUNTRY_PARSING_RULES['pt']['zip_search_pattern']
171
-
167
+ pattern = COUNTRY_PARSING_RULES["pt"]["zip_search_pattern"]
168
+
172
169
  match = pattern.search(address)
173
170
  if match is None and opt_address:
174
171
  match = pattern.search(opt_address)
175
-
172
+
176
173
  return match.group() if match else zip_code
177
174
 
178
175
 
@@ -13,8 +13,12 @@ from camoufox.async_api import AsyncCamoufox as Camoufox
13
13
  from playwright.async_api import (
14
14
  Browser,
15
15
  BrowserContext,
16
- Error as PlaywrightError,
17
16
  Page,
17
+ )
18
+ from playwright.async_api import (
19
+ Error as PlaywrightError,
20
+ )
21
+ from playwright.async_api import (
18
22
  TimeoutError as PlaywrightTimeoutError,
19
23
  )
20
24
  from tenacity import (
@@ -27,7 +31,6 @@ from tenacity import (
27
31
 
28
32
  from datamarket.interfaces.proxy import ProxyInterface
29
33
 
30
-
31
34
  ########################################################################################################################
32
35
  # SETUP LOGGER
33
36
 
@@ -12,8 +12,12 @@ from camoufox import Camoufox
12
12
  from playwright.sync_api import (
13
13
  Browser,
14
14
  BrowserContext,
15
- Error as PlaywrightError,
16
15
  Page,
16
+ )
17
+ from playwright.sync_api import (
18
+ Error as PlaywrightError,
19
+ )
20
+ from playwright.sync_api import (
17
21
  TimeoutError as PlaywrightTimeoutError,
18
22
  )
19
23
  from tenacity import (
@@ -23,6 +27,7 @@ from tenacity import (
23
27
  stop_after_delay,
24
28
  wait_exponential,
25
29
  )
30
+
26
31
  from datamarket.interfaces.proxy import ProxyInterface
27
32
 
28
33
  ########################################################################################################################
@@ -171,4 +176,4 @@ class PlaywrightCrawler:
171
176
  if not self.page:
172
177
  logger.info("Browser context not found, initializing now...")
173
178
  self.init_context()
174
- return self._goto_with_retry(url)
179
+ return self._goto_with_retry(url)
@@ -17,9 +17,7 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
 
19
19
  def get_chromedriver_version():
20
- return int(
21
- run_bash_command("/usr/bin/google-chrome --version").split(" ")[2].split(".")[0]
22
- )
20
+ return int(run_bash_command("/usr/bin/google-chrome --version").split(" ")[2].split(".")[0])
23
21
 
24
22
 
25
23
  def get_driver(chrome_options=None, **kwargs):
@@ -38,23 +36,19 @@ def get_driver(chrome_options=None, **kwargs):
38
36
 
39
37
  def wait(driver, css_selector, timeout=30):
40
38
  logger.info(f"waiting for {css_selector}...")
41
- return WebDriverWait(driver, timeout).until(
42
- EC.visibility_of_element_located(("css selector", css_selector))
43
- )
39
+ return WebDriverWait(driver, timeout).until(EC.visibility_of_element_located(("css selector", css_selector)))
44
40
 
45
41
 
46
42
  def wait_and_click(driver, css_selector, timeout=30):
47
43
  logger.info(f"clicking on {css_selector}...")
48
- WebDriverWait(driver, timeout).until(
49
- EC.element_to_be_clickable(("css selector", css_selector))
50
- ).click()
44
+ WebDriverWait(driver, timeout).until(EC.element_to_be_clickable(("css selector", css_selector))).click()
51
45
 
52
46
 
53
47
  def wait_and_fill(driver, css_selector, text_to_fill, timeout=30):
54
48
  logger.info(f"sending text to {css_selector}...")
55
- WebDriverWait(driver, timeout).until(
56
- EC.presence_of_element_located(("css selector", css_selector))
57
- ).send_keys(text_to_fill)
49
+ WebDriverWait(driver, timeout).until(EC.presence_of_element_located(("css selector", css_selector))).send_keys(
50
+ text_to_fill
51
+ )
58
52
 
59
53
 
60
54
  def scroll(driver, css_selector):
@@ -1,9 +1,9 @@
1
1
  ########################################################################################################################
2
2
  # IMPORTS
3
- import re
4
3
  import unicodedata
5
4
  from enum import Enum, auto
6
5
  from typing import Any, Optional, Set, Union
6
+
7
7
  import numpy as np
8
8
  from inflection import camelize, parameterize, titleize, underscore
9
9
  from string_utils import prettify, strip_html
@@ -36,9 +36,7 @@ class NamingConvention(Enum):
36
36
  # FUNCTIONS
37
37
 
38
38
 
39
- def get_unidecoded_text(
40
- input_text: str, allowed_chars: Set[str], apply_lowercase: bool = False
41
- ) -> str:
39
+ def get_unidecoded_text(input_text: str, allowed_chars: Set[str], apply_lowercase: bool = False) -> str:
42
40
  """
43
41
  Processes a string by unidecoding characters, optionally lowercasing them,
44
42
  while preserving a specified set of allowed characters.
@@ -65,9 +63,7 @@ def get_unidecoded_text(
65
63
  return "".join(chars_list)
66
64
 
67
65
 
68
- def transliterate_symbols(
69
- s: str, allowed_symbols_set: Optional[Set[str]] = None
70
- ) -> str:
66
+ def transliterate_symbols(s: str, allowed_symbols_set: Optional[Set[str]] = None) -> str:
71
67
  """
72
68
  Translates Unicode symbols (category S*) in the input string to their lowercase Unicode names,
73
69
  with spaces replaced by underscores. Other characters, or characters in allowed_symbols_set, remain unchanged.
@@ -182,9 +178,7 @@ def normalize(
182
178
 
183
179
  for c in intermediate_text:
184
180
  cat = unicodedata.category(c)
185
- if (
186
- c in _allowed_symbols_set or c.isalnum()
187
- ): # Allowed symbols are part of tokens
181
+ if c in _allowed_symbols_set or c.isalnum(): # Allowed symbols are part of tokens
188
182
  current_token_chars.append(c)
189
183
  elif mode is NormalizationMode.FULL and cat.startswith("S"):
190
184
  # Transliterate S* category symbols not in allowed_symbols
@@ -220,4 +214,4 @@ def normalize(
220
214
  if naming is NamingConvention.PASCAL:
221
215
  return camelize(underscored)
222
216
 
223
- return underscored
217
+ return underscored
@@ -3,11 +3,13 @@
3
3
 
4
4
  import re
5
5
  from typing import Literal
6
+
6
7
  from ...params.nominatim import COUNTRY_PARSING_RULES
7
8
 
8
9
  ########################################################################################################################
9
10
  # FUNCTIONS
10
11
 
12
+
11
13
  def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str | None:
12
14
  """Clean and standardize phone number from a certain country_code
13
15
 
@@ -27,12 +29,12 @@ def parse_phone_number(number: str, country_code: Literal["es", "pt"]) -> str |
27
29
  pattern = COUNTRY_PARSING_RULES[country_code]["phone_validate_pattern"]
28
30
 
29
31
  # Validate and extract in one step
30
- if len(clean_number) >= 9: # Check if the cleaned number has at least 9 digits
32
+ if len(clean_number) >= 9: # Check if the cleaned number has at least 9 digits
31
33
  match = pattern.match(clean_number)
32
34
 
33
35
  # Return the captured group (the 9-digit number)
34
36
  return match.group(0)[-9:] if match else None
35
37
  else:
36
- return None # Or handle the case where the number is too short
38
+ return None # Or handle the case where the number is too short
37
39
  else:
38
40
  raise ValueError(f"Country code ({country_code}) is not currently supported")
datamarket/utils/typer.py CHANGED
@@ -9,6 +9,7 @@ from typing_extensions import Annotated
9
9
  ########################################################################################################################
10
10
  # TYPES
11
11
 
12
+
12
13
  class Dict(dict):
13
14
  def __init__(self, value: str):
14
15
  super().__init__(json.loads(value))
@@ -25,4 +26,4 @@ def parse_json_dict(value: str) -> Dict:
25
26
 
26
27
 
27
28
  DictArg = Annotated[Dict, typer.Argument(parser=parse_json_dict)]
28
- DictOpt = Annotated[Dict, typer.Option(parser=parse_json_dict)]
29
+ DictOpt = Annotated[Dict, typer.Option(parser=parse_json_dict)]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.103
3
+ Version: 0.7.104
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -0,0 +1,35 @@
1
+ datamarket/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ datamarket/exceptions/__init__.py,sha256=FHLh-Qp9XpM4LkAocppCf_llW2CWVVghGorkqxqt1wk,34
3
+ datamarket/exceptions/main.py,sha256=sJDsspp3e0gwK2GmVnIR9NEW6lEFVcsq8ONpfJ0NESE,1553
4
+ datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ datamarket/interfaces/alchemy.py,sha256=2EZp7fn7-o8yL918dnqIYZ-gO7FUXGH8d8TzQFa7XRI,15769
6
+ datamarket/interfaces/aws.py,sha256=4HEN_VfQuEEvDnksRYlcMBUdKbgJXWBkLnymKpyRtrs,4781
7
+ datamarket/interfaces/azure.py,sha256=GXmdwG2MvWNGGyHhAxOhKmQw7bU7RyA5iffdPC2qzmk,4821
8
+ datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjHk,4840
9
+ datamarket/interfaces/ftp.py,sha256=LH3Oz19k_xUNhzDXcrq5Ofb4c3uiph5pWUqpgiaDvHI,2671
10
+ datamarket/interfaces/nominatim.py,sha256=xizT94tVum7QPppfDgI5sEhx1mAXT-SM3JyPl8CDxxU,15148
11
+ datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
12
+ datamarket/interfaces/proxy.py,sha256=vI_CGkTAnCz27gF_a-8zEON3a5vxvzmWAwOO2jeRJXk,13065
13
+ datamarket/interfaces/tinybird.py,sha256=cNG-kAPTdQn2inlNX9LPf-VVdtnLud947ApLVO40Now,2594
14
+ datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ datamarket/params/nominatim.py,sha256=S9TEB4FxmffvFyK9KffWl20TfXzWX69IAdbEehKar1I,11920
16
+ datamarket/utils/__init__.py,sha256=FHLh-Qp9XpM4LkAocppCf_llW2CWVVghGorkqxqt1wk,34
17
+ datamarket/utils/airflow.py,sha256=Tc8vFB85NGJn0vgEkvT_yGMbn_NmW0OAJa9fy1qKocQ,804
18
+ datamarket/utils/alchemy.py,sha256=B-6cdMiEStzD4JKhi7Xpk7pVs7eUcdT_fHqpfm2ToNc,637
19
+ datamarket/utils/main.py,sha256=M9W_j4zkDxtAOa-4XxYxjDtFZ618tV6QAbHngnO6ViQ,8001
20
+ datamarket/utils/nominatim.py,sha256=HUJfR86lw68PzaLfhZOCIT5YlopDvRSbwEY2JCf0WyI,5704
21
+ datamarket/utils/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ datamarket/utils/playwright/async_api.py,sha256=i6dxeCBJm1fWKeCotlSEuwmXIUyJD80lHVC8wgdjIAE,5855
23
+ datamarket/utils/playwright/sync_api.py,sha256=lK0kjK8ERPMUHMvHr6jmLk0FEFPtdzZ2NTdOEYPh600,6508
24
+ datamarket/utils/selenium.py,sha256=Fc2BJzTH7_xIqjBP9LbZODF69RSH4fF8LhD5WuGdlZ0,2457
25
+ datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
26
+ datamarket/utils/strings/__init__.py,sha256=b6TYOT9v7y9ID-lDyZk4E8BH2uIPbsF2ZSLGjCQ1MCQ,43
27
+ datamarket/utils/strings/normalization.py,sha256=tlZHq8h9AtcANkaJ2AOrR6UD5yKShn1cLldfFfFQgTA,8990
28
+ datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnmH3FPlgUUjM,5246
29
+ datamarket/utils/strings/standardization.py,sha256=j_NbT-O1XnxDvDhct8panfkrfAC8R5OX6XM5fYBZ4RU,1496
30
+ datamarket/utils/typer.py,sha256=geWuwMwGQjBQhxo27hX0vEAeRl1j1TS0u2oFVfpAs5I,816
31
+ datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
32
+ datamarket-0.7.104.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
+ datamarket-0.7.104.dist-info/METADATA,sha256=K_hWvJnTd-1UesiRW-vKhl6eoNBKa3lR6u0k_bnDaHc,7382
34
+ datamarket-0.7.104.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
+ datamarket-0.7.104.dist-info/RECORD,,
@@ -1,35 +0,0 @@
1
- datamarket/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- datamarket/exceptions/__init__.py,sha256=-Vu-RZNKjW6fYCLqbUJTkKNuHeA8Yi_gyR50oZNaA_8,33
3
- datamarket/exceptions/main.py,sha256=SuP-ZKZIxJYdnOpNb63Y7BpYGRhLl-4JIyTEqgUoWV4,1205
4
- datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- datamarket/interfaces/alchemy.py,sha256=i2lKLLLy3-jpbzV3-jxfRCXTy7jRoTsNU3063pmSonk,15749
6
- datamarket/interfaces/aws.py,sha256=co5JkC3iFIp-0FqdYX4eKy3_m71LhZKuJoW6kXwEImc,4780
7
- datamarket/interfaces/azure.py,sha256=PnPlo95skYiq63qYa4QDvEnVYi2JblPmMSfbTsmXhFs,4937
8
- datamarket/interfaces/drive.py,sha256=3nhx3THr2SHNWKYwme9F2nPpvsqyEMFIxz0whF2FjHk,4840
9
- datamarket/interfaces/ftp.py,sha256=K219-PP21EhQo1A1LkvRLahlrw2-pf4svBN0LogZaJE,2813
10
- datamarket/interfaces/nominatim.py,sha256=TjS9O2U446XuPUzfP65NwDSG-RDNqmYb6-NKikM-34w,15187
11
- datamarket/interfaces/peerdb.py,sha256=sO451wEGNb_0DDwchZ6eBVYKltqHM5XKau-WsfspXzA,23640
12
- datamarket/interfaces/proxy.py,sha256=YNPNDFd2xTF-P5MITRHxGCLiXD8Fal4HK0yN8KhuYgI,9738
13
- datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
14
- datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- datamarket/params/nominatim.py,sha256=RnmYXGoJQCijOsuCavCYcxw98WvOd_vOMK4KaraI0RU,11967
16
- datamarket/utils/__init__.py,sha256=FHLh-Qp9XpM4LkAocppCf_llW2CWVVghGorkqxqt1wk,34
17
- datamarket/utils/airflow.py,sha256=al0vc0YUikNu3Oy51VSn52I7pMU40akFBOl_UlHa2E4,795
18
- datamarket/utils/alchemy.py,sha256=SRq6kgh1aANXVShBPgAuglmNhZssPWwWEY503gKSia8,635
19
- datamarket/utils/main.py,sha256=OORsHggUqa2lKj5AG5LTPzEvXfAtx3ry4rSaAwkuS38,8001
20
- datamarket/utils/nominatim.py,sha256=IxexKY2KOlDhiKtzsqQfoVUjJXPxJl7tn3iHUaQKg08,5795
21
- datamarket/utils/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- datamarket/utils/playwright/async_api.py,sha256=UbA2D4ScBtYeMfrRjly4RO-s8wXIub9c05J1eoOCpsQ,5782
23
- datamarket/utils/playwright/sync_api.py,sha256=eXaZsd7xgWSYJtZv6EAstjSbS2bl9OYlkwMBfqqTbFY,6434
24
- datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
25
- datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
26
- datamarket/utils/strings/__init__.py,sha256=b6TYOT9v7y9ID-lDyZk4E8BH2uIPbsF2ZSLGjCQ1MCQ,43
27
- datamarket/utils/strings/normalization.py,sha256=UBluU6ABY6aCpnd02F7L7HcivVSisRJ9IUXdj9D1MyE,9050
28
- datamarket/utils/strings/obfuscation.py,sha256=Jo-x3f2Cb75983smmpcdPqUlBrLCTyrnmH3FPlgUUjM,5246
29
- datamarket/utils/strings/standardization.py,sha256=c8CAG6HI3AfK0hB3A3IGwsbnQebZ6R3PrA5PELHRXM0,1492
30
- datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
31
- datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
32
- datamarket-0.7.103.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
- datamarket-0.7.103.dist-info/METADATA,sha256=XAS_V3qRLGddVGC6sRhxavgwa4EdzONa1B-YWnvcMK0,7382
34
- datamarket-0.7.103.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
- datamarket-0.7.103.dist-info/RECORD,,