datamarket 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/peerdb.py +8 -34
- datamarket/utils/main.py +9 -1
- {datamarket-0.8.6.dist-info → datamarket-0.8.8.dist-info}/METADATA +1 -1
- {datamarket-0.8.6.dist-info → datamarket-0.8.8.dist-info}/RECORD +6 -6
- {datamarket-0.8.6.dist-info → datamarket-0.8.8.dist-info}/LICENSE +0 -0
- {datamarket-0.8.6.dist-info → datamarket-0.8.8.dist-info}/WHEEL +0 -0
datamarket/interfaces/peerdb.py
CHANGED
|
@@ -216,52 +216,26 @@ class TransientS3:
|
|
|
216
216
|
self.config = config["peerdb-s3"]
|
|
217
217
|
self.bucket_name = self.config["bucket"]
|
|
218
218
|
self.session = boto3.Session(profile_name=self.config["profile"])
|
|
219
|
-
self.
|
|
219
|
+
self.s3_resource = self.session.resource("s3")
|
|
220
220
|
self.credentials = self.session.get_credentials()
|
|
221
221
|
self.access_key = self.credentials.access_key
|
|
222
222
|
self.secret_key = self.credentials.secret_key
|
|
223
223
|
self.region_name = self.session.region_name
|
|
224
|
-
self.endpoint_url = self.
|
|
224
|
+
self.endpoint_url = self.s3_resource.meta.endpoint_url
|
|
225
225
|
else:
|
|
226
226
|
logger.warning("no peerdb-s3 section in config")
|
|
227
227
|
|
|
228
228
|
def delete_paths_with_schema(self, schema_name):
|
|
229
229
|
logger.info(f"Deleting paths containing '{schema_name}' from S3")
|
|
230
230
|
|
|
231
|
-
|
|
232
|
-
pages = paginator.paginate(Bucket=self.bucket_name, Delimiter="/")
|
|
231
|
+
bucket = self.s3_resource.Bucket(self.bucket_name)
|
|
233
232
|
|
|
234
|
-
for
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
folder = prefix["Prefix"]
|
|
238
|
-
if schema_name in folder:
|
|
239
|
-
self._delete_folder_contents(folder)
|
|
233
|
+
for prefix in [schema_name, f"clone_{schema_name}"]:
|
|
234
|
+
objects_to_delete = bucket.objects.filter(Prefix=prefix)
|
|
235
|
+
objects_to_delete.delete()
|
|
240
236
|
|
|
241
237
|
logger.info(f"Deleted paths containing '{schema_name}' from S3")
|
|
242
238
|
|
|
243
|
-
def _delete_folder_contents(self, folder):
|
|
244
|
-
logger.info(f"Deleting contents of folder: {folder}")
|
|
245
|
-
|
|
246
|
-
paginator = self.s3_client.get_paginator("list_objects_v2")
|
|
247
|
-
pages = paginator.paginate(Bucket=self.bucket_name, Prefix=folder)
|
|
248
|
-
|
|
249
|
-
delete_us = dict(Objects=[])
|
|
250
|
-
for page in pages:
|
|
251
|
-
if "Contents" in page:
|
|
252
|
-
for obj in page["Contents"]:
|
|
253
|
-
delete_us["Objects"].append(dict(Key=obj["Key"]))
|
|
254
|
-
|
|
255
|
-
# AWS limits to deleting 1000 objects at a time
|
|
256
|
-
if len(delete_us["Objects"]) >= 1000:
|
|
257
|
-
self.s3_client.delete_objects(Bucket=self.bucket_name, Delete=delete_us)
|
|
258
|
-
delete_us = dict(Objects=[])
|
|
259
|
-
|
|
260
|
-
if len(delete_us["Objects"]):
|
|
261
|
-
self.s3_client.delete_objects(Bucket=self.bucket_name, Delete=delete_us)
|
|
262
|
-
|
|
263
|
-
logger.info(f"Deleted contents of folder: {folder}")
|
|
264
|
-
|
|
265
239
|
|
|
266
240
|
class PeerDBInterface:
|
|
267
241
|
def __init__(self, config):
|
|
@@ -308,11 +282,11 @@ class PeerDBInterface:
|
|
|
308
282
|
if not self.docker_host_mapping or not host:
|
|
309
283
|
return host
|
|
310
284
|
|
|
311
|
-
if host in [
|
|
285
|
+
if host in ["localhost", "127.0.0.1"]:
|
|
312
286
|
logger.debug(f"Mapping host {host} to {self.docker_host_mapping} for Docker environment")
|
|
313
287
|
return self.docker_host_mapping
|
|
314
288
|
|
|
315
|
-
url_pattern = r
|
|
289
|
+
url_pattern = r"(localhost|127\.0\.0\.1)"
|
|
316
290
|
match = re.search(url_pattern, host)
|
|
317
291
|
if match:
|
|
318
292
|
original_host = match.group(1)
|
datamarket/utils/main.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
########################################################################################################################
|
|
2
2
|
# IMPORTS
|
|
3
3
|
|
|
4
|
+
import asyncio
|
|
4
5
|
import configparser
|
|
5
6
|
import inspect
|
|
6
7
|
import logging
|
|
@@ -67,7 +68,8 @@ def get_config(config_file: Path, tz: str = "Europe/Madrid"):
|
|
|
67
68
|
if Path(config_file).suffix == ".ini":
|
|
68
69
|
logger.warning("Using legacy INI config reader. Please migrate to TOML")
|
|
69
70
|
cfg = configparser.RawConfigParser()
|
|
70
|
-
|
|
71
|
+
cfg.read(config_file)
|
|
72
|
+
return cfg
|
|
71
73
|
|
|
72
74
|
add_converter("read", read_converter)
|
|
73
75
|
|
|
@@ -124,6 +126,12 @@ def ban_sleep(max_time, min_time=0):
|
|
|
124
126
|
time.sleep(sleep_time)
|
|
125
127
|
|
|
126
128
|
|
|
129
|
+
async def ban_sleep_async(max_time, min_time=0):
|
|
130
|
+
sleep_time = int(random.uniform(min_time, max_time)) # noqa: S311
|
|
131
|
+
logger.info(f"sleeping for {sleep_time} seconds...")
|
|
132
|
+
await asyncio.sleep(sleep_time)
|
|
133
|
+
|
|
134
|
+
|
|
127
135
|
def run_bash_command(command):
|
|
128
136
|
p = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
129
137
|
|
|
@@ -5,7 +5,7 @@ datamarket/interfaces/aws.py,sha256=UztVuBn561DnU1AcjyJ16UAIS1BUD5HUxiQ4gc9EhtM,
|
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
6
|
datamarket/interfaces/ftp.py,sha256=9GQgiNBBK7njkv8ytHQaP9YLB9kI5vnUFA5gtz9J7As,1859
|
|
7
7
|
datamarket/interfaces/nominatim.py,sha256=_gFJ04D-ju5xn3wuaGT5Pj5jhf4F5eINpxOpuQL_dIQ,3664
|
|
8
|
-
datamarket/interfaces/peerdb.py,sha256=
|
|
8
|
+
datamarket/interfaces/peerdb.py,sha256=hGQ9TXKq9k2xEShz7n6iV-x66bhYylBaWoCc-I2VtN0,20705
|
|
9
9
|
datamarket/interfaces/proxy.py,sha256=8EJaW8zAMzUMIRLkdAcMkTO9qZXPIubE6vyB5ZXcRtU,3352
|
|
10
10
|
datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
|
|
11
11
|
datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -13,11 +13,11 @@ datamarket/params/nominatim.py,sha256=pBYRfoBkkLBg2INbFymefmYSzaAVujQSpEro5c1hD_
|
|
|
13
13
|
datamarket/utils/__init__.py,sha256=8D5a8oKgqd6WA1RUkiKCn4l_PVemtyuckxQut0vDHXM,20
|
|
14
14
|
datamarket/utils/airflow.py,sha256=al0vc0YUikNu3Oy51VSn52I7pMU40akFBOl_UlHa2E4,795
|
|
15
15
|
datamarket/utils/alchemy.py,sha256=SRq6kgh1aANXVShBPgAuglmNhZssPWwWEY503gKSia8,635
|
|
16
|
-
datamarket/utils/main.py,sha256=
|
|
16
|
+
datamarket/utils/main.py,sha256=agWVJ5ZFZjVrBNuMpnxN2F_edA3mMJop6dVHPBBkOqU,5775
|
|
17
17
|
datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
|
|
18
18
|
datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
19
19
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
20
|
-
datamarket-0.8.
|
|
21
|
-
datamarket-0.8.
|
|
22
|
-
datamarket-0.8.
|
|
23
|
-
datamarket-0.8.
|
|
20
|
+
datamarket-0.8.8.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
21
|
+
datamarket-0.8.8.dist-info/METADATA,sha256=Kgq1Quj4MHqEk0xauolE8YM8wJBMR3ULQwvkSGfLt7s,6284
|
|
22
|
+
datamarket-0.8.8.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
23
|
+
datamarket-0.8.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|