datamarket 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/aws.py +15 -6
- datamarket/interfaces/proxy.py +2 -4
- {datamarket-0.9.0.dist-info → datamarket-0.9.2.dist-info}/METADATA +3 -2
- {datamarket-0.9.0.dist-info → datamarket-0.9.2.dist-info}/RECORD +6 -6
- {datamarket-0.9.0.dist-info → datamarket-0.9.2.dist-info}/WHEEL +1 -1
- {datamarket-0.9.0.dist-info → datamarket-0.9.2.dist-info}/LICENSE +0 -0
datamarket/interfaces/aws.py
CHANGED
|
@@ -20,7 +20,7 @@ class AWSInterface:
|
|
|
20
20
|
self.profiles.append(
|
|
21
21
|
{
|
|
22
22
|
"profile": profile_name,
|
|
23
|
-
"
|
|
23
|
+
"buckets": values["buckets"],
|
|
24
24
|
"session": boto3.Session(profile_name=profile_name),
|
|
25
25
|
}
|
|
26
26
|
)
|
|
@@ -35,9 +35,9 @@ class AWSInterface:
|
|
|
35
35
|
if self.current_profile:
|
|
36
36
|
self.s3 = self.current_profile["session"].resource("s3")
|
|
37
37
|
self.s3_client = self.s3.meta.client
|
|
38
|
-
self.bucket = self.current_profile["
|
|
38
|
+
self.bucket = self.current_profile["buckets"][0]
|
|
39
39
|
|
|
40
|
-
def switch_profile(self, profile_name):
|
|
40
|
+
def switch_profile(self, profile_name: str):
|
|
41
41
|
for profile in self.profiles:
|
|
42
42
|
if profile["profile"] == profile_name:
|
|
43
43
|
self.current_profile = profile
|
|
@@ -45,14 +45,23 @@ class AWSInterface:
|
|
|
45
45
|
return
|
|
46
46
|
logger.warning(f"Profile {profile_name} not found")
|
|
47
47
|
|
|
48
|
-
def
|
|
48
|
+
def switch_bucket(self, bucket: str):
|
|
49
|
+
if bucket not in self.current_profile["buckets"]:
|
|
50
|
+
logger.warning(
|
|
51
|
+
f"Bucket {bucket} not found in profile {self.current_profile['profile']}"
|
|
52
|
+
)
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
self.bucket = bucket
|
|
56
|
+
|
|
57
|
+
def get_file(self, s3_path: str):
|
|
49
58
|
try:
|
|
50
59
|
return self.s3.Object(self.bucket, s3_path).get()
|
|
51
60
|
except self.s3_client.exceptions.NoSuchKey:
|
|
52
61
|
logger.info(f"{s3_path} does not exist")
|
|
53
62
|
|
|
54
|
-
def read_file_as_bytes(self, s3_path):
|
|
63
|
+
def read_file_as_bytes(self, s3_path: str):
|
|
55
64
|
return io.BytesIO(self.get_file(s3_path)["Body"].read())
|
|
56
65
|
|
|
57
|
-
def upload_file(self, local_path, s3_path):
|
|
66
|
+
def upload_file(self, local_path: str, s3_path: str):
|
|
58
67
|
self.s3.Bucket(self.bucket).upload_file(local_path, s3_path)
|
datamarket/interfaces/proxy.py
CHANGED
|
@@ -58,15 +58,13 @@ class ProxyInterface:
|
|
|
58
58
|
}
|
|
59
59
|
|
|
60
60
|
def get_current_host_port(self):
|
|
61
|
-
|
|
62
|
-
host_port_pairs = [hp.split(":") for hp in hosts]
|
|
61
|
+
host_port_pairs = [hp.split(":") for hp in self.config["hosts"]]
|
|
63
62
|
current_host, current_port = host_port_pairs[self.current_index]
|
|
64
63
|
self.current_index = (self.current_index + 1) % len(host_port_pairs)
|
|
65
64
|
return current_host, current_port
|
|
66
65
|
|
|
67
66
|
def get_random_host_port(self):
|
|
68
|
-
|
|
69
|
-
host_port_pairs = [hp.split(":") for hp in hosts]
|
|
67
|
+
host_port_pairs = [hp.split(":") for hp in self.config["hosts"]]
|
|
70
68
|
self.current_index = random.randint(0, len(host_port_pairs) - 1) # noqa: S311
|
|
71
69
|
return host_port_pairs[self.current_index]
|
|
72
70
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.2
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
6
|
Author: DataMarket
|
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Provides-Extra: alchemy
|
|
18
19
|
Provides-Extra: aws
|
|
19
20
|
Provides-Extra: azure-storage-blob
|
|
20
21
|
Provides-Extra: boto3
|
|
@@ -62,7 +63,7 @@ Provides-Extra: tqdm
|
|
|
62
63
|
Provides-Extra: undetected-chromedriver
|
|
63
64
|
Provides-Extra: unidecode
|
|
64
65
|
Provides-Extra: xmltodict
|
|
65
|
-
Requires-Dist: SQLAlchemy (==2.0.36)
|
|
66
|
+
Requires-Dist: SQLAlchemy (==2.0.36) ; extra == "alchemy"
|
|
66
67
|
Requires-Dist: azure-storage-blob (==12.23.1) ; extra == "azure-storage-blob"
|
|
67
68
|
Requires-Dist: beautifulsoup4 (==4.12.3)
|
|
68
69
|
Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
|
|
2
2
|
datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
datamarket/interfaces/alchemy.py,sha256=V8E1GtokxUNmrUftKTFkIpNoXaqJME7ACES2BY0znQM,4214
|
|
4
|
-
datamarket/interfaces/aws.py,sha256=
|
|
4
|
+
datamarket/interfaces/aws.py,sha256=UV7FPZNC1S66mqmUZ8jgQUe8y1G0B7YVJzGw_dq-FUQ,2281
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
6
|
datamarket/interfaces/ftp.py,sha256=9GQgiNBBK7njkv8ytHQaP9YLB9kI5vnUFA5gtz9J7As,1859
|
|
7
7
|
datamarket/interfaces/nominatim.py,sha256=_gFJ04D-ju5xn3wuaGT5Pj5jhf4F5eINpxOpuQL_dIQ,3664
|
|
8
8
|
datamarket/interfaces/peerdb.py,sha256=rNQ1-THcVvrej8BEPJs9zM4VfH5dlByafOIHYN9sB2A,21833
|
|
9
|
-
datamarket/interfaces/proxy.py,sha256=
|
|
9
|
+
datamarket/interfaces/proxy.py,sha256=updoOStKd8-nQBbxWbnD9eOt6HksnYi-5dQ0rEySf5M,3152
|
|
10
10
|
datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
|
|
11
11
|
datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
datamarket/params/nominatim.py,sha256=pBYRfoBkkLBg2INbFymefmYSzaAVujQSpEro5c1hD_I,1143
|
|
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=agWVJ5ZFZjVrBNuMpnxN2F_edA3mMJop6dVHPBBkOqU,5775
|
|
|
17
17
|
datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
|
|
18
18
|
datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
19
19
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
20
|
-
datamarket-0.9.
|
|
21
|
-
datamarket-0.9.
|
|
22
|
-
datamarket-0.9.
|
|
23
|
-
datamarket-0.9.
|
|
20
|
+
datamarket-0.9.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
21
|
+
datamarket-0.9.2.dist-info/METADATA,sha256=GK7Pc3NbQ0Adn4_McbjFQ7cbwjpBLTpJjUCfOx3aUkw,6329
|
|
22
|
+
datamarket-0.9.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
23
|
+
datamarket-0.9.2.dist-info/RECORD,,
|
|
File without changes
|