datamarket 0.7.21__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/aws.py +8 -10
- datamarket/utils/main.py +78 -5
- {datamarket-0.7.21.dist-info → datamarket-0.8.0.dist-info}/METADATA +7 -7
- {datamarket-0.7.21.dist-info → datamarket-0.8.0.dist-info}/RECORD +6 -6
- {datamarket-0.7.21.dist-info → datamarket-0.8.0.dist-info}/WHEEL +1 -1
- {datamarket-0.7.21.dist-info → datamarket-0.8.0.dist-info}/LICENSE +0 -0
datamarket/interfaces/aws.py
CHANGED
|
@@ -16,16 +16,14 @@ class AWSInterface:
|
|
|
16
16
|
self.profiles = []
|
|
17
17
|
self.config = config
|
|
18
18
|
|
|
19
|
-
for
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
)
|
|
19
|
+
for profile_name, values in self.config.get("aws", {}).items():
|
|
20
|
+
self.profiles.append(
|
|
21
|
+
{
|
|
22
|
+
"profile": profile_name,
|
|
23
|
+
"bucket": values["bucket"],
|
|
24
|
+
"session": boto3.Session(profile_name=profile_name),
|
|
25
|
+
}
|
|
26
|
+
)
|
|
29
27
|
|
|
30
28
|
if not self.profiles:
|
|
31
29
|
logger.warning("No AWS profiles found in config file")
|
datamarket/utils/main.py
CHANGED
|
@@ -2,14 +2,20 @@
|
|
|
2
2
|
# IMPORTS
|
|
3
3
|
|
|
4
4
|
import configparser
|
|
5
|
+
import inspect
|
|
5
6
|
import logging
|
|
6
7
|
import random
|
|
7
8
|
import re
|
|
8
9
|
import shlex
|
|
10
|
+
import shutil
|
|
9
11
|
import subprocess
|
|
10
12
|
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Literal, Union
|
|
11
15
|
|
|
12
16
|
import pendulum
|
|
17
|
+
from croniter import croniter
|
|
18
|
+
from dynaconf import Dynaconf, add_converter
|
|
13
19
|
|
|
14
20
|
########################################################################################################################
|
|
15
21
|
# FUNCTIONS
|
|
@@ -17,10 +23,77 @@ import pendulum
|
|
|
17
23
|
logger = logging.getLogger(__name__)
|
|
18
24
|
|
|
19
25
|
|
|
20
|
-
def
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
26
|
+
def get_granular_date(
|
|
27
|
+
granularity: Union[Literal["monthly", "biweekly", "weekly", "daily"], str], tz: str = "Europe/Madrid"
|
|
28
|
+
) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Returns the most recent date based on the given granularity or a custom cron expression.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
granularity: Either a predefined value ("monthly", "biweekly", "weekly") or a custom cron expression.
|
|
34
|
+
tz: Timezone to use for date calculations (default: "Europe/Madrid").
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
A string representing the most recent date in the format "YYYY-MM-DD".
|
|
38
|
+
|
|
39
|
+
Raises:
|
|
40
|
+
ValueError: If the provided granularity or cron expression is invalid.
|
|
41
|
+
"""
|
|
42
|
+
now = pendulum.now(tz)
|
|
43
|
+
|
|
44
|
+
predefined_patterns = {
|
|
45
|
+
"monthly": "0 0 1 * *",
|
|
46
|
+
"biweekly": "0 0 1,15 * *",
|
|
47
|
+
"weekly": "0 0 * * MON",
|
|
48
|
+
"daily": "0 0 * * *",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
cron_pattern = predefined_patterns.get(granularity, granularity)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
cron = croniter(cron_pattern, now)
|
|
55
|
+
return cron.get_prev(pendulum.DateTime).strftime("%Y-%m-%d")
|
|
56
|
+
except Exception as e:
|
|
57
|
+
raise ValueError("Invalid cron expression or granularity specified.") from e
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def read_converter(path_str: str):
|
|
61
|
+
with open(path_str) as f:
|
|
62
|
+
return f.read()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_config(config_file: Path, tz: str = "Europe/Madrid"):
|
|
66
|
+
if Path(config_file).suffix == "ini":
|
|
67
|
+
logger.warning("Using legacy INI config reader. Please migrate to TOML")
|
|
68
|
+
cfg = configparser.RawConfigParser()
|
|
69
|
+
return cfg.read(config_file)
|
|
70
|
+
|
|
71
|
+
add_converter("read", read_converter)
|
|
72
|
+
|
|
73
|
+
config = Dynaconf(
|
|
74
|
+
environments=True,
|
|
75
|
+
env_switcher="SYSTYPE",
|
|
76
|
+
vars={
|
|
77
|
+
"today": get_granular_date("daily", tz),
|
|
78
|
+
"biweekly_date": get_granular_date("biweekly", tz),
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
config.load_file(path=config_file)
|
|
83
|
+
config.load_file(path=Path.home() / config_file.name)
|
|
84
|
+
return config
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_project_metadata():
|
|
88
|
+
caller_frame = inspect.stack()[1]
|
|
89
|
+
current_file_parts = Path(caller_frame.filename).resolve().parts
|
|
90
|
+
src_index = current_file_parts.index("src")
|
|
91
|
+
cmd_prefix = "dix vnc run --" if shutil.which("dix") else ""
|
|
92
|
+
pkg_name = current_file_parts[src_index + 1]
|
|
93
|
+
env_name = f"{pkg_name}_env"
|
|
94
|
+
project_path = Path(*current_file_parts[:src_index])
|
|
95
|
+
|
|
96
|
+
return {"cmd_prefix": cmd_prefix, "pkg_name": pkg_name, "env_name": env_name, "project_path": project_path}
|
|
24
97
|
|
|
25
98
|
|
|
26
99
|
def set_logger(level):
|
|
@@ -34,7 +107,7 @@ def set_logger(level):
|
|
|
34
107
|
|
|
35
108
|
|
|
36
109
|
def ban_sleep(max_time, min_time=0):
|
|
37
|
-
sleep_time = int(random.uniform(min_time, max_time))
|
|
110
|
+
sleep_time = int(random.uniform(min_time, max_time)) # noqa: S311
|
|
38
111
|
logger.info(f"sleeping for {sleep_time} seconds...")
|
|
39
112
|
time.sleep(sleep_time)
|
|
40
113
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
|
-
Home-page: https://datamarket.es
|
|
6
5
|
License: GPL-3.0-or-later
|
|
7
6
|
Author: DataMarket
|
|
8
7
|
Author-email: techsupport@datamarket.es
|
|
@@ -22,7 +21,6 @@ Provides-Extra: boto3
|
|
|
22
21
|
Provides-Extra: chompjs
|
|
23
22
|
Provides-Extra: click
|
|
24
23
|
Provides-Extra: clickhouse-driver
|
|
25
|
-
Provides-Extra: croniter
|
|
26
24
|
Provides-Extra: datetime
|
|
27
25
|
Provides-Extra: demjson3
|
|
28
26
|
Provides-Extra: dnspython
|
|
@@ -44,7 +42,6 @@ Provides-Extra: openpyxl
|
|
|
44
42
|
Provides-Extra: pandas
|
|
45
43
|
Provides-Extra: pandera
|
|
46
44
|
Provides-Extra: peerdb
|
|
47
|
-
Provides-Extra: pendulum
|
|
48
45
|
Provides-Extra: pillow
|
|
49
46
|
Provides-Extra: playwright
|
|
50
47
|
Provides-Extra: playwright-stealth
|
|
@@ -71,11 +68,12 @@ Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra =
|
|
|
71
68
|
Requires-Dist: chompjs (==1.3.0) ; extra == "chompjs"
|
|
72
69
|
Requires-Dist: click (==8.1.7) ; extra == "click"
|
|
73
70
|
Requires-Dist: clickhouse-driver (==0.2.9) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
74
|
-
Requires-Dist: croniter (==3.0.4)
|
|
71
|
+
Requires-Dist: croniter (==3.0.4)
|
|
75
72
|
Requires-Dist: datetime (==5.5) ; extra == "datetime"
|
|
76
73
|
Requires-Dist: demjson3 (==3.0.6) ; extra == "demjson3"
|
|
77
74
|
Requires-Dist: dnspython (==2.7.0) ; extra == "dnspython"
|
|
78
75
|
Requires-Dist: duckduckgo-search (==6.2.11b1) ; extra == "duckduckgo-search"
|
|
76
|
+
Requires-Dist: dynaconf (==3.2.6)
|
|
79
77
|
Requires-Dist: fake-useragent (==1.5.1) ; extra == "fake-useragent"
|
|
80
78
|
Requires-Dist: geoalchemy2 (==0.15.2) ; extra == "geoalchemy2"
|
|
81
79
|
Requires-Dist: geopandas (==1.0.1) ; extra == "geopandas"
|
|
@@ -85,13 +83,14 @@ Requires-Dist: google-auth-httplib2 (==0.2.0) ; extra == "google-auth-httplib2"
|
|
|
85
83
|
Requires-Dist: google-auth-oauthlib (==1.2.1) ; extra == "google-auth-oauthlib"
|
|
86
84
|
Requires-Dist: html2text (==2024.2.26) ; extra == "html2text"
|
|
87
85
|
Requires-Dist: httpx[http2] (==0.28.1) ; extra == "httpx"
|
|
86
|
+
Requires-Dist: jinja2 (==3.1.5)
|
|
88
87
|
Requires-Dist: json5 (==0.9.25) ; extra == "json5"
|
|
89
88
|
Requires-Dist: lxml[html-clean] (==5.3.0) ; extra == "lxml"
|
|
90
89
|
Requires-Dist: nodriver (==0.37) ; extra == "nodriver"
|
|
91
90
|
Requires-Dist: openpyxl (==3.1.5) ; extra == "openpyxl"
|
|
92
91
|
Requires-Dist: pandas (==2.2.3) ; extra == "pandas"
|
|
93
92
|
Requires-Dist: pandera (==0.20.4) ; extra == "pandera"
|
|
94
|
-
Requires-Dist: pendulum (==3.0.0)
|
|
93
|
+
Requires-Dist: pendulum (==3.0.0)
|
|
95
94
|
Requires-Dist: pillow (==11.0.0) ; extra == "pillow"
|
|
96
95
|
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
97
96
|
Requires-Dist: playwright-stealth (==1.0.6) ; extra == "playwright-stealth"
|
|
@@ -116,6 +115,7 @@ Requires-Dist: undetected-chromedriver (==3.5.5) ; extra == "undetected-chromedr
|
|
|
116
115
|
Requires-Dist: unidecode (==1.3.8) ; extra == "unidecode"
|
|
117
116
|
Requires-Dist: xmltodict (==0.14.2) ; extra == "xmltodict"
|
|
118
117
|
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
118
|
+
Project-URL: Homepage, https://datamarket.es
|
|
119
119
|
Project-URL: Repository, https://github.com/Data-Market/datamarket
|
|
120
120
|
Description-Content-Type: text/markdown
|
|
121
121
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
|
|
2
2
|
datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
datamarket/interfaces/alchemy.py,sha256=V8E1GtokxUNmrUftKTFkIpNoXaqJME7ACES2BY0znQM,4214
|
|
4
|
-
datamarket/interfaces/aws.py,sha256=
|
|
4
|
+
datamarket/interfaces/aws.py,sha256=UztVuBn561DnU1AcjyJ16UAIS1BUD5HUxiQ4gc9EhtM,1968
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
6
|
datamarket/interfaces/ftp.py,sha256=9GQgiNBBK7njkv8ytHQaP9YLB9kI5vnUFA5gtz9J7As,1859
|
|
7
7
|
datamarket/interfaces/nominatim.py,sha256=WkPXaug-oH5zJkuE6aXMu4-MEkGYIY7S6TekfZ2FnHY,3658
|
|
@@ -13,11 +13,11 @@ datamarket/params/nominatim.py,sha256=pBYRfoBkkLBg2INbFymefmYSzaAVujQSpEro5c1hD_
|
|
|
13
13
|
datamarket/utils/__init__.py,sha256=8D5a8oKgqd6WA1RUkiKCn4l_PVemtyuckxQut0vDHXM,20
|
|
14
14
|
datamarket/utils/airflow.py,sha256=al0vc0YUikNu3Oy51VSn52I7pMU40akFBOl_UlHa2E4,795
|
|
15
15
|
datamarket/utils/alchemy.py,sha256=SRq6kgh1aANXVShBPgAuglmNhZssPWwWEY503gKSia8,635
|
|
16
|
-
datamarket/utils/main.py,sha256=
|
|
16
|
+
datamarket/utils/main.py,sha256=z6gbwR5RhFXYFLkkfCRk14DQsjvSnmJ_GDRd0G5PKgg,5144
|
|
17
17
|
datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
|
|
18
18
|
datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
19
19
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
20
|
-
datamarket-0.
|
|
21
|
-
datamarket-0.
|
|
22
|
-
datamarket-0.
|
|
23
|
-
datamarket-0.
|
|
20
|
+
datamarket-0.8.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
21
|
+
datamarket-0.8.0.dist-info/METADATA,sha256=1dr2cvGcPu3WVR-lAkWkoHRtQ31eS9uImDwvLtWTi0Q,6176
|
|
22
|
+
datamarket-0.8.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
23
|
+
datamarket-0.8.0.dist-info/RECORD,,
|
|
File without changes
|