datamarket 0.7.21__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -16,16 +16,14 @@ class AWSInterface:
16
16
  self.profiles = []
17
17
  self.config = config
18
18
 
19
- for section in self.config.sections():
20
- if section.startswith("aws:"):
21
- profile_name = section.split(":", 1)[1]
22
- self.profiles.append(
23
- {
24
- "profile": profile_name,
25
- "bucket": self.config[section]["bucket"],
26
- "session": boto3.Session(profile_name=profile_name),
27
- }
28
- )
19
+ for profile_name, values in self.config.get("aws", {}).items():
20
+ self.profiles.append(
21
+ {
22
+ "profile": profile_name,
23
+ "bucket": values["bucket"],
24
+ "session": boto3.Session(profile_name=profile_name),
25
+ }
26
+ )
29
27
 
30
28
  if not self.profiles:
31
29
  logger.warning("No AWS profiles found in config file")
datamarket/utils/main.py CHANGED
@@ -2,14 +2,20 @@
2
2
  # IMPORTS
3
3
 
4
4
  import configparser
5
+ import inspect
5
6
  import logging
6
7
  import random
7
8
  import re
8
9
  import shlex
10
+ import shutil
9
11
  import subprocess
10
12
  import time
13
+ from pathlib import Path
14
+ from typing import Literal, Union
11
15
 
12
16
  import pendulum
17
+ from croniter import croniter
18
+ from dynaconf import Dynaconf, add_converter
13
19
 
14
20
  ########################################################################################################################
15
21
  # FUNCTIONS
@@ -17,10 +23,77 @@ import pendulum
17
23
  logger = logging.getLogger(__name__)
18
24
 
19
25
 
20
- def get_config(config_path):
21
- cfg = configparser.RawConfigParser()
22
- cfg.read(config_path)
23
- return cfg
26
+ def get_granular_date(
27
+ granularity: Union[Literal["monthly", "biweekly", "weekly", "daily"], str], tz: str = "Europe/Madrid"
28
+ ) -> str:
29
+ """
30
+ Returns the most recent date based on the given granularity or a custom cron expression.
31
+
32
+ Args:
33
+ granularity: Either a predefined value ("monthly", "biweekly", "weekly") or a custom cron expression.
34
+ tz: Timezone to use for date calculations (default: "Europe/Madrid").
35
+
36
+ Returns:
37
+ A string representing the most recent date in the format "YYYY-MM-DD".
38
+
39
+ Raises:
40
+ ValueError: If the provided granularity or cron expression is invalid.
41
+ """
42
+ now = pendulum.now(tz)
43
+
44
+ predefined_patterns = {
45
+ "monthly": "0 0 1 * *",
46
+ "biweekly": "0 0 1,15 * *",
47
+ "weekly": "0 0 * * MON",
48
+ "daily": "0 0 * * *",
49
+ }
50
+
51
+ cron_pattern = predefined_patterns.get(granularity, granularity)
52
+
53
+ try:
54
+ cron = croniter(cron_pattern, now)
55
+ return cron.get_prev(pendulum.DateTime).strftime("%Y-%m-%d")
56
+ except Exception as e:
57
+ raise ValueError("Invalid cron expression or granularity specified.") from e
58
+
59
+
60
+ def read_converter(path_str: str):
61
+ with open(path_str) as f:
62
+ return f.read()
63
+
64
+
65
+ def get_config(config_file: Path, tz: str = "Europe/Madrid"):
66
+ if Path(config_file).suffix == "ini":
67
+ logger.warning("Using legacy INI config reader. Please migrate to TOML")
68
+ cfg = configparser.RawConfigParser()
69
+ return cfg.read(config_file)
70
+
71
+ add_converter("read", read_converter)
72
+
73
+ config = Dynaconf(
74
+ environments=True,
75
+ env_switcher="SYSTYPE",
76
+ vars={
77
+ "today": get_granular_date("daily", tz),
78
+ "biweekly_date": get_granular_date("biweekly", tz),
79
+ },
80
+ )
81
+
82
+ config.load_file(path=config_file)
83
+ config.load_file(path=Path.home() / config_file.name)
84
+ return config
85
+
86
+
87
+ def get_project_metadata():
88
+ caller_frame = inspect.stack()[1]
89
+ current_file_parts = Path(caller_frame.filename).resolve().parts
90
+ src_index = current_file_parts.index("src")
91
+ cmd_prefix = "dix vnc run --" if shutil.which("dix") else ""
92
+ pkg_name = current_file_parts[src_index + 1]
93
+ env_name = f"{pkg_name}_env"
94
+ project_path = Path(*current_file_parts[:src_index])
95
+
96
+ return {"cmd_prefix": cmd_prefix, "pkg_name": pkg_name, "env_name": env_name, "project_path": project_path}
24
97
 
25
98
 
26
99
  def set_logger(level):
@@ -34,7 +107,7 @@ def set_logger(level):
34
107
 
35
108
 
36
109
  def ban_sleep(max_time, min_time=0):
37
- sleep_time = int(random.uniform(min_time, max_time))
110
+ sleep_time = int(random.uniform(min_time, max_time)) # noqa: S311
38
111
  logger.info(f"sleeping for {sleep_time} seconds...")
39
112
  time.sleep(sleep_time)
40
113
 
@@ -1,8 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.7.21
3
+ Version: 0.8.0
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
- Home-page: https://datamarket.es
6
5
  License: GPL-3.0-or-later
7
6
  Author: DataMarket
8
7
  Author-email: techsupport@datamarket.es
@@ -22,7 +21,6 @@ Provides-Extra: boto3
22
21
  Provides-Extra: chompjs
23
22
  Provides-Extra: click
24
23
  Provides-Extra: clickhouse-driver
25
- Provides-Extra: croniter
26
24
  Provides-Extra: datetime
27
25
  Provides-Extra: demjson3
28
26
  Provides-Extra: dnspython
@@ -44,7 +42,6 @@ Provides-Extra: openpyxl
44
42
  Provides-Extra: pandas
45
43
  Provides-Extra: pandera
46
44
  Provides-Extra: peerdb
47
- Provides-Extra: pendulum
48
45
  Provides-Extra: pillow
49
46
  Provides-Extra: playwright
50
47
  Provides-Extra: playwright-stealth
@@ -71,11 +68,12 @@ Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra =
71
68
  Requires-Dist: chompjs (==1.3.0) ; extra == "chompjs"
72
69
  Requires-Dist: click (==8.1.7) ; extra == "click"
73
70
  Requires-Dist: clickhouse-driver (==0.2.9) ; extra == "clickhouse-driver" or extra == "peerdb"
74
- Requires-Dist: croniter (==3.0.4) ; extra == "croniter"
71
+ Requires-Dist: croniter (==3.0.4)
75
72
  Requires-Dist: datetime (==5.5) ; extra == "datetime"
76
73
  Requires-Dist: demjson3 (==3.0.6) ; extra == "demjson3"
77
74
  Requires-Dist: dnspython (==2.7.0) ; extra == "dnspython"
78
75
  Requires-Dist: duckduckgo-search (==6.2.11b1) ; extra == "duckduckgo-search"
76
+ Requires-Dist: dynaconf (==3.2.6)
79
77
  Requires-Dist: fake-useragent (==1.5.1) ; extra == "fake-useragent"
80
78
  Requires-Dist: geoalchemy2 (==0.15.2) ; extra == "geoalchemy2"
81
79
  Requires-Dist: geopandas (==1.0.1) ; extra == "geopandas"
@@ -85,13 +83,14 @@ Requires-Dist: google-auth-httplib2 (==0.2.0) ; extra == "google-auth-httplib2"
85
83
  Requires-Dist: google-auth-oauthlib (==1.2.1) ; extra == "google-auth-oauthlib"
86
84
  Requires-Dist: html2text (==2024.2.26) ; extra == "html2text"
87
85
  Requires-Dist: httpx[http2] (==0.28.1) ; extra == "httpx"
86
+ Requires-Dist: jinja2 (==3.1.5)
88
87
  Requires-Dist: json5 (==0.9.25) ; extra == "json5"
89
88
  Requires-Dist: lxml[html-clean] (==5.3.0) ; extra == "lxml"
90
89
  Requires-Dist: nodriver (==0.37) ; extra == "nodriver"
91
90
  Requires-Dist: openpyxl (==3.1.5) ; extra == "openpyxl"
92
91
  Requires-Dist: pandas (==2.2.3) ; extra == "pandas"
93
92
  Requires-Dist: pandera (==0.20.4) ; extra == "pandera"
94
- Requires-Dist: pendulum (==3.0.0) ; extra == "pendulum"
93
+ Requires-Dist: pendulum (==3.0.0)
95
94
  Requires-Dist: pillow (==11.0.0) ; extra == "pillow"
96
95
  Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
97
96
  Requires-Dist: playwright-stealth (==1.0.6) ; extra == "playwright-stealth"
@@ -116,6 +115,7 @@ Requires-Dist: undetected-chromedriver (==3.5.5) ; extra == "undetected-chromedr
116
115
  Requires-Dist: unidecode (==1.3.8) ; extra == "unidecode"
117
116
  Requires-Dist: xmltodict (==0.14.2) ; extra == "xmltodict"
118
117
  Project-URL: Documentation, https://github.com/Data-Market/datamarket
118
+ Project-URL: Homepage, https://datamarket.es
119
119
  Project-URL: Repository, https://github.com/Data-Market/datamarket
120
120
  Description-Content-Type: text/markdown
121
121
 
@@ -1,7 +1,7 @@
1
1
  datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
2
2
  datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  datamarket/interfaces/alchemy.py,sha256=V8E1GtokxUNmrUftKTFkIpNoXaqJME7ACES2BY0znQM,4214
4
- datamarket/interfaces/aws.py,sha256=Mk9h-UcdbyNPUaYaG9wlKdd0R95xzTwtX1-_PgsBkjo,2084
4
+ datamarket/interfaces/aws.py,sha256=UztVuBn561DnU1AcjyJ16UAIS1BUD5HUxiQ4gc9EhtM,1968
5
5
  datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
6
6
  datamarket/interfaces/ftp.py,sha256=9GQgiNBBK7njkv8ytHQaP9YLB9kI5vnUFA5gtz9J7As,1859
7
7
  datamarket/interfaces/nominatim.py,sha256=WkPXaug-oH5zJkuE6aXMu4-MEkGYIY7S6TekfZ2FnHY,3658
@@ -13,11 +13,11 @@ datamarket/params/nominatim.py,sha256=pBYRfoBkkLBg2INbFymefmYSzaAVujQSpEro5c1hD_
13
13
  datamarket/utils/__init__.py,sha256=8D5a8oKgqd6WA1RUkiKCn4l_PVemtyuckxQut0vDHXM,20
14
14
  datamarket/utils/airflow.py,sha256=al0vc0YUikNu3Oy51VSn52I7pMU40akFBOl_UlHa2E4,795
15
15
  datamarket/utils/alchemy.py,sha256=SRq6kgh1aANXVShBPgAuglmNhZssPWwWEY503gKSia8,635
16
- datamarket/utils/main.py,sha256=jumvHgbPiUNUTGZ2ebGcGhV6wPWV1BHig6Wranxmn4w,2696
16
+ datamarket/utils/main.py,sha256=z6gbwR5RhFXYFLkkfCRk14DQsjvSnmJ_GDRd0G5PKgg,5144
17
17
  datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
18
18
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
19
19
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
20
- datamarket-0.7.21.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
- datamarket-0.7.21.dist-info/METADATA,sha256=ecszHimSeE45_gqA5PiBAuzMmDa55cZCr2R7NJfP4bU,6193
22
- datamarket-0.7.21.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
23
- datamarket-0.7.21.dist-info/RECORD,,
20
+ datamarket-0.8.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
+ datamarket-0.8.0.dist-info/METADATA,sha256=1dr2cvGcPu3WVR-lAkWkoHRtQ31eS9uImDwvLtWTi0Q,6176
22
+ datamarket-0.8.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
23
+ datamarket-0.8.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.0.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any