datamarket 0.9.5__tar.gz → 0.9.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.9.5 → datamarket-0.9.7}/PKG-INFO +1 -1
- {datamarket-0.9.5 → datamarket-0.9.7}/pyproject.toml +1 -1
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/aws.py +2 -3
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/ftp.py +2 -2
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/peerdb.py +28 -18
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/main.py +30 -12
- {datamarket-0.9.5 → datamarket-0.9.7}/LICENSE +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/README.md +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/__init__.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.9.5 → datamarket-0.9.7}/src/datamarket/utils/typer.py +0 -0
|
@@ -4,8 +4,7 @@
|
|
|
4
4
|
import io
|
|
5
5
|
import logging
|
|
6
6
|
import boto3
|
|
7
|
-
|
|
8
|
-
from ..utils.main import Config
|
|
7
|
+
from dynaconf import Dynaconf
|
|
9
8
|
|
|
10
9
|
########################################################################################################################
|
|
11
10
|
# CLASSES
|
|
@@ -14,7 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class AWSInterface:
|
|
17
|
-
def __init__(self, config:
|
|
16
|
+
def __init__(self, config: Dynaconf) -> None:
|
|
18
17
|
self.profiles = []
|
|
19
18
|
self.config = config
|
|
20
19
|
|
|
@@ -5,7 +5,7 @@ import logging
|
|
|
5
5
|
from ftplib import FTP, FTP_TLS
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from dynaconf import Dynaconf
|
|
9
9
|
|
|
10
10
|
########################################################################################################################
|
|
11
11
|
# CLASSES
|
|
@@ -13,7 +13,7 @@ from ..utils.main import Config
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
15
|
class FTPInterface:
|
|
16
|
-
def __init__(self, config:
|
|
16
|
+
def __init__(self, config: Dynaconf):
|
|
17
17
|
if "ftp" in config:
|
|
18
18
|
self.config = config["ftp"]
|
|
19
19
|
|
|
@@ -15,6 +15,13 @@ from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponenti
|
|
|
15
15
|
|
|
16
16
|
from .alchemy import AlchemyInterface
|
|
17
17
|
|
|
18
|
+
########################################################################################################################
|
|
19
|
+
# EXCEPTIONS
|
|
20
|
+
|
|
21
|
+
class DatabaseNotConnectedError(Exception):
|
|
22
|
+
"""Custom error for when database is not connected."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
18
25
|
########################################################################################################################
|
|
19
26
|
# CLASSES
|
|
20
27
|
|
|
@@ -105,32 +112,32 @@ class ClickhousePeer:
|
|
|
105
112
|
def __init__(self, config):
|
|
106
113
|
if "clickhouse" in config:
|
|
107
114
|
self.config = config["clickhouse"]
|
|
108
|
-
self.ensure_database_exists()
|
|
109
|
-
self.client = self._create_client(database=self.config["database"])
|
|
110
115
|
else:
|
|
111
116
|
logger.warning("no clickhouse section in config")
|
|
112
117
|
|
|
113
|
-
def
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
118
|
+
def connect(self, database):
|
|
119
|
+
if not database:
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
self.ensure_database_exists(database)
|
|
123
|
+
self.config["database"] = database
|
|
124
|
+
self.client = clickhouse_driver.Client(**self.config)
|
|
125
|
+
|
|
126
|
+
def _check_connection(self):
|
|
127
|
+
if self.client is None:
|
|
128
|
+
raise DatabaseNotConnectedError("Database not connected. Call connect() method first.")
|
|
123
129
|
|
|
124
|
-
def ensure_database_exists(self):
|
|
125
|
-
logger.info(f"Checking if database '{
|
|
126
|
-
temp_client = self.
|
|
130
|
+
def ensure_database_exists(self, database):
|
|
131
|
+
logger.info(f"Checking if database '{database}' exists in Clickhouse")
|
|
132
|
+
temp_client = clickhouse_driver.Client(**self.config)
|
|
127
133
|
databases = temp_client.execute("SHOW DATABASES")
|
|
128
|
-
if
|
|
129
|
-
logger.info(f"Creating database '{
|
|
130
|
-
temp_client.execute(f"CREATE DATABASE IF NOT EXISTS {
|
|
134
|
+
if database not in [db[0] for db in databases]:
|
|
135
|
+
logger.info(f"Creating database '{database}'")
|
|
136
|
+
temp_client.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
|
|
131
137
|
temp_client.disconnect()
|
|
132
138
|
|
|
133
139
|
def delete_existing_tables(self, table_names):
|
|
140
|
+
self._check_connection()
|
|
134
141
|
logger.info(f"Deleting existing tables in Clickhouse for database: {self.config['database']}")
|
|
135
142
|
|
|
136
143
|
all_tables = self.client.execute("SHOW TABLES")
|
|
@@ -156,6 +163,7 @@ class ClickhousePeer:
|
|
|
156
163
|
logger.info("Finished deleting existing tables in Clickhouse")
|
|
157
164
|
|
|
158
165
|
def create_row_policies(self, schema_name, table_names):
|
|
166
|
+
self._check_connection()
|
|
159
167
|
logger.info(f"Creating row policies for schema: {schema_name}")
|
|
160
168
|
for table_name in table_names:
|
|
161
169
|
policy_name = "non_deleted"
|
|
@@ -167,6 +175,7 @@ class ClickhousePeer:
|
|
|
167
175
|
logger.info(f"Created row policy '{policy_name}' for table '{table_name}'")
|
|
168
176
|
|
|
169
177
|
def execute_sql_file(self, file_path):
|
|
178
|
+
self._check_connection()
|
|
170
179
|
try:
|
|
171
180
|
with file_path.open("r") as sql_file:
|
|
172
181
|
sql_content = sql_file.read()
|
|
@@ -501,6 +510,7 @@ class PeerDBInterface:
|
|
|
501
510
|
self.source.create_user(peerdb_user, peerdb_pwd)
|
|
502
511
|
self.source.grant_permissions(schema_name, peerdb_user)
|
|
503
512
|
self.source.create_publication(schema_name, mirror_tablenames)
|
|
513
|
+
self.destination.connect(schema_name)
|
|
504
514
|
self.create_postgres_peer()
|
|
505
515
|
self.create_clickhouse_peer(schema_name)
|
|
506
516
|
self.pre_init(schema_name, mirror_tablenames, clickhouse_sql_path, resync, hard_resync)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# IMPORTS
|
|
3
3
|
|
|
4
4
|
import asyncio
|
|
5
|
+
from dataclasses import dataclass
|
|
5
6
|
import inspect
|
|
6
7
|
import logging
|
|
7
8
|
import random
|
|
@@ -11,23 +12,35 @@ import shutil
|
|
|
11
12
|
import subprocess
|
|
12
13
|
import time
|
|
13
14
|
from pathlib import Path
|
|
14
|
-
from typing import Literal, Union
|
|
15
|
+
from typing import Literal, Optional, Union
|
|
15
16
|
|
|
16
17
|
import pendulum
|
|
17
18
|
from croniter import croniter
|
|
18
19
|
from configparser import RawConfigParser
|
|
19
20
|
from dynaconf import Dynaconf, add_converter
|
|
20
21
|
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
21
24
|
########################################################################################################################
|
|
22
|
-
#
|
|
25
|
+
# CLASSES
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ProjectMetadata:
|
|
30
|
+
cmd_prefix: str
|
|
31
|
+
package_name: str
|
|
32
|
+
env_name: str
|
|
33
|
+
path: Path
|
|
34
|
+
config_path: Path
|
|
23
35
|
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
36
|
|
|
26
|
-
|
|
37
|
+
########################################################################################################################
|
|
38
|
+
# FUNCTIONS
|
|
27
39
|
|
|
28
40
|
|
|
29
41
|
def get_granular_date(
|
|
30
|
-
granularity: Union[Literal["monthly", "biweekly", "weekly", "daily"], str],
|
|
42
|
+
granularity: Union[Literal["monthly", "biweekly", "weekly", "daily"], str],
|
|
43
|
+
tz: str = "Europe/Madrid",
|
|
31
44
|
) -> pendulum.DateTime:
|
|
32
45
|
"""
|
|
33
46
|
Returns the most recent date based on the given granularity or a custom cron expression.
|
|
@@ -67,8 +80,10 @@ def read_converter(path_str: str):
|
|
|
67
80
|
|
|
68
81
|
|
|
69
82
|
def get_config(
|
|
70
|
-
config_file: Path, tz: str = "Europe/Madrid"
|
|
71
|
-
) ->
|
|
83
|
+
config_file: Optional[Path] = None, tz: str = "Europe/Madrid"
|
|
84
|
+
) -> Dynaconf:
|
|
85
|
+
config_file = config_file or get_project_metadata().config_path
|
|
86
|
+
|
|
72
87
|
if Path(config_file).suffix == ".ini":
|
|
73
88
|
logger.warning("Using legacy INI config reader. Please migrate to TOML")
|
|
74
89
|
cfg = RawConfigParser()
|
|
@@ -106,17 +121,20 @@ def get_config(
|
|
|
106
121
|
|
|
107
122
|
return config
|
|
108
123
|
|
|
109
|
-
|
|
110
|
-
def get_project_metadata():
|
|
124
|
+
def get_project_metadata() -> ProjectMetadata:
|
|
111
125
|
caller_frame = inspect.stack()[1]
|
|
112
126
|
current_file_parts = Path(caller_frame.filename).resolve().parts
|
|
113
127
|
src_index = current_file_parts.index("src")
|
|
128
|
+
|
|
114
129
|
cmd_prefix = "dix vnc run --" if shutil.which("dix") else ""
|
|
115
|
-
|
|
116
|
-
env_name = f"{
|
|
130
|
+
package_name = current_file_parts[src_index + 1]
|
|
131
|
+
env_name = f"{package_name}_env"
|
|
117
132
|
project_path = Path(*current_file_parts[:src_index])
|
|
133
|
+
config_path = project_path / "config.toml"
|
|
118
134
|
|
|
119
|
-
return
|
|
135
|
+
return ProjectMetadata(
|
|
136
|
+
cmd_prefix, package_name, env_name, project_path, config_path
|
|
137
|
+
)
|
|
120
138
|
|
|
121
139
|
|
|
122
140
|
def set_logger(level):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|