PyPI - datamarket - Versions diffs - 0.6.0__py3-none-any.whl → 0.10.3__py3-none-any.whl - Mend

datamarket 0.6.0py3-none-any.whl → 0.10.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datamarket might be problematic. Click here for more details.

Files changed (38) hide show

datamarket/__init__.py +0 -1
datamarket/exceptions/__init__.py +1 -0
datamarket/exceptions/main.py +118 -0
datamarket/interfaces/alchemy.py +1934 -25
datamarket/interfaces/aws.py +81 -14
datamarket/interfaces/azure.py +127 -0
datamarket/interfaces/drive.py +60 -10
datamarket/interfaces/ftp.py +37 -14
datamarket/interfaces/llm.py +1220 -0
datamarket/interfaces/nominatim.py +314 -42
datamarket/interfaces/peerdb.py +272 -104
datamarket/interfaces/proxy.py +354 -50
datamarket/interfaces/tinybird.py +7 -15
datamarket/params/nominatim.py +439 -0
datamarket/utils/__init__.py +1 -1
datamarket/utils/airflow.py +10 -7
datamarket/utils/alchemy.py +2 -1
datamarket/utils/logs.py +88 -0
datamarket/utils/main.py +138 -10
datamarket/utils/nominatim.py +201 -0
datamarket/utils/playwright/__init__.py +0 -0
datamarket/utils/playwright/async_api.py +274 -0
datamarket/utils/playwright/sync_api.py +281 -0
datamarket/utils/requests.py +655 -0
datamarket/utils/selenium.py +6 -12
datamarket/utils/strings/__init__.py +1 -0
datamarket/utils/strings/normalization.py +217 -0
datamarket/utils/strings/obfuscation.py +153 -0
datamarket/utils/strings/standardization.py +40 -0
datamarket/utils/typer.py +2 -1
datamarket/utils/types.py +1 -0
datamarket-0.10.3.dist-info/METADATA +172 -0
datamarket-0.10.3.dist-info/RECORD +38 -0
{datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info}/WHEEL +1 -2
datamarket-0.6.0.dist-info/METADATA +0 -49
datamarket-0.6.0.dist-info/RECORD +0 -24
datamarket-0.6.0.dist-info/top_level.txt +0 -1
{datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info/licenses}/LICENSE +0 -0

datamarket/interfaces/peerdb.py CHANGED Viewed

@@ -3,13 +3,27 @@
 import base64
 import logging
+import re
 import time
+import boto3
 import clickhouse_driver
 import requests
-from .alchemy import AlchemyInterface
-from .aws import AWSInterface
+from requests.exceptions import HTTPError
 from sqlalchemy import text
+from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
+from .alchemy import AlchemyInterface
+########################################################################################################################
+# EXCEPTIONS
+class DatabaseNotConnectedError(Exception):
+    """Custom error for when database is not connected."""
+    pass
 ########################################################################################################################
 # CLASSES
@@ -19,45 +33,47 @@ logger = logging.getLogger(__name__)
 class PostgresPeer:
     def __init__(self, config):
-        self.config = config["db"]
-        self.alchemy_interface = AlchemyInterface(config)
-        self.engine = self.alchemy_interface.engine
+        if "db" in config:
+            self.config = config["db"]
+            self.alchemy_interface = AlchemyInterface(config)
+            self.engine = self.alchemy_interface.engine
+        else:
+            logger.warning("no db section in config")
-    def create_user(self):
-        user = self.config["user"]
-        password = self.config["password"]
+    def create_user(self, user, password):
+        database = self.config["database"]
         logger.info(f"Creating PostgreSQL user '{user}' for database: {self.config['database']}")
         with self.engine.connect() as conn:
             conn.execute(
                 text(f"""
-            DO $$
-            BEGIN
-                IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{user}') THEN
-                    CREATE USER {user} WITH PASSWORD '{password}';
-                    ALTER USER {user} REPLICATION;
-                    GRANT CREATE ON DATABASE datamarket TO {user};
-                END IF;
-            END
-            $$;
-            """)
+                DO $$
+                BEGIN
+                    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{user}') THEN
+                        CREATE USER "{user}" WITH PASSWORD '{password}';
+                        ALTER USER "{user}" REPLICATION;
+                        GRANT CREATE ON DATABASE {database} TO "{user}";
+                    END IF;
+                END
+                $$;
+                """)
             )
+            conn.commit()
         logger.info(f"PostgreSQL user '{user}' created or already exists")
-    def grant_permissions(self, schema_name):
-        user = self.config["user"]
+    def grant_permissions(self, schema_name, user):
         logger.info(f"Granting permissions for schema '{schema_name}' to '{user}'")
         with self.engine.connect() as conn:
             conn.execute(
                 text(f"""
-            GRANT USAGE ON SCHEMA "{schema_name}" TO {user};
-            GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA "{schema_name}" TO {user};
-            ALTER DEFAULT PRIVILEGES IN SCHEMA "{schema_name}" GRANT ALL PRIVILEGES ON TABLES TO {user};
-            """)
+                GRANT USAGE ON SCHEMA "{schema_name}" TO "{user}";
+                GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA "{schema_name}" TO "{user}";
+                ALTER DEFAULT PRIVILEGES IN SCHEMA "{schema_name}" GRANT ALL PRIVILEGES ON TABLES TO "{user}";
+                """)
             )
+            conn.commit()
         logger.info(f"Permissions granted for schema '{schema_name}' to '{user}'")
     def create_publication(self, schema_name, table_names):
@@ -65,12 +81,40 @@ class PostgresPeer:
         with self.engine.connect() as conn:
             conn.execute(text(f"DROP PUBLICATION IF EXISTS {schema_name}_peerdb"))
-            table_list = ", ".join([f"{schema_name}.{table}" for table in table_names])
+            table_list = []
+            for table in table_names:
+                full_table_name = f'"{schema_name}"."{table}"'
+                # Check current replica identity
+                query = text("""
+                    SELECT CASE c.relreplident
+                             WHEN 'd' THEN 'DEFAULT'
+                             WHEN 'n' THEN 'NOTHING'
+                             WHEN 'f' THEN 'FULL'
+                             WHEN 'i' THEN 'INDEX'
+                           END AS replica_identity
+                    FROM pg_class c
+                    JOIN pg_namespace n ON c.relnamespace = n.oid
+                    WHERE c.relname = :table_name
+                      AND n.nspname = :schema_name;
+                """)
+                result = conn.execute(query, {"table_name": table, "schema_name": schema_name}).scalar_one_or_none()
+                if result != "FULL":
+                    logger.info(f"Setting REPLICA IDENTITY FULL for table: {full_table_name}")
+                    conn.execute(text(f"ALTER TABLE {full_table_name} REPLICA IDENTITY FULL;"))
+                else:
+                    logger.info(f"REPLICA IDENTITY for table {full_table_name} is already FULL. Skipping ALTER TABLE.")
+                table_list.append(full_table_name)
+            table_list_str = ", ".join(table_list)
             conn.execute(
                 text(f"""
-            CREATE PUBLICATION {schema_name}_peerdb FOR TABLE {table_list};
-            """)
+                CREATE PUBLICATION {schema_name}_peerdb FOR TABLE {table_list_str};
+                """)
             )
+            conn.commit()
         logger.info(f"Publication '{schema_name}_peerdb' created successfully")
     def create_tables(self, schema_tables, drop=False):
@@ -90,39 +134,42 @@ class PostgresPeer:
                 """),
                 {"slot_name": slot_name},
             )
+            conn.commit()
             logger.info(f"Replication slot '{slot_name}' dropped if it existed")
 class ClickhousePeer:
     def __init__(self, config):
-        self.config = config["clickhouse"]
-        self.ensure_database_exists()
-        self.client = clickhouse_driver.Client(
-            host=self.config["host"],
-            port=self.config["port"],
-            user=self.config["user"],
-            password=self.config["password"],
-            database=self.config["database"],
-        )
-    def ensure_database_exists(self):
-        logger.info(f"Checking if database '{self.config['database']}' exists in Clickhouse")
-        temp_client = clickhouse_driver.Client(
-            host=self.config["host"],
-            port=self.config["port"],
-            user=self.config["user"],
-            password=self.config["password"],
-        )
+        if "clickhouse" in config:
+            self.config = config["clickhouse"]
+            self.credentials = {key: self.config[key] for key in ["user", "password", "host", "port"]}
-        databases = temp_client.execute("SHOW DATABASES")
-        if (self.config["database"],) not in databases:
-            logger.info(f"Database '{self.config['database']}' does not exist. Creating it now.")
-            temp_client.execute(f"CREATE DATABASE IF NOT EXISTS {self.config['database']}")
-            logger.info(f"Database '{self.config['database']}' created successfully")
         else:
-            logger.info(f"Database '{self.config['database']}' already exists")
+            logger.warning("no clickhouse section in config")
+    def connect(self, database):
+        if not database:
+            return
+        self.ensure_database_exists(database)
+        self.config["database"] = self.credentials["database"] = database
+        self.client = clickhouse_driver.Client(**self.credentials)
+    def _check_connection(self):
+        if self.client is None:
+            raise DatabaseNotConnectedError("Database not connected. Call connect() method first.")
+    def ensure_database_exists(self, database):
+        logger.info(f"Checking if database '{database}' exists in Clickhouse")
+        temp_client = clickhouse_driver.Client(**self.credentials)
+        databases = temp_client.execute("SHOW DATABASES")
+        if database not in [db[0] for db in databases]:
+            logger.info(f"Creating database '{database}'")
+            temp_client.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
+        temp_client.disconnect()
     def delete_existing_tables(self, table_names):
+        self._check_connection()
         logger.info(f"Deleting existing tables in Clickhouse for database: {self.config['database']}")
         all_tables = self.client.execute("SHOW TABLES")
@@ -148,6 +195,7 @@ class ClickhousePeer:
         logger.info("Finished deleting existing tables in Clickhouse")
     def create_row_policies(self, schema_name, table_names):
+        self._check_connection()
         logger.info(f"Creating row policies for schema: {schema_name}")
         for table_name in table_names:
             policy_name = "non_deleted"
@@ -158,34 +206,86 @@ class ClickhousePeer:
             self.client.execute(query)
             logger.info(f"Created row policy '{policy_name}' for table '{table_name}'")
+    def execute_sql_file(self, file_path):
+        self._check_connection()
+        try:
+            with file_path.open("r") as sql_file:
+                sql_content = sql_file.read()
+                logger.info(f"Executing SQL from file: {file_path}")
+                sql_statements = [stmt.strip() for stmt in sql_content.split(";") if stmt.strip()]
+                for statement in sql_statements:
+                    self.client.execute(statement)
+                    logger.info(f"Successfully executed SQL statement: {statement}")
+        except Exception as e:
+            logger.error(f"Error executing SQL from file {file_path}: {str(e)}")
+    def teardown_from_sql_folder(self, sql_folder):
+        logger.info("Performing ClickHouse teardown")
+        self._process_sql_files(sql_folder, teardown=True)
+        logger.info("ClickHouse teardown completed")
+    def initialize_from_sql_folder(self, sql_folder):
+        logger.info(f"Initializing Clickhouse database from SQL files in folder: {sql_folder}")
+        self._process_sql_files(sql_folder)
+        logger.info("Finished initializing Clickhouse database from SQL files")
+    def _process_sql_files(self, sql_folder, teardown=False):
+        if not sql_folder.exists():
+            logger.error(f"SQL initialization folder does not exist: {sql_folder}")
+            return
+        all_dirs = [sql_folder] + [d for d in sql_folder.rglob("*") if d.is_dir()]
+        sorted_dirs = sorted(all_dirs)
+        for directory in sorted_dirs:
+            sql_files = self._filter_sql_files(directory, teardown)
+            for file_path in sql_files:
+                self.execute_sql_file(file_path)
+    def _filter_sql_files(self, directory, teardown):
+        all_sql_files = directory.glob("*.sql")
+        return sorted(f for f in all_sql_files if ("teardown" in f.name.lower()) == teardown)
 class TransientS3:
     def __init__(self, config):
-        self.aws_interface = AWSInterface(config)
-        self.aws_interface.switch_profile("datamarket-minio")
+        if "peerdb-s3" in config:
+            self.config = config["peerdb-s3"]
+            self.bucket_name = self.config["bucket"]
+            self.session = boto3.Session(profile_name=self.config["profile"])
+            self.s3_client = self.session.client("s3")
+            self.credentials = self.session.get_credentials()
+            self.access_key = self.credentials.access_key
+            self.secret_key = self.credentials.secret_key
+            self.region_name = self.session.region_name
+            self.endpoint_url = self.s3_client.meta.endpoint_url
+        else:
+            logger.warning("no peerdb-s3 section in config")
     def delete_paths_with_schema(self, schema_name):
         logger.info(f"Deleting paths containing '{schema_name}' from S3")
-        bucket_name = self.aws_interface.current_profile["bucket"]
-        paginator = self.aws_interface.s3_client.get_paginator("list_objects_v2")
-        pages = paginator.paginate(Bucket=bucket_name, Delimiter="/")
+        paginator = self.s3_client.get_paginator("list_objects_v2")
+        pages = paginator.paginate(Bucket=self.bucket_name, Delimiter="/")
         for page in pages:
             if "CommonPrefixes" in page:
                 for prefix in page["CommonPrefixes"]:
                     folder = prefix["Prefix"]
                     if schema_name in folder:
-                        self._delete_folder_contents(bucket_name, folder)
+                        self._delete_folder_contents(folder)
         logger.info(f"Deleted paths containing '{schema_name}' from S3")
-    def _delete_folder_contents(self, bucket_name, folder):
+    def _delete_folder_contents(self, folder):
         logger.info(f"Deleting contents of folder: {folder}")
-        paginator = self.aws_interface.s3_client.get_paginator("list_objects_v2")
-        pages = paginator.paginate(Bucket=bucket_name, Prefix=folder)
+        paginator = self.s3_client.get_paginator("list_objects_v2")
+        pages = paginator.paginate(Bucket=self.bucket_name, Prefix=folder)
         delete_us = dict(Objects=[])
         for page in pages:
@@ -195,22 +295,32 @@ class TransientS3:
                     # AWS limits to deleting 1000 objects at a time
                     if len(delete_us["Objects"]) >= 1000:
-                        self.aws_interface.s3_client.delete_objects(Bucket=bucket_name, Delete=delete_us)
+                        self.s3_client.delete_objects(Bucket=self.bucket_name, Delete=delete_us)
                         delete_us = dict(Objects=[])
         if len(delete_us["Objects"]):
-            self.aws_interface.s3_client.delete_objects(Bucket=bucket_name, Delete=delete_us)
+            self.s3_client.delete_objects(Bucket=self.bucket_name, Delete=delete_us)
         logger.info(f"Deleted contents of folder: {folder}")
 class PeerDBInterface:
     def __init__(self, config):
-        self.config = config["peerdb"]
+        if "peerdb" in config:
+            self.config = config["peerdb"]
+            self.docker_host_mapping = self.config.get("docker_host_mapping")
+        else:
+            logger.warning("no peerdb section in config")
         self.source = PostgresPeer(config)
-        self.destination = ClickhousePeer(config)
         self.transient_s3 = TransientS3(config)
+        self.destination = ClickhousePeer(config)
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_exponential(multiplier=1, min=4, max=10),
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+    )
     def _make_api_request(self, endpoint, payload):
         url = f"http://{self.config['host']}:{self.config['port']}/api/{endpoint}"
         password = self.config["password"]
@@ -219,17 +329,44 @@ class PeerDBInterface:
         headers = {"Authorization": f"Basic {encoded_credentials}", "Content-Type": "application/json"}
-        logger.info(f"Making API request to PeerDB endpoint: {endpoint}")
+        logger.debug(f"Making API request to PeerDB endpoint: {endpoint}")
         try:
             r = requests.post(url, headers=headers, json=payload, timeout=30)
+            response = r.json()
             r.raise_for_status()
-            logger.info(f"API request to {endpoint} completed successfully")
-            return r.json()
-        except requests.exceptions.HTTPError as e:
+            logger.debug(f"API request to {endpoint} completed successfully")
+            return response
+        except HTTPError as e:
             logger.error(f"HTTP error occurred: {e}")
-            logger.error(f"Response JSON: {r.json()}")
+            logger.error(f"Response JSON: {r.json() if 'r' in locals() else 'N/A'}")
+            if "no rows in result set" in response.get("message", ""):
+                return {"currentFlowState": "STATUS_UNKNOWN"}
             raise
+    def _resolve_host_mapping(self, host):
+        """
+        Resolves host mapping for Docker environments.
+        If host is localhost/127.0.0.1 and docker_host_mapping is configured,
+        returns the mapped host, otherwise returns original host.
+        """
+        if not self.docker_host_mapping or not host:
+            return host
+        if host in ["localhost", "127.0.0.1"]:
+            logger.debug(f"Mapping host {host} to {self.docker_host_mapping} for Docker environment")
+            return self.docker_host_mapping
+        url_pattern = r"(localhost|127\.0\.0\.1)"
+        match = re.search(url_pattern, host)
+        if match:
+            original_host = match.group(1)
+            mapped_host = self._resolve_host_mapping(original_host)
+            return host.replace(original_host, mapped_host)
+        return host
     def create_postgres_peer(self):
         logger.info(f"Creating Postgres peer for database: {self.source.config['database']}")
         payload = {
@@ -237,7 +374,7 @@ class PeerDBInterface:
                 "name": self.source.config["database"],
                 "type": 3,
                 "postgres_config": {
-                    "host": self.source.config["host"],
+                    "host": self._resolve_host_mapping(self.source.config["host"]),
                     "port": int(self.source.config["admin_port"]),
                     "user": self.config["user"],
                     "password": self.config["password"],
@@ -255,23 +392,22 @@ class PeerDBInterface:
     def create_clickhouse_peer(self, schema_name):
         logger.info(f"Creating Clickhouse peer for schema: {schema_name}")
         payload = {
             "peer": {
                 "name": f"{schema_name}",
                 "type": 8,
                 "clickhouse_config": {
-                    "host": self.destination.config["host"],
+                    "host": self._resolve_host_mapping(self.destination.config["host"]),
                     "port": int(self.destination.config["port"]),
                     "user": self.destination.config["user"],
                     "password": self.destination.config["password"],
                     "database": schema_name,
                     "disable_tls": True,
-                    "s3_path": f"s3://{self.destination.config['s3_bucket']}",
-                    "access_key_id": self.destination.config["access_key_id"],
-                    "secret_access_key": self.destination.config["secret_access_key"],
-                    "region": "local",
-                    "endpoint": f"http://{self.destination.config['s3_host']}:{self.destination.config['s3_port']}",
+                    "s3_path": f"s3://{self.transient_s3.bucket_name}",
+                    "access_key_id": self.transient_s3.access_key,
+                    "secret_access_key": self.transient_s3.secret_key,
+                    "region": self.transient_s3.region_name,
+                    "endpoint": self._resolve_host_mapping(self.transient_s3.endpoint_url),
                 },
             },
             "allow_update": True,
@@ -283,6 +419,16 @@ class PeerDBInterface:
         logger.info(f"Clickhouse peer for schema '{schema_name}' created successfully")
+    def check_mirror_status(self, schema_name):
+        current_state = "STATUS_UNKNOWN"
+        try:
+            payload = {"flowJobName": schema_name, "includeFlowInfo": False}
+            response = self._make_api_request("v1/mirrors/status", payload)
+            current_state = response.get("currentFlowState")
+        except Exception as e:
+            logger.debug(f"Error checking mirror status for schema '{schema_name}': {str(e)}")
+        return current_state
     def drop_mirror(self, schema_name):
         logger.info(f"Dropping mirror for schema: {schema_name}")
@@ -301,37 +447,42 @@ class PeerDBInterface:
         logger.info(f"Mirror for schema '{schema_name}' dropped successfully")
-    def check_mirror_status(self, schema_name):
-        logger.info(f"Checking mirror status for schema: {schema_name}")
-        max_attempts = 60
+    def wait_for_running_mirror(self, schema_name, max_attempts=360, sleep_interval=10):
+        logger.info(f"Waiting for mirror status to be 'STATUS_RUNNING' for schema: {schema_name}")
         attempt = 0
         while attempt < max_attempts:
-            payload = {"flowJobName": schema_name, "includeFlowInfo": False}
-            response = self._make_api_request("v1/mirrors/status", payload)
-            current_state = response.get("currentFlowState")
+            current_state = self.check_mirror_status(schema_name)
-            if current_state != "STATUS_SETUP":
-                logger.info(f"Mirror status for schema '{schema_name}' is: {current_state}")
+            if current_state == "STATUS_RUNNING":
+                logger.info(f"Mirror status for schema '{schema_name}' is now: {current_state}")
                 return current_state
             attempt += 1
-            time.sleep(10)
+            logger.info(f"Status is '{current_state}'. Waiting {sleep_interval} seconds before next check.")
+            time.sleep(sleep_interval)
         logger.warning(f"Mirror status check timed out for schema: {schema_name}")
         return None
-    def resync_operations(self, schema_name, table_names, resync, hard_resync):
+    def pre_init(self, schema_name, table_names, clickhouse_sql_path, resync, hard_resync):
+        logger.info("Running pre-init operations.")
         if resync:
-            logger.info(f"Resync requested. Performing {'hard' if hard_resync else 'simple'} resync operations.")
             self.drop_mirror(schema_name)
             self.transient_s3.delete_paths_with_schema(schema_name)
+            self.destination.teardown_from_sql_folder(clickhouse_sql_path)
+            self.source.drop_replication_slot(schema_name)
             if hard_resync:
                 self.destination.delete_existing_tables(table_names)
-                self.source.drop_replication_slot(schema_name)
-            logger.info("Resync operations completed.")
+        logger.info("Pre-init operations completed.")
+    def post_init(self, schema_name, table_names, clickhouse_sql_path, resync, hard_resync):
+        logger.info("Running post-init operations.")
+        self.destination.create_row_policies(schema_name, table_names)
+        if resync:
+            self.destination.initialize_from_sql_folder(clickhouse_sql_path)
+        logger.info("Post-init operations completed.")
     def create_mirror(self, schema_name, table_names, resync, hard_resync):
-        self.resync_operations(schema_name, table_names, resync, hard_resync)
         logger.info(f"Creating mirror for schema: {schema_name}")
         table_mappings = [
@@ -352,7 +503,7 @@ class PeerDBInterface:
                 "snapshot_num_rows_per_partition": 1000000,
                 "snapshot_max_parallel_workers": 1,
                 "snapshot_num_tables_in_parallel": 1,
-                "resync": not hard_resync,
+                "resync": resync and not hard_resync,
                 "initial_snapshot_only": False,
                 "soft_delete_col_name": "_peerdb_is_deleted",
                 "synced_at_col_name": "_peerdb_synced_at",
@@ -365,25 +516,42 @@ class PeerDBInterface:
                 f"Failed to create mirror for schema '{schema_name}': {response.get('errorMessage', response.get('message', 'Unknown error'))}"
             )
-        mirror_status = self.check_mirror_status(schema_name)
+        mirror_status = self.wait_for_running_mirror(schema_name)
         if mirror_status:
-            logger.info(f"Mirror status for schema '{schema_name}' is: {mirror_status}")
             logger.info(f"Mirror creation for schema '{schema_name}' completed successfully")
         else:
             logger.warning(f"Failed to confirm mirror status change for schema: {schema_name}")
-    def run_automation(self, schema_name, schema_tables, drop=False, resync=False, hard_resync=False):
+    def run_automation(
+        self,
+        schema_name,
+        schema_tables,
+        drop=False,
+        sync=False,
+        resync=False,
+        hard_resync=False,
+        clickhouse_sql_path=None,
+    ):
         logger.info(f"Starting automation for schema: {schema_name}")
-        table_names = [table.__tablename__ for table in schema_tables]
+        base_tables = [table for table, _ in schema_tables]
+        mirror_tablenames = [table.__tablename__ for table, should_replicate in schema_tables if should_replicate]
-        self.source.create_user()
-        self.source.create_tables(schema_tables, drop)
-        self.source.grant_permissions(schema_name)
-        self.source.create_publication(schema_name, table_names)
+        self.source.create_tables(base_tables, drop)
+        if not (sync or resync):
+            return
+        peerdb_user = self.config["user"]
+        peerdb_pwd = self.config["password"]
+        self.source.create_user(peerdb_user, peerdb_pwd)
+        self.source.grant_permissions(schema_name, peerdb_user)
+        self.source.create_publication(schema_name, mirror_tablenames)
+        self.destination.connect(schema_name)
         self.create_postgres_peer()
         self.create_clickhouse_peer(schema_name)
-        self.create_mirror(schema_name, table_names, resync, hard_resync)
-        self.destination.create_row_policies(schema_name, table_names)
+        self.pre_init(schema_name, mirror_tablenames, clickhouse_sql_path, resync, hard_resync)
+        self.create_mirror(schema_name, mirror_tablenames, resync, hard_resync)
+        self.post_init(schema_name, mirror_tablenames, clickhouse_sql_path, resync, hard_resync)
         logger.info(f"Automation completed successfully for schema: {schema_name}")

datamarket 0.6.0__py3-none-any.whl → 0.10.3__py3-none-any.whl

Potentially problematic release.

datamarket 0.6.0py3-none-any.whl → 0.10.3py3-none-any.whl