datamarket 0.9.6__tar.gz → 0.9.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (23) hide show
  1. {datamarket-0.9.6 → datamarket-0.9.7}/PKG-INFO +1 -1
  2. {datamarket-0.9.6 → datamarket-0.9.7}/pyproject.toml +1 -1
  3. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/peerdb.py +28 -18
  4. {datamarket-0.9.6 → datamarket-0.9.7}/LICENSE +0 -0
  5. {datamarket-0.9.6 → datamarket-0.9.7}/README.md +0 -0
  6. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/__init__.py +0 -0
  7. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/__init__.py +0 -0
  8. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/alchemy.py +0 -0
  9. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/aws.py +0 -0
  10. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/drive.py +0 -0
  11. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/ftp.py +0 -0
  12. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/nominatim.py +0 -0
  13. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/proxy.py +0 -0
  14. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/interfaces/tinybird.py +0 -0
  15. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/params/__init__.py +0 -0
  16. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/params/nominatim.py +0 -0
  17. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/__init__.py +0 -0
  18. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/airflow.py +0 -0
  19. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/alchemy.py +0 -0
  20. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/main.py +0 -0
  21. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/selenium.py +0 -0
  22. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/soda.py +0 -0
  23. {datamarket-0.9.6 → datamarket-0.9.7}/src/datamarket/utils/typer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.6
3
+ Version: 0.9.7
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.6"
3
+ version = "0.9.7"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -15,6 +15,13 @@ from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponenti
15
15
 
16
16
  from .alchemy import AlchemyInterface
17
17
 
18
+ ########################################################################################################################
19
+ # EXCEPTIONS
20
+
21
+ class DatabaseNotConnectedError(Exception):
22
+ """Custom error for when database is not connected."""
23
+ pass
24
+
18
25
  ########################################################################################################################
19
26
  # CLASSES
20
27
 
@@ -105,32 +112,32 @@ class ClickhousePeer:
105
112
  def __init__(self, config):
106
113
  if "clickhouse" in config:
107
114
  self.config = config["clickhouse"]
108
- self.ensure_database_exists()
109
- self.client = self._create_client(database=self.config["database"])
110
115
  else:
111
116
  logger.warning("no clickhouse section in config")
112
117
 
113
- def _create_client(self, database=None):
114
- client_config = {
115
- "host": self.config["host"],
116
- "port": self.config["port"],
117
- "user": self.config["user"],
118
- "password": self.config["password"],
119
- }
120
- if database:
121
- client_config["database"] = database
122
- return clickhouse_driver.Client(**client_config)
118
+ def connect(self, database):
119
+ if not database:
120
+ return
121
+
122
+ self.ensure_database_exists(database)
123
+ self.config["database"] = database
124
+ self.client = clickhouse_driver.Client(**self.config)
125
+
126
+ def _check_connection(self):
127
+ if self.client is None:
128
+ raise DatabaseNotConnectedError("Database not connected. Call connect() method first.")
123
129
 
124
- def ensure_database_exists(self):
125
- logger.info(f"Checking if database '{self.config['database']}' exists in Clickhouse")
126
- temp_client = self._create_client()
130
+ def ensure_database_exists(self, database):
131
+ logger.info(f"Checking if database '{database}' exists in Clickhouse")
132
+ temp_client = clickhouse_driver.Client(**self.config)
127
133
  databases = temp_client.execute("SHOW DATABASES")
128
- if self.config["database"] not in [db[0] for db in databases]:
129
- logger.info(f"Creating database '{self.config['database']}'")
130
- temp_client.execute(f"CREATE DATABASE IF NOT EXISTS {self.config['database']}")
134
+ if database not in [db[0] for db in databases]:
135
+ logger.info(f"Creating database '{database}'")
136
+ temp_client.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
131
137
  temp_client.disconnect()
132
138
 
133
139
  def delete_existing_tables(self, table_names):
140
+ self._check_connection()
134
141
  logger.info(f"Deleting existing tables in Clickhouse for database: {self.config['database']}")
135
142
 
136
143
  all_tables = self.client.execute("SHOW TABLES")
@@ -156,6 +163,7 @@ class ClickhousePeer:
156
163
  logger.info("Finished deleting existing tables in Clickhouse")
157
164
 
158
165
  def create_row_policies(self, schema_name, table_names):
166
+ self._check_connection()
159
167
  logger.info(f"Creating row policies for schema: {schema_name}")
160
168
  for table_name in table_names:
161
169
  policy_name = "non_deleted"
@@ -167,6 +175,7 @@ class ClickhousePeer:
167
175
  logger.info(f"Created row policy '{policy_name}' for table '{table_name}'")
168
176
 
169
177
  def execute_sql_file(self, file_path):
178
+ self._check_connection()
170
179
  try:
171
180
  with file_path.open("r") as sql_file:
172
181
  sql_content = sql_file.read()
@@ -501,6 +510,7 @@ class PeerDBInterface:
501
510
  self.source.create_user(peerdb_user, peerdb_pwd)
502
511
  self.source.grant_permissions(schema_name, peerdb_user)
503
512
  self.source.create_publication(schema_name, mirror_tablenames)
513
+ self.destination.connect(schema_name)
504
514
  self.create_postgres_peer()
505
515
  self.create_clickhouse_peer(schema_name)
506
516
  self.pre_init(schema_name, mirror_tablenames, clickhouse_sql_path, resync, hard_resync)
File without changes
File without changes