datamarket 0.9.7__tar.gz → 0.9.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (23) hide show
  1. {datamarket-0.9.7 → datamarket-0.9.9}/PKG-INFO +1 -1
  2. {datamarket-0.9.7 → datamarket-0.9.9}/pyproject.toml +1 -1
  3. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/alchemy.py +39 -5
  4. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/peerdb.py +4 -2
  5. {datamarket-0.9.7 → datamarket-0.9.9}/LICENSE +0 -0
  6. {datamarket-0.9.7 → datamarket-0.9.9}/README.md +0 -0
  7. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/__init__.py +0 -0
  8. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/__init__.py +0 -0
  9. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/aws.py +0 -0
  10. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/drive.py +0 -0
  11. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/ftp.py +0 -0
  12. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/nominatim.py +0 -0
  13. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/proxy.py +0 -0
  14. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/interfaces/tinybird.py +0 -0
  15. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/params/__init__.py +0 -0
  16. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/params/nominatim.py +0 -0
  17. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/__init__.py +0 -0
  18. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/airflow.py +0 -0
  19. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/alchemy.py +0 -0
  20. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/main.py +0 -0
  21. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/selenium.py +0 -0
  22. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/soda.py +0 -0
  23. {datamarket-0.9.7 → datamarket-0.9.9}/src/datamarket/utils/typer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.7
3
+ Version: 0.9.9
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.7"
3
+ version = "0.9.9"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -4,7 +4,7 @@
4
4
  import logging
5
5
  from urllib.parse import quote_plus
6
6
 
7
- from sqlalchemy import DDL, create_engine
7
+ from sqlalchemy import DDL, create_engine, text
8
8
  from sqlalchemy.exc import IntegrityError
9
9
  from sqlalchemy.orm import sessionmaker
10
10
 
@@ -14,6 +14,13 @@ from sqlalchemy.orm import sessionmaker
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
+ class MockContext:
18
+ def __init__(self, column):
19
+ self.current_parameters = {}
20
+ self.current_column = column
21
+ self.connection = None
22
+
23
+
17
24
  class AlchemyInterface:
18
25
  def __init__(self, config):
19
26
  if "db" in config:
@@ -28,10 +35,10 @@ class AlchemyInterface:
28
35
 
29
36
  def get_conn_str(self):
30
37
  return (
31
- f'{self.config["engine"]}://'
32
- f'{self.config["user"]}:{quote_plus(self.config["password"])}'
33
- f'@{self.config["host"]}:{self.config["port"]}'
34
- f'/{self.config["database"]}'
38
+ f"{self.config['engine']}://"
39
+ f"{self.config['user']}:{quote_plus(self.config['password'])}"
40
+ f"@{self.config['host']}:{self.config['port']}"
41
+ f"/{self.config['database']}"
35
42
  )
36
43
 
37
44
  @staticmethod
@@ -109,3 +116,30 @@ class AlchemyInterface:
109
116
  logger.info(f"{alchemy_obj} already in db")
110
117
 
111
118
  self.session.rollback()
119
+
120
+ def reset_column(self, query_results, column_name):
121
+ if not query_results:
122
+ logger.warning("No objects to reset column for.")
123
+ return
124
+
125
+ first_obj = query_results[0]
126
+ model_class = first_obj.__class__
127
+ table = model_class.__table__
128
+
129
+ if column_name not in table.columns:
130
+ logger.warning(f"Column {column_name} does not exist in table {table.name}.")
131
+ return
132
+
133
+ column = table.columns[column_name]
134
+
135
+ if column.server_default is not None:
136
+ query_results.update({column_name: text("DEFAULT")}, synchronize_session=False)
137
+ elif column.default is not None:
138
+ default_value = column.default.arg
139
+ if callable(default_value):
140
+ default_value = default_value(MockContext(column))
141
+ query_results.update({column_name: default_value}, synchronize_session=False)
142
+ else:
143
+ raise ValueError(f"Column '{column_name}' doesn't have a default value defined.")
144
+
145
+ self.session.commit()
@@ -112,6 +112,8 @@ class ClickhousePeer:
112
112
  def __init__(self, config):
113
113
  if "clickhouse" in config:
114
114
  self.config = config["clickhouse"]
115
+ self.credentials = {key: self.config[key] for key in ["user", "password", "host", "port"]}
116
+
115
117
  else:
116
118
  logger.warning("no clickhouse section in config")
117
119
 
@@ -121,7 +123,7 @@ class ClickhousePeer:
121
123
 
122
124
  self.ensure_database_exists(database)
123
125
  self.config["database"] = database
124
- self.client = clickhouse_driver.Client(**self.config)
126
+ self.client = clickhouse_driver.Client(**self.credentials)
125
127
 
126
128
  def _check_connection(self):
127
129
  if self.client is None:
@@ -129,7 +131,7 @@ class ClickhousePeer:
129
131
 
130
132
  def ensure_database_exists(self, database):
131
133
  logger.info(f"Checking if database '{database}' exists in Clickhouse")
132
- temp_client = clickhouse_driver.Client(**self.config)
134
+ temp_client = clickhouse_driver.Client(**self.credentials)
133
135
  databases = temp_client.execute("SHOW DATABASES")
134
136
  if database not in [db[0] for db in databases]:
135
137
  logger.info(f"Creating database '{database}'")
File without changes
File without changes