datamarket 0.9.8__tar.gz → 0.9.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (23) hide show
  1. {datamarket-0.9.8 → datamarket-0.9.10}/PKG-INFO +1 -1
  2. {datamarket-0.9.8 → datamarket-0.9.10}/pyproject.toml +1 -1
  3. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/alchemy.py +29 -0
  4. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/peerdb.py +4 -2
  5. {datamarket-0.9.8 → datamarket-0.9.10}/LICENSE +0 -0
  6. {datamarket-0.9.8 → datamarket-0.9.10}/README.md +0 -0
  7. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/__init__.py +0 -0
  8. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/__init__.py +0 -0
  9. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/aws.py +0 -0
  10. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/drive.py +0 -0
  11. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/ftp.py +0 -0
  12. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/nominatim.py +0 -0
  13. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/proxy.py +0 -0
  14. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/interfaces/tinybird.py +0 -0
  15. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/params/__init__.py +0 -0
  16. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/params/nominatim.py +0 -0
  17. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/__init__.py +0 -0
  18. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/airflow.py +0 -0
  19. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/alchemy.py +0 -0
  20. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/main.py +0 -0
  21. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/selenium.py +0 -0
  22. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/soda.py +0 -0
  23. {datamarket-0.9.8 → datamarket-0.9.10}/src/datamarket/utils/typer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.8
3
+ Version: 0.9.10
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.8"
3
+ version = "0.9.10"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -5,6 +5,7 @@ import logging
5
5
  from urllib.parse import quote_plus
6
6
 
7
7
  from sqlalchemy import DDL, create_engine, text
8
+ from sqlalchemy.dialects.postgresql import insert
8
9
  from sqlalchemy.exc import IntegrityError
9
10
  from sqlalchemy.orm import sessionmaker
10
11
 
@@ -117,6 +118,34 @@ class AlchemyInterface:
117
118
 
118
119
  self.session.rollback()
119
120
 
121
+ def upsert_alchemy_obj(self, alchemy_obj, index_elements, silent=False):
122
+ if not silent:
123
+ logger.info(f"upserting {alchemy_obj}")
124
+
125
+ primary_keys = list(
126
+ column.name for column in alchemy_obj.__table__.primary_key.columns.values()
127
+ )
128
+ obj_dict = {
129
+ column.name: getattr(alchemy_obj, column.name)
130
+ for column in alchemy_obj.__table__.columns
131
+ if column.name not in primary_keys
132
+ }
133
+
134
+ statement = (
135
+ insert(alchemy_obj.__table__)
136
+ .values(obj_dict)
137
+ .on_conflict_do_update(index_elements=index_elements, set_=obj_dict)
138
+ )
139
+
140
+ try:
141
+ self.session.execute(statement)
142
+ self.session.commit()
143
+ except IntegrityError:
144
+ if not silent:
145
+ logger.info(f"could not upsert {alchemy_obj}")
146
+
147
+ self.session.rollback()
148
+
120
149
  def reset_column(self, query_results, column_name):
121
150
  if not query_results:
122
151
  logger.warning("No objects to reset column for.")
@@ -112,6 +112,8 @@ class ClickhousePeer:
112
112
  def __init__(self, config):
113
113
  if "clickhouse" in config:
114
114
  self.config = config["clickhouse"]
115
+ self.credentials = {key: self.config[key] for key in ["user", "password", "host", "port"]}
116
+
115
117
  else:
116
118
  logger.warning("no clickhouse section in config")
117
119
 
@@ -121,7 +123,7 @@ class ClickhousePeer:
121
123
 
122
124
  self.ensure_database_exists(database)
123
125
  self.config["database"] = database
124
- self.client = clickhouse_driver.Client(**self.config)
126
+ self.client = clickhouse_driver.Client(**self.credentials)
125
127
 
126
128
  def _check_connection(self):
127
129
  if self.client is None:
@@ -129,7 +131,7 @@ class ClickhousePeer:
129
131
 
130
132
  def ensure_database_exists(self, database):
131
133
  logger.info(f"Checking if database '{database}' exists in Clickhouse")
132
- temp_client = clickhouse_driver.Client(**self.config)
134
+ temp_client = clickhouse_driver.Client(**self.credentials)
133
135
  databases = temp_client.execute("SHOW DATABASES")
134
136
  if database not in [db[0] for db in databases]:
135
137
  logger.info(f"Creating database '{database}'")
File without changes
File without changes