datamarket 0.9.20__tar.gz → 0.9.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.9.20 → datamarket-0.9.21}/PKG-INFO +1 -1
- {datamarket-0.9.20 → datamarket-0.9.21}/pyproject.toml +1 -1
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/alchemy.py +113 -41
- {datamarket-0.9.20 → datamarket-0.9.21}/LICENSE +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/README.md +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/__init__.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/ftp.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/peerdb.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.9.20 → datamarket-0.9.21}/src/datamarket/utils/typer.py +0 -0
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections.abc import MutableMapping
|
|
6
|
-
from typing import Any, Iterator, List, Type, TypeVar
|
|
6
|
+
from typing import Any, Iterator, List, Optional, Type, TypeVar
|
|
7
7
|
from urllib.parse import quote_plus
|
|
8
8
|
|
|
9
9
|
from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, func, select, text
|
|
10
10
|
from sqlalchemy.dialects.postgresql import insert
|
|
11
11
|
from sqlalchemy.exc import IntegrityError
|
|
12
12
|
from sqlalchemy.ext.declarative import DeclarativeMeta
|
|
13
|
-
from sqlalchemy.orm import sessionmaker
|
|
13
|
+
from sqlalchemy.orm import Session, sessionmaker
|
|
14
14
|
|
|
15
15
|
########################################################################################################################
|
|
16
16
|
# CLASSES
|
|
@@ -29,16 +29,58 @@ class MockContext:
|
|
|
29
29
|
|
|
30
30
|
class AlchemyInterface:
|
|
31
31
|
def __init__(self, config: MutableMapping) -> None:
|
|
32
|
+
self.session: Optional[Session] = None
|
|
32
33
|
if "db" in config:
|
|
33
34
|
self.config = config["db"]
|
|
34
|
-
|
|
35
35
|
self.engine = create_engine(self.get_conn_str())
|
|
36
|
-
self.
|
|
37
|
-
self.cursor = self.session.connection().connection.cursor()
|
|
38
|
-
|
|
36
|
+
self.Session = sessionmaker(bind=self.engine)
|
|
39
37
|
else:
|
|
40
38
|
logger.warning("no db section in config")
|
|
41
39
|
|
|
40
|
+
def __enter__(self) -> "AlchemyInterface":
|
|
41
|
+
"""Enter the runtime context related to this object (starts session)."""
|
|
42
|
+
self.start()
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
46
|
+
"""Exit the runtime context related to this object (stops session)."""
|
|
47
|
+
should_commit = exc_type is None
|
|
48
|
+
self.stop(commit=should_commit)
|
|
49
|
+
|
|
50
|
+
def start(self) -> None:
|
|
51
|
+
"""Starts a new SQLAlchemy session manually."""
|
|
52
|
+
if not hasattr(self, "Session"):
|
|
53
|
+
raise AttributeError("Database configuration not initialized. Cannot create session.")
|
|
54
|
+
if self.session is not None:
|
|
55
|
+
raise RuntimeError("Session already active.")
|
|
56
|
+
self.session = self.Session()
|
|
57
|
+
logger.debug("SQLAlchemy session started manually.")
|
|
58
|
+
|
|
59
|
+
def stop(self, commit: bool = True) -> None:
|
|
60
|
+
"""Stops the manually started SQLAlchemy session."""
|
|
61
|
+
if self.session is None:
|
|
62
|
+
logger.warning("No active session to stop.")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
if commit:
|
|
67
|
+
logger.debug("Committing SQLAlchemy session before stopping.")
|
|
68
|
+
self.session.commit()
|
|
69
|
+
else:
|
|
70
|
+
logger.debug("Rolling back SQLAlchemy session before stopping.")
|
|
71
|
+
self.session.rollback()
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error(f"Exception during session commit/rollback on stop: {e}", exc_info=True)
|
|
74
|
+
try:
|
|
75
|
+
self.session.rollback()
|
|
76
|
+
except Exception as rb_exc:
|
|
77
|
+
logger.error(f"Exception during secondary rollback attempt on stop: {rb_exc}", exc_info=True)
|
|
78
|
+
raise
|
|
79
|
+
finally:
|
|
80
|
+
logger.debug("Closing SQLAlchemy session.")
|
|
81
|
+
self.session.close()
|
|
82
|
+
self.session = None
|
|
83
|
+
|
|
42
84
|
def get_conn_str(self):
|
|
43
85
|
return (
|
|
44
86
|
f"{self.config['engine']}://"
|
|
@@ -110,6 +152,16 @@ class AlchemyInterface:
|
|
|
110
152
|
self.create_tables(tables)
|
|
111
153
|
|
|
112
154
|
def reset_column(self, query_results: List[Result[Any]], column_name: str) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Reset a column to its default value for a list of query results.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
query_results: List of query results to update
|
|
160
|
+
column_name: Name of the column to reset
|
|
161
|
+
"""
|
|
162
|
+
if self.session is None:
|
|
163
|
+
raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
|
|
164
|
+
|
|
113
165
|
if not query_results:
|
|
114
166
|
logger.warning("No objects to reset column for.")
|
|
115
167
|
return
|
|
@@ -124,33 +176,38 @@ class AlchemyInterface:
|
|
|
124
176
|
|
|
125
177
|
column = table.columns[column_name]
|
|
126
178
|
|
|
179
|
+
# Determine the default value to use
|
|
127
180
|
if column.server_default is not None:
|
|
128
|
-
|
|
181
|
+
default_value = text("DEFAULT")
|
|
129
182
|
elif column.default is not None:
|
|
130
183
|
default_value = column.default.arg
|
|
131
184
|
if callable(default_value):
|
|
132
185
|
default_value = default_value(MockContext(column))
|
|
133
|
-
query_results.update({column_name: default_value}, synchronize_session=False)
|
|
134
186
|
else:
|
|
135
187
|
raise ValueError(f"Column '{column_name}' doesn't have a default value defined.")
|
|
136
188
|
|
|
137
|
-
|
|
189
|
+
query_results.update({column_name: default_value}, synchronize_session=False)
|
|
138
190
|
|
|
139
191
|
def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> None:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
logger.info(f"adding {alchemy_obj}...")
|
|
143
|
-
|
|
144
|
-
self.session.add(alchemy_obj)
|
|
145
|
-
self.session.commit()
|
|
192
|
+
if self.session is None:
|
|
193
|
+
raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
|
|
146
194
|
|
|
195
|
+
try:
|
|
196
|
+
# Use a savepoint (nested transaction)
|
|
197
|
+
with self.session.begin_nested():
|
|
198
|
+
if not silent:
|
|
199
|
+
logger.info(f"adding {alchemy_obj}...")
|
|
200
|
+
self.session.add(alchemy_obj)
|
|
147
201
|
except IntegrityError:
|
|
202
|
+
# Rollback is handled automatically by begin_nested() context manager on error
|
|
148
203
|
if not silent:
|
|
149
|
-
logger.info(f"{alchemy_obj} already in db")
|
|
150
|
-
|
|
151
|
-
self.session.rollback()
|
|
204
|
+
logger.info(f"{alchemy_obj} already in db (savepoint rolled back)")
|
|
205
|
+
# Do not re-raise, allow outer transaction/loop to continue
|
|
152
206
|
|
|
153
207
|
def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> None:
|
|
208
|
+
if self.session is None:
|
|
209
|
+
raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
|
|
210
|
+
|
|
154
211
|
if not silent:
|
|
155
212
|
logger.info(f"upserting {alchemy_obj}")
|
|
156
213
|
|
|
@@ -168,13 +225,14 @@ class AlchemyInterface:
|
|
|
168
225
|
)
|
|
169
226
|
|
|
170
227
|
try:
|
|
171
|
-
|
|
172
|
-
self.session.
|
|
228
|
+
# Use a savepoint (nested transaction)
|
|
229
|
+
with self.session.begin_nested():
|
|
230
|
+
self.session.execute(statement)
|
|
173
231
|
except IntegrityError:
|
|
232
|
+
# Rollback is handled automatically by begin_nested() context manager on error
|
|
174
233
|
if not silent:
|
|
175
|
-
logger.info(f"could not upsert {alchemy_obj}")
|
|
176
|
-
|
|
177
|
-
self.session.rollback()
|
|
234
|
+
logger.info(f"could not upsert {alchemy_obj} (savepoint rolled back)")
|
|
235
|
+
# Do not re-raise, allow outer transaction/loop to continue
|
|
178
236
|
|
|
179
237
|
def windowed_query(
|
|
180
238
|
self,
|
|
@@ -183,7 +241,7 @@ class AlchemyInterface:
|
|
|
183
241
|
windowsize: int,
|
|
184
242
|
) -> Iterator[Result[Any]]:
|
|
185
243
|
"""
|
|
186
|
-
Executes a windowed query
|
|
244
|
+
Executes a windowed query, fetching each window in a separate, short-lived session.
|
|
187
245
|
|
|
188
246
|
Args:
|
|
189
247
|
stmt: The SQL select statement to execute.
|
|
@@ -192,14 +250,15 @@ class AlchemyInterface:
|
|
|
192
250
|
|
|
193
251
|
Returns:
|
|
194
252
|
An iterator of Result objects, each containing a window of data.
|
|
253
|
+
The session used to fetch the Result is closed immediately after yielding.
|
|
195
254
|
|
|
196
255
|
More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
|
|
197
256
|
"""
|
|
198
257
|
# Add row_number over the specified order
|
|
199
258
|
row_number = func.row_number().over(order_by=order_by).label("row_number")
|
|
200
259
|
|
|
201
|
-
# Add the windowing column to the statement
|
|
202
|
-
inner_stmt = stmt.add_columns(row_number).order_by(row_number)
|
|
260
|
+
# Add the windowing column to the statement
|
|
261
|
+
inner_stmt = stmt.with_session(None).add_columns(row_number).order_by(row_number)
|
|
203
262
|
subq = inner_stmt.subquery()
|
|
204
263
|
|
|
205
264
|
# Create an outer query that selects from the subquery
|
|
@@ -209,24 +268,37 @@ class AlchemyInterface:
|
|
|
209
268
|
last_row_number = 0
|
|
210
269
|
|
|
211
270
|
while True:
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
271
|
+
session_active = False
|
|
272
|
+
commit_needed = False
|
|
273
|
+
try:
|
|
274
|
+
self.start()
|
|
275
|
+
session_active = True
|
|
276
|
+
|
|
277
|
+
# Filter on row_number in the outer query
|
|
278
|
+
current_query = outer_query.where(subq.c.row_number > last_row_number).limit(windowsize)
|
|
279
|
+
result = self.session.execute(current_query)
|
|
280
|
+
|
|
281
|
+
# Create a FrozenResult to allow peeking at the data without consuming
|
|
282
|
+
frozen_result: FrozenResult = result.freeze()
|
|
283
|
+
chunk = frozen_result().all()
|
|
215
284
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
chunk = frozen_result().all()
|
|
285
|
+
if not chunk:
|
|
286
|
+
break
|
|
219
287
|
|
|
220
|
-
|
|
221
|
-
|
|
288
|
+
# Update for next iteration
|
|
289
|
+
last_row_number = chunk[-1].row_number
|
|
222
290
|
|
|
223
|
-
|
|
224
|
-
|
|
291
|
+
# Create a new Result object from the FrozenResult
|
|
292
|
+
yield_result = frozen_result()
|
|
225
293
|
|
|
226
|
-
|
|
227
|
-
|
|
294
|
+
# Remove row_number from result before yielding
|
|
295
|
+
# Ensure we don't yield the row_number column itself
|
|
296
|
+
original_col_count = len(cols) - 1
|
|
297
|
+
yield_result = yield_result.columns(*range(original_col_count))
|
|
228
298
|
|
|
229
|
-
|
|
230
|
-
|
|
299
|
+
yield yield_result
|
|
300
|
+
commit_needed = True
|
|
231
301
|
|
|
232
|
-
|
|
302
|
+
finally:
|
|
303
|
+
if session_active and self.session:
|
|
304
|
+
self.stop(commit=commit_needed)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|