datamarket 0.9.22__py3-none-any.whl → 0.9.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/alchemy.py +37 -22
- {datamarket-0.9.22.dist-info → datamarket-0.9.24.dist-info}/METADATA +1 -1
- {datamarket-0.9.22.dist-info → datamarket-0.9.24.dist-info}/RECORD +5 -5
- {datamarket-0.9.22.dist-info → datamarket-0.9.24.dist-info}/WHEEL +1 -1
- {datamarket-0.9.22.dist-info → datamarket-0.9.24.dist-info}/LICENSE +0 -0
datamarket/interfaces/alchemy.py
CHANGED
|
@@ -6,11 +6,12 @@ from collections.abc import MutableMapping
|
|
|
6
6
|
from typing import Any, Iterator, List, Optional, Type, TypeVar
|
|
7
7
|
from urllib.parse import quote_plus
|
|
8
8
|
|
|
9
|
-
from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine,
|
|
9
|
+
from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, text
|
|
10
10
|
from sqlalchemy.dialects.postgresql import insert
|
|
11
11
|
from sqlalchemy.exc import IntegrityError
|
|
12
12
|
from sqlalchemy.ext.declarative import DeclarativeMeta
|
|
13
13
|
from sqlalchemy.orm import Session, sessionmaker
|
|
14
|
+
from enum import Enum
|
|
14
15
|
|
|
15
16
|
########################################################################################################################
|
|
16
17
|
# CLASSES
|
|
@@ -20,6 +21,11 @@ logger = logging.getLogger(__name__)
|
|
|
20
21
|
ModelType = TypeVar("ModelType", bound=DeclarativeMeta)
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
class CommitStrategy(Enum):
|
|
25
|
+
COMMIT_ON_SUCCESS = "commit_on_success"
|
|
26
|
+
FORCE_COMMIT = "force_commit"
|
|
27
|
+
|
|
28
|
+
|
|
23
29
|
class MockContext:
|
|
24
30
|
def __init__(self, column: SQLColumnExpression) -> None:
|
|
25
31
|
self.current_parameters = {}
|
|
@@ -188,7 +194,7 @@ class AlchemyInterface:
|
|
|
188
194
|
|
|
189
195
|
query_results.update({column_name: default_value}, synchronize_session=False)
|
|
190
196
|
|
|
191
|
-
def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) ->
|
|
197
|
+
def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> bool:
|
|
192
198
|
if self.session is None:
|
|
193
199
|
raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
|
|
194
200
|
|
|
@@ -203,8 +209,11 @@ class AlchemyInterface:
|
|
|
203
209
|
if not silent:
|
|
204
210
|
logger.info(f"{alchemy_obj} already in db (savepoint rolled back)")
|
|
205
211
|
# Do not re-raise, allow outer transaction/loop to continue
|
|
212
|
+
return False
|
|
206
213
|
|
|
207
|
-
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> bool:
|
|
208
217
|
if self.session is None:
|
|
209
218
|
raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
|
|
210
219
|
|
|
@@ -233,12 +242,16 @@ class AlchemyInterface:
|
|
|
233
242
|
if not silent:
|
|
234
243
|
logger.info(f"could not upsert {alchemy_obj} (savepoint rolled back)")
|
|
235
244
|
# Do not re-raise, allow outer transaction/loop to continue
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
return True
|
|
236
248
|
|
|
237
249
|
def windowed_query(
|
|
238
250
|
self,
|
|
239
251
|
stmt: Select[Any],
|
|
240
252
|
order_by: List[SQLColumnExpression[Any]],
|
|
241
253
|
windowsize: int,
|
|
254
|
+
commit_strategy: CommitStrategy = CommitStrategy.COMMIT_ON_SUCCESS,
|
|
242
255
|
) -> Iterator[Result[Any]]:
|
|
243
256
|
"""
|
|
244
257
|
Executes a windowed query, fetching each window in a separate, short-lived session.
|
|
@@ -247,6 +260,8 @@ class AlchemyInterface:
|
|
|
247
260
|
stmt: The SQL select statement to execute.
|
|
248
261
|
order_by: The columns to use for ordering.
|
|
249
262
|
windowsize: The number of rows to fetch in each window.
|
|
263
|
+
commit_strategy: The strategy to use for committing the session after each window.
|
|
264
|
+
Defaults to CommitStrategy.COMMIT_ON_SUCCESS.
|
|
250
265
|
|
|
251
266
|
Returns:
|
|
252
267
|
An iterator of Result objects, each containing a window of data.
|
|
@@ -254,19 +269,13 @@ class AlchemyInterface:
|
|
|
254
269
|
|
|
255
270
|
More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
|
|
256
271
|
"""
|
|
257
|
-
# Add row_number over the specified order
|
|
258
|
-
row_number = func.row_number().over(order_by=order_by).label("row_number")
|
|
259
272
|
|
|
260
|
-
#
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
# Create an outer query that selects from the subquery
|
|
265
|
-
cols = [c for c in subq.c]
|
|
266
|
-
outer_query = select(*cols)
|
|
267
|
-
|
|
268
|
-
last_row_number = 0
|
|
273
|
+
# Find id column in stmt
|
|
274
|
+
if not any(column.get("entity").id for column in stmt.column_descriptions):
|
|
275
|
+
raise Exception("Column 'id' not found in any entity of the query.")
|
|
276
|
+
id_column = stmt.column_descriptions[0]["entity"].id
|
|
269
277
|
|
|
278
|
+
last_id = 0
|
|
270
279
|
while True:
|
|
271
280
|
session_active = False
|
|
272
281
|
commit_needed = False
|
|
@@ -275,7 +284,7 @@ class AlchemyInterface:
|
|
|
275
284
|
session_active = True
|
|
276
285
|
|
|
277
286
|
# Filter on row_number in the outer query
|
|
278
|
-
current_query =
|
|
287
|
+
current_query = stmt.where(id_column > last_id).order_by(order_by[0], *order_by[1:]).limit(windowsize)
|
|
279
288
|
result = self.session.execute(current_query)
|
|
280
289
|
|
|
281
290
|
# Create a FrozenResult to allow peeking at the data without consuming
|
|
@@ -286,19 +295,25 @@ class AlchemyInterface:
|
|
|
286
295
|
break
|
|
287
296
|
|
|
288
297
|
# Update for next iteration
|
|
289
|
-
|
|
298
|
+
last_id = chunk[-1].id
|
|
290
299
|
|
|
291
300
|
# Create a new Result object from the FrozenResult
|
|
292
301
|
yield_result = frozen_result()
|
|
293
302
|
|
|
294
|
-
# Remove row_number from result before yielding
|
|
295
|
-
# Ensure we don't yield the row_number column itself
|
|
296
|
-
original_col_count = len(cols) - 1
|
|
297
|
-
yield_result = yield_result.columns(*range(original_col_count))
|
|
298
|
-
|
|
299
303
|
yield yield_result
|
|
300
304
|
commit_needed = True
|
|
301
305
|
|
|
302
306
|
finally:
|
|
303
307
|
if session_active and self.session:
|
|
304
|
-
|
|
308
|
+
if commit_strategy == CommitStrategy.FORCE_COMMIT:
|
|
309
|
+
# For forced commit, always attempt to commit.
|
|
310
|
+
# The self.stop() method already handles potential exceptions during commit/rollback.
|
|
311
|
+
self.stop(commit=True)
|
|
312
|
+
elif commit_strategy == CommitStrategy.COMMIT_ON_SUCCESS:
|
|
313
|
+
# Commit only if no exception occurred before yielding the result.
|
|
314
|
+
self.stop(commit=commit_needed)
|
|
315
|
+
else:
|
|
316
|
+
# Fallback or error for unknown strategy, though type hinting should prevent this.
|
|
317
|
+
# For safety, default to rollback.
|
|
318
|
+
logger.warning(f"Unknown commit strategy: {commit_strategy}. Defaulting to rollback.")
|
|
319
|
+
self.stop(commit=False)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
|
|
2
2
|
datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
datamarket/interfaces/alchemy.py,sha256=
|
|
3
|
+
datamarket/interfaces/alchemy.py,sha256=pL71oG0ld2gx6uGSdgBzSv6RnQPJ3Ps2yTVPCCTXGLU,13178
|
|
4
4
|
datamarket/interfaces/aws.py,sha256=7KLUeBxmPN7avEMPsu5HC_KHB1N7W6Anp2X8fo43mlw,2383
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
6
|
datamarket/interfaces/ftp.py,sha256=o0KlJxtksbop9OjCiQRzyAa2IeG_ExVXagS6apwrAQo,1881
|
|
@@ -18,7 +18,7 @@ datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,
|
|
|
18
18
|
datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
19
19
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
20
20
|
datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
|
|
21
|
-
datamarket-0.9.
|
|
22
|
-
datamarket-0.9.
|
|
23
|
-
datamarket-0.9.
|
|
24
|
-
datamarket-0.9.
|
|
21
|
+
datamarket-0.9.24.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
22
|
+
datamarket-0.9.24.dist-info/METADATA,sha256=18l6aTHI7ejx_26UJ6QtDhPdMbIGMtiToq-qFrx2EVw,6459
|
|
23
|
+
datamarket-0.9.24.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
24
|
+
datamarket-0.9.24.dist-info/RECORD,,
|
|
File without changes
|