datamarket 0.9.23__tar.gz → 0.9.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (24) hide show
  1. {datamarket-0.9.23 → datamarket-0.9.25}/PKG-INFO +1 -1
  2. {datamarket-0.9.23 → datamarket-0.9.25}/pyproject.toml +1 -1
  3. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/alchemy.py +55 -8
  4. {datamarket-0.9.23 → datamarket-0.9.25}/LICENSE +0 -0
  5. {datamarket-0.9.23 → datamarket-0.9.25}/README.md +0 -0
  6. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/__init__.py +0 -0
  7. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/__init__.py +0 -0
  8. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/aws.py +0 -0
  9. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/drive.py +0 -0
  10. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/ftp.py +0 -0
  11. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/nominatim.py +0 -0
  12. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/peerdb.py +0 -0
  13. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/proxy.py +0 -0
  14. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/interfaces/tinybird.py +0 -0
  15. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/params/__init__.py +0 -0
  16. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/params/nominatim.py +0 -0
  17. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/__init__.py +0 -0
  18. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/airflow.py +0 -0
  19. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/alchemy.py +0 -0
  20. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/main.py +0 -0
  21. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/selenium.py +0 -0
  22. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/soda.py +0 -0
  23. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/typer.py +0 -0
  24. {datamarket-0.9.23 → datamarket-0.9.25}/src/datamarket/utils/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.23
3
+ Version: 0.9.25
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.23"
3
+ version = "0.9.25"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -6,11 +6,13 @@ from collections.abc import MutableMapping
6
6
  from typing import Any, Iterator, List, Optional, Type, TypeVar
7
7
  from urllib.parse import quote_plus
8
8
 
9
- from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, func, select, text
9
+ from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, text
10
10
  from sqlalchemy.dialects.postgresql import insert
11
11
  from sqlalchemy.exc import IntegrityError
12
12
  from sqlalchemy.ext.declarative import DeclarativeMeta
13
13
  from sqlalchemy.orm import Session, sessionmaker
14
+ from sqlalchemy.sql.expression import ClauseElement
15
+ from enum import Enum
14
16
 
15
17
  ########################################################################################################################
16
18
  # CLASSES
@@ -20,6 +22,11 @@ logger = logging.getLogger(__name__)
20
22
  ModelType = TypeVar("ModelType", bound=DeclarativeMeta)
21
23
 
22
24
 
25
+ class CommitStrategy(Enum):
26
+ COMMIT_ON_SUCCESS = "commit_on_success"
27
+ FORCE_COMMIT = "force_commit"
28
+
29
+
23
30
  class MockContext:
24
31
  def __init__(self, column: SQLColumnExpression) -> None:
25
32
  self.current_parameters = {}
@@ -214,17 +221,43 @@ class AlchemyInterface:
214
221
  if not silent:
215
222
  logger.info(f"upserting {alchemy_obj}")
216
223
 
217
- primary_keys = list(col.name for col in alchemy_obj.__table__.primary_key.columns.values())
218
- obj_dict = {
224
+ table = alchemy_obj.__table__
225
+ primary_keys = list(col.name for col in table.primary_key.columns.values())
226
+
227
+ # Build the dictionary for the INSERT values
228
+ insert_values = {
229
+ col.name: getattr(alchemy_obj, col.name)
230
+ for col in table.columns
231
+ if getattr(alchemy_obj, col.name) is not None # Include all non-None values for insert
232
+ }
233
+
234
+ # Build the dictionary for the UPDATE set clause
235
+ # Start with values from the object, excluding primary keys
236
+ update_set_values = {
219
237
  col.name: val
220
- for col in alchemy_obj.__table__.columns
238
+ for col in table.columns
221
239
  if col.name not in primary_keys and (val := getattr(alchemy_obj, col.name)) is not None
222
240
  }
223
241
 
242
+ # Add columns with SQL-based onupdate values explicitly to the set clause
243
+ for column in table.columns:
244
+ actual_sql_expression = None
245
+ if column.onupdate is not None:
246
+ if hasattr(column.onupdate, "arg") and isinstance(column.onupdate.arg, ClauseElement):
247
+ # This handles wrappers like ColumnElementColumnDefault,
248
+ # where the actual SQL expression is in the .arg attribute.
249
+ actual_sql_expression = column.onupdate.arg
250
+ elif isinstance(column.onupdate, ClauseElement):
251
+ # This handles cases where onupdate might be a direct SQL expression.
252
+ actual_sql_expression = column.onupdate
253
+
254
+ if actual_sql_expression is not None:
255
+ update_set_values[column.name] = actual_sql_expression
256
+
224
257
  statement = (
225
- insert(alchemy_obj.__table__)
226
- .values(obj_dict)
227
- .on_conflict_do_update(index_elements=index_elements, set_=obj_dict)
258
+ insert(table)
259
+ .values(insert_values)
260
+ .on_conflict_do_update(index_elements=index_elements, set_=update_set_values)
228
261
  )
229
262
 
230
263
  try:
@@ -245,6 +278,7 @@ class AlchemyInterface:
245
278
  stmt: Select[Any],
246
279
  order_by: List[SQLColumnExpression[Any]],
247
280
  windowsize: int,
281
+ commit_strategy: CommitStrategy = CommitStrategy.COMMIT_ON_SUCCESS,
248
282
  ) -> Iterator[Result[Any]]:
249
283
  """
250
284
  Executes a windowed query, fetching each window in a separate, short-lived session.
@@ -253,6 +287,8 @@ class AlchemyInterface:
253
287
  stmt: The SQL select statement to execute.
254
288
  order_by: The columns to use for ordering.
255
289
  windowsize: The number of rows to fetch in each window.
290
+ commit_strategy: The strategy to use for committing the session after each window.
291
+ Defaults to CommitStrategy.COMMIT_ON_SUCCESS.
256
292
 
257
293
  Returns:
258
294
  An iterator of Result objects, each containing a window of data.
@@ -296,4 +332,15 @@ class AlchemyInterface:
296
332
 
297
333
  finally:
298
334
  if session_active and self.session:
299
- self.stop(commit=commit_needed)
335
+ if commit_strategy == CommitStrategy.FORCE_COMMIT:
336
+ # For forced commit, always attempt to commit.
337
+ # The self.stop() method already handles potential exceptions during commit/rollback.
338
+ self.stop(commit=True)
339
+ elif commit_strategy == CommitStrategy.COMMIT_ON_SUCCESS:
340
+ # Commit only if no exception occurred before yielding the result.
341
+ self.stop(commit=commit_needed)
342
+ else:
343
+ # Fallback or error for unknown strategy, though type hinting should prevent this.
344
+ # For safety, default to rollback.
345
+ logger.warning(f"Unknown commit strategy: {commit_strategy}. Defaulting to rollback.")
346
+ self.stop(commit=False)
File without changes
File without changes