datamarket 0.9.22__py3-none-any.whl → 0.9.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -188,7 +188,7 @@ class AlchemyInterface:
188
188
 
189
189
  query_results.update({column_name: default_value}, synchronize_session=False)
190
190
 
191
- def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> None:
191
+ def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> bool:
192
192
  if self.session is None:
193
193
  raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
194
194
 
@@ -203,8 +203,11 @@ class AlchemyInterface:
203
203
  if not silent:
204
204
  logger.info(f"{alchemy_obj} already in db (savepoint rolled back)")
205
205
  # Do not re-raise, allow outer transaction/loop to continue
206
+ return False
206
207
 
207
- def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> None:
208
+ return True
209
+
210
+ def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> bool:
208
211
  if self.session is None:
209
212
  raise RuntimeError("Session not active. Use 'with AlchemyInterface(...):' or call start()")
210
213
 
@@ -233,6 +236,9 @@ class AlchemyInterface:
233
236
  if not silent:
234
237
  logger.info(f"could not upsert {alchemy_obj} (savepoint rolled back)")
235
238
  # Do not re-raise, allow outer transaction/loop to continue
239
+ return False
240
+
241
+ return True
236
242
 
237
243
  def windowed_query(
238
244
  self,
@@ -254,19 +260,13 @@ class AlchemyInterface:
254
260
 
255
261
  More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
256
262
  """
257
- # Add row_number over the specified order
258
- row_number = func.row_number().over(order_by=order_by).label("row_number")
259
-
260
- # Add the windowing column to the statement
261
- inner_stmt = stmt.with_session(None).add_columns(row_number).order_by(row_number)
262
- subq = inner_stmt.subquery()
263
263
 
264
- # Create an outer query that selects from the subquery
265
- cols = [c for c in subq.c]
266
- outer_query = select(*cols)
267
-
268
- last_row_number = 0
264
+ # Find id column in stmt
265
+ if not any(column.get("entity").id for column in stmt.column_descriptions):
266
+ raise Exception("Column 'id' not found in any entity of the query.")
267
+ id_column = stmt.column_descriptions[0]["entity"].id
269
268
 
269
+ last_id = 0
270
270
  while True:
271
271
  session_active = False
272
272
  commit_needed = False
@@ -275,7 +275,7 @@ class AlchemyInterface:
275
275
  session_active = True
276
276
 
277
277
  # Filter on row_number in the outer query
278
- current_query = outer_query.where(subq.c.row_number > last_row_number).limit(windowsize)
278
+ current_query = stmt.where(id_column > last_id).order_by(order_by[0], *order_by[1:]).limit(windowsize)
279
279
  result = self.session.execute(current_query)
280
280
 
281
281
  # Create a FrozenResult to allow peeking at the data without consuming
@@ -286,16 +286,11 @@ class AlchemyInterface:
286
286
  break
287
287
 
288
288
  # Update for next iteration
289
- last_row_number = chunk[-1].row_number
289
+ last_id = chunk[-1].id
290
290
 
291
291
  # Create a new Result object from the FrozenResult
292
292
  yield_result = frozen_result()
293
293
 
294
- # Remove row_number from result before yielding
295
- # Ensure we don't yield the row_number column itself
296
- original_col_count = len(cols) - 1
297
- yield_result = yield_result.columns(*range(original_col_count))
298
-
299
294
  yield yield_result
300
295
  commit_needed = True
301
296
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.22
3
+ Version: 0.9.23
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
2
2
  datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- datamarket/interfaces/alchemy.py,sha256=Qxgi_-TSzlDlUPy9SV1GoEfg1VzLx4KnPcSRVT-BFdc,12365
3
+ datamarket/interfaces/alchemy.py,sha256=-oO-ZBQcv6ixn-N4rEbJcfRjT_54YkMr37hRqc8QKfA,11993
4
4
  datamarket/interfaces/aws.py,sha256=7KLUeBxmPN7avEMPsu5HC_KHB1N7W6Anp2X8fo43mlw,2383
5
5
  datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
6
6
  datamarket/interfaces/ftp.py,sha256=o0KlJxtksbop9OjCiQRzyAa2IeG_ExVXagS6apwrAQo,1881
@@ -18,7 +18,7 @@ datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,
18
18
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
19
19
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
20
20
  datamarket/utils/types.py,sha256=vxdQZdwdXrfPR4Es52gBgol-tMRIOD6oK9cBo3rB0JQ,74
21
- datamarket-0.9.22.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
22
- datamarket-0.9.22.dist-info/METADATA,sha256=P21dnfivUdzY529pRAh83IVfa48at80lm7NuomuMuF4,6459
23
- datamarket-0.9.22.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
24
- datamarket-0.9.22.dist-info/RECORD,,
21
+ datamarket-0.9.23.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
22
+ datamarket-0.9.23.dist-info/METADATA,sha256=iHO101fsdYc5iuQJbhEeVe8x6LBIl_mkofNpRP4Vjew,6459
23
+ datamarket-0.9.23.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ datamarket-0.9.23.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any