datamarket 0.9.15__py3-none-any.whl → 0.9.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -2,16 +2,15 @@
2
2
  # IMPORTS
3
3
 
4
4
  import logging
5
- from urllib.parse import quote_plus
6
- from typing import Any, Iterator, List, Type, TypeVar
7
5
  from collections.abc import MutableMapping
6
+ from typing import Any, Iterator, List, Type, TypeVar
7
+ from urllib.parse import quote_plus
8
8
 
9
- from sqlalchemy import DDL, Result, Select, SQLColumnExpression, create_engine, text
9
+ from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, func, select, text
10
10
  from sqlalchemy.dialects.postgresql import insert
11
11
  from sqlalchemy.exc import IntegrityError
12
- from sqlalchemy.orm import sessionmaker
13
- from sqlalchemy import FrozenResult
14
12
  from sqlalchemy.ext.declarative import DeclarativeMeta
13
+ from sqlalchemy.orm import sessionmaker
15
14
 
16
15
  ########################################################################################################################
17
16
  # CLASSES
@@ -180,7 +179,7 @@ class AlchemyInterface:
180
179
  def windowed_query(
181
180
  self,
182
181
  stmt: Select[Any],
183
- column: SQLColumnExpression[Any],
182
+ order_by: List[SQLColumnExpression[Any]],
184
183
  windowsize: int,
185
184
  ) -> Iterator[Result[Any]]:
186
185
  """
@@ -188,26 +187,31 @@ class AlchemyInterface:
188
187
 
189
188
  Args:
190
189
  stmt: The SQL select statement to execute.
191
- column: The column to use for windowing and sorting.
190
+ order_by: The columns to use for ordering.
192
191
  windowsize: The number of rows to fetch in each window.
193
192
 
194
193
  Returns:
195
194
  An iterator of Result objects, each containing a window of data.
196
195
 
197
- Source: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
196
+ More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
198
197
  """
198
+ # Add row_number over the specified order
199
+ row_number = func.row_number().over(order_by=order_by).label("row_number")
199
200
 
200
201
  # Add the windowing column to the statement and sort by it for deterministic results
201
- stmt = stmt.add_columns(column).order_by(column)
202
- last_id = None
202
+ inner_stmt = stmt.add_columns(row_number).order_by(row_number)
203
+ subq = inner_stmt.subquery()
203
204
 
204
- while True:
205
- subq = stmt
205
+ # Create an outer query that selects from the subquery
206
+ cols = [c for c in subq.c]
207
+ outer_query = select(*cols)
206
208
 
207
- if last_id is not None:
208
- subq = subq.filter(column > last_id)
209
+ last_row_number = 0
209
210
 
210
- result: Result = self.session.execute(subq.limit(windowsize))
211
+ while True:
212
+ # Filter on row_number in the outer query
213
+ current_query = outer_query.where(subq.c.row_number > last_row_number).limit(windowsize)
214
+ result = self.session.execute(current_query)
211
215
 
212
216
  # Create a FrozenResult to allow peeking at the data without consuming
213
217
  frozen_result: FrozenResult = result.freeze()
@@ -216,13 +220,13 @@ class AlchemyInterface:
216
220
  if not chunk:
217
221
  break
218
222
 
219
- result_width = len(chunk)
220
- last_id = chunk[-1][-1]
223
+ # Update for next iteration
224
+ last_row_number = chunk[-1].row_number - 1
221
225
 
222
226
  # Create a new Result object from the FrozenResult
223
- yield_result: Result = frozen_result()
227
+ yield_result = frozen_result()
224
228
 
225
- # Remove the windowing column from the result
226
- yield_result = yield_result.columns(*list(range(0, result_width - 1)))
229
+ # Remove row_number from result
230
+ yield_result = yield_result.columns(*range(len(cols) - 1))
227
231
 
228
232
  yield yield_result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.15
3
+ Version: 0.9.16
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,6 +1,6 @@
1
1
  datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
2
2
  datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- datamarket/interfaces/alchemy.py,sha256=Y1wKSEBlWXCE3-6JypdaUL2i_7FAGa9PK4_zQHeM2SE,8563
3
+ datamarket/interfaces/alchemy.py,sha256=z7VarlKZ-JfsXWtuDXCYnNp_pSzEYo5IvrD7wqoRpbI,8891
4
4
  datamarket/interfaces/aws.py,sha256=7KLUeBxmPN7avEMPsu5HC_KHB1N7W6Anp2X8fo43mlw,2383
5
5
  datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
6
6
  datamarket/interfaces/ftp.py,sha256=o0KlJxtksbop9OjCiQRzyAa2IeG_ExVXagS6apwrAQo,1881
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=O6rX-65h4h0j2zs9dofdTPlly5reKDnvgLtTwbLmbWg,6529
17
17
  datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
18
18
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
19
19
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
20
- datamarket-0.9.15.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
- datamarket-0.9.15.dist-info/METADATA,sha256=LzfDrq6DA-PUfIHU19ZOy0n_x_txIMqXu6PKl-pmmxI,6363
22
- datamarket-0.9.15.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
23
- datamarket-0.9.15.dist-info/RECORD,,
20
+ datamarket-0.9.16.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
+ datamarket-0.9.16.dist-info/METADATA,sha256=DsHOm4WsenKNFphVyqbLI3A3WluNWGGgVFZRlE8U8NI,6363
22
+ datamarket-0.9.16.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
23
+ datamarket-0.9.16.dist-info/RECORD,,