datamarket 0.9.15__py3-none-any.whl → 0.9.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -2,16 +2,15 @@
2
2
  # IMPORTS
3
3
 
4
4
  import logging
5
- from urllib.parse import quote_plus
6
- from typing import Any, Iterator, List, Type, TypeVar
7
5
  from collections.abc import MutableMapping
6
+ from typing import Any, Iterator, List, Type, TypeVar
7
+ from urllib.parse import quote_plus
8
8
 
9
- from sqlalchemy import DDL, Result, Select, SQLColumnExpression, create_engine, text
9
+ from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, func, select, text
10
10
  from sqlalchemy.dialects.postgresql import insert
11
11
  from sqlalchemy.exc import IntegrityError
12
- from sqlalchemy.orm import sessionmaker
13
- from sqlalchemy import FrozenResult
14
12
  from sqlalchemy.ext.declarative import DeclarativeMeta
13
+ from sqlalchemy.orm import sessionmaker
15
14
 
16
15
  ########################################################################################################################
17
16
  # CLASSES
@@ -180,7 +179,7 @@ class AlchemyInterface:
180
179
  def windowed_query(
181
180
  self,
182
181
  stmt: Select[Any],
183
- column: SQLColumnExpression[Any],
182
+ order_by: List[SQLColumnExpression[Any]],
184
183
  windowsize: int,
185
184
  ) -> Iterator[Result[Any]]:
186
185
  """
@@ -188,26 +187,31 @@ class AlchemyInterface:
188
187
 
189
188
  Args:
190
189
  stmt: The SQL select statement to execute.
191
- column: The column to use for windowing and sorting.
190
+ order_by: The columns to use for ordering.
192
191
  windowsize: The number of rows to fetch in each window.
193
192
 
194
193
  Returns:
195
194
  An iterator of Result objects, each containing a window of data.
196
195
 
197
- Source: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
196
+ More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
198
197
  """
198
+ # Add row_number over the specified order
199
+ row_number = func.row_number().over(order_by=order_by).label("row_number")
199
200
 
200
201
  # Add the windowing column to the statement and sort by it for deterministic results
201
- stmt = stmt.add_columns(column).order_by(column)
202
- last_id = None
202
+ inner_stmt = stmt.add_columns(row_number).order_by(row_number)
203
+ subq = inner_stmt.subquery()
203
204
 
204
- while True:
205
- subq = stmt
205
+ # Create an outer query that selects from the subquery
206
+ cols = [c for c in subq.c]
207
+ outer_query = select(*cols)
206
208
 
207
- if last_id is not None:
208
- subq = subq.filter(column > last_id)
209
+ last_row_number = 0
209
210
 
210
- result: Result = self.session.execute(subq.limit(windowsize))
211
+ while True:
212
+ # Filter on row_number in the outer query
213
+ current_query = outer_query.where(subq.c.row_number > last_row_number).limit(windowsize)
214
+ result = self.session.execute(current_query)
211
215
 
212
216
  # Create a FrozenResult to allow peeking at the data without consuming
213
217
  frozen_result: FrozenResult = result.freeze()
@@ -216,13 +220,13 @@ class AlchemyInterface:
216
220
  if not chunk:
217
221
  break
218
222
 
219
- result_width = len(chunk)
220
- last_id = chunk[-1][-1]
223
+ # Update for next iteration
224
+ last_row_number = chunk[-1].row_number - 1
221
225
 
222
226
  # Create a new Result object from the FrozenResult
223
- yield_result: Result = frozen_result()
227
+ yield_result = frozen_result()
224
228
 
225
- # Remove the windowing column from the result
226
- yield_result = yield_result.columns(*list(range(0, result_width - 1)))
229
+ # Remove row_number from result
230
+ yield_result = yield_result.columns(*range(len(cols) - 1))
227
231
 
228
232
  yield yield_result
@@ -18,10 +18,13 @@ from .alchemy import AlchemyInterface
18
18
  ########################################################################################################################
19
19
  # EXCEPTIONS
20
20
 
21
+
21
22
  class DatabaseNotConnectedError(Exception):
22
23
  """Custom error for when database is not connected."""
24
+
23
25
  pass
24
26
 
27
+
25
28
  ########################################################################################################################
26
29
  # CLASSES
27
30
 
@@ -78,10 +81,17 @@ class PostgresPeer:
78
81
  with self.engine.connect() as conn:
79
82
  conn.execute(text(f"DROP PUBLICATION IF EXISTS {schema_name}_peerdb"))
80
83
 
81
- table_list = ", ".join([f"{schema_name}.{table}" for table in table_names])
84
+ table_list = []
85
+ for table in table_names:
86
+ full_table_name = f'"{schema_name}"."{table}"'
87
+ logger.info(f"Setting REPLICA IDENTITY FULL for table: {full_table_name}")
88
+ conn.execute(text(f"ALTER TABLE {full_table_name} REPLICA IDENTITY FULL;"))
89
+ table_list.append(full_table_name)
90
+
91
+ table_list_str = ", ".join(table_list)
82
92
  conn.execute(
83
93
  text(f"""
84
- CREATE PUBLICATION {schema_name}_peerdb FOR TABLE {table_list};
94
+ CREATE PUBLICATION {schema_name}_peerdb FOR TABLE {table_list_str};
85
95
  """)
86
96
  )
87
97
  conn.commit()
@@ -319,11 +329,11 @@ class PeerDBInterface:
319
329
  if not self.docker_host_mapping or not host:
320
330
  return host
321
331
 
322
- if host in ['localhost', '127.0.0.1']:
332
+ if host in ["localhost", "127.0.0.1"]:
323
333
  logger.debug(f"Mapping host {host} to {self.docker_host_mapping} for Docker environment")
324
334
  return self.docker_host_mapping
325
335
 
326
- url_pattern = r'(localhost|127\.0\.0\.1)'
336
+ url_pattern = r"(localhost|127\.0\.0\.1)"
327
337
  match = re.search(url_pattern, host)
328
338
  if match:
329
339
  original_host = match.group(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.15
3
+ Version: 0.9.17
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
5
  License: GPL-3.0-or-later
6
6
  Author: DataMarket
@@ -1,11 +1,11 @@
1
1
  datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
2
2
  datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- datamarket/interfaces/alchemy.py,sha256=Y1wKSEBlWXCE3-6JypdaUL2i_7FAGa9PK4_zQHeM2SE,8563
3
+ datamarket/interfaces/alchemy.py,sha256=z7VarlKZ-JfsXWtuDXCYnNp_pSzEYo5IvrD7wqoRpbI,8891
4
4
  datamarket/interfaces/aws.py,sha256=7KLUeBxmPN7avEMPsu5HC_KHB1N7W6Anp2X8fo43mlw,2383
5
5
  datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
6
6
  datamarket/interfaces/ftp.py,sha256=o0KlJxtksbop9OjCiQRzyAa2IeG_ExVXagS6apwrAQo,1881
7
7
  datamarket/interfaces/nominatim.py,sha256=ysIA2J1GhsZ0TJxD6B8N1_a7dkMEqtZQV6mT4Hayecg,3672
8
- datamarket/interfaces/peerdb.py,sha256=2FrG7E5BAGxhwoxjjlPP7ceE7bmUpQi54jtTPtjQul0,22232
8
+ datamarket/interfaces/peerdb.py,sha256=QkX8pyT5pi6g0wT5yoPiBAkCTJB0NutkxNPPxYOVqyY,22566
9
9
  datamarket/interfaces/proxy.py,sha256=updoOStKd8-nQBbxWbnD9eOt6HksnYi-5dQ0rEySf5M,3152
10
10
  datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
11
11
  datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=O6rX-65h4h0j2zs9dofdTPlly5reKDnvgLtTwbLmbWg,6529
17
17
  datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
18
18
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
19
19
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
20
- datamarket-0.9.15.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
- datamarket-0.9.15.dist-info/METADATA,sha256=LzfDrq6DA-PUfIHU19ZOy0n_x_txIMqXu6PKl-pmmxI,6363
22
- datamarket-0.9.15.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
23
- datamarket-0.9.15.dist-info/RECORD,,
20
+ datamarket-0.9.17.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
+ datamarket-0.9.17.dist-info/METADATA,sha256=hNvKYKsU4pTLrpjGQ6HFBn8IBicz1-uvjzuAcKE7yy0,6363
22
+ datamarket-0.9.17.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
23
+ datamarket-0.9.17.dist-info/RECORD,,