datamarket 0.9.14__py3-none-any.whl → 0.9.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/alchemy.py +90 -30
- {datamarket-0.9.14.dist-info → datamarket-0.9.16.dist-info}/METADATA +1 -1
- {datamarket-0.9.14.dist-info → datamarket-0.9.16.dist-info}/RECORD +5 -5
- {datamarket-0.9.14.dist-info → datamarket-0.9.16.dist-info}/LICENSE +0 -0
- {datamarket-0.9.14.dist-info → datamarket-0.9.16.dist-info}/WHEEL +0 -0
datamarket/interfaces/alchemy.py
CHANGED
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
# IMPORTS
|
|
3
3
|
|
|
4
4
|
import logging
|
|
5
|
+
from collections.abc import MutableMapping
|
|
6
|
+
from typing import Any, Iterator, List, Type, TypeVar
|
|
5
7
|
from urllib.parse import quote_plus
|
|
6
8
|
|
|
7
|
-
from sqlalchemy import DDL, create_engine, text
|
|
9
|
+
from sqlalchemy import DDL, FrozenResult, Result, Select, SQLColumnExpression, create_engine, func, select, text
|
|
8
10
|
from sqlalchemy.dialects.postgresql import insert
|
|
9
11
|
from sqlalchemy.exc import IntegrityError
|
|
12
|
+
from sqlalchemy.ext.declarative import DeclarativeMeta
|
|
10
13
|
from sqlalchemy.orm import sessionmaker
|
|
11
14
|
|
|
12
15
|
########################################################################################################################
|
|
@@ -14,16 +17,18 @@ from sqlalchemy.orm import sessionmaker
|
|
|
14
17
|
|
|
15
18
|
logger = logging.getLogger(__name__)
|
|
16
19
|
|
|
20
|
+
ModelType = TypeVar("ModelType", bound=DeclarativeMeta)
|
|
21
|
+
|
|
17
22
|
|
|
18
23
|
class MockContext:
|
|
19
|
-
def __init__(self, column):
|
|
24
|
+
def __init__(self, column: SQLColumnExpression) -> None:
|
|
20
25
|
self.current_parameters = {}
|
|
21
26
|
self.current_column = column
|
|
22
27
|
self.connection = None
|
|
23
28
|
|
|
24
29
|
|
|
25
30
|
class AlchemyInterface:
|
|
26
|
-
def __init__(self, config):
|
|
31
|
+
def __init__(self, config: MutableMapping) -> None:
|
|
27
32
|
if "db" in config:
|
|
28
33
|
self.config = config["db"]
|
|
29
34
|
|
|
@@ -43,7 +48,7 @@ class AlchemyInterface:
|
|
|
43
48
|
)
|
|
44
49
|
|
|
45
50
|
@staticmethod
|
|
46
|
-
def get_schema_from_table(table):
|
|
51
|
+
def get_schema_from_table(table: Type[ModelType]) -> str:
|
|
47
52
|
schema = "public"
|
|
48
53
|
|
|
49
54
|
if isinstance(table.__table_args__, tuple):
|
|
@@ -59,7 +64,7 @@ class AlchemyInterface:
|
|
|
59
64
|
|
|
60
65
|
return schema
|
|
61
66
|
|
|
62
|
-
def create_tables(self, tables):
|
|
67
|
+
def create_tables(self, tables: List[Type[ModelType]]) -> None:
|
|
63
68
|
for table in tables:
|
|
64
69
|
schema = self.get_schema_from_table(table)
|
|
65
70
|
|
|
@@ -82,7 +87,7 @@ class AlchemyInterface:
|
|
|
82
87
|
else:
|
|
83
88
|
logger.info(f"table {table.__tablename__} already exists")
|
|
84
89
|
|
|
85
|
-
def drop_tables(self, tables):
|
|
90
|
+
def drop_tables(self, tables: List[Type[ModelType]]) -> None:
|
|
86
91
|
for table in tables:
|
|
87
92
|
schema = self.get_schema_from_table(table)
|
|
88
93
|
|
|
@@ -98,13 +103,40 @@ class AlchemyInterface:
|
|
|
98
103
|
conn.execute(DDL(f"DROP TABLE {schema}.{table.__tablename__} CASCADE"))
|
|
99
104
|
conn.commit()
|
|
100
105
|
|
|
101
|
-
def reset_db(self, tables, drop):
|
|
106
|
+
def reset_db(self, tables: List[Type[ModelType]], drop: bool = False) -> None:
|
|
102
107
|
if drop:
|
|
103
108
|
self.drop_tables(tables)
|
|
104
109
|
|
|
105
110
|
self.create_tables(tables)
|
|
106
111
|
|
|
107
|
-
def
|
|
112
|
+
def reset_column(self, query_results: List[Result[Any]], column_name: str) -> None:
|
|
113
|
+
if not query_results:
|
|
114
|
+
logger.warning("No objects to reset column for.")
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
first_obj = query_results[0]
|
|
118
|
+
model_class = first_obj.__class__
|
|
119
|
+
table = model_class.__table__
|
|
120
|
+
|
|
121
|
+
if column_name not in table.columns:
|
|
122
|
+
logger.warning(f"Column {column_name} does not exist in table {table.name}.")
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
column = table.columns[column_name]
|
|
126
|
+
|
|
127
|
+
if column.server_default is not None:
|
|
128
|
+
query_results.update({column_name: text("DEFAULT")}, synchronize_session=False)
|
|
129
|
+
elif column.default is not None:
|
|
130
|
+
default_value = column.default.arg
|
|
131
|
+
if callable(default_value):
|
|
132
|
+
default_value = default_value(MockContext(column))
|
|
133
|
+
query_results.update({column_name: default_value}, synchronize_session=False)
|
|
134
|
+
else:
|
|
135
|
+
raise ValueError(f"Column '{column_name}' doesn't have a default value defined.")
|
|
136
|
+
|
|
137
|
+
self.session.commit()
|
|
138
|
+
|
|
139
|
+
def insert_alchemy_obj(self, alchemy_obj: ModelType, silent: bool = False) -> None:
|
|
108
140
|
try:
|
|
109
141
|
if not silent:
|
|
110
142
|
logger.info(f"adding {alchemy_obj}...")
|
|
@@ -118,7 +150,7 @@ class AlchemyInterface:
|
|
|
118
150
|
|
|
119
151
|
self.session.rollback()
|
|
120
152
|
|
|
121
|
-
def upsert_alchemy_obj(self, alchemy_obj, index_elements, silent=False):
|
|
153
|
+
def upsert_alchemy_obj(self, alchemy_obj: ModelType, index_elements: List[str], silent: bool = False) -> None:
|
|
122
154
|
if not silent:
|
|
123
155
|
logger.info(f"upserting {alchemy_obj}")
|
|
124
156
|
|
|
@@ -144,29 +176,57 @@ class AlchemyInterface:
|
|
|
144
176
|
|
|
145
177
|
self.session.rollback()
|
|
146
178
|
|
|
147
|
-
def
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
179
|
+
def windowed_query(
|
|
180
|
+
self,
|
|
181
|
+
stmt: Select[Any],
|
|
182
|
+
order_by: List[SQLColumnExpression[Any]],
|
|
183
|
+
windowsize: int,
|
|
184
|
+
) -> Iterator[Result[Any]]:
|
|
185
|
+
"""
|
|
186
|
+
Executes a windowed query on the given statement.
|
|
151
187
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
188
|
+
Args:
|
|
189
|
+
stmt: The SQL select statement to execute.
|
|
190
|
+
order_by: The columns to use for ordering.
|
|
191
|
+
windowsize: The number of rows to fetch in each window.
|
|
155
192
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
return
|
|
193
|
+
Returns:
|
|
194
|
+
An iterator of Result objects, each containing a window of data.
|
|
159
195
|
|
|
160
|
-
|
|
196
|
+
More info: https://github.com/sqlalchemy/sqlalchemy/wiki/RangeQuery-and-WindowedRangeQuery
|
|
197
|
+
"""
|
|
198
|
+
# Add row_number over the specified order
|
|
199
|
+
row_number = func.row_number().over(order_by=order_by).label("row_number")
|
|
161
200
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
default_value = column.default.arg
|
|
166
|
-
if callable(default_value):
|
|
167
|
-
default_value = default_value(MockContext(column))
|
|
168
|
-
query_results.update({column_name: default_value}, synchronize_session=False)
|
|
169
|
-
else:
|
|
170
|
-
raise ValueError(f"Column '{column_name}' doesn't have a default value defined.")
|
|
201
|
+
# Add the windowing column to the statement and sort by it for deterministic results
|
|
202
|
+
inner_stmt = stmt.add_columns(row_number).order_by(row_number)
|
|
203
|
+
subq = inner_stmt.subquery()
|
|
171
204
|
|
|
172
|
-
|
|
205
|
+
# Create an outer query that selects from the subquery
|
|
206
|
+
cols = [c for c in subq.c]
|
|
207
|
+
outer_query = select(*cols)
|
|
208
|
+
|
|
209
|
+
last_row_number = 0
|
|
210
|
+
|
|
211
|
+
while True:
|
|
212
|
+
# Filter on row_number in the outer query
|
|
213
|
+
current_query = outer_query.where(subq.c.row_number > last_row_number).limit(windowsize)
|
|
214
|
+
result = self.session.execute(current_query)
|
|
215
|
+
|
|
216
|
+
# Create a FrozenResult to allow peeking at the data without consuming
|
|
217
|
+
frozen_result: FrozenResult = result.freeze()
|
|
218
|
+
chunk = frozen_result().all()
|
|
219
|
+
|
|
220
|
+
if not chunk:
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
# Update for next iteration
|
|
224
|
+
last_row_number = chunk[-1].row_number - 1
|
|
225
|
+
|
|
226
|
+
# Create a new Result object from the FrozenResult
|
|
227
|
+
yield_result = frozen_result()
|
|
228
|
+
|
|
229
|
+
# Remove row_number from result
|
|
230
|
+
yield_result = yield_result.columns(*range(len(cols) - 1))
|
|
231
|
+
|
|
232
|
+
yield yield_result
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datamarket/__init__.py,sha256=FHS77P9qNewKMoN-p0FLEUEC60oWIYup1QkbJZP4ays,12
|
|
2
2
|
datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
datamarket/interfaces/alchemy.py,sha256=
|
|
3
|
+
datamarket/interfaces/alchemy.py,sha256=z7VarlKZ-JfsXWtuDXCYnNp_pSzEYo5IvrD7wqoRpbI,8891
|
|
4
4
|
datamarket/interfaces/aws.py,sha256=7KLUeBxmPN7avEMPsu5HC_KHB1N7W6Anp2X8fo43mlw,2383
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
6
|
datamarket/interfaces/ftp.py,sha256=o0KlJxtksbop9OjCiQRzyAa2IeG_ExVXagS6apwrAQo,1881
|
|
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=O6rX-65h4h0j2zs9dofdTPlly5reKDnvgLtTwbLmbWg,6529
|
|
|
17
17
|
datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
|
|
18
18
|
datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
19
19
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
20
|
-
datamarket-0.9.
|
|
21
|
-
datamarket-0.9.
|
|
22
|
-
datamarket-0.9.
|
|
23
|
-
datamarket-0.9.
|
|
20
|
+
datamarket-0.9.16.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
21
|
+
datamarket-0.9.16.dist-info/METADATA,sha256=DsHOm4WsenKNFphVyqbLI3A3WluNWGGgVFZRlE8U8NI,6363
|
|
22
|
+
datamarket-0.9.16.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
23
|
+
datamarket-0.9.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|