meerschaum 2.4.13__py3-none-any.whl → 2.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/entry.py +16 -5
- meerschaum/actions/upgrade.py +3 -2
- meerschaum/api/dash/pipes.py +95 -13
- meerschaum/api/routes/_webterm.py +1 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_create_engine.py +8 -3
- meerschaum/connectors/sql/_fetch.py +2 -2
- meerschaum/connectors/sql/_instance.py +3 -3
- meerschaum/connectors/sql/_pipes.py +18 -13
- meerschaum/connectors/sql/_sql.py +59 -50
- meerschaum/core/Pipe/__init__.py +23 -1
- meerschaum/core/Pipe/_attributes.py +96 -14
- meerschaum/utils/dataframe.py +19 -7
- meerschaum/utils/packages/__init__.py +40 -22
- meerschaum/utils/packages/_packages.py +26 -9
- meerschaum/utils/schedule.py +16 -10
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/METADATA +13 -15
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/RECORD +24 -24
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/LICENSE +0 -0
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/NOTICE +0 -0
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/WHEEL +0 -0
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.4.13.dist-info → meerschaum-2.5.1.dist-info}/zip-safe +0 -0
meerschaum/_internal/entry.py
CHANGED
@@ -13,6 +13,7 @@ import os
|
|
13
13
|
import sys
|
14
14
|
import pathlib
|
15
15
|
|
16
|
+
import meerschaum as mrsm
|
16
17
|
from meerschaum.utils.typing import SuccessTuple, List, Optional, Dict, Callable, Any
|
17
18
|
from meerschaum.config.static import STATIC_CONFIG as _STATIC_CONFIG
|
18
19
|
|
@@ -229,7 +230,7 @@ def entry_with_args(
|
|
229
230
|
_ = get_shell(**kw).cmdloop()
|
230
231
|
return True, "Success"
|
231
232
|
|
232
|
-
|
233
|
+
_skip_schedule = False
|
233
234
|
|
234
235
|
executor_keys = kw.get('executor_keys', None)
|
235
236
|
if executor_keys is None:
|
@@ -246,7 +247,7 @@ def entry_with_args(
|
|
246
247
|
api_label = executor_keys.split(':')[-1]
|
247
248
|
kw['action'].insert(0, 'api')
|
248
249
|
kw['action'].insert(1, api_label)
|
249
|
-
|
250
|
+
_skip_schedule = True
|
250
251
|
|
251
252
|
### If the `--daemon` flag is present, prepend 'start job'.
|
252
253
|
if kw.get('daemon', False) and kw['action'][0] != 'stack':
|
@@ -271,7 +272,7 @@ def entry_with_args(
|
|
271
272
|
and kw['action'][0] == 'start'
|
272
273
|
and kw['action'][1] in ('job', 'jobs')
|
273
274
|
):
|
274
|
-
|
275
|
+
_skip_schedule = True
|
275
276
|
|
276
277
|
kw['action'] = remove_leading_action(kw['action'], _actions=_actions)
|
277
278
|
|
@@ -279,10 +280,11 @@ def entry_with_args(
|
|
279
280
|
_do_action_wrapper,
|
280
281
|
action_function,
|
281
282
|
plugin_name,
|
283
|
+
_skip_schedule=_skip_schedule,
|
282
284
|
**kw
|
283
285
|
)
|
284
286
|
|
285
|
-
if kw.get('schedule', None) and not
|
287
|
+
if kw.get('schedule', None) and not _skip_schedule:
|
286
288
|
from meerschaum.utils.schedule import schedule_function
|
287
289
|
from meerschaum.utils.misc import interval_str
|
288
290
|
import time
|
@@ -304,7 +306,12 @@ def entry_with_args(
|
|
304
306
|
return result
|
305
307
|
|
306
308
|
|
307
|
-
def _do_action_wrapper(
|
309
|
+
def _do_action_wrapper(
|
310
|
+
action_function,
|
311
|
+
plugin_name,
|
312
|
+
_skip_schedule: bool = False,
|
313
|
+
**kw
|
314
|
+
):
|
308
315
|
from meerschaum.plugins import Plugin
|
309
316
|
from meerschaum.utils.venv import Venv
|
310
317
|
from meerschaum.utils.misc import filter_keywords
|
@@ -328,6 +335,10 @@ def _do_action_wrapper(action_function, plugin_name, **kw):
|
|
328
335
|
)
|
329
336
|
except KeyboardInterrupt:
|
330
337
|
result = False, f"Cancelled action `{action_name.lstrip()}`."
|
338
|
+
|
339
|
+
if kw.get('schedule', None) and not _skip_schedule:
|
340
|
+
mrsm.pprint(result)
|
341
|
+
|
331
342
|
return result
|
332
343
|
|
333
344
|
_shells = []
|
meerschaum/actions/upgrade.py
CHANGED
@@ -109,6 +109,7 @@ def _upgrade_meerschaum(
|
|
109
109
|
class NoVenv:
|
110
110
|
pass
|
111
111
|
|
112
|
+
|
112
113
|
def _upgrade_packages(
|
113
114
|
action: Optional[List[str]] = None,
|
114
115
|
venv: Union[str, None, NoVenv] = NoVenv,
|
@@ -121,7 +122,7 @@ def _upgrade_packages(
|
|
121
122
|
"""
|
122
123
|
Upgrade and install dependencies.
|
123
124
|
If provided, upgrade only a dependency group, otherwise default to `full`.
|
124
|
-
|
125
|
+
|
125
126
|
Examples:
|
126
127
|
upgrade packages
|
127
128
|
upgrade packages full
|
@@ -160,7 +161,7 @@ def _upgrade_packages(
|
|
160
161
|
to_install = [
|
161
162
|
install_name
|
162
163
|
for install_name in to_install
|
163
|
-
if install_name not in prereleases_to_install
|
164
|
+
if (install_name not in prereleases_to_install) or group == '_internal'
|
164
165
|
]
|
165
166
|
|
166
167
|
success, msg = False, f"Nothing installed."
|
meerschaum/api/dash/pipes.py
CHANGED
@@ -346,6 +346,8 @@ def accordion_items_from_pipe(
|
|
346
346
|
for item in skip_items:
|
347
347
|
_ = items_titles.pop(item, None)
|
348
348
|
|
349
|
+
pipe_meta_str = json.dumps(pipe.meta, sort_keys=True)
|
350
|
+
|
349
351
|
### Only generate items if they're in the `active_items` list.
|
350
352
|
items_bodies = {}
|
351
353
|
if 'overview' in active_items:
|
@@ -358,22 +360,103 @@ def accordion_items_from_pipe(
|
|
358
360
|
html.Tr([html.Td("Instance"), html.Td(html.Pre(f"{pipe.instance_keys}"))]),
|
359
361
|
html.Tr([html.Td("Target Table"), html.Td(html.Pre(f"{pipe.target}"))]),
|
360
362
|
]
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
363
|
+
|
364
|
+
indices_header = [
|
365
|
+
html.Thead(
|
366
|
+
html.Tr(
|
367
|
+
[
|
368
|
+
html.Th(
|
369
|
+
html.Span(
|
370
|
+
"Key",
|
371
|
+
id={'type': 'key-table-header', 'id': pipe_meta_str},
|
372
|
+
style={"textDecoration": "underline", "cursor": "pointer"},
|
373
|
+
),
|
374
|
+
),
|
375
|
+
html.Th(
|
376
|
+
html.Span(
|
377
|
+
"Column",
|
378
|
+
id={'type': 'column-table-header', 'id': pipe_meta_str},
|
379
|
+
style={"textDecoration": "underline", "cursor": "pointer"},
|
380
|
+
),
|
381
|
+
),
|
382
|
+
html.Th(
|
383
|
+
html.Span(
|
384
|
+
"Index",
|
385
|
+
id={'type': 'index-table-header', 'id': pipe_meta_str},
|
386
|
+
style={"textDecoration": "underline", "cursor": "pointer"},
|
387
|
+
),
|
388
|
+
),
|
389
|
+
html.Th(
|
390
|
+
html.Span(
|
391
|
+
"Is Composite",
|
392
|
+
id={'type': 'is-composite-table-header', 'id': pipe_meta_str},
|
393
|
+
style={"textDecoration": "underline", "cursor": "pointer"},
|
394
|
+
),
|
395
|
+
),
|
396
|
+
dbc.Tooltip(
|
397
|
+
"Unique reference name for the index "
|
398
|
+
"(e.g. `datetime` for the range axis)",
|
399
|
+
target={'type': 'key-table-header', 'id': pipe_meta_str},
|
400
|
+
),
|
401
|
+
dbc.Tooltip(
|
402
|
+
"The actual column (field name) in the target dataset.",
|
403
|
+
target={'type': 'column-table-header', 'id': pipe_meta_str},
|
404
|
+
),
|
405
|
+
dbc.Tooltip(
|
406
|
+
"The name of the index created on the given columns.",
|
407
|
+
target={'type': 'index-table-header', 'id': pipe_meta_str},
|
408
|
+
),
|
409
|
+
dbc.Tooltip(
|
410
|
+
"Whether the column is used in the composite primary key "
|
411
|
+
"to determine updates.",
|
412
|
+
target={'type': 'is-composite-table-header', 'id': pipe_meta_str},
|
413
|
+
),
|
414
|
+
]
|
415
|
+
)
|
416
|
+
)
|
417
|
+
]
|
418
|
+
|
419
|
+
indices = pipe.indices
|
420
|
+
columns = pipe.columns
|
421
|
+
index_column_names = pipe.get_indices()
|
422
|
+
indices_rows = []
|
423
|
+
for ix_key, ix_name in index_column_names.items():
|
424
|
+
col = columns.get(ix_key, None)
|
425
|
+
ix_cols = indices.get(ix_key, None)
|
426
|
+
if not col and not ix_cols:
|
427
|
+
continue
|
428
|
+
if not isinstance(ix_cols, (list, tuple)):
|
429
|
+
ix_cols = [ix_cols]
|
430
|
+
if col:
|
431
|
+
col_item = html.Pre(col)
|
432
|
+
elif len(ix_cols) == 1:
|
433
|
+
col_item = html.Pre(ix_cols[0])
|
434
|
+
else:
|
435
|
+
col_item = html.Pre(html.Ul([html.Li(_c) for _c in ix_cols]))
|
436
|
+
is_composite_item = "✅" if col else ""
|
437
|
+
ix_key_item = html.Pre(ix_key) if ix_key != 'datetime' else html.Pre(f"🕓 {ix_key}")
|
438
|
+
indices_rows.append(
|
439
|
+
html.Tr([
|
440
|
+
html.Td(ix_key_item),
|
441
|
+
html.Td(col_item),
|
442
|
+
html.Td(html.Pre(ix_name)),
|
443
|
+
html.Td(is_composite_item),
|
444
|
+
])
|
445
|
+
)
|
446
|
+
indices_table = dbc.Table(
|
447
|
+
indices_header + [html.Tbody(indices_rows)],
|
448
|
+
bordered=True,
|
449
|
+
hover=False,
|
450
|
+
striped=True,
|
451
|
+
)
|
452
|
+
if indices_rows:
|
371
453
|
overview_rows.append(
|
372
454
|
html.Tr([
|
373
|
-
html.Td("Indices" if len(
|
374
|
-
html.Td(
|
455
|
+
html.Td("Indices" if len(indices_rows) != 1 else "Index"),
|
456
|
+
html.Td(indices_table),
|
375
457
|
])
|
376
458
|
)
|
459
|
+
|
377
460
|
tags = pipe.tags
|
378
461
|
if tags:
|
379
462
|
tags_items = html.Ul([
|
@@ -420,7 +503,6 @@ def accordion_items_from_pipe(
|
|
420
503
|
if newest_time is not None:
|
421
504
|
stats_rows.append(html.Tr([html.Td("Newest time"), html.Td(str(newest_time))]))
|
422
505
|
|
423
|
-
|
424
506
|
items_bodies['stats'] = dbc.Table(stats_header + [html.Tbody(stats_rows)], hover=True)
|
425
507
|
|
426
508
|
if 'columns' in active_items:
|
@@ -13,6 +13,7 @@ from meerschaum.utils.packages import attempt_import
|
|
13
13
|
from meerschaum.api.dash.sessions import is_session_authenticated
|
14
14
|
fastapi, fastapi_responses = attempt_import('fastapi', 'fastapi.responses')
|
15
15
|
import starlette
|
16
|
+
httpcore = attempt_import('httpcore')
|
16
17
|
httpx = attempt_import('httpx')
|
17
18
|
websockets = attempt_import('websockets')
|
18
19
|
Request = fastapi.Request
|
meerschaum/config/_version.py
CHANGED
@@ -63,6 +63,8 @@ flavor_configs = {
|
|
63
63
|
'fast_executemany': True,
|
64
64
|
'isolation_level': 'AUTOCOMMIT',
|
65
65
|
'use_setinputsizes': False,
|
66
|
+
'pool_pre_ping': True,
|
67
|
+
'ignore_no_transaction_on_rollback': True,
|
66
68
|
},
|
67
69
|
'omit_create_engine': {'method',},
|
68
70
|
'to_sql': {
|
@@ -189,15 +191,18 @@ def create_engine(
|
|
189
191
|
### Install and patch required drivers.
|
190
192
|
if self.flavor in install_flavor_drivers:
|
191
193
|
attempt_import(*install_flavor_drivers[self.flavor], debug=debug, lazy=False, warn=False)
|
194
|
+
if self.flavor == 'mssql':
|
195
|
+
pyodbc = attempt_import('pyodbc', debug=debug, lazy=False, warn=False)
|
196
|
+
pyodbc.pooling = False
|
192
197
|
if self.flavor in require_patching_flavors:
|
193
198
|
from meerschaum.utils.packages import determine_version, _monkey_patch_get_distribution
|
194
199
|
import pathlib
|
195
200
|
for install_name, import_name in require_patching_flavors[self.flavor]:
|
196
201
|
pkg = attempt_import(
|
197
202
|
import_name,
|
198
|
-
debug
|
199
|
-
lazy
|
200
|
-
warn
|
203
|
+
debug=debug,
|
204
|
+
lazy=False,
|
205
|
+
warn=False
|
201
206
|
)
|
202
207
|
_monkey_patch_get_distribution(
|
203
208
|
install_name, determine_version(pathlib.Path(pkg.__file__), venv='mrsm')
|
@@ -31,7 +31,7 @@ def fetch(
|
|
31
31
|
----------
|
32
32
|
pipe: mrsm.Pipe
|
33
33
|
The pipe object which contains the `fetch` metadata.
|
34
|
-
|
34
|
+
|
35
35
|
- pipe.columns['datetime']: str
|
36
36
|
- Name of the datetime column for the remote table.
|
37
37
|
- pipe.parameters['fetch']: Dict[str, Any]
|
@@ -196,7 +196,7 @@ def get_pipe_metadef(
|
|
196
196
|
dateadd_str(
|
197
197
|
flavor=self.flavor,
|
198
198
|
datepart='minute',
|
199
|
-
number=((-1 * btm) if apply_backtrack else 0),
|
199
|
+
number=((-1 * btm) if apply_backtrack else 0),
|
200
200
|
begin=begin,
|
201
201
|
)
|
202
202
|
if begin
|
@@ -88,9 +88,9 @@ def _drop_temporary_tables(self, debug: bool = False) -> SuccessTuple:
|
|
88
88
|
from meerschaum.connectors.sql.tables import get_tables
|
89
89
|
sqlalchemy = mrsm.attempt_import('sqlalchemy')
|
90
90
|
temp_tables_table = get_tables(
|
91
|
-
mrsm_instance
|
92
|
-
create
|
93
|
-
debug
|
91
|
+
mrsm_instance=self,
|
92
|
+
create=False,
|
93
|
+
debug=debug,
|
94
94
|
)['temp_tables']
|
95
95
|
query = (
|
96
96
|
sqlalchemy.select(temp_tables_table.c.table)
|
@@ -384,7 +384,7 @@ def get_create_index_queries(
|
|
384
384
|
|
385
385
|
Returns
|
386
386
|
-------
|
387
|
-
A dictionary of
|
387
|
+
A dictionary of index names mapping to lists of queries.
|
388
388
|
"""
|
389
389
|
### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly.
|
390
390
|
if self.flavor == 'duckdb':
|
@@ -400,7 +400,8 @@ def get_create_index_queries(
|
|
400
400
|
index_queries = {}
|
401
401
|
|
402
402
|
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
|
403
|
-
|
403
|
+
index_names = pipe.get_indices()
|
404
|
+
indices = pipe.indices
|
404
405
|
|
405
406
|
_datetime = pipe.get_columns('datetime', error=False)
|
406
407
|
_datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]')
|
@@ -409,8 +410,8 @@ def get_create_index_queries(
|
|
409
410
|
if _datetime is not None else None
|
410
411
|
)
|
411
412
|
_datetime_index_name = (
|
412
|
-
sql_item_name(
|
413
|
-
if
|
413
|
+
sql_item_name(index_names['datetime'], self.flavor, None)
|
414
|
+
if index_names.get('datetime', None)
|
414
415
|
else None
|
415
416
|
)
|
416
417
|
_id = pipe.get_columns('id', error=False)
|
@@ -421,8 +422,8 @@ def get_create_index_queries(
|
|
421
422
|
)
|
422
423
|
|
423
424
|
_id_index_name = (
|
424
|
-
sql_item_name(
|
425
|
-
if
|
425
|
+
sql_item_name(index_names['id'], self.flavor, None)
|
426
|
+
if index_names.get('id', None)
|
426
427
|
else None
|
427
428
|
)
|
428
429
|
_pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
@@ -491,18 +492,22 @@ def get_create_index_queries(
|
|
491
492
|
if id_query is not None:
|
492
493
|
index_queries[_id] = id_query if isinstance(id_query, list) else [id_query]
|
493
494
|
|
494
|
-
|
495
495
|
### Create indices for other labels in `pipe.columns`.
|
496
|
-
|
496
|
+
other_index_names = {
|
497
497
|
ix_key: ix_unquoted
|
498
|
-
for ix_key, ix_unquoted in
|
498
|
+
for ix_key, ix_unquoted in index_names.items()
|
499
499
|
if ix_key not in ('datetime', 'id')
|
500
500
|
}
|
501
|
-
for ix_key, ix_unquoted in
|
501
|
+
for ix_key, ix_unquoted in other_index_names.items():
|
502
502
|
ix_name = sql_item_name(ix_unquoted, self.flavor, None)
|
503
|
-
|
504
|
-
|
505
|
-
|
503
|
+
cols = indices[ix_key]
|
504
|
+
if not isinstance(cols, (list, tuple)):
|
505
|
+
cols = [cols]
|
506
|
+
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
507
|
+
if not cols_names:
|
508
|
+
continue
|
509
|
+
cols_names_str = ", ".join(cols_names)
|
510
|
+
index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"]
|
506
511
|
|
507
512
|
existing_cols_types = pipe.get_columns_types(debug=debug)
|
508
513
|
indices_cols_str = ', '.join(
|
@@ -17,8 +17,9 @@ from meerschaum.utils.warnings import warn
|
|
17
17
|
### database flavors that can use bulk insert
|
18
18
|
_bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
|
19
19
|
### flavors that do not support chunks
|
20
|
-
_disallow_chunks_flavors =
|
20
|
+
_disallow_chunks_flavors = ['duckdb']
|
21
21
|
_max_chunks_flavors = {'sqlite': 1000,}
|
22
|
+
SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
|
22
23
|
|
23
24
|
|
24
25
|
def read(
|
@@ -97,7 +98,7 @@ def read(
|
|
97
98
|
Defaults to `SQLConnector.schema`.
|
98
99
|
|
99
100
|
as_chunks: bool, default False
|
100
|
-
If `True`, return a list of DataFrames.
|
101
|
+
If `True`, return a list of DataFrames.
|
101
102
|
Otherwise return a single DataFrame.
|
102
103
|
|
103
104
|
as_iterator: bool, default False
|
@@ -127,7 +128,6 @@ def read(
|
|
127
128
|
from meerschaum.utils.pool import get_pool
|
128
129
|
from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
|
129
130
|
import warnings
|
130
|
-
import inspect
|
131
131
|
import traceback
|
132
132
|
from decimal import Decimal
|
133
133
|
pd = import_pandas()
|
@@ -140,6 +140,7 @@ def read(
|
|
140
140
|
chunksize = None
|
141
141
|
schema = schema or self.schema
|
142
142
|
|
143
|
+
pool = get_pool(workers=workers)
|
143
144
|
sqlalchemy = attempt_import("sqlalchemy")
|
144
145
|
default_chunksize = self._sys_config.get('chunksize', None)
|
145
146
|
chunksize = chunksize if chunksize != -1 else default_chunksize
|
@@ -157,7 +158,7 @@ def read(
|
|
157
158
|
f"The specified chunksize of {chunksize} exceeds the maximum of "
|
158
159
|
+ f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
|
159
160
|
+ f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
|
160
|
-
stacklevel
|
161
|
+
stacklevel=3,
|
161
162
|
)
|
162
163
|
chunksize = _max_chunks_flavors[self.flavor]
|
163
164
|
|
@@ -184,8 +185,8 @@ def read(
|
|
184
185
|
truncated_table_name = truncate_item_name(str(query_or_table), self.flavor)
|
185
186
|
if truncated_table_name != str(query_or_table) and not silent:
|
186
187
|
warn(
|
187
|
-
f"Table '{
|
188
|
-
+ f" will instead
|
188
|
+
f"Table '{query_or_table}' is too long for '{self.flavor}',"
|
189
|
+
+ f" will instead read the table '{truncated_table_name}'."
|
189
190
|
)
|
190
191
|
|
191
192
|
query_or_table = sql_item_name(str(query_or_table), self.flavor, schema)
|
@@ -204,6 +205,34 @@ def read(
|
|
204
205
|
|
205
206
|
chunk_list = []
|
206
207
|
chunk_hook_results = []
|
208
|
+
def _process_chunk(_chunk, _retry_on_failure: bool = True):
|
209
|
+
if not as_hook_results:
|
210
|
+
chunk_list.append(_chunk)
|
211
|
+
if chunk_hook is None:
|
212
|
+
return None
|
213
|
+
|
214
|
+
result = None
|
215
|
+
try:
|
216
|
+
result = chunk_hook(
|
217
|
+
_chunk,
|
218
|
+
workers=workers,
|
219
|
+
chunksize=chunksize,
|
220
|
+
debug=debug,
|
221
|
+
**kw
|
222
|
+
)
|
223
|
+
except Exception:
|
224
|
+
result = False, traceback.format_exc()
|
225
|
+
from meerschaum.utils.formatting import get_console
|
226
|
+
if not silent:
|
227
|
+
get_console().print_exception()
|
228
|
+
|
229
|
+
### If the chunk fails to process, try it again one more time.
|
230
|
+
if isinstance(result, tuple) and result[0] is False:
|
231
|
+
if _retry_on_failure:
|
232
|
+
return _process_chunk(_chunk, _retry_on_failure=False)
|
233
|
+
|
234
|
+
return result
|
235
|
+
|
207
236
|
try:
|
208
237
|
stream_results = not as_iterator and chunk_hook is not None and chunksize is not None
|
209
238
|
with warnings.catch_warnings():
|
@@ -235,52 +264,32 @@ def read(
|
|
235
264
|
)
|
236
265
|
else:
|
237
266
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
267
|
+
def get_chunk_generator(connectable):
|
268
|
+
chunk_generator = pd.read_sql_query(
|
269
|
+
formatted_query,
|
270
|
+
self.engine,
|
271
|
+
**read_sql_query_kwargs
|
272
|
+
)
|
273
|
+
to_return = (
|
274
|
+
chunk_generator
|
275
|
+
if as_iterator or chunksize is None
|
276
|
+
else (
|
277
|
+
list(pool.imap(_process_chunk, chunk_generator))
|
278
|
+
if as_hook_results
|
279
|
+
else None
|
244
280
|
)
|
281
|
+
)
|
282
|
+
return chunk_generator, to_return
|
283
|
+
|
284
|
+
if self.flavor in SKIP_READ_TRANSACTION_FLAVORS:
|
285
|
+
chunk_generator, to_return = get_chunk_generator(self.engine)
|
286
|
+
else:
|
287
|
+
with self.engine.begin() as transaction:
|
288
|
+
with transaction.execution_options(stream_results=stream_results) as connection:
|
289
|
+
chunk_generator, to_return = get_chunk_generator(connection)
|
245
290
|
|
246
|
-
|
247
|
-
|
248
|
-
return chunk_generator
|
249
|
-
|
250
|
-
### We must consume the generator in this context if using server-side cursors.
|
251
|
-
if stream_results:
|
252
|
-
|
253
|
-
pool = get_pool(workers=workers)
|
254
|
-
|
255
|
-
def _process_chunk(_chunk, _retry_on_failure: bool = True):
|
256
|
-
if not as_hook_results:
|
257
|
-
chunk_list.append(_chunk)
|
258
|
-
result = None
|
259
|
-
if chunk_hook is not None:
|
260
|
-
try:
|
261
|
-
result = chunk_hook(
|
262
|
-
_chunk,
|
263
|
-
workers = workers,
|
264
|
-
chunksize = chunksize,
|
265
|
-
debug = debug,
|
266
|
-
**kw
|
267
|
-
)
|
268
|
-
except Exception as e:
|
269
|
-
result = False, traceback.format_exc()
|
270
|
-
from meerschaum.utils.formatting import get_console
|
271
|
-
if not silent:
|
272
|
-
get_console().print_exception()
|
273
|
-
|
274
|
-
### If the chunk fails to process, try it again one more time.
|
275
|
-
if isinstance(result, tuple) and result[0] is False:
|
276
|
-
if _retry_on_failure:
|
277
|
-
return _process_chunk(_chunk, _retry_on_failure=False)
|
278
|
-
|
279
|
-
return result
|
280
|
-
|
281
|
-
chunk_hook_results = list(pool.imap(_process_chunk, chunk_generator))
|
282
|
-
if as_hook_results:
|
283
|
-
return chunk_hook_results
|
291
|
+
if to_return is not None:
|
292
|
+
return to_return
|
284
293
|
|
285
294
|
except Exception as e:
|
286
295
|
if debug:
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -98,6 +98,8 @@ class Pipe:
|
|
98
98
|
attributes,
|
99
99
|
parameters,
|
100
100
|
columns,
|
101
|
+
indices,
|
102
|
+
indexes,
|
101
103
|
dtypes,
|
102
104
|
get_columns,
|
103
105
|
get_columns_types,
|
@@ -145,6 +147,7 @@ class Pipe:
|
|
145
147
|
location: Optional[str] = None,
|
146
148
|
parameters: Optional[Dict[str, Any]] = None,
|
147
149
|
columns: Union[Dict[str, str], List[str], None] = None,
|
150
|
+
indices: Optional[Dict[str, Union[str, List[str]]]] = None,
|
148
151
|
tags: Optional[List[str]] = None,
|
149
152
|
target: Optional[str] = None,
|
150
153
|
dtypes: Optional[Dict[str, str]] = None,
|
@@ -156,6 +159,7 @@ class Pipe:
|
|
156
159
|
connector_keys: Optional[str] = None,
|
157
160
|
metric_key: Optional[str] = None,
|
158
161
|
location_key: Optional[str] = None,
|
162
|
+
indexes: Union[Dict[str, str], List[str], None] = None,
|
159
163
|
):
|
160
164
|
"""
|
161
165
|
Parameters
|
@@ -174,10 +178,14 @@ class Pipe:
|
|
174
178
|
e.g. columns and other attributes.
|
175
179
|
You can edit these parameters with `edit pipes`.
|
176
180
|
|
177
|
-
columns:
|
181
|
+
columns: Union[Dict[str, str], List[str], None], default None
|
178
182
|
Set the `columns` dictionary of `parameters`.
|
179
183
|
If `parameters` is also provided, this dictionary is added under the `'columns'` key.
|
180
184
|
|
185
|
+
indices: Optional[Dict[str, Union[str, List[str]]]], default None
|
186
|
+
Set the `indices` dictionary of `parameters`.
|
187
|
+
If `parameters` is also provided, this dictionary is added under the `'indices'` key.
|
188
|
+
|
181
189
|
tags: Optional[List[str]], default None
|
182
190
|
A list of strings to be added under the `'tags'` key of `parameters`.
|
183
191
|
You can select pipes with certain tags using `--tags`.
|
@@ -255,6 +263,20 @@ class Pipe:
|
|
255
263
|
elif columns is not None:
|
256
264
|
warn(f"The provided columns are of invalid type '{type(columns)}'.")
|
257
265
|
|
266
|
+
indices = (
|
267
|
+
indices
|
268
|
+
or indexes
|
269
|
+
or self._attributes.get('parameters', {}).get('indices', None)
|
270
|
+
or self._attributes.get('parameters', {}).get('indexes', None)
|
271
|
+
) or columns
|
272
|
+
if isinstance(indices, dict):
|
273
|
+
indices_key = (
|
274
|
+
'indexes'
|
275
|
+
if 'indexes' in self._attributes['parameters']
|
276
|
+
else 'indices'
|
277
|
+
)
|
278
|
+
self._attributes['parameters'][indices_key] = indices
|
279
|
+
|
258
280
|
if isinstance(tags, (list, tuple)):
|
259
281
|
self._attributes['parameters']['tags'] = tags
|
260
282
|
elif tags is not None:
|