meerschaum 2.9.5__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/__init__.py +5 -2
- meerschaum/_internal/__init__.py +1 -0
- meerschaum/_internal/arguments/_parse_arguments.py +4 -4
- meerschaum/_internal/arguments/_parser.py +33 -4
- meerschaum/_internal/cli/__init__.py +6 -0
- meerschaum/_internal/cli/daemons.py +103 -0
- meerschaum/_internal/cli/entry.py +220 -0
- meerschaum/_internal/cli/workers.py +435 -0
- meerschaum/_internal/docs/index.py +48 -2
- meerschaum/_internal/entry.py +50 -14
- meerschaum/_internal/shell/Shell.py +121 -29
- meerschaum/_internal/shell/__init__.py +4 -1
- meerschaum/_internal/static.py +359 -0
- meerschaum/_internal/term/TermPageHandler.py +1 -2
- meerschaum/_internal/term/__init__.py +40 -6
- meerschaum/_internal/term/tools.py +33 -8
- meerschaum/actions/__init__.py +6 -4
- meerschaum/actions/api.py +53 -13
- meerschaum/actions/attach.py +1 -0
- meerschaum/actions/bootstrap.py +8 -8
- meerschaum/actions/delete.py +4 -2
- meerschaum/actions/edit.py +171 -25
- meerschaum/actions/login.py +8 -8
- meerschaum/actions/register.py +143 -6
- meerschaum/actions/reload.py +22 -5
- meerschaum/actions/restart.py +14 -0
- meerschaum/actions/show.py +184 -31
- meerschaum/actions/start.py +166 -17
- meerschaum/actions/stop.py +38 -2
- meerschaum/actions/sync.py +7 -2
- meerschaum/actions/tag.py +9 -8
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +45 -15
- meerschaum/api/_events.py +46 -4
- meerschaum/api/_oauth2.py +162 -9
- meerschaum/api/_tokens.py +102 -0
- meerschaum/api/dash/__init__.py +0 -3
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/custom.py +4 -3
- meerschaum/api/dash/callbacks/dashboard.py +198 -118
- meerschaum/api/dash/callbacks/jobs.py +14 -7
- meerschaum/api/dash/callbacks/login.py +10 -1
- meerschaum/api/dash/callbacks/pipes.py +194 -14
- meerschaum/api/dash/callbacks/plugins.py +0 -1
- meerschaum/api/dash/callbacks/register.py +10 -3
- meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
- meerschaum/api/dash/callbacks/tokens.py +389 -0
- meerschaum/api/dash/components.py +36 -15
- meerschaum/api/dash/jobs.py +1 -1
- meerschaum/api/dash/keys.py +35 -93
- meerschaum/api/dash/pages/__init__.py +2 -1
- meerschaum/api/dash/pages/dashboard.py +1 -20
- meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
- meerschaum/api/dash/pages/login.py +2 -2
- meerschaum/api/dash/pages/pipes.py +16 -5
- meerschaum/api/dash/pages/settings/password_reset.py +1 -1
- meerschaum/api/dash/pages/tokens.py +53 -0
- meerschaum/api/dash/pipes.py +382 -95
- meerschaum/api/dash/sessions.py +12 -0
- meerschaum/api/dash/tokens.py +603 -0
- meerschaum/api/dash/websockets.py +1 -1
- meerschaum/api/dash/webterm.py +18 -6
- meerschaum/api/models/__init__.py +23 -3
- meerschaum/api/models/_actions.py +22 -0
- meerschaum/api/models/_pipes.py +91 -7
- meerschaum/api/models/_tokens.py +81 -0
- meerschaum/api/resources/static/js/terminado.js +3 -0
- meerschaum/api/resources/static/js/xterm-addon-unicode11.js +2 -0
- meerschaum/api/resources/templates/termpage.html +13 -0
- meerschaum/api/routes/__init__.py +1 -0
- meerschaum/api/routes/_actions.py +3 -4
- meerschaum/api/routes/_connectors.py +3 -7
- meerschaum/api/routes/_jobs.py +26 -35
- meerschaum/api/routes/_login.py +120 -15
- meerschaum/api/routes/_misc.py +5 -10
- meerschaum/api/routes/_pipes.py +178 -143
- meerschaum/api/routes/_plugins.py +38 -28
- meerschaum/api/routes/_tokens.py +236 -0
- meerschaum/api/routes/_users.py +47 -35
- meerschaum/api/routes/_version.py +3 -3
- meerschaum/api/routes/_webterm.py +3 -3
- meerschaum/config/__init__.py +100 -30
- meerschaum/config/_default.py +132 -64
- meerschaum/config/_edit.py +38 -32
- meerschaum/config/_formatting.py +2 -0
- meerschaum/config/_patch.py +10 -8
- meerschaum/config/_paths.py +133 -13
- meerschaum/config/_read_config.py +87 -36
- meerschaum/config/_sync.py +6 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/config/environment.py +262 -0
- meerschaum/config/stack/__init__.py +37 -15
- meerschaum/config/static.py +18 -0
- meerschaum/connectors/_Connector.py +11 -6
- meerschaum/connectors/__init__.py +41 -22
- meerschaum/connectors/api/_APIConnector.py +34 -6
- meerschaum/connectors/api/_actions.py +2 -2
- meerschaum/connectors/api/_jobs.py +12 -1
- meerschaum/connectors/api/_login.py +33 -7
- meerschaum/connectors/api/_misc.py +2 -2
- meerschaum/connectors/api/_pipes.py +23 -32
- meerschaum/connectors/api/_plugins.py +2 -2
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/api/_tokens.py +146 -0
- meerschaum/connectors/api/_users.py +70 -58
- meerschaum/connectors/instance/_InstanceConnector.py +83 -0
- meerschaum/connectors/instance/__init__.py +10 -0
- meerschaum/connectors/instance/_pipes.py +442 -0
- meerschaum/connectors/instance/_plugins.py +159 -0
- meerschaum/connectors/instance/_tokens.py +317 -0
- meerschaum/connectors/instance/_users.py +188 -0
- meerschaum/connectors/parse.py +5 -2
- meerschaum/connectors/sql/_SQLConnector.py +22 -5
- meerschaum/connectors/sql/_cli.py +12 -11
- meerschaum/connectors/sql/_create_engine.py +12 -168
- meerschaum/connectors/sql/_fetch.py +2 -18
- meerschaum/connectors/sql/_pipes.py +295 -278
- meerschaum/connectors/sql/_plugins.py +29 -0
- meerschaum/connectors/sql/_sql.py +46 -21
- meerschaum/connectors/sql/_users.py +36 -2
- meerschaum/connectors/sql/tables/__init__.py +254 -122
- meerschaum/connectors/valkey/_ValkeyConnector.py +5 -7
- meerschaum/connectors/valkey/_pipes.py +60 -31
- meerschaum/connectors/valkey/_plugins.py +2 -26
- meerschaum/core/Pipe/__init__.py +115 -85
- meerschaum/core/Pipe/_attributes.py +425 -124
- meerschaum/core/Pipe/_bootstrap.py +54 -24
- meerschaum/core/Pipe/_cache.py +555 -0
- meerschaum/core/Pipe/_clear.py +0 -11
- meerschaum/core/Pipe/_data.py +96 -68
- meerschaum/core/Pipe/_deduplicate.py +0 -13
- meerschaum/core/Pipe/_delete.py +12 -21
- meerschaum/core/Pipe/_drop.py +11 -23
- meerschaum/core/Pipe/_dtypes.py +49 -19
- meerschaum/core/Pipe/_edit.py +14 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_index.py +8 -14
- meerschaum/core/Pipe/_show.py +5 -5
- meerschaum/core/Pipe/_sync.py +123 -204
- meerschaum/core/Pipe/_verify.py +4 -4
- meerschaum/{plugins → core/Plugin}/_Plugin.py +16 -12
- meerschaum/core/Plugin/__init__.py +1 -1
- meerschaum/core/Token/_Token.py +220 -0
- meerschaum/core/Token/__init__.py +12 -0
- meerschaum/core/User/_User.py +35 -10
- meerschaum/core/User/__init__.py +9 -1
- meerschaum/core/__init__.py +1 -0
- meerschaum/jobs/_Executor.py +88 -4
- meerschaum/jobs/_Job.py +149 -38
- meerschaum/jobs/__init__.py +3 -2
- meerschaum/jobs/systemd.py +8 -3
- meerschaum/models/__init__.py +35 -0
- meerschaum/models/pipes.py +247 -0
- meerschaum/models/tokens.py +38 -0
- meerschaum/models/users.py +26 -0
- meerschaum/plugins/__init__.py +301 -88
- meerschaum/plugins/bootstrap.py +510 -4
- meerschaum/utils/_get_pipes.py +97 -30
- meerschaum/utils/daemon/Daemon.py +199 -43
- meerschaum/utils/daemon/FileDescriptorInterceptor.py +0 -1
- meerschaum/utils/daemon/RotatingFile.py +63 -36
- meerschaum/utils/daemon/StdinFile.py +53 -13
- meerschaum/utils/daemon/__init__.py +47 -6
- meerschaum/utils/daemon/_names.py +6 -3
- meerschaum/utils/dataframe.py +479 -81
- meerschaum/utils/debug.py +49 -19
- meerschaum/utils/dtypes/__init__.py +476 -34
- meerschaum/utils/dtypes/sql.py +369 -29
- meerschaum/utils/formatting/__init__.py +5 -2
- meerschaum/utils/formatting/_jobs.py +1 -1
- meerschaum/utils/formatting/_pipes.py +52 -50
- meerschaum/utils/formatting/_pprint.py +1 -0
- meerschaum/utils/formatting/_shell.py +44 -18
- meerschaum/utils/misc.py +268 -186
- meerschaum/utils/packages/__init__.py +25 -40
- meerschaum/utils/packages/_packages.py +42 -34
- meerschaum/utils/pipes.py +213 -0
- meerschaum/utils/process.py +2 -2
- meerschaum/utils/prompt.py +175 -144
- meerschaum/utils/schedule.py +2 -1
- meerschaum/utils/sql.py +134 -47
- meerschaum/utils/threading.py +42 -0
- meerschaum/utils/typing.py +1 -4
- meerschaum/utils/venv/_Venv.py +2 -2
- meerschaum/utils/venv/__init__.py +7 -7
- meerschaum/utils/warnings.py +19 -13
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/METADATA +94 -96
- meerschaum-3.0.0.dist-info/RECORD +289 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/WHEEL +1 -1
- meerschaum-3.0.0.dist-info/licenses/NOTICE +2 -0
- meerschaum/api/models/_interfaces.py +0 -15
- meerschaum/api/models/_locations.py +0 -15
- meerschaum/api/models/_metrics.py +0 -15
- meerschaum/config/_environment.py +0 -145
- meerschaum/config/static/__init__.py +0 -186
- meerschaum-2.9.5.dist-info/RECORD +0 -263
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/zip-safe +0 -0
@@ -25,7 +25,6 @@ def register_pipe(
|
|
25
25
|
Register a new pipe.
|
26
26
|
A pipe's attributes must be set before registering.
|
27
27
|
"""
|
28
|
-
from meerschaum.utils.debug import dprint
|
29
28
|
from meerschaum.utils.packages import attempt_import
|
30
29
|
from meerschaum.utils.sql import json_flavors
|
31
30
|
|
@@ -45,7 +44,7 @@ def register_pipe(
|
|
45
44
|
### (which shouldn't be able to be registered anyway but that's an issue for later).
|
46
45
|
parameters = None
|
47
46
|
try:
|
48
|
-
parameters = pipe.
|
47
|
+
parameters = pipe.get_parameters(apply_symlinks=False)
|
49
48
|
except Exception as e:
|
50
49
|
if debug:
|
51
50
|
dprint(str(e))
|
@@ -76,7 +75,7 @@ def register_pipe(
|
|
76
75
|
|
77
76
|
def edit_pipe(
|
78
77
|
self,
|
79
|
-
pipe
|
78
|
+
pipe: mrsm.Pipe,
|
80
79
|
patch: bool = False,
|
81
80
|
debug: bool = False,
|
82
81
|
**kw : Any
|
@@ -108,10 +107,10 @@ def edit_pipe(
|
|
108
107
|
original_parameters = Pipe(
|
109
108
|
pipe.connector_keys, pipe.metric_key, pipe.location_key,
|
110
109
|
mrsm_instance=pipe.instance_keys
|
111
|
-
).
|
110
|
+
).get_parameters(apply_symlinks=False)
|
112
111
|
parameters = apply_patch_to_config(
|
113
112
|
original_parameters,
|
114
|
-
pipe.parameters
|
113
|
+
pipe._attributes['parameters']
|
115
114
|
)
|
116
115
|
|
117
116
|
### ensure pipes table exists
|
@@ -147,8 +146,10 @@ def fetch_pipes_keys(
|
|
147
146
|
location_keys: Optional[List[str]] = None,
|
148
147
|
tags: Optional[List[str]] = None,
|
149
148
|
params: Optional[Dict[str, Any]] = None,
|
150
|
-
debug: bool = False
|
151
|
-
) ->
|
149
|
+
debug: bool = False,
|
150
|
+
) -> List[
|
151
|
+
Tuple[str, str, Union[str, None], Dict[str, Any]]
|
152
|
+
]:
|
152
153
|
"""
|
153
154
|
Return a list of tuples corresponding to the parameters provided.
|
154
155
|
|
@@ -163,18 +164,28 @@ def fetch_pipes_keys(
|
|
163
164
|
location_keys: Optional[List[str]], default None
|
164
165
|
List of location_keys to search by.
|
165
166
|
|
167
|
+
tags: Optional[List[str]], default None
|
168
|
+
List of pipes to search by.
|
169
|
+
|
166
170
|
params: Optional[Dict[str, Any]], default None
|
167
171
|
Dictionary of additional parameters to search by.
|
168
172
|
E.g. `--params pipe_id:1`
|
169
173
|
|
170
174
|
debug: bool, default False
|
171
175
|
Verbosity toggle.
|
176
|
+
|
177
|
+
Returns
|
178
|
+
-------
|
179
|
+
A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
|
172
180
|
"""
|
173
|
-
from meerschaum.utils.debug import dprint
|
174
181
|
from meerschaum.utils.packages import attempt_import
|
175
182
|
from meerschaum.utils.misc import separate_negation_values
|
176
|
-
from meerschaum.utils.sql import
|
177
|
-
|
183
|
+
from meerschaum.utils.sql import (
|
184
|
+
OMIT_NULLSFIRST_FLAVORS,
|
185
|
+
table_exists,
|
186
|
+
json_flavors,
|
187
|
+
)
|
188
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
178
189
|
import json
|
179
190
|
from copy import deepcopy
|
180
191
|
sqlalchemy, sqlalchemy_sql_functions = attempt_import(
|
@@ -240,11 +251,18 @@ def fetch_pipes_keys(
|
|
240
251
|
) for key, val in _params.items()
|
241
252
|
if not isinstance(val, (list, tuple)) and key in pipes_tbl.c
|
242
253
|
]
|
254
|
+
if self.flavor in json_flavors:
|
255
|
+
sqlalchemy_dialects = mrsm.attempt_import('sqlalchemy.dialects', lazy=False)
|
256
|
+
JSONB = sqlalchemy_dialects.postgresql.JSONB
|
257
|
+
else:
|
258
|
+
JSONB = sqlalchemy.String
|
259
|
+
|
243
260
|
select_cols = (
|
244
261
|
[
|
245
262
|
pipes_tbl.c.connector_keys,
|
246
263
|
pipes_tbl.c.metric_key,
|
247
264
|
pipes_tbl.c.location_key,
|
265
|
+
pipes_tbl.c.parameters,
|
248
266
|
]
|
249
267
|
)
|
250
268
|
|
@@ -261,25 +279,43 @@ def fetch_pipes_keys(
|
|
261
279
|
in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
|
262
280
|
|
263
281
|
ors, nands = [], []
|
264
|
-
|
265
|
-
|
266
|
-
for
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
282
|
+
if self.flavor in json_flavors:
|
283
|
+
tags_jsonb = pipes_tbl.c['parameters'].cast(JSONB).op('->')('tags').cast(JSONB)
|
284
|
+
for _in_tags, _ex_tags in in_ex_tag_groups:
|
285
|
+
if _in_tags:
|
286
|
+
ors.append(
|
287
|
+
sqlalchemy.and_(
|
288
|
+
tags_jsonb.contains(_in_tags)
|
289
|
+
)
|
290
|
+
)
|
291
|
+
for xt in _ex_tags:
|
292
|
+
nands.append(
|
293
|
+
sqlalchemy.not_(
|
294
|
+
sqlalchemy.and_(
|
295
|
+
tags_jsonb.contains([xt])
|
296
|
+
)
|
297
|
+
)
|
298
|
+
)
|
299
|
+
else:
|
300
|
+
for _in_tags, _ex_tags in in_ex_tag_groups:
|
301
|
+
sub_ands = []
|
302
|
+
for nt in _in_tags:
|
303
|
+
sub_ands.append(
|
304
|
+
sqlalchemy.cast(
|
305
|
+
pipes_tbl.c['parameters'],
|
306
|
+
sqlalchemy.String,
|
307
|
+
).like(f'%"tags":%"{nt}"%')
|
308
|
+
)
|
309
|
+
if sub_ands:
|
310
|
+
ors.append(sqlalchemy.and_(*sub_ands))
|
311
|
+
|
312
|
+
for xt in _ex_tags:
|
313
|
+
nands.append(
|
314
|
+
sqlalchemy.cast(
|
315
|
+
pipes_tbl.c['parameters'],
|
316
|
+
sqlalchemy.String,
|
317
|
+
).not_like(f'%"tags":%"{xt}"%')
|
318
|
+
)
|
283
319
|
|
284
320
|
q = q.where(sqlalchemy.and_(*nands)) if nands else q
|
285
321
|
q = q.where(sqlalchemy.or_(*ors)) if ors else q
|
@@ -294,7 +330,7 @@ def fetch_pipes_keys(
|
|
294
330
|
|
295
331
|
### execute the query and return a list of tuples
|
296
332
|
if debug:
|
297
|
-
dprint(q
|
333
|
+
dprint(q)
|
298
334
|
try:
|
299
335
|
rows = (
|
300
336
|
self.execute(q).fetchall()
|
@@ -307,7 +343,7 @@ def fetch_pipes_keys(
|
|
307
343
|
except Exception as e:
|
308
344
|
error(str(e))
|
309
345
|
|
310
|
-
return
|
346
|
+
return rows
|
311
347
|
|
312
348
|
|
313
349
|
def create_pipe_indices(
|
@@ -338,7 +374,9 @@ def create_indices(
|
|
338
374
|
"""
|
339
375
|
Create a pipe's indices.
|
340
376
|
"""
|
341
|
-
|
377
|
+
if pipe.__dict__.get('_skip_check_indices', False):
|
378
|
+
return True
|
379
|
+
|
342
380
|
if debug:
|
343
381
|
dprint(f"Creating indices for {pipe}...")
|
344
382
|
|
@@ -348,7 +386,7 @@ def create_indices(
|
|
348
386
|
|
349
387
|
cols_to_include = set((columns or []) + (indices or [])) or None
|
350
388
|
|
351
|
-
|
389
|
+
pipe._clear_cache_key('_columns_indices', debug=debug)
|
352
390
|
ix_queries = {
|
353
391
|
col: queries
|
354
392
|
for col, queries in self.get_create_index_queries(pipe, debug=debug).items()
|
@@ -392,7 +430,6 @@ def drop_indices(
|
|
392
430
|
"""
|
393
431
|
Drop a pipe's indices.
|
394
432
|
"""
|
395
|
-
from meerschaum.utils.debug import dprint
|
396
433
|
if debug:
|
397
434
|
dprint(f"Dropping indices for {pipe}...")
|
398
435
|
|
@@ -425,7 +462,7 @@ def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]:
|
|
425
462
|
-------
|
426
463
|
A dictionary of index keys to column names.
|
427
464
|
"""
|
428
|
-
from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS
|
465
|
+
from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS, truncate_item_name
|
429
466
|
_parameters = pipe.parameters
|
430
467
|
_index_template = _parameters.get('index_template', "IX_{schema_str}{target}_{column_names}")
|
431
468
|
_schema = self.get_pipe_schema(pipe)
|
@@ -466,7 +503,7 @@ def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]:
|
|
466
503
|
continue
|
467
504
|
seen_index_names[index_name] = ix
|
468
505
|
return {
|
469
|
-
ix: index_name
|
506
|
+
ix: truncate_item_name(index_name, flavor=self.flavor)
|
470
507
|
for index_name, ix in seen_index_names.items()
|
471
508
|
}
|
472
509
|
|
@@ -603,7 +640,10 @@ def get_create_index_queries(
|
|
603
640
|
### create datetime index
|
604
641
|
dt_query = None
|
605
642
|
if _datetime is not None:
|
606
|
-
if
|
643
|
+
if (
|
644
|
+
self.flavor in ('timescaledb', 'timescaledb-ha')
|
645
|
+
and pipe.parameters.get('hypertable', True)
|
646
|
+
):
|
607
647
|
_id_count = (
|
608
648
|
get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
|
609
649
|
if (_id is not None and _create_space_partition) else None
|
@@ -719,7 +759,7 @@ def get_create_index_queries(
|
|
719
759
|
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
720
760
|
)
|
721
761
|
])
|
722
|
-
elif self.flavor
|
762
|
+
elif self.flavor in ('timescaledb', 'timescaledb-ha'):
|
723
763
|
primary_queries.extend([
|
724
764
|
(
|
725
765
|
f"ALTER TABLE {_pipe_name}\n"
|
@@ -758,7 +798,7 @@ def get_create_index_queries(
|
|
758
798
|
|
759
799
|
### create id index
|
760
800
|
if _id_name is not None:
|
761
|
-
if self.flavor
|
801
|
+
if self.flavor in ('timescaledb', 'timescaledb-ha'):
|
762
802
|
### Already created indices via create_hypertable.
|
763
803
|
id_query = (
|
764
804
|
None if (_id is not None and _create_space_partition)
|
@@ -797,7 +837,7 @@ def get_create_index_queries(
|
|
797
837
|
|
798
838
|
cols_names_str = ", ".join(cols_names)
|
799
839
|
index_query_params_clause = f" ({cols_names_str})"
|
800
|
-
if self.flavor
|
840
|
+
if self.flavor in ('postgis', 'timescaledb-ha'):
|
801
841
|
for col in cols:
|
802
842
|
col_typ = existing_cols_pd_types.get(cols[0], 'object')
|
803
843
|
if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'):
|
@@ -1005,6 +1045,8 @@ def get_pipe_data(
|
|
1005
1045
|
limit: Optional[int] = None,
|
1006
1046
|
begin_add_minutes: int = 0,
|
1007
1047
|
end_add_minutes: int = 0,
|
1048
|
+
chunksize: Optional[int] = -1,
|
1049
|
+
as_iterator: bool = False,
|
1008
1050
|
debug: bool = False,
|
1009
1051
|
**kw: Any
|
1010
1052
|
) -> Union[pd.DataFrame, None]:
|
@@ -1041,14 +1083,17 @@ def get_pipe_data(
|
|
1041
1083
|
If specified, limit the number of rows retrieved to this value.
|
1042
1084
|
|
1043
1085
|
begin_add_minutes: int, default 0
|
1044
|
-
The number of minutes to add to the `begin` datetime (i.e. `DATEADD
|
1086
|
+
The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
|
1045
1087
|
|
1046
1088
|
end_add_minutes: int, default 0
|
1047
|
-
The number of minutes to add to the `end` datetime (i.e. `DATEADD
|
1089
|
+
The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
|
1048
1090
|
|
1049
1091
|
chunksize: Optional[int], default -1
|
1050
1092
|
The size of dataframe chunks to load into memory.
|
1051
1093
|
|
1094
|
+
as_iterator: bool, default False
|
1095
|
+
If `True`, return the chunks iterator directly.
|
1096
|
+
|
1052
1097
|
debug: bool, default False
|
1053
1098
|
Verbosity toggle.
|
1054
1099
|
|
@@ -1057,43 +1102,58 @@ def get_pipe_data(
|
|
1057
1102
|
A `pd.DataFrame` of the pipe's data.
|
1058
1103
|
|
1059
1104
|
"""
|
1060
|
-
import
|
1061
|
-
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
1105
|
+
import functools
|
1062
1106
|
from meerschaum.utils.packages import import_pandas
|
1063
|
-
from meerschaum.utils.dtypes import
|
1064
|
-
attempt_cast_to_numeric,
|
1065
|
-
attempt_cast_to_uuid,
|
1066
|
-
attempt_cast_to_bytes,
|
1067
|
-
attempt_cast_to_geometry,
|
1068
|
-
are_dtypes_equal,
|
1069
|
-
)
|
1107
|
+
from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
|
1070
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1071
1109
|
pd = import_pandas()
|
1072
1110
|
is_dask = 'dask' in pd.__name__
|
1073
1111
|
|
1074
1112
|
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
1113
|
+
pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
|
1114
|
+
|
1115
|
+
remote_pandas_types = {
|
1116
|
+
col: to_pandas_dtype(get_pd_type_from_db_type(typ))
|
1117
|
+
for col, typ in cols_types.items()
|
1118
|
+
}
|
1119
|
+
remote_dt_cols_types = {
|
1120
|
+
col: typ
|
1121
|
+
for col, typ in remote_pandas_types.items()
|
1122
|
+
if are_dtypes_equal(typ, 'datetime')
|
1123
|
+
}
|
1124
|
+
remote_dt_tz_aware_cols_types = {
|
1125
|
+
col: typ
|
1126
|
+
for col, typ in remote_dt_cols_types.items()
|
1127
|
+
if ',' in typ or typ == 'datetime'
|
1128
|
+
}
|
1129
|
+
remote_dt_tz_naive_cols_types = {
|
1130
|
+
col: typ
|
1131
|
+
for col, typ in remote_dt_cols_types.items()
|
1132
|
+
if col not in remote_dt_tz_aware_cols_types
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
configured_pandas_types = {
|
1136
|
+
col: to_pandas_dtype(typ)
|
1137
|
+
for col, typ in pipe_dtypes.items()
|
1138
|
+
}
|
1139
|
+
configured_lower_precision_dt_cols_types = {
|
1140
|
+
col: typ
|
1141
|
+
for col, typ in pipe_dtypes.items()
|
1142
|
+
if (
|
1143
|
+
are_dtypes_equal('datetime', typ)
|
1144
|
+
and '[' in typ
|
1145
|
+
and 'ns' not in typ
|
1146
|
+
)
|
1147
|
+
|
1148
|
+
}
|
1149
|
+
|
1075
1150
|
dtypes = {
|
1076
|
-
**
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
**
|
1081
|
-
col: get_pd_type_from_db_type(typ)
|
1082
|
-
for col, typ in cols_types.items()
|
1083
|
-
}
|
1151
|
+
**remote_pandas_types,
|
1152
|
+
**configured_pandas_types,
|
1153
|
+
**remote_dt_tz_aware_cols_types,
|
1154
|
+
**remote_dt_tz_naive_cols_types,
|
1155
|
+
**configured_lower_precision_dt_cols_types
|
1084
1156
|
} if pipe.enforce else {}
|
1085
|
-
if dtypes:
|
1086
|
-
if self.flavor == 'sqlite':
|
1087
|
-
if not pipe.columns.get('datetime', None):
|
1088
|
-
_dt = pipe.guess_datetime()
|
1089
|
-
else:
|
1090
|
-
_dt = pipe.get_columns('datetime')
|
1091
|
-
|
1092
|
-
if _dt:
|
1093
|
-
dt_type = dtypes.get(_dt, 'object').lower()
|
1094
|
-
if 'datetime' not in dt_type:
|
1095
|
-
if 'int' not in dt_type:
|
1096
|
-
dtypes[_dt] = 'datetime64[ns, UTC]'
|
1097
1157
|
|
1098
1158
|
existing_cols = cols_types.keys()
|
1099
1159
|
select_columns = (
|
@@ -1110,13 +1170,20 @@ def get_pipe_data(
|
|
1110
1170
|
and col not in (omit_columns or [])
|
1111
1171
|
]
|
1112
1172
|
) if pipe.enforce else select_columns
|
1173
|
+
|
1113
1174
|
if select_columns:
|
1114
1175
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
1176
|
+
|
1115
1177
|
dtypes = {
|
1116
|
-
col:
|
1178
|
+
col: typ
|
1117
1179
|
for col, typ in dtypes.items()
|
1118
|
-
if col in select_columns and col not in (omit_columns or [])
|
1180
|
+
if col in (select_columns or [col]) and col not in (omit_columns or [])
|
1119
1181
|
} if pipe.enforce else {}
|
1182
|
+
|
1183
|
+
if debug:
|
1184
|
+
dprint(f"[{self}] `read()` dtypes:")
|
1185
|
+
mrsm.pprint(dtypes)
|
1186
|
+
|
1120
1187
|
query = self.get_pipe_data_query(
|
1121
1188
|
pipe,
|
1122
1189
|
select_columns=select_columns,
|
@@ -1132,91 +1199,25 @@ def get_pipe_data(
|
|
1132
1199
|
**kw
|
1133
1200
|
)
|
1134
1201
|
|
1202
|
+
read_kwargs = {}
|
1135
1203
|
if is_dask:
|
1136
1204
|
index_col = pipe.columns.get('datetime', None)
|
1137
|
-
|
1138
|
-
|
1139
|
-
numeric_columns = [
|
1140
|
-
col
|
1141
|
-
for col, typ in pipe.dtypes.items()
|
1142
|
-
if typ.startswith('numeric') and col in dtypes
|
1143
|
-
]
|
1144
|
-
uuid_columns = [
|
1145
|
-
col
|
1146
|
-
for col, typ in pipe.dtypes.items()
|
1147
|
-
if typ == 'uuid' and col in dtypes
|
1148
|
-
]
|
1149
|
-
bytes_columns = [
|
1150
|
-
col
|
1151
|
-
for col, typ in pipe.dtypes.items()
|
1152
|
-
if typ == 'bytes' and col in dtypes
|
1153
|
-
]
|
1154
|
-
geometry_columns = [
|
1155
|
-
col
|
1156
|
-
for col, typ in pipe.dtypes.items()
|
1157
|
-
if typ.startswith('geometry') and col in dtypes
|
1158
|
-
]
|
1159
|
-
|
1160
|
-
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
1205
|
+
read_kwargs['index_col'] = index_col
|
1161
1206
|
|
1162
|
-
|
1207
|
+
chunks = self.read(
|
1163
1208
|
query,
|
1209
|
+
chunksize=chunksize,
|
1210
|
+
as_iterator=True,
|
1211
|
+
coerce_float=False,
|
1164
1212
|
dtype=dtypes,
|
1165
1213
|
debug=debug,
|
1166
|
-
**
|
1214
|
+
**read_kwargs
|
1167
1215
|
)
|
1168
|
-
for col in numeric_columns:
|
1169
|
-
if col not in df.columns:
|
1170
|
-
continue
|
1171
|
-
df[col] = df[col].apply(attempt_cast_to_numeric)
|
1172
1216
|
|
1173
|
-
|
1174
|
-
|
1175
|
-
continue
|
1176
|
-
df[col] = df[col].apply(attempt_cast_to_uuid)
|
1177
|
-
|
1178
|
-
for col in bytes_columns:
|
1179
|
-
if col not in df.columns:
|
1180
|
-
continue
|
1181
|
-
df[col] = df[col].apply(attempt_cast_to_bytes)
|
1182
|
-
|
1183
|
-
for col in geometry_columns:
|
1184
|
-
if col not in df.columns:
|
1185
|
-
continue
|
1186
|
-
df[col] = df[col].apply(attempt_cast_to_geometry)
|
1217
|
+
if as_iterator:
|
1218
|
+
return chunks
|
1187
1219
|
|
1188
|
-
|
1189
|
-
ignore_dt_cols = [
|
1190
|
-
col
|
1191
|
-
for col, dtype in pipe.dtypes.items()
|
1192
|
-
if not are_dtypes_equal(str(dtype), 'datetime')
|
1193
|
-
]
|
1194
|
-
### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
|
1195
|
-
df = (
|
1196
|
-
parse_df_datetimes(
|
1197
|
-
df,
|
1198
|
-
ignore_cols=ignore_dt_cols,
|
1199
|
-
chunksize=kw.get('chunksize', None),
|
1200
|
-
strip_timezone=(pipe.tzinfo is None),
|
1201
|
-
debug=debug,
|
1202
|
-
) if isinstance(df, pd.DataFrame) else (
|
1203
|
-
[
|
1204
|
-
parse_df_datetimes(
|
1205
|
-
c,
|
1206
|
-
ignore_cols=ignore_dt_cols,
|
1207
|
-
chunksize=kw.get('chunksize', None),
|
1208
|
-
strip_timezone=(pipe.tzinfo is None),
|
1209
|
-
debug=debug,
|
1210
|
-
)
|
1211
|
-
for c in df
|
1212
|
-
]
|
1213
|
-
)
|
1214
|
-
)
|
1215
|
-
for col, typ in dtypes.items():
|
1216
|
-
if typ != 'json':
|
1217
|
-
continue
|
1218
|
-
df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x)
|
1219
|
-
return df
|
1220
|
+
return pd.concat(chunks)
|
1220
1221
|
|
1221
1222
|
|
1222
1223
|
def get_pipe_data_query(
|
@@ -1419,7 +1420,7 @@ def get_pipe_data_query(
|
|
1419
1420
|
if k in existing_cols or skip_existing_cols_check
|
1420
1421
|
}
|
1421
1422
|
if valid_params:
|
1422
|
-
where += build_where(valid_params, self).replace(
|
1423
|
+
where += ' ' + build_where(valid_params, self).lstrip().replace(
|
1423
1424
|
'WHERE', (' AND' if is_dt_bound else " ")
|
1424
1425
|
)
|
1425
1426
|
|
@@ -1503,7 +1504,7 @@ def get_pipe_attributes(
|
|
1503
1504
|
"""
|
1504
1505
|
from meerschaum.connectors.sql.tables import get_tables
|
1505
1506
|
from meerschaum.utils.packages import attempt_import
|
1506
|
-
sqlalchemy = attempt_import('sqlalchemy')
|
1507
|
+
sqlalchemy = attempt_import('sqlalchemy', lazy=False)
|
1507
1508
|
|
1508
1509
|
if pipe.get_id(debug=debug) is None:
|
1509
1510
|
return {}
|
@@ -1514,16 +1515,16 @@ def get_pipe_attributes(
|
|
1514
1515
|
q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
|
1515
1516
|
if debug:
|
1516
1517
|
dprint(q)
|
1517
|
-
|
1518
|
-
|
1518
|
+
rows = (
|
1519
|
+
self.exec(q, silent=True, debug=debug).mappings().all()
|
1519
1520
|
if self.flavor != 'duckdb'
|
1520
|
-
else self.read(q, debug=debug).to_dict(orient='records')
|
1521
|
+
else self.read(q, debug=debug).to_dict(orient='records')
|
1521
1522
|
)
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1523
|
+
if not rows:
|
1524
|
+
return {}
|
1525
|
+
attributes = dict(rows[0])
|
1526
|
+
except Exception:
|
1527
|
+
warn(traceback.format_exc())
|
1527
1528
|
return {}
|
1528
1529
|
|
1529
1530
|
### handle non-PostgreSQL databases (text vs JSON)
|
@@ -1549,13 +1550,7 @@ def create_pipe_table_from_df(
|
|
1549
1550
|
"""
|
1550
1551
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1551
1552
|
"""
|
1552
|
-
from meerschaum.utils.dataframe import
|
1553
|
-
get_json_cols,
|
1554
|
-
get_numeric_cols,
|
1555
|
-
get_uuid_cols,
|
1556
|
-
get_datetime_cols,
|
1557
|
-
get_bytes_cols,
|
1558
|
-
)
|
1553
|
+
from meerschaum.utils.dataframe import get_special_cols
|
1559
1554
|
from meerschaum.utils.sql import (
|
1560
1555
|
get_create_table_queries,
|
1561
1556
|
sql_item_name,
|
@@ -1584,30 +1579,7 @@ def create_pipe_table_from_df(
|
|
1584
1579
|
for col_ix, col in pipe.columns.items()
|
1585
1580
|
if col and col_ix != 'primary'
|
1586
1581
|
},
|
1587
|
-
**
|
1588
|
-
col: 'uuid'
|
1589
|
-
for col in get_uuid_cols(df)
|
1590
|
-
},
|
1591
|
-
**{
|
1592
|
-
col: 'json'
|
1593
|
-
for col in get_json_cols(df)
|
1594
|
-
},
|
1595
|
-
**{
|
1596
|
-
col: 'numeric'
|
1597
|
-
for col in get_numeric_cols(df)
|
1598
|
-
},
|
1599
|
-
**{
|
1600
|
-
col: 'bytes'
|
1601
|
-
for col in get_bytes_cols(df)
|
1602
|
-
},
|
1603
|
-
**{
|
1604
|
-
col: 'datetime64[ns, UTC]'
|
1605
|
-
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1606
|
-
},
|
1607
|
-
**{
|
1608
|
-
col: 'datetime64[ns]'
|
1609
|
-
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1610
|
-
},
|
1582
|
+
**get_special_cols(df),
|
1611
1583
|
**pipe.dtypes
|
1612
1584
|
}
|
1613
1585
|
autoincrement = (
|
@@ -1648,8 +1620,8 @@ def sync_pipe(
|
|
1648
1620
|
self,
|
1649
1621
|
pipe: mrsm.Pipe,
|
1650
1622
|
df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
|
1651
|
-
begin:
|
1652
|
-
end:
|
1623
|
+
begin: Union[datetime, int, None] = None,
|
1624
|
+
end: Union[datetime, int, None] = None,
|
1653
1625
|
chunksize: Optional[int] = -1,
|
1654
1626
|
check_existing: bool = True,
|
1655
1627
|
blocking: bool = True,
|
@@ -1669,11 +1641,11 @@ def sync_pipe(
|
|
1669
1641
|
An optional DataFrame or equivalent to sync into the pipe.
|
1670
1642
|
Defaults to `None`.
|
1671
1643
|
|
1672
|
-
begin:
|
1644
|
+
begin: Union[datetime, int, None], default None
|
1673
1645
|
Optionally specify the earliest datetime to search for data.
|
1674
1646
|
Defaults to `None`.
|
1675
1647
|
|
1676
|
-
end:
|
1648
|
+
end: Union[datetime, int, None], default None
|
1677
1649
|
Optionally specify the latest datetime to search for data.
|
1678
1650
|
Defaults to `None`.
|
1679
1651
|
|
@@ -1707,8 +1679,9 @@ def sync_pipe(
|
|
1707
1679
|
UPDATE_QUERIES,
|
1708
1680
|
get_reset_autoincrement_queries,
|
1709
1681
|
)
|
1710
|
-
from meerschaum.utils.dtypes import
|
1682
|
+
from meerschaum.utils.dtypes import get_current_timestamp
|
1711
1683
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1684
|
+
from meerschaum.utils.dataframe import get_special_cols
|
1712
1685
|
from meerschaum import Pipe
|
1713
1686
|
import time
|
1714
1687
|
import copy
|
@@ -1720,6 +1693,7 @@ def sync_pipe(
|
|
1720
1693
|
|
1721
1694
|
start = time.perf_counter()
|
1722
1695
|
pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
|
1696
|
+
dtypes = pipe.get_dtypes(debug=debug)
|
1723
1697
|
|
1724
1698
|
if not pipe.temporary and not pipe.get_id(debug=debug):
|
1725
1699
|
register_tuple = pipe.register(debug=debug)
|
@@ -1736,6 +1710,7 @@ def sync_pipe(
|
|
1736
1710
|
df,
|
1737
1711
|
chunksize=chunksize,
|
1738
1712
|
safe_copy=kw.get('safe_copy', False),
|
1713
|
+
dtypes=dtypes,
|
1739
1714
|
debug=debug,
|
1740
1715
|
)
|
1741
1716
|
|
@@ -1748,36 +1723,17 @@ def sync_pipe(
|
|
1748
1723
|
### Check for new columns.
|
1749
1724
|
add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
|
1750
1725
|
if add_cols_queries:
|
1751
|
-
|
1752
|
-
|
1726
|
+
pipe._clear_cache_key('_columns_types', debug=debug)
|
1727
|
+
pipe._clear_cache_key('_columns_indices', debug=debug)
|
1753
1728
|
if not self.exec_queries(add_cols_queries, debug=debug):
|
1754
1729
|
warn(f"Failed to add new columns to {pipe}.")
|
1755
1730
|
|
1756
1731
|
alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
|
1757
1732
|
if alter_cols_queries:
|
1758
|
-
|
1759
|
-
|
1733
|
+
pipe._clear_cache_key('_columns_types', debug=debug)
|
1734
|
+
pipe._clear_cache_key('_columns_types', debug=debug)
|
1760
1735
|
if not self.exec_queries(alter_cols_queries, debug=debug):
|
1761
1736
|
warn(f"Failed to alter columns for {pipe}.")
|
1762
|
-
else:
|
1763
|
-
_ = pipe.infer_dtypes(persist=True)
|
1764
|
-
|
1765
|
-
### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
|
1766
|
-
### so infer bools and persist them to `dtypes`.
|
1767
|
-
if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
|
1768
|
-
pipe_dtypes = pipe.dtypes
|
1769
|
-
new_bool_cols = {
|
1770
|
-
col: 'bool[pyarrow]'
|
1771
|
-
for col, typ in df.dtypes.items()
|
1772
|
-
if col not in pipe_dtypes
|
1773
|
-
and are_dtypes_equal(str(typ), 'bool')
|
1774
|
-
}
|
1775
|
-
pipe_dtypes.update(new_bool_cols)
|
1776
|
-
pipe.dtypes = pipe_dtypes
|
1777
|
-
if new_bool_cols and not pipe.temporary:
|
1778
|
-
infer_bool_success, infer_bool_msg = pipe.edit(debug=debug)
|
1779
|
-
if not infer_bool_success:
|
1780
|
-
return infer_bool_success, infer_bool_msg
|
1781
1737
|
|
1782
1738
|
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
|
1783
1739
|
if upsert:
|
@@ -1807,7 +1763,7 @@ def sync_pipe(
|
|
1807
1763
|
if 'name' in kw:
|
1808
1764
|
kw.pop('name')
|
1809
1765
|
|
1810
|
-
### Insert new data into
|
1766
|
+
### Insert new data into the target table.
|
1811
1767
|
unseen_kw = copy.deepcopy(kw)
|
1812
1768
|
unseen_kw.update({
|
1813
1769
|
'name': pipe.target,
|
@@ -1828,15 +1784,17 @@ def sync_pipe(
|
|
1828
1784
|
is_new
|
1829
1785
|
and primary_key
|
1830
1786
|
and primary_key
|
1831
|
-
not in
|
1787
|
+
not in dtypes
|
1832
1788
|
and primary_key not in unseen_df.columns
|
1833
1789
|
)
|
1834
1790
|
)
|
1835
1791
|
if autoincrement and autoincrement not in pipe.parameters:
|
1836
|
-
|
1837
|
-
|
1838
|
-
|
1839
|
-
|
1792
|
+
update_success, update_msg = pipe.update_parameters(
|
1793
|
+
{'autoincrement': autoincrement},
|
1794
|
+
debug=debug,
|
1795
|
+
)
|
1796
|
+
if not update_success:
|
1797
|
+
return update_success, update_msg
|
1840
1798
|
|
1841
1799
|
def _check_pk(_df_to_clear):
|
1842
1800
|
if _df_to_clear is None:
|
@@ -1926,6 +1884,14 @@ def sync_pipe(
|
|
1926
1884
|
label=('update' if not upsert else 'upsert'),
|
1927
1885
|
)
|
1928
1886
|
self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
|
1887
|
+
update_dtypes = {
|
1888
|
+
**{
|
1889
|
+
col: str(typ)
|
1890
|
+
for col, typ in update_df.dtypes.items()
|
1891
|
+
},
|
1892
|
+
**get_special_cols(update_df)
|
1893
|
+
}
|
1894
|
+
|
1929
1895
|
temp_pipe = Pipe(
|
1930
1896
|
pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
|
1931
1897
|
instance=pipe.instance_keys,
|
@@ -1934,34 +1900,30 @@ def sync_pipe(
|
|
1934
1900
|
for ix_key, ix in pipe.columns.items()
|
1935
1901
|
if ix and ix in update_df.columns
|
1936
1902
|
},
|
1937
|
-
dtypes=
|
1938
|
-
col: typ
|
1939
|
-
for col, typ in pipe.dtypes.items()
|
1940
|
-
if col in update_df.columns
|
1941
|
-
},
|
1903
|
+
dtypes=update_dtypes,
|
1942
1904
|
target=temp_target,
|
1943
1905
|
temporary=True,
|
1944
1906
|
enforce=False,
|
1945
1907
|
static=True,
|
1946
1908
|
autoincrement=False,
|
1909
|
+
cache=False,
|
1947
1910
|
parameters={
|
1948
1911
|
'schema': self.internal_schema,
|
1949
1912
|
'hypertable': False,
|
1950
1913
|
},
|
1951
1914
|
)
|
1952
|
-
|
1953
|
-
col: get_db_type_from_pd_type(
|
1954
|
-
|
1955
|
-
self.flavor,
|
1956
|
-
)
|
1957
|
-
for col, typ in update_df.dtypes.items()
|
1915
|
+
_temp_columns_types = {
|
1916
|
+
col: get_db_type_from_pd_type(typ, self.flavor)
|
1917
|
+
for col, typ in update_dtypes.items()
|
1958
1918
|
}
|
1959
|
-
|
1960
|
-
temp_pipe.
|
1961
|
-
|
1919
|
+
temp_pipe._cache_value('_columns_types', _temp_columns_types, memory_only=True, debug=debug)
|
1920
|
+
temp_pipe._cache_value('_skip_check_indices', True, memory_only=True, debug=debug)
|
1921
|
+
now_ts = get_current_timestamp('ms', as_int=True) / 1000
|
1922
|
+
temp_pipe._cache_value('_columns_types_timestamp', now_ts, memory_only=True, debug=debug)
|
1962
1923
|
temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
|
1963
1924
|
if not temp_success:
|
1964
1925
|
return temp_success, temp_msg
|
1926
|
+
|
1965
1927
|
existing_cols = pipe.get_columns_types(debug=debug)
|
1966
1928
|
join_cols = [
|
1967
1929
|
col
|
@@ -1969,7 +1931,11 @@ def sync_pipe(
|
|
1969
1931
|
if col and col in existing_cols
|
1970
1932
|
] if not primary_key or self.flavor == 'oracle' else (
|
1971
1933
|
[dt_col, primary_key]
|
1972
|
-
if
|
1934
|
+
if (
|
1935
|
+
self.flavor in ('timescaledb', 'timescaledb-ha')
|
1936
|
+
and dt_col
|
1937
|
+
and dt_col in update_df.columns
|
1938
|
+
)
|
1973
1939
|
else [primary_key]
|
1974
1940
|
)
|
1975
1941
|
update_queries = get_update_queries(
|
@@ -1980,6 +1946,8 @@ def sync_pipe(
|
|
1980
1946
|
upsert=upsert,
|
1981
1947
|
schema=self.get_pipe_schema(pipe),
|
1982
1948
|
patch_schema=self.internal_schema,
|
1949
|
+
target_cols_types=pipe.get_columns_types(debug=debug),
|
1950
|
+
patch_cols_types=_temp_columns_types,
|
1983
1951
|
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1984
1952
|
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1985
1953
|
null_indices=pipe.null_indices,
|
@@ -2267,13 +2235,13 @@ def sync_pipe_inplace(
|
|
2267
2235
|
|
2268
2236
|
add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
|
2269
2237
|
if add_cols_queries:
|
2270
|
-
|
2271
|
-
|
2238
|
+
pipe._clear_cache_key('_columns_types', debug=debug)
|
2239
|
+
pipe._clear_cache_key('_columns_indices', debug=debug)
|
2272
2240
|
self.exec_queries(add_cols_queries, debug=debug)
|
2273
2241
|
|
2274
2242
|
alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
|
2275
2243
|
if alter_cols_queries:
|
2276
|
-
|
2244
|
+
pipe._clear_cache_key('_columns_types', debug=debug)
|
2277
2245
|
self.exec_queries(alter_cols_queries, debug=debug)
|
2278
2246
|
|
2279
2247
|
insert_queries = [
|
@@ -2576,6 +2544,8 @@ def sync_pipe_inplace(
|
|
2576
2544
|
upsert=upsert,
|
2577
2545
|
schema=self.get_pipe_schema(pipe),
|
2578
2546
|
patch_schema=internal_schema,
|
2547
|
+
target_cols_types=pipe.get_columns_types(debug=debug),
|
2548
|
+
patch_cols_types=delta_cols_types,
|
2579
2549
|
datetime_col=pipe.columns.get('datetime', None),
|
2580
2550
|
flavor=self.flavor,
|
2581
2551
|
null_indices=pipe.null_indices,
|
@@ -2779,7 +2749,6 @@ def pipe_exists(
|
|
2779
2749
|
debug=debug,
|
2780
2750
|
)
|
2781
2751
|
if debug:
|
2782
|
-
from meerschaum.utils.debug import dprint
|
2783
2752
|
dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
|
2784
2753
|
return exists
|
2785
2754
|
|
@@ -2833,7 +2802,6 @@ def get_pipe_rowcount(
|
|
2833
2802
|
error(msg)
|
2834
2803
|
return None
|
2835
2804
|
|
2836
|
-
|
2837
2805
|
flavor = self.flavor if not remote else pipe.connector.flavor
|
2838
2806
|
conn = self if not remote else pipe.connector
|
2839
2807
|
_pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
|
@@ -3068,6 +3036,7 @@ def get_pipe_table(
|
|
3068
3036
|
from meerschaum.utils.sql import get_sqlalchemy_table
|
3069
3037
|
if not pipe.exists(debug=debug):
|
3070
3038
|
return None
|
3039
|
+
|
3071
3040
|
return get_sqlalchemy_table(
|
3072
3041
|
pipe.target,
|
3073
3042
|
connector=self,
|
@@ -3117,11 +3086,19 @@ def get_pipe_columns_types(
|
|
3117
3086
|
debug=debug,
|
3118
3087
|
)
|
3119
3088
|
|
3089
|
+
if debug:
|
3090
|
+
dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
|
3091
|
+
|
3120
3092
|
table_columns = {}
|
3121
3093
|
try:
|
3122
3094
|
pipe_table = self.get_pipe_table(pipe, debug=debug)
|
3123
3095
|
if pipe_table is None:
|
3124
3096
|
return {}
|
3097
|
+
|
3098
|
+
if debug:
|
3099
|
+
dprint("Found columns:")
|
3100
|
+
mrsm.pprint(dict(pipe_table.columns))
|
3101
|
+
|
3125
3102
|
for col in pipe_table.columns:
|
3126
3103
|
table_columns[str(col.name)] = str(col.type)
|
3127
3104
|
except Exception as e:
|
@@ -3153,6 +3130,7 @@ def get_pipe_columns_indices(
|
|
3153
3130
|
"""
|
3154
3131
|
if pipe.__dict__.get('_skip_check_indices', False):
|
3155
3132
|
return {}
|
3133
|
+
|
3156
3134
|
from meerschaum.utils.sql import get_table_cols_indices
|
3157
3135
|
return get_table_cols_indices(
|
3158
3136
|
pipe.target,
|
@@ -3207,7 +3185,6 @@ def get_add_columns_queries(
|
|
3207
3185
|
get_db_type_from_pd_type,
|
3208
3186
|
)
|
3209
3187
|
from meerschaum.utils.misc import flatten_list
|
3210
|
-
table_obj = self.get_pipe_table(pipe, debug=debug)
|
3211
3188
|
is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False
|
3212
3189
|
if is_dask:
|
3213
3190
|
df = df.partitions[0].compute()
|
@@ -3231,9 +3208,6 @@ def get_add_columns_queries(
|
|
3231
3208
|
elif isinstance(val, str):
|
3232
3209
|
df_cols_types[col] = 'str'
|
3233
3210
|
db_cols_types = {
|
3234
|
-
col: get_pd_type_from_db_type(str(typ.type))
|
3235
|
-
for col, typ in table_obj.columns.items()
|
3236
|
-
} if table_obj is not None else {
|
3237
3211
|
col: get_pd_type_from_db_type(typ)
|
3238
3212
|
for col, typ in get_table_cols_types(
|
3239
3213
|
pipe.target,
|
@@ -3313,10 +3287,9 @@ def get_alter_columns_queries(
|
|
3313
3287
|
-------
|
3314
3288
|
A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
|
3315
3289
|
"""
|
3316
|
-
if not pipe.exists(debug=debug):
|
3290
|
+
if not pipe.exists(debug=debug) or pipe.static:
|
3317
3291
|
return []
|
3318
|
-
|
3319
|
-
return
|
3292
|
+
|
3320
3293
|
from meerschaum.utils.sql import (
|
3321
3294
|
sql_item_name,
|
3322
3295
|
get_table_cols_types,
|
@@ -3330,7 +3303,6 @@ def get_alter_columns_queries(
|
|
3330
3303
|
get_db_type_from_pd_type,
|
3331
3304
|
)
|
3332
3305
|
from meerschaum.utils.misc import flatten_list, generate_password, items_str
|
3333
|
-
table_obj = self.get_pipe_table(pipe, debug=debug)
|
3334
3306
|
target = pipe.target
|
3335
3307
|
session_id = generate_password(3)
|
3336
3308
|
numeric_cols = (
|
@@ -3351,9 +3323,6 @@ def get_alter_columns_queries(
|
|
3351
3323
|
else df
|
3352
3324
|
)
|
3353
3325
|
db_cols_types = {
|
3354
|
-
col: get_pd_type_from_db_type(str(typ.type))
|
3355
|
-
for col, typ in table_obj.columns.items()
|
3356
|
-
} if table_obj is not None else {
|
3357
3326
|
col: get_pd_type_from_db_type(typ)
|
3358
3327
|
for col, typ in get_table_cols_types(
|
3359
3328
|
pipe.target,
|
@@ -3362,7 +3331,8 @@ def get_alter_columns_queries(
|
|
3362
3331
|
debug=debug,
|
3363
3332
|
).items()
|
3364
3333
|
}
|
3365
|
-
|
3334
|
+
pipe_dtypes = pipe.get_dtypes(debug=debug)
|
3335
|
+
pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
|
3366
3336
|
pd_db_df_aliases = {
|
3367
3337
|
'int': 'bool',
|
3368
3338
|
'float': 'bool',
|
@@ -3370,7 +3340,11 @@ def get_alter_columns_queries(
|
|
3370
3340
|
'guid': 'object',
|
3371
3341
|
}
|
3372
3342
|
if self.flavor == 'oracle':
|
3373
|
-
pd_db_df_aliases
|
3343
|
+
pd_db_df_aliases.update({
|
3344
|
+
'int': 'numeric',
|
3345
|
+
'date': 'datetime',
|
3346
|
+
'numeric': 'int',
|
3347
|
+
})
|
3374
3348
|
|
3375
3349
|
altered_cols = {
|
3376
3350
|
col: (db_cols_types.get(col, 'object'), typ)
|
@@ -3379,11 +3353,33 @@ def get_alter_columns_queries(
|
|
3379
3353
|
and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
|
3380
3354
|
}
|
3381
3355
|
|
3356
|
+
if debug and altered_cols:
|
3357
|
+
dprint("Columns to be altered:")
|
3358
|
+
mrsm.pprint(altered_cols)
|
3359
|
+
|
3360
|
+
### NOTE: Special columns (numerics, bools, etc.) are captured and cached upon detection.
|
3361
|
+
new_special_cols = pipe._get_cached_value('new_special_cols', debug=debug) or {}
|
3362
|
+
new_special_db_cols_types = {
|
3363
|
+
col: (db_cols_types.get(col, 'object'), typ)
|
3364
|
+
for col, typ in new_special_cols.items()
|
3365
|
+
}
|
3366
|
+
if debug:
|
3367
|
+
dprint("Cached new special columns:")
|
3368
|
+
mrsm.pprint(new_special_cols)
|
3369
|
+
dprint("New special columns db types:")
|
3370
|
+
mrsm.pprint(new_special_db_cols_types)
|
3371
|
+
|
3372
|
+
altered_cols.update(new_special_db_cols_types)
|
3373
|
+
|
3382
3374
|
### NOTE: Sometimes bools are coerced into ints or floats.
|
3383
3375
|
altered_cols_to_ignore = set()
|
3384
3376
|
for col, (db_typ, df_typ) in altered_cols.items():
|
3385
3377
|
for db_alias, df_alias in pd_db_df_aliases.items():
|
3386
|
-
if
|
3378
|
+
if (
|
3379
|
+
db_alias in db_typ.lower()
|
3380
|
+
and df_alias in df_typ.lower()
|
3381
|
+
and col not in new_special_cols
|
3382
|
+
):
|
3387
3383
|
altered_cols_to_ignore.add(col)
|
3388
3384
|
|
3389
3385
|
### Oracle's bool handling sometimes mixes NUMBER and INT.
|
@@ -3405,21 +3401,29 @@ def get_alter_columns_queries(
|
|
3405
3401
|
if db_is_bool_compatible and df_is_bool_compatible:
|
3406
3402
|
altered_cols_to_ignore.add(bool_col)
|
3407
3403
|
|
3404
|
+
if debug and altered_cols_to_ignore:
|
3405
|
+
dprint("Ignoring the following altered columns (false positives).")
|
3406
|
+
mrsm.pprint(altered_cols_to_ignore)
|
3407
|
+
|
3408
3408
|
for col in altered_cols_to_ignore:
|
3409
3409
|
_ = altered_cols.pop(col, None)
|
3410
|
+
|
3410
3411
|
if not altered_cols:
|
3411
3412
|
return []
|
3412
3413
|
|
3413
3414
|
if numeric_cols:
|
3414
|
-
pipe.
|
3415
|
-
|
3416
|
-
|
3417
|
-
|
3418
|
-
|
3419
|
-
|
3420
|
-
|
3415
|
+
explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
|
3416
|
+
explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
|
3417
|
+
pipe.dtypes = explicit_pipe_dtypes
|
3418
|
+
if not pipe.temporary:
|
3419
|
+
edit_success, edit_msg = pipe.edit(debug=debug)
|
3420
|
+
if not edit_success:
|
3421
|
+
warn(
|
3422
|
+
f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
|
3423
|
+
+ f"{edit_msg}"
|
3424
|
+
)
|
3421
3425
|
else:
|
3422
|
-
numeric_cols.extend([col for col, typ in
|
3426
|
+
numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
|
3423
3427
|
|
3424
3428
|
numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
|
3425
3429
|
text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
|
@@ -3445,12 +3449,12 @@ def get_alter_columns_queries(
|
|
3445
3449
|
+ sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
|
3446
3450
|
+ " (\n"
|
3447
3451
|
)
|
3448
|
-
for col_name,
|
3452
|
+
for col_name, col_typ in db_cols_types.items():
|
3449
3453
|
create_query += (
|
3450
3454
|
sql_item_name(col_name, self.flavor, None)
|
3451
3455
|
+ " "
|
3452
3456
|
+ (
|
3453
|
-
|
3457
|
+
col_typ
|
3454
3458
|
if col_name not in altered_cols
|
3455
3459
|
else altered_cols_types[col_name]
|
3456
3460
|
)
|
@@ -3464,12 +3468,12 @@ def get_alter_columns_queries(
|
|
3464
3468
|
+ ' ('
|
3465
3469
|
+ ', '.join([
|
3466
3470
|
sql_item_name(col_name, self.flavor, None)
|
3467
|
-
for col_name
|
3471
|
+
for col_name in db_cols_types
|
3468
3472
|
])
|
3469
3473
|
+ ')'
|
3470
3474
|
+ "\nSELECT\n"
|
3471
3475
|
)
|
3472
|
-
for col_name
|
3476
|
+
for col_name in db_cols_types:
|
3473
3477
|
new_col_str = (
|
3474
3478
|
sql_item_name(col_name, self.flavor, None)
|
3475
3479
|
if col_name not in altered_cols
|
@@ -3482,6 +3486,7 @@ def get_alter_columns_queries(
|
|
3482
3486
|
)
|
3483
3487
|
)
|
3484
3488
|
insert_query += new_col_str + ",\n"
|
3489
|
+
|
3485
3490
|
insert_query = insert_query[:-2] + (
|
3486
3491
|
f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
|
3487
3492
|
)
|
@@ -3627,20 +3632,18 @@ def get_to_sql_dtype(
|
|
3627
3632
|
>>> get_to_sql_dtype(pipe, df)
|
3628
3633
|
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
|
3629
3634
|
"""
|
3630
|
-
from meerschaum.utils.dataframe import
|
3635
|
+
from meerschaum.utils.dataframe import get_special_cols
|
3631
3636
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
3632
3637
|
df_dtypes = {
|
3633
3638
|
col: str(typ)
|
3634
3639
|
for col, typ in df.dtypes.items()
|
3635
3640
|
}
|
3636
|
-
|
3637
|
-
|
3638
|
-
|
3639
|
-
df_dtypes.update({col: 'json' for col in json_cols})
|
3640
|
-
df_dtypes.update({col: 'numeric' for col in numeric_cols})
|
3641
|
-
df_dtypes.update({col: 'uuid' for col in uuid_cols})
|
3641
|
+
special_cols = get_special_cols(df)
|
3642
|
+
df_dtypes.update(special_cols)
|
3643
|
+
|
3642
3644
|
if update_dtypes:
|
3643
3645
|
df_dtypes.update(pipe.dtypes)
|
3646
|
+
|
3644
3647
|
return {
|
3645
3648
|
col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
|
3646
3649
|
for col, typ in df_dtypes.items()
|
@@ -3881,13 +3884,15 @@ def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
|
|
3881
3884
|
-------
|
3882
3885
|
A schema string or `None` if nothing is configured.
|
3883
3886
|
"""
|
3887
|
+
if self.flavor == 'sqlite':
|
3888
|
+
return self.schema
|
3884
3889
|
return pipe.parameters.get('schema', self.schema)
|
3885
3890
|
|
3886
3891
|
|
3887
3892
|
@staticmethod
|
3888
3893
|
def get_temporary_target(
|
3889
3894
|
target: str,
|
3890
|
-
transact_id: Optional[str
|
3895
|
+
transact_id: Optional[str] = None,
|
3891
3896
|
label: Optional[str] = None,
|
3892
3897
|
separator: Optional[str] = None,
|
3893
3898
|
) -> str:
|
@@ -3909,3 +3914,15 @@ def get_temporary_target(
|
|
3909
3914
|
+ transact_id
|
3910
3915
|
+ ((separator + label) if label else '')
|
3911
3916
|
)
|
3917
|
+
|
3918
|
+
|
3919
|
+
def _enforce_pipe_dtypes_chunks_hook(
|
3920
|
+
pipe: mrsm.Pipe,
|
3921
|
+
chunk_df: 'pd.DataFrame',
|
3922
|
+
debug: bool = False,
|
3923
|
+
**kwargs
|
3924
|
+
) -> 'pd.DataFrame':
|
3925
|
+
"""
|
3926
|
+
Enforce a pipe's dtypes on each chunk.
|
3927
|
+
"""
|
3928
|
+
return pipe.enforce_dtypes(chunk_df, debug=debug)
|