meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +6 -1
- meerschaum/_internal/entry.py +16 -5
- meerschaum/actions/edit.py +6 -6
- meerschaum/actions/sql.py +12 -11
- meerschaum/api/dash/pages/login.py +17 -17
- meerschaum/api/dash/pipes.py +104 -13
- meerschaum/api/routes/_pipes.py +58 -40
- meerschaum/api/routes/_webterm.py +1 -0
- meerschaum/config/_edit.py +46 -19
- meerschaum/config/_read_config.py +20 -9
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +1 -1
- meerschaum/config/static/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +1 -0
- meerschaum/connectors/api/_pipes.py +39 -8
- meerschaum/connectors/sql/_SQLConnector.py +4 -3
- meerschaum/connectors/sql/_pipes.py +511 -118
- meerschaum/connectors/sql/_sql.py +55 -15
- meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
- meerschaum/connectors/valkey/_pipes.py +11 -5
- meerschaum/core/Pipe/__init__.py +27 -9
- meerschaum/core/Pipe/_attributes.py +181 -18
- meerschaum/core/Pipe/_clear.py +10 -8
- meerschaum/core/Pipe/_copy.py +2 -0
- meerschaum/core/Pipe/_data.py +65 -17
- meerschaum/core/Pipe/_deduplicate.py +30 -28
- meerschaum/core/Pipe/_dtypes.py +4 -4
- meerschaum/core/Pipe/_fetch.py +12 -10
- meerschaum/core/Pipe/_sync.py +28 -11
- meerschaum/core/Pipe/_verify.py +52 -49
- meerschaum/utils/dataframe.py +64 -34
- meerschaum/utils/dtypes/__init__.py +25 -6
- meerschaum/utils/dtypes/sql.py +76 -33
- meerschaum/utils/misc.py +57 -24
- meerschaum/utils/packages/_packages.py +2 -1
- meerschaum/utils/schedule.py +7 -5
- meerschaum/utils/sql.py +697 -44
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
@@ -320,10 +320,11 @@ def create_indices(
|
|
320
320
|
from meerschaum.utils.debug import dprint
|
321
321
|
if debug:
|
322
322
|
dprint(f"Creating indices for {pipe}...")
|
323
|
-
if not pipe.
|
323
|
+
if not pipe.indices:
|
324
324
|
warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
|
325
325
|
return True
|
326
326
|
|
327
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
327
328
|
ix_queries = {
|
328
329
|
ix: queries
|
329
330
|
for ix, queries in self.get_create_index_queries(pipe, debug=debug).items()
|
@@ -394,23 +395,43 @@ def get_create_index_queries(
|
|
394
395
|
get_distinct_col_count,
|
395
396
|
update_queries,
|
396
397
|
get_null_replacement,
|
398
|
+
get_create_table_queries,
|
399
|
+
get_rename_table_queries,
|
397
400
|
COALESCE_UNIQUE_INDEX_FLAVORS,
|
398
401
|
)
|
402
|
+
from meerschaum.utils.dtypes.sql import (
|
403
|
+
get_db_type_from_pd_type,
|
404
|
+
get_pd_type_from_db_type,
|
405
|
+
AUTO_INCREMENT_COLUMN_FLAVORS,
|
406
|
+
)
|
399
407
|
from meerschaum.config import get_config
|
400
408
|
index_queries = {}
|
401
409
|
|
402
410
|
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
|
411
|
+
static = pipe.parameters.get('static', False)
|
403
412
|
index_names = pipe.get_indices()
|
404
413
|
indices = pipe.indices
|
414
|
+
existing_cols_types = pipe.get_columns_types(debug=debug)
|
415
|
+
existing_cols_pd_types = {
|
416
|
+
col: get_pd_type_from_db_type(typ)
|
417
|
+
for col, typ in existing_cols_types.items()
|
418
|
+
}
|
419
|
+
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
|
+
existing_ix_names = set()
|
421
|
+
existing_primary_keys = []
|
422
|
+
for col, col_indices in existing_cols_indices.items():
|
423
|
+
for col_ix_doc in col_indices:
|
424
|
+
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
|
+
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
|
+
existing_primary_keys.append(col)
|
405
427
|
|
406
428
|
_datetime = pipe.get_columns('datetime', error=False)
|
407
|
-
_datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]')
|
408
429
|
_datetime_name = (
|
409
430
|
sql_item_name(_datetime, self.flavor, None)
|
410
431
|
if _datetime is not None else None
|
411
432
|
)
|
412
433
|
_datetime_index_name = (
|
413
|
-
sql_item_name(index_names['datetime'], self.flavor, None)
|
434
|
+
sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
|
414
435
|
if index_names.get('datetime', None)
|
415
436
|
else None
|
416
437
|
)
|
@@ -420,6 +441,29 @@ def get_create_index_queries(
|
|
420
441
|
if _id is not None
|
421
442
|
else None
|
422
443
|
)
|
444
|
+
primary_key = pipe.columns.get('primary', None)
|
445
|
+
primary_key_name = (
|
446
|
+
sql_item_name(primary_key, flavor=self.flavor, schema=None)
|
447
|
+
if primary_key
|
448
|
+
else None
|
449
|
+
)
|
450
|
+
autoincrement = (
|
451
|
+
pipe.parameters.get('autoincrement', False)
|
452
|
+
or (
|
453
|
+
primary_key is not None
|
454
|
+
and primary_key not in existing_cols_pd_types
|
455
|
+
)
|
456
|
+
)
|
457
|
+
primary_key_db_type = (
|
458
|
+
get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int'), self.flavor)
|
459
|
+
if primary_key
|
460
|
+
else None
|
461
|
+
)
|
462
|
+
primary_key_constraint_name = (
|
463
|
+
sql_item_name(f'pk_{pipe.target}', self.flavor, None)
|
464
|
+
if primary_key is not None
|
465
|
+
else None
|
466
|
+
)
|
423
467
|
|
424
468
|
_id_index_name = (
|
425
469
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -462,8 +506,10 @@ def get_create_index_queries(
|
|
462
506
|
)
|
463
507
|
elif self.flavor == 'mssql':
|
464
508
|
dt_query = (
|
465
|
-
|
466
|
-
|
509
|
+
"CREATE "
|
510
|
+
+ ("CLUSTERED " if not primary_key else '')
|
511
|
+
+ f"INDEX {_datetime_index_name} "
|
512
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
467
513
|
)
|
468
514
|
else: ### mssql, sqlite, etc.
|
469
515
|
dt_query = (
|
@@ -473,6 +519,115 @@ def get_create_index_queries(
|
|
473
519
|
|
474
520
|
index_queries[_datetime] = [dt_query]
|
475
521
|
|
522
|
+
primary_queries = []
|
523
|
+
if (
|
524
|
+
primary_key is not None
|
525
|
+
and primary_key not in existing_primary_keys
|
526
|
+
and not static
|
527
|
+
):
|
528
|
+
if autoincrement and primary_key not in existing_cols_pd_types:
|
529
|
+
autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
|
530
|
+
self.flavor,
|
531
|
+
AUTO_INCREMENT_COLUMN_FLAVORS['default']
|
532
|
+
)
|
533
|
+
primary_queries.extend([
|
534
|
+
(
|
535
|
+
f"ALTER TABLE {_pipe_name}\n"
|
536
|
+
f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
|
537
|
+
),
|
538
|
+
])
|
539
|
+
elif not autoincrement and primary_key in existing_cols_pd_types:
|
540
|
+
if self.flavor == 'sqlite':
|
541
|
+
new_table_name = sql_item_name(
|
542
|
+
f'_new_{pipe.target}',
|
543
|
+
self.flavor,
|
544
|
+
self.get_pipe_schema(pipe)
|
545
|
+
)
|
546
|
+
select_cols_str = ', '.join(
|
547
|
+
[
|
548
|
+
sql_item_name(col, self.flavor, None)
|
549
|
+
for col in existing_cols_types
|
550
|
+
]
|
551
|
+
)
|
552
|
+
primary_queries.extend(
|
553
|
+
get_create_table_queries(
|
554
|
+
existing_cols_pd_types,
|
555
|
+
f'_new_{pipe.target}',
|
556
|
+
self.flavor,
|
557
|
+
schema=self.get_pipe_schema(pipe),
|
558
|
+
primary_key=primary_key,
|
559
|
+
) + [
|
560
|
+
(
|
561
|
+
f"INSERT INTO {new_table_name} ({select_cols_str})\n"
|
562
|
+
f"SELECT {select_cols_str}\nFROM {_pipe_name}"
|
563
|
+
),
|
564
|
+
f"DROP TABLE {_pipe_name}",
|
565
|
+
] + get_rename_table_queries(
|
566
|
+
f'_new_{pipe.target}',
|
567
|
+
pipe.target,
|
568
|
+
self.flavor,
|
569
|
+
schema=self.get_pipe_schema(pipe),
|
570
|
+
)
|
571
|
+
)
|
572
|
+
elif self.flavor == 'oracle':
|
573
|
+
primary_queries.extend([
|
574
|
+
(
|
575
|
+
f"ALTER TABLE {_pipe_name}\n"
|
576
|
+
f"MODIFY {primary_key_name} NOT NULL"
|
577
|
+
),
|
578
|
+
(
|
579
|
+
f"ALTER TABLE {_pipe_name}\n"
|
580
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
581
|
+
)
|
582
|
+
])
|
583
|
+
elif self.flavor in ('mysql', 'mariadb'):
|
584
|
+
primary_queries.extend([
|
585
|
+
(
|
586
|
+
f"ALTER TABLE {_pipe_name}\n"
|
587
|
+
f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
|
588
|
+
),
|
589
|
+
(
|
590
|
+
f"ALTER TABLE {_pipe_name}\n"
|
591
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
592
|
+
)
|
593
|
+
])
|
594
|
+
elif self.flavor == 'timescaledb':
|
595
|
+
primary_queries.extend([
|
596
|
+
(
|
597
|
+
f"ALTER TABLE {_pipe_name}\n"
|
598
|
+
f"ALTER COLUMN {primary_key_name} SET NOT NULL"
|
599
|
+
),
|
600
|
+
(
|
601
|
+
f"ALTER TABLE {_pipe_name}\n"
|
602
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
|
603
|
+
f"{_datetime_name}, " if _datetime_name else ""
|
604
|
+
) + f"{primary_key_name})"
|
605
|
+
),
|
606
|
+
])
|
607
|
+
elif self.flavor in ('citus', 'postgresql', 'duckdb'):
|
608
|
+
primary_queries.extend([
|
609
|
+
(
|
610
|
+
f"ALTER TABLE {_pipe_name}\n"
|
611
|
+
f"ALTER COLUMN {primary_key_name} SET NOT NULL"
|
612
|
+
),
|
613
|
+
(
|
614
|
+
f"ALTER TABLE {_pipe_name}\n"
|
615
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
616
|
+
),
|
617
|
+
])
|
618
|
+
else:
|
619
|
+
primary_queries.extend([
|
620
|
+
(
|
621
|
+
f"ALTER TABLE {_pipe_name}\n"
|
622
|
+
f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
|
623
|
+
),
|
624
|
+
(
|
625
|
+
f"ALTER TABLE {_pipe_name}\n"
|
626
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
627
|
+
),
|
628
|
+
])
|
629
|
+
index_queries[primary_key] = primary_queries
|
630
|
+
|
476
631
|
### create id index
|
477
632
|
if _id_name is not None:
|
478
633
|
if self.flavor == 'timescaledb':
|
@@ -496,7 +651,7 @@ def get_create_index_queries(
|
|
496
651
|
other_index_names = {
|
497
652
|
ix_key: ix_unquoted
|
498
653
|
for ix_key, ix_unquoted in index_names.items()
|
499
|
-
if ix_key not in ('datetime', 'id')
|
654
|
+
if ix_key not in ('datetime', 'id', 'primary') and ix_unquoted not in existing_ix_names
|
500
655
|
}
|
501
656
|
for ix_key, ix_unquoted in other_index_names.items():
|
502
657
|
ix_name = sql_item_name(ix_unquoted, self.flavor, None)
|
@@ -509,13 +664,12 @@ def get_create_index_queries(
|
|
509
664
|
cols_names_str = ", ".join(cols_names)
|
510
665
|
index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"]
|
511
666
|
|
512
|
-
existing_cols_types = pipe.get_columns_types(debug=debug)
|
513
667
|
indices_cols_str = ', '.join(
|
514
|
-
|
668
|
+
list({
|
515
669
|
sql_item_name(ix, self.flavor)
|
516
670
|
for ix_key, ix in pipe.columns.items()
|
517
671
|
if ix and ix in existing_cols_types
|
518
|
-
|
672
|
+
})
|
519
673
|
)
|
520
674
|
coalesce_indices_cols_str = ', '.join(
|
521
675
|
[
|
@@ -738,7 +892,7 @@ def get_pipe_data(
|
|
738
892
|
dt_type = dtypes.get(_dt, 'object').lower()
|
739
893
|
if 'datetime' not in dt_type:
|
740
894
|
if 'int' not in dt_type:
|
741
|
-
dtypes[_dt] = 'datetime64[ns]'
|
895
|
+
dtypes[_dt] = 'datetime64[ns, UTC]'
|
742
896
|
existing_cols = pipe.get_columns_types(debug=debug)
|
743
897
|
select_columns = (
|
744
898
|
[
|
@@ -855,6 +1009,7 @@ def get_pipe_data_query(
|
|
855
1009
|
begin_add_minutes: int = 0,
|
856
1010
|
end_add_minutes: int = 0,
|
857
1011
|
replace_nulls: Optional[str] = None,
|
1012
|
+
skip_existing_cols_check: bool = False,
|
858
1013
|
debug: bool = False,
|
859
1014
|
**kw: Any
|
860
1015
|
) -> Union[str, None]:
|
@@ -905,6 +1060,9 @@ def get_pipe_data_query(
|
|
905
1060
|
replace_nulls: Optional[str], default None
|
906
1061
|
If provided, replace null values with this value.
|
907
1062
|
|
1063
|
+
skip_existing_cols_check: bool, default False
|
1064
|
+
If `True`, do not verify that querying columns are actually on the table.
|
1065
|
+
|
908
1066
|
debug: bool, default False
|
909
1067
|
Verbosity toggle.
|
910
1068
|
|
@@ -912,16 +1070,13 @@ def get_pipe_data_query(
|
|
912
1070
|
-------
|
913
1071
|
A `SELECT` query to retrieve a pipe's data.
|
914
1072
|
"""
|
915
|
-
from meerschaum.utils.debug import dprint
|
916
1073
|
from meerschaum.utils.misc import items_str
|
917
1074
|
from meerschaum.utils.sql import sql_item_name, dateadd_str
|
918
|
-
from meerschaum.utils.packages import import_pandas
|
919
|
-
pd = import_pandas()
|
920
1075
|
existing_cols = pipe.get_columns_types(debug=debug)
|
921
1076
|
select_columns = (
|
922
1077
|
[col for col in existing_cols]
|
923
1078
|
if not select_columns
|
924
|
-
else [col for col in select_columns if col in existing_cols]
|
1079
|
+
else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
|
925
1080
|
)
|
926
1081
|
if omit_columns:
|
927
1082
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -935,7 +1090,10 @@ def get_pipe_data_query(
|
|
935
1090
|
if begin is not None:
|
936
1091
|
begin -= backtrack_interval
|
937
1092
|
|
938
|
-
cols_names = [
|
1093
|
+
cols_names = [
|
1094
|
+
sql_item_name(col, self.flavor, None)
|
1095
|
+
for col in select_columns
|
1096
|
+
]
|
939
1097
|
select_cols_str = (
|
940
1098
|
'SELECT\n '
|
941
1099
|
+ ',\n '.join(
|
@@ -948,7 +1106,7 @@ def get_pipe_data_query(
|
|
948
1106
|
for col_name in cols_names
|
949
1107
|
]
|
950
1108
|
)
|
951
|
-
)
|
1109
|
+
) if cols_names else 'SELECT *'
|
952
1110
|
pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
953
1111
|
query = f"{select_cols_str}\nFROM {pipe_table_name}"
|
954
1112
|
where = ""
|
@@ -972,7 +1130,7 @@ def get_pipe_data_query(
|
|
972
1130
|
quoted_indices = {
|
973
1131
|
key: sql_item_name(val, self.flavor, None)
|
974
1132
|
for key, val in pipe.columns.items()
|
975
|
-
if val in existing_cols
|
1133
|
+
if val in existing_cols or skip_existing_cols_check
|
976
1134
|
}
|
977
1135
|
|
978
1136
|
if begin is not None or end is not None:
|
@@ -992,7 +1150,7 @@ def get_pipe_data_query(
|
|
992
1150
|
)
|
993
1151
|
|
994
1152
|
is_dt_bound = False
|
995
|
-
if begin is not None and _dt in existing_cols:
|
1153
|
+
if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
|
996
1154
|
begin_da = dateadd_str(
|
997
1155
|
flavor=self.flavor,
|
998
1156
|
datepart='minute',
|
@@ -1002,7 +1160,7 @@ def get_pipe_data_query(
|
|
1002
1160
|
where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
|
1003
1161
|
is_dt_bound = True
|
1004
1162
|
|
1005
|
-
if end is not None and _dt in existing_cols:
|
1163
|
+
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
1006
1164
|
if 'int' in str(type(end)).lower() and end == begin:
|
1007
1165
|
end += 1
|
1008
1166
|
end_da = dateadd_str(
|
@@ -1016,7 +1174,11 @@ def get_pipe_data_query(
|
|
1016
1174
|
|
1017
1175
|
if params is not None:
|
1018
1176
|
from meerschaum.utils.sql import build_where
|
1019
|
-
valid_params = {
|
1177
|
+
valid_params = {
|
1178
|
+
k: v
|
1179
|
+
for k, v in params.items()
|
1180
|
+
if k in existing_cols or skip_existing_cols_check
|
1181
|
+
}
|
1020
1182
|
if valid_params:
|
1021
1183
|
where += build_where(valid_params, self).replace(
|
1022
1184
|
'WHERE', ('AND' if is_dt_bound else "")
|
@@ -1030,7 +1192,7 @@ def get_pipe_data_query(
|
|
1030
1192
|
order_by = ""
|
1031
1193
|
if quoted_indices:
|
1032
1194
|
order_by += "\nORDER BY "
|
1033
|
-
if _dt and _dt in existing_cols:
|
1195
|
+
if _dt and (_dt in existing_cols or skip_existing_cols_check):
|
1034
1196
|
order_by += dt + ' ' + order + ','
|
1035
1197
|
for key, quoted_col_name in quoted_indices.items():
|
1036
1198
|
if dt == quoted_col_name:
|
@@ -1140,6 +1302,70 @@ def get_pipe_attributes(
|
|
1140
1302
|
return attributes
|
1141
1303
|
|
1142
1304
|
|
1305
|
+
def create_pipe_table_from_df(
|
1306
|
+
self,
|
1307
|
+
pipe: mrsm.Pipe,
|
1308
|
+
df: 'pd.DataFrame',
|
1309
|
+
debug: bool = False,
|
1310
|
+
) -> mrsm.SuccessTuple:
|
1311
|
+
"""
|
1312
|
+
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1313
|
+
"""
|
1314
|
+
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1315
|
+
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1316
|
+
primary_key = pipe.columns.get('primary', None)
|
1317
|
+
dt_col = pipe.columns.get('datetime', None)
|
1318
|
+
new_dtypes = {
|
1319
|
+
**{
|
1320
|
+
col: str(typ)
|
1321
|
+
for col, typ in df.dtypes.items()
|
1322
|
+
},
|
1323
|
+
**{
|
1324
|
+
col: 'int'
|
1325
|
+
for col_ix, col in pipe.columns.items()
|
1326
|
+
if col_ix != 'primary'
|
1327
|
+
},
|
1328
|
+
**{
|
1329
|
+
col: 'uuid'
|
1330
|
+
for col in get_uuid_cols(df)
|
1331
|
+
},
|
1332
|
+
**{
|
1333
|
+
col: 'json'
|
1334
|
+
for col in get_json_cols(df)
|
1335
|
+
},
|
1336
|
+
**{
|
1337
|
+
col: 'numeric'
|
1338
|
+
for col in get_numeric_cols(df)
|
1339
|
+
},
|
1340
|
+
**pipe.dtypes
|
1341
|
+
}
|
1342
|
+
autoincrement = (
|
1343
|
+
pipe.parameters.get('autoincrement', False)
|
1344
|
+
or (primary_key and primary_key not in new_dtypes)
|
1345
|
+
)
|
1346
|
+
if autoincrement:
|
1347
|
+
_ = new_dtypes.pop(primary_key, None)
|
1348
|
+
|
1349
|
+
create_table_queries = get_create_table_queries(
|
1350
|
+
new_dtypes,
|
1351
|
+
pipe.target,
|
1352
|
+
self.flavor,
|
1353
|
+
schema=self.get_pipe_schema(pipe),
|
1354
|
+
primary_key=primary_key,
|
1355
|
+
datetime_column=dt_col,
|
1356
|
+
)
|
1357
|
+
success = all(
|
1358
|
+
self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
|
1359
|
+
)
|
1360
|
+
target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
|
1361
|
+
msg = (
|
1362
|
+
"Success"
|
1363
|
+
if success
|
1364
|
+
else f"Failed to create {target_name}."
|
1365
|
+
)
|
1366
|
+
return success, msg
|
1367
|
+
|
1368
|
+
|
1143
1369
|
def sync_pipe(
|
1144
1370
|
self,
|
1145
1371
|
pipe: mrsm.Pipe,
|
@@ -1197,10 +1423,17 @@ def sync_pipe(
|
|
1197
1423
|
A `SuccessTuple` of success (`bool`) and message (`str`).
|
1198
1424
|
"""
|
1199
1425
|
from meerschaum.utils.packages import import_pandas
|
1200
|
-
from meerschaum.utils.sql import
|
1426
|
+
from meerschaum.utils.sql import (
|
1427
|
+
get_update_queries,
|
1428
|
+
sql_item_name,
|
1429
|
+
update_queries,
|
1430
|
+
get_create_table_queries,
|
1431
|
+
get_reset_autoincrement_queries,
|
1432
|
+
)
|
1201
1433
|
from meerschaum.utils.misc import generate_password
|
1202
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
|
1434
|
+
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1203
1435
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1436
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1204
1437
|
from meerschaum import Pipe
|
1205
1438
|
import time
|
1206
1439
|
import copy
|
@@ -1211,6 +1444,7 @@ def sync_pipe(
|
|
1211
1444
|
return False, msg
|
1212
1445
|
|
1213
1446
|
start = time.perf_counter()
|
1447
|
+
pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
|
1214
1448
|
|
1215
1449
|
if not pipe.temporary and not pipe.get_id(debug=debug):
|
1216
1450
|
register_tuple = pipe.register(debug=debug)
|
@@ -1232,7 +1466,6 @@ def sync_pipe(
|
|
1232
1466
|
|
1233
1467
|
### if table does not exist, create it with indices
|
1234
1468
|
is_new = False
|
1235
|
-
add_cols_query = None
|
1236
1469
|
if not pipe.exists(debug=debug):
|
1237
1470
|
check_existing = False
|
1238
1471
|
is_new = True
|
@@ -1240,11 +1473,15 @@ def sync_pipe(
|
|
1240
1473
|
### Check for new columns.
|
1241
1474
|
add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
|
1242
1475
|
if add_cols_queries:
|
1476
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
1477
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1243
1478
|
if not self.exec_queries(add_cols_queries, debug=debug):
|
1244
1479
|
warn(f"Failed to add new columns to {pipe}.")
|
1245
1480
|
|
1246
1481
|
alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
|
1247
1482
|
if alter_cols_queries:
|
1483
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
1484
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1248
1485
|
if not self.exec_queries(alter_cols_queries, debug=debug):
|
1249
1486
|
warn(f"Failed to alter columns for {pipe}.")
|
1250
1487
|
else:
|
@@ -1252,9 +1489,7 @@ def sync_pipe(
|
|
1252
1489
|
|
1253
1490
|
### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
|
1254
1491
|
### so infer bools and persist them to `dtypes`.
|
1255
|
-
|
1256
|
-
### to avoid merge issues.
|
1257
|
-
if self.flavor in ('oracle', 'sqlite', 'mssql', 'mysql', 'mariadb'):
|
1492
|
+
if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
|
1258
1493
|
pipe_dtypes = pipe.dtypes
|
1259
1494
|
new_bool_cols = {
|
1260
1495
|
col: 'bool[pyarrow]'
|
@@ -1309,47 +1544,131 @@ def sync_pipe(
|
|
1309
1544
|
'schema': self.get_pipe_schema(pipe),
|
1310
1545
|
})
|
1311
1546
|
|
1312
|
-
|
1547
|
+
primary_key = pipe.columns.get('primary', None)
|
1548
|
+
autoincrement = (
|
1549
|
+
pipe.parameters.get('autoincrement', False)
|
1550
|
+
or (
|
1551
|
+
is_new
|
1552
|
+
and primary_key
|
1553
|
+
and primary_key
|
1554
|
+
not in pipe.dtypes
|
1555
|
+
and primary_key not in unseen_df.columns
|
1556
|
+
)
|
1557
|
+
)
|
1558
|
+
if autoincrement and autoincrement not in pipe.parameters:
|
1559
|
+
pipe.parameters['autoincrement'] = autoincrement
|
1560
|
+
edit_success, edit_msg = pipe.edit(debug=debug)
|
1561
|
+
if not edit_success:
|
1562
|
+
return edit_success, edit_msg
|
1563
|
+
|
1564
|
+
autoincrement_needs_reset = False
|
1565
|
+
if autoincrement and primary_key:
|
1566
|
+
if primary_key not in df.columns:
|
1567
|
+
if unseen_df is not None and primary_key in unseen_df.columns:
|
1568
|
+
del unseen_df[primary_key]
|
1569
|
+
if update_df is not None and primary_key in update_df.columns:
|
1570
|
+
del update_df[primary_key]
|
1571
|
+
if delta_df is not None and primary_key in delta_df.columns:
|
1572
|
+
del delta_df[primary_key]
|
1573
|
+
elif unseen_df[primary_key].notnull().any():
|
1574
|
+
autoincrement_needs_reset = True
|
1575
|
+
|
1576
|
+
if is_new:
|
1577
|
+
create_success, create_msg = self.create_pipe_table_from_df(
|
1578
|
+
pipe,
|
1579
|
+
unseen_df,
|
1580
|
+
debug=debug,
|
1581
|
+
)
|
1582
|
+
if not create_success:
|
1583
|
+
return create_success, create_msg
|
1584
|
+
|
1585
|
+
do_identity_insert = bool(
|
1586
|
+
self.flavor in ('mssql',)
|
1587
|
+
and primary_key in unseen_df.columns
|
1588
|
+
and autoincrement
|
1589
|
+
)
|
1590
|
+
with self.engine.connect() as connection:
|
1591
|
+
with connection.begin():
|
1592
|
+
if do_identity_insert:
|
1593
|
+
identity_on_result = self.exec(
|
1594
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1595
|
+
commit=False,
|
1596
|
+
_connection=connection,
|
1597
|
+
close=False,
|
1598
|
+
debug=debug,
|
1599
|
+
)
|
1600
|
+
if identity_on_result is None:
|
1601
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1602
|
+
|
1603
|
+
stats = self.to_sql(
|
1604
|
+
unseen_df,
|
1605
|
+
_connection=connection,
|
1606
|
+
**unseen_kw
|
1607
|
+
)
|
1608
|
+
|
1609
|
+
if do_identity_insert:
|
1610
|
+
identity_off_result = self.exec(
|
1611
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1612
|
+
commit=False,
|
1613
|
+
_connection=connection,
|
1614
|
+
close=False,
|
1615
|
+
debug=debug,
|
1616
|
+
)
|
1617
|
+
if identity_off_result is None:
|
1618
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1619
|
+
|
1313
1620
|
if is_new:
|
1314
1621
|
if not self.create_indices(pipe, debug=debug):
|
1315
1622
|
warn(f"Failed to create indices for {pipe}. Continuing...")
|
1316
1623
|
|
1317
|
-
if
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
else 1
|
1330
|
-
)
|
1331
|
-
) if dt_col else None
|
1624
|
+
if autoincrement_needs_reset:
|
1625
|
+
reset_autoincrement_queries = get_reset_autoincrement_queries(
|
1626
|
+
pipe.target,
|
1627
|
+
primary_key,
|
1628
|
+
self,
|
1629
|
+
schema=self.get_pipe_schema(pipe),
|
1630
|
+
debug=debug,
|
1631
|
+
)
|
1632
|
+
results = self.exec_queries(reset_autoincrement_queries, debug=debug)
|
1633
|
+
for result in results:
|
1634
|
+
if result is None:
|
1635
|
+
warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
|
1332
1636
|
|
1637
|
+
if update_df is not None and len(update_df) > 0:
|
1333
1638
|
transact_id = generate_password(3)
|
1334
|
-
|
1639
|
+
temp_prefix = '##' if self.flavor != 'oracle' else ''
|
1640
|
+
temp_target = temp_prefix + transact_id + '_' + pipe.target
|
1335
1641
|
self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
|
1336
1642
|
temp_pipe = Pipe(
|
1337
1643
|
pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
|
1338
1644
|
instance=pipe.instance_keys,
|
1339
1645
|
columns={
|
1340
|
-
ix_key: ix
|
1646
|
+
(ix_key if ix_key != 'primary' else 'primary_'): ix
|
1341
1647
|
for ix_key, ix in pipe.columns.items()
|
1342
1648
|
if ix and ix in update_df.columns
|
1343
1649
|
},
|
1344
|
-
dtypes=
|
1650
|
+
dtypes={
|
1651
|
+
col: typ
|
1652
|
+
for col, typ in pipe.dtypes.items()
|
1653
|
+
if col in update_df.columns
|
1654
|
+
},
|
1345
1655
|
target=temp_target,
|
1346
1656
|
temporary=True,
|
1347
1657
|
parameters={
|
1658
|
+
'static': True,
|
1348
1659
|
'schema': self.internal_schema,
|
1349
1660
|
'hypertable': False,
|
1661
|
+
'autoincrement': False,
|
1350
1662
|
},
|
1351
1663
|
)
|
1352
|
-
temp_pipe.
|
1664
|
+
temp_pipe._columns_types = {
|
1665
|
+
col: get_db_type_from_pd_type(str(typ), self.flavor)
|
1666
|
+
for col, typ in update_df.dtypes.items()
|
1667
|
+
}
|
1668
|
+
temp_pipe._columns_types_timestamp = time.perf_counter()
|
1669
|
+
temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
|
1670
|
+
if not temp_success:
|
1671
|
+
return temp_success, temp_msg
|
1353
1672
|
existing_cols = pipe.get_columns_types(debug=debug)
|
1354
1673
|
join_cols = [
|
1355
1674
|
col
|
@@ -1358,7 +1677,7 @@ def sync_pipe(
|
|
1358
1677
|
]
|
1359
1678
|
update_queries = get_update_queries(
|
1360
1679
|
pipe.target,
|
1361
|
-
temp_target,
|
1680
|
+
temp_target,
|
1362
1681
|
self,
|
1363
1682
|
join_cols,
|
1364
1683
|
upsert=upsert,
|
@@ -1475,28 +1794,23 @@ def sync_pipe_inplace(
|
|
1475
1794
|
)
|
1476
1795
|
from meerschaum.utils.sql import (
|
1477
1796
|
sql_item_name,
|
1478
|
-
get_sqlalchemy_table,
|
1479
1797
|
get_update_queries,
|
1480
1798
|
get_null_replacement,
|
1481
|
-
|
1482
|
-
NO_SELECT_INTO_FLAVORS,
|
1483
|
-
format_cte_subquery,
|
1484
|
-
get_create_table_query,
|
1799
|
+
get_create_table_queries,
|
1485
1800
|
get_table_cols_types,
|
1486
|
-
truncate_item_name,
|
1487
1801
|
session_execute,
|
1488
|
-
table_exists,
|
1489
1802
|
update_queries,
|
1490
1803
|
)
|
1804
|
+
from meerschaum.utils.dtypes import coerce_timezone, are_dtypes_equal
|
1491
1805
|
from meerschaum.utils.dtypes.sql import (
|
1492
1806
|
get_pd_type_from_db_type,
|
1493
1807
|
)
|
1494
1808
|
from meerschaum.utils.misc import generate_password
|
1495
|
-
from meerschaum.utils.debug import dprint
|
1496
1809
|
|
1497
1810
|
transact_id = generate_password(3)
|
1498
1811
|
def get_temp_table_name(label: str) -> str:
|
1499
|
-
|
1812
|
+
temp_prefix = '##' if self.flavor != 'oracle' else ''
|
1813
|
+
return temp_prefix + transact_id + '_' + label + '_' + pipe.target
|
1500
1814
|
|
1501
1815
|
internal_schema = self.internal_schema
|
1502
1816
|
temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
|
@@ -1523,6 +1837,11 @@ def sync_pipe_inplace(
|
|
1523
1837
|
pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
1524
1838
|
upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries
|
1525
1839
|
database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
|
1840
|
+
primary_key = pipe.columns.get('primary', None)
|
1841
|
+
autoincrement = pipe.parameters.get('autoincrement', False)
|
1842
|
+
dt_col = pipe.columns.get('datetime', None)
|
1843
|
+
dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
1844
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
1526
1845
|
|
1527
1846
|
def clean_up_temp_tables(ready_to_drop: bool = False):
|
1528
1847
|
log_success, log_msg = self._log_temporary_tables_creation(
|
@@ -1546,13 +1865,16 @@ def sync_pipe_inplace(
|
|
1546
1865
|
|
1547
1866
|
sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm')
|
1548
1867
|
if not pipe.exists(debug=debug):
|
1549
|
-
|
1868
|
+
create_pipe_queries = get_create_table_queries(
|
1550
1869
|
metadef,
|
1551
1870
|
pipe.target,
|
1552
1871
|
self.flavor,
|
1553
1872
|
schema=self.get_pipe_schema(pipe),
|
1873
|
+
primary_key=primary_key,
|
1874
|
+
autoincrement=autoincrement,
|
1875
|
+
datetime_column=dt_col,
|
1554
1876
|
)
|
1555
|
-
result = self.
|
1877
|
+
result = self.exec_queries(create_pipe_queries, debug=debug)
|
1556
1878
|
if result is None:
|
1557
1879
|
_ = clean_up_temp_tables()
|
1558
1880
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
@@ -1567,12 +1889,12 @@ def sync_pipe_inplace(
|
|
1567
1889
|
session = sqlalchemy_orm.Session(self.engine)
|
1568
1890
|
connectable = session if self.flavor != 'duckdb' else self
|
1569
1891
|
|
1570
|
-
create_new_query =
|
1892
|
+
create_new_query = get_create_table_queries(
|
1571
1893
|
metadef,
|
1572
1894
|
temp_tables[('new') if not upsert else 'update'],
|
1573
1895
|
self.flavor,
|
1574
1896
|
schema=internal_schema,
|
1575
|
-
)
|
1897
|
+
)[0]
|
1576
1898
|
(create_new_success, create_new_msg), create_new_results = session_execute(
|
1577
1899
|
session,
|
1578
1900
|
create_new_query,
|
@@ -1603,13 +1925,20 @@ def sync_pipe_inplace(
|
|
1603
1925
|
sql_item_name(col, self.flavor)
|
1604
1926
|
for col in new_cols
|
1605
1927
|
])
|
1928
|
+
def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
|
1929
|
+
if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
|
1930
|
+
return new_cols_types[col]
|
1931
|
+
return cols_types[col]
|
1606
1932
|
|
1607
1933
|
add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
|
1608
1934
|
if add_cols_queries:
|
1935
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1936
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
1609
1937
|
self.exec_queries(add_cols_queries, debug=debug)
|
1610
1938
|
|
1611
1939
|
alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
|
1612
1940
|
if alter_cols_queries:
|
1941
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1613
1942
|
self.exec_queries(alter_cols_queries, debug=debug)
|
1614
1943
|
|
1615
1944
|
insert_queries = [
|
@@ -1634,6 +1963,26 @@ def sync_pipe_inplace(
|
|
1634
1963
|
_ = clean_up_temp_tables()
|
1635
1964
|
return True, f"Inserted {new_count}, updated 0 rows."
|
1636
1965
|
|
1966
|
+
(new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
|
1967
|
+
session,
|
1968
|
+
[
|
1969
|
+
"SELECT\n"
|
1970
|
+
f" MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n"
|
1971
|
+
f" MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n"
|
1972
|
+
f"FROM {temp_table_names['new']}\n"
|
1973
|
+
f"WHERE {dt_col_name} IS NOT NULL"
|
1974
|
+
],
|
1975
|
+
with_results=True,
|
1976
|
+
debug=debug,
|
1977
|
+
)
|
1978
|
+
if not new_dt_bounds_success:
|
1979
|
+
return (
|
1980
|
+
new_dt_bounds_success,
|
1981
|
+
f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
|
1982
|
+
)
|
1983
|
+
|
1984
|
+
begin, end = new_dt_bounds_results[0].fetchone()
|
1985
|
+
|
1637
1986
|
backtrack_def = self.get_pipe_data_query(
|
1638
1987
|
pipe,
|
1639
1988
|
begin=begin,
|
@@ -1644,19 +1993,18 @@ def sync_pipe_inplace(
|
|
1644
1993
|
debug=debug,
|
1645
1994
|
order=None,
|
1646
1995
|
)
|
1647
|
-
|
1648
|
-
create_backtrack_query = get_create_table_query(
|
1996
|
+
create_backtrack_query = get_create_table_queries(
|
1649
1997
|
backtrack_def,
|
1650
1998
|
temp_tables['backtrack'],
|
1651
1999
|
self.flavor,
|
1652
2000
|
schema=internal_schema,
|
1653
|
-
)
|
1654
|
-
(create_backtrack_success, create_backtrack_msg),
|
2001
|
+
)[0]
|
2002
|
+
(create_backtrack_success, create_backtrack_msg), create_new_results = session_execute(
|
1655
2003
|
session,
|
1656
2004
|
create_backtrack_query,
|
1657
2005
|
with_results=True,
|
1658
2006
|
debug=debug,
|
1659
|
-
) if not upsert else (True, "Success"), None
|
2007
|
+
) if not upsert else ((True, "Success"), None)
|
1660
2008
|
|
1661
2009
|
if not create_backtrack_success:
|
1662
2010
|
_ = clean_up_temp_tables()
|
@@ -1673,7 +2021,7 @@ def sync_pipe_inplace(
|
|
1673
2021
|
|
1674
2022
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
1675
2023
|
on_cols = {
|
1676
|
-
col: new_cols.get(col
|
2024
|
+
col: new_cols.get(col)
|
1677
2025
|
for col_key, col in pipe.columns.items()
|
1678
2026
|
if (
|
1679
2027
|
col
|
@@ -1687,7 +2035,8 @@ def sync_pipe_inplace(
|
|
1687
2035
|
null_replace_new_cols_str = (
|
1688
2036
|
', '.join([
|
1689
2037
|
f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
|
1690
|
-
+
|
2038
|
+
+ get_null_replacement(get_col_typ(col, new_cols), self.flavor)
|
2039
|
+
+ ") AS "
|
1691
2040
|
+ sql_item_name(col, self.flavor, None)
|
1692
2041
|
for col, typ in new_cols.items()
|
1693
2042
|
])
|
@@ -1703,7 +2052,7 @@ def sync_pipe_inplace(
|
|
1703
2052
|
f"COALESCE({temp_table_names['new']}."
|
1704
2053
|
+ sql_item_name(c, self.flavor, None)
|
1705
2054
|
+ ", "
|
1706
|
-
+ get_null_replacement(
|
2055
|
+
+ get_null_replacement(get_col_typ(c, new_cols), self.flavor)
|
1707
2056
|
+ ") "
|
1708
2057
|
+ ' = '
|
1709
2058
|
+ f"COALESCE({temp_table_names['backtrack']}."
|
@@ -1720,12 +2069,12 @@ def sync_pipe_inplace(
|
|
1720
2069
|
) for c in common_cols
|
1721
2070
|
])
|
1722
2071
|
)
|
1723
|
-
create_delta_query =
|
2072
|
+
create_delta_query = get_create_table_queries(
|
1724
2073
|
select_delta_query,
|
1725
2074
|
temp_tables['delta'],
|
1726
2075
|
self.flavor,
|
1727
2076
|
schema=internal_schema,
|
1728
|
-
)
|
2077
|
+
)[0]
|
1729
2078
|
create_delta_success, create_delta_msg = session_execute(
|
1730
2079
|
session,
|
1731
2080
|
create_delta_query,
|
@@ -1778,20 +2127,28 @@ def sync_pipe_inplace(
|
|
1778
2127
|
+ '\nAND\n'.join([
|
1779
2128
|
(
|
1780
2129
|
f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
|
1781
|
-
+ ", "
|
2130
|
+
+ ", "
|
2131
|
+
+ get_null_replacement(
|
2132
|
+
get_col_typ(c, on_cols),
|
2133
|
+
self.flavor
|
2134
|
+
) + ")"
|
1782
2135
|
+ ' = '
|
1783
2136
|
+ f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
|
1784
|
-
+ ", "
|
2137
|
+
+ ", "
|
2138
|
+
+ get_null_replacement(
|
2139
|
+
get_col_typ(c, on_cols),
|
2140
|
+
self.flavor
|
2141
|
+
) + ")"
|
1785
2142
|
) for c, typ in on_cols.items()
|
1786
2143
|
])
|
1787
2144
|
)
|
1788
2145
|
|
1789
|
-
create_joined_query =
|
2146
|
+
create_joined_query = get_create_table_queries(
|
1790
2147
|
select_joined_query,
|
1791
2148
|
temp_tables['joined'],
|
1792
2149
|
self.flavor,
|
1793
|
-
schema
|
1794
|
-
)
|
2150
|
+
schema=internal_schema,
|
2151
|
+
)[0]
|
1795
2152
|
create_joined_success, create_joined_msg = session_execute(
|
1796
2153
|
session,
|
1797
2154
|
create_joined_query,
|
@@ -1806,7 +2163,7 @@ def sync_pipe_inplace(
|
|
1806
2163
|
+ (', '.join([
|
1807
2164
|
(
|
1808
2165
|
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1809
|
-
+ " != " + get_null_replacement(
|
2166
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
|
1810
2167
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1811
2168
|
+ "\n ELSE NULL\nEND "
|
1812
2169
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
@@ -1820,18 +2177,18 @@ def sync_pipe_inplace(
|
|
1820
2177
|
) for c in delta_cols
|
1821
2178
|
])
|
1822
2179
|
)
|
1823
|
-
create_unseen_query =
|
2180
|
+
create_unseen_query = get_create_table_queries(
|
1824
2181
|
select_unseen_query,
|
1825
2182
|
temp_tables['unseen'],
|
1826
2183
|
self.flavor,
|
1827
2184
|
internal_schema,
|
1828
|
-
)
|
2185
|
+
)[0]
|
1829
2186
|
(create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
|
1830
2187
|
session,
|
1831
2188
|
create_unseen_query,
|
1832
2189
|
with_results=True,
|
1833
2190
|
debug=debug
|
1834
|
-
) if not upsert else (True, "Success"), None
|
2191
|
+
) if not upsert else ((True, "Success"), None)
|
1835
2192
|
if not create_unseen_success:
|
1836
2193
|
_ = clean_up_temp_tables()
|
1837
2194
|
return create_unseen_success, create_unseen_msg
|
@@ -1841,7 +2198,7 @@ def sync_pipe_inplace(
|
|
1841
2198
|
+ (', '.join([
|
1842
2199
|
(
|
1843
2200
|
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1844
|
-
+ " != " + get_null_replacement(
|
2201
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
|
1845
2202
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1846
2203
|
+ "\n ELSE NULL\nEND "
|
1847
2204
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
@@ -1856,12 +2213,12 @@ def sync_pipe_inplace(
|
|
1856
2213
|
])
|
1857
2214
|
)
|
1858
2215
|
|
1859
|
-
create_update_query =
|
2216
|
+
create_update_query = get_create_table_queries(
|
1860
2217
|
select_update_query,
|
1861
2218
|
temp_tables['update'],
|
1862
2219
|
self.flavor,
|
1863
2220
|
internal_schema,
|
1864
|
-
)
|
2221
|
+
)[0]
|
1865
2222
|
(create_update_success, create_update_msg), create_update_results = session_execute(
|
1866
2223
|
session,
|
1867
2224
|
create_update_query,
|
@@ -1901,7 +2258,7 @@ def sync_pipe_inplace(
|
|
1901
2258
|
apply_unseen_queries,
|
1902
2259
|
with_results=True,
|
1903
2260
|
debug=debug,
|
1904
|
-
) if not upsert else (True, "Success"), None
|
2261
|
+
) if not upsert else ((True, "Success"), None)
|
1905
2262
|
if not apply_unseen_success:
|
1906
2263
|
_ = clean_up_temp_tables()
|
1907
2264
|
return apply_unseen_success, apply_unseen_msg
|
@@ -1931,12 +2288,12 @@ def sync_pipe_inplace(
|
|
1931
2288
|
|
1932
2289
|
|
1933
2290
|
def get_sync_time(
|
1934
|
-
|
1935
|
-
|
1936
|
-
|
1937
|
-
|
1938
|
-
|
1939
|
-
|
2291
|
+
self,
|
2292
|
+
pipe: 'mrsm.Pipe',
|
2293
|
+
params: Optional[Dict[str, Any]] = None,
|
2294
|
+
newest: bool = True,
|
2295
|
+
debug: bool = False,
|
2296
|
+
) -> Union[datetime, int, None]:
|
1940
2297
|
"""Get a Pipe's most recent datetime value.
|
1941
2298
|
|
1942
2299
|
Parameters
|
@@ -1960,7 +2317,7 @@ def get_sync_time(
|
|
1960
2317
|
table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
1961
2318
|
|
1962
2319
|
dt_col = pipe.columns.get('datetime', None)
|
1963
|
-
dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns]')
|
2320
|
+
dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
|
1964
2321
|
if not dt_col:
|
1965
2322
|
_dt = pipe.guess_datetime()
|
1966
2323
|
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
@@ -2031,10 +2388,10 @@ def get_sync_time(
|
|
2031
2388
|
|
2032
2389
|
|
2033
2390
|
def pipe_exists(
|
2034
|
-
|
2035
|
-
|
2036
|
-
|
2037
|
-
|
2391
|
+
self,
|
2392
|
+
pipe: mrsm.Pipe,
|
2393
|
+
debug: bool = False
|
2394
|
+
) -> bool:
|
2038
2395
|
"""
|
2039
2396
|
Check that a Pipe's table exists.
|
2040
2397
|
|
@@ -2042,7 +2399,7 @@ def pipe_exists(
|
|
2042
2399
|
----------
|
2043
2400
|
pipe: mrsm.Pipe:
|
2044
2401
|
The pipe to check.
|
2045
|
-
|
2402
|
+
|
2046
2403
|
debug: bool, default False
|
2047
2404
|
Verbosity toggle.
|
2048
2405
|
|
@@ -2055,8 +2412,8 @@ def pipe_exists(
|
|
2055
2412
|
exists = table_exists(
|
2056
2413
|
pipe.target,
|
2057
2414
|
self,
|
2058
|
-
schema
|
2059
|
-
debug
|
2415
|
+
schema=self.get_pipe_schema(pipe),
|
2416
|
+
debug=debug,
|
2060
2417
|
)
|
2061
2418
|
if debug:
|
2062
2419
|
from meerschaum.utils.debug import dprint
|
@@ -2366,7 +2723,7 @@ def get_pipe_columns_types(
|
|
2366
2723
|
----------
|
2367
2724
|
pipe: mrsm.Pipe:
|
2368
2725
|
The pipe to get the columns for.
|
2369
|
-
|
2726
|
+
|
2370
2727
|
Returns
|
2371
2728
|
-------
|
2372
2729
|
A dictionary of columns names (`str`) and types (`str`).
|
@@ -2381,16 +2738,17 @@ def get_pipe_columns_types(
|
|
2381
2738
|
}
|
2382
2739
|
>>>
|
2383
2740
|
"""
|
2741
|
+
from meerschaum.utils.sql import get_table_cols_types
|
2384
2742
|
if not pipe.exists(debug=debug):
|
2385
2743
|
return {}
|
2386
2744
|
|
2387
|
-
if self.flavor
|
2388
|
-
from meerschaum.utils.sql import get_table_cols_types
|
2745
|
+
if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite'):
|
2389
2746
|
return get_table_cols_types(
|
2390
2747
|
pipe.target,
|
2391
2748
|
self,
|
2392
2749
|
flavor=self.flavor,
|
2393
2750
|
schema=self.get_pipe_schema(pipe),
|
2751
|
+
debug=debug,
|
2394
2752
|
)
|
2395
2753
|
|
2396
2754
|
table_columns = {}
|
@@ -2409,6 +2767,35 @@ def get_pipe_columns_types(
|
|
2409
2767
|
return table_columns
|
2410
2768
|
|
2411
2769
|
|
2770
|
+
def get_pipe_columns_indices(
|
2771
|
+
self,
|
2772
|
+
pipe: mrsm.Pipe,
|
2773
|
+
debug: bool = False,
|
2774
|
+
) -> Dict[str, List[Dict[str, str]]]:
|
2775
|
+
"""
|
2776
|
+
Return a dictionary mapping columns to the indices created on those columns.
|
2777
|
+
|
2778
|
+
Parameters
|
2779
|
+
----------
|
2780
|
+
pipe: mrsm.Pipe
|
2781
|
+
The pipe to be queried against.
|
2782
|
+
|
2783
|
+
|
2784
|
+
Returns
|
2785
|
+
-------
|
2786
|
+
A dictionary mapping columns names to lists of dictionaries.
|
2787
|
+
The dictionaries in the lists contain the name and type of the indices.
|
2788
|
+
"""
|
2789
|
+
from meerschaum.utils.sql import get_table_cols_indices
|
2790
|
+
return get_table_cols_indices(
|
2791
|
+
pipe.target,
|
2792
|
+
self,
|
2793
|
+
flavor=self.flavor,
|
2794
|
+
schema=self.get_pipe_schema(pipe),
|
2795
|
+
debug=debug,
|
2796
|
+
)
|
2797
|
+
|
2798
|
+
|
2412
2799
|
def get_add_columns_queries(
|
2413
2800
|
self,
|
2414
2801
|
pipe: mrsm.Pipe,
|
@@ -2438,6 +2825,9 @@ def get_add_columns_queries(
|
|
2438
2825
|
if not pipe.exists(debug=debug):
|
2439
2826
|
return []
|
2440
2827
|
|
2828
|
+
if pipe.parameters.get('static', False):
|
2829
|
+
return []
|
2830
|
+
|
2441
2831
|
from decimal import Decimal
|
2442
2832
|
import copy
|
2443
2833
|
from meerschaum.utils.sql import (
|
@@ -2556,6 +2946,8 @@ def get_alter_columns_queries(
|
|
2556
2946
|
"""
|
2557
2947
|
if not pipe.exists(debug=debug):
|
2558
2948
|
return []
|
2949
|
+
if pipe.static:
|
2950
|
+
return
|
2559
2951
|
from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types
|
2560
2952
|
from meerschaum.utils.dataframe import get_numeric_cols
|
2561
2953
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
@@ -2789,7 +3181,6 @@ def get_alter_columns_queries(
|
|
2789
3181
|
|
2790
3182
|
return queries
|
2791
3183
|
|
2792
|
-
|
2793
3184
|
query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
|
2794
3185
|
for col, typ in altered_cols_types.items():
|
2795
3186
|
alter_col_prefix = (
|
@@ -2823,11 +3214,11 @@ def get_alter_columns_queries(
|
|
2823
3214
|
|
2824
3215
|
|
2825
3216
|
def get_to_sql_dtype(
|
2826
|
-
|
2827
|
-
|
2828
|
-
|
2829
|
-
|
2830
|
-
|
3217
|
+
self,
|
3218
|
+
pipe: 'mrsm.Pipe',
|
3219
|
+
df: 'pd.DataFrame',
|
3220
|
+
update_dtypes: bool = True,
|
3221
|
+
) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
|
2831
3222
|
"""
|
2832
3223
|
Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
|
2833
3224
|
|
@@ -2857,7 +3248,7 @@ def get_to_sql_dtype(
|
|
2857
3248
|
>>> get_to_sql_dtype(pipe, df)
|
2858
3249
|
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
|
2859
3250
|
"""
|
2860
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
|
3251
|
+
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
2861
3252
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2862
3253
|
df_dtypes = {
|
2863
3254
|
col: str(typ)
|
@@ -2865,8 +3256,10 @@ def get_to_sql_dtype(
|
|
2865
3256
|
}
|
2866
3257
|
json_cols = get_json_cols(df)
|
2867
3258
|
numeric_cols = get_numeric_cols(df)
|
3259
|
+
uuid_cols = get_uuid_cols(df)
|
2868
3260
|
df_dtypes.update({col: 'json' for col in json_cols})
|
2869
3261
|
df_dtypes.update({col: 'numeric' for col in numeric_cols})
|
3262
|
+
df_dtypes.update({col: 'uuid' for col in uuid_cols})
|
2870
3263
|
if update_dtypes:
|
2871
3264
|
df_dtypes.update(pipe.dtypes)
|
2872
3265
|
return {
|
@@ -2876,14 +3269,14 @@ def get_to_sql_dtype(
|
|
2876
3269
|
|
2877
3270
|
|
2878
3271
|
def deduplicate_pipe(
|
2879
|
-
|
2880
|
-
|
2881
|
-
|
2882
|
-
|
2883
|
-
|
2884
|
-
|
2885
|
-
|
2886
|
-
|
3272
|
+
self,
|
3273
|
+
pipe: mrsm.Pipe,
|
3274
|
+
begin: Union[datetime, int, None] = None,
|
3275
|
+
end: Union[datetime, int, None] = None,
|
3276
|
+
params: Optional[Dict[str, Any]] = None,
|
3277
|
+
debug: bool = False,
|
3278
|
+
**kwargs: Any
|
3279
|
+
) -> SuccessTuple:
|
2887
3280
|
"""
|
2888
3281
|
Delete duplicate values within a pipe's table.
|
2889
3282
|
|
@@ -2947,7 +3340,7 @@ def deduplicate_pipe(
|
|
2947
3340
|
duplicates_cte_name = sql_item_name('dups', self.flavor, None)
|
2948
3341
|
duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
|
2949
3342
|
previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
|
2950
|
-
|
3343
|
+
|
2951
3344
|
index_list_str = (
|
2952
3345
|
sql_item_name(dt_col, self.flavor, None)
|
2953
3346
|
if dt_col
|
@@ -3038,7 +3431,7 @@ def deduplicate_pipe(
|
|
3038
3431
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3039
3432
|
|
3040
3433
|
create_temporary_table_query = get_create_table_query(
|
3041
|
-
duplicates_cte_subquery,
|
3434
|
+
duplicates_cte_subquery,
|
3042
3435
|
dedup_table,
|
3043
3436
|
self.flavor,
|
3044
3437
|
) + f"""
|