meerschaum 2.6.0.dev1__py3-none-any.whl → 2.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/api/dash/pages/login.py +17 -17
- meerschaum/api/dash/pipes.py +13 -4
- meerschaum/api/routes/_pipes.py +162 -136
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +1 -0
- meerschaum/connectors/api/_pipes.py +46 -13
- meerschaum/connectors/sql/_SQLConnector.py +4 -3
- meerschaum/connectors/sql/_fetch.py +4 -2
- meerschaum/connectors/sql/_pipes.py +496 -148
- meerschaum/connectors/sql/_sql.py +37 -16
- meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
- meerschaum/connectors/valkey/_pipes.py +13 -5
- meerschaum/core/Pipe/__init__.py +20 -0
- meerschaum/core/Pipe/_attributes.py +179 -9
- meerschaum/core/Pipe/_clear.py +10 -8
- meerschaum/core/Pipe/_copy.py +2 -0
- meerschaum/core/Pipe/_data.py +57 -28
- meerschaum/core/Pipe/_deduplicate.py +30 -28
- meerschaum/core/Pipe/_dtypes.py +12 -2
- meerschaum/core/Pipe/_fetch.py +11 -9
- meerschaum/core/Pipe/_sync.py +24 -7
- meerschaum/core/Pipe/_verify.py +51 -48
- meerschaum/utils/dataframe.py +16 -8
- meerschaum/utils/dtypes/__init__.py +9 -1
- meerschaum/utils/dtypes/sql.py +32 -6
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/sql.py +485 -16
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/METADATA +1 -1
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/RECORD +36 -36
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/zip-safe +0 -0
@@ -320,10 +320,11 @@ def create_indices(
|
|
320
320
|
from meerschaum.utils.debug import dprint
|
321
321
|
if debug:
|
322
322
|
dprint(f"Creating indices for {pipe}...")
|
323
|
-
if not pipe.
|
323
|
+
if not pipe.indices:
|
324
324
|
warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
|
325
325
|
return True
|
326
326
|
|
327
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
327
328
|
ix_queries = {
|
328
329
|
ix: queries
|
329
330
|
for ix, queries in self.get_create_index_queries(pipe, debug=debug).items()
|
@@ -394,23 +395,43 @@ def get_create_index_queries(
|
|
394
395
|
get_distinct_col_count,
|
395
396
|
update_queries,
|
396
397
|
get_null_replacement,
|
398
|
+
get_create_table_queries,
|
399
|
+
get_rename_table_queries,
|
397
400
|
COALESCE_UNIQUE_INDEX_FLAVORS,
|
398
401
|
)
|
402
|
+
from meerschaum.utils.dtypes.sql import (
|
403
|
+
get_db_type_from_pd_type,
|
404
|
+
get_pd_type_from_db_type,
|
405
|
+
AUTO_INCREMENT_COLUMN_FLAVORS,
|
406
|
+
)
|
399
407
|
from meerschaum.config import get_config
|
400
408
|
index_queries = {}
|
401
409
|
|
402
410
|
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
|
411
|
+
static = pipe.parameters.get('static', False)
|
403
412
|
index_names = pipe.get_indices()
|
404
413
|
indices = pipe.indices
|
414
|
+
existing_cols_types = pipe.get_columns_types(debug=debug)
|
415
|
+
existing_cols_pd_types = {
|
416
|
+
col: get_pd_type_from_db_type(typ)
|
417
|
+
for col, typ in existing_cols_types.items()
|
418
|
+
}
|
419
|
+
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
|
+
existing_ix_names = set()
|
421
|
+
existing_primary_keys = []
|
422
|
+
for col, col_indices in existing_cols_indices.items():
|
423
|
+
for col_ix_doc in col_indices:
|
424
|
+
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
|
+
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
|
+
existing_primary_keys.append(col)
|
405
427
|
|
406
428
|
_datetime = pipe.get_columns('datetime', error=False)
|
407
|
-
_datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns, UTC]')
|
408
429
|
_datetime_name = (
|
409
430
|
sql_item_name(_datetime, self.flavor, None)
|
410
431
|
if _datetime is not None else None
|
411
432
|
)
|
412
433
|
_datetime_index_name = (
|
413
|
-
sql_item_name(index_names['datetime'], self.flavor, None)
|
434
|
+
sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
|
414
435
|
if index_names.get('datetime', None)
|
415
436
|
else None
|
416
437
|
)
|
@@ -420,6 +441,29 @@ def get_create_index_queries(
|
|
420
441
|
if _id is not None
|
421
442
|
else None
|
422
443
|
)
|
444
|
+
primary_key = pipe.columns.get('primary', None)
|
445
|
+
primary_key_name = (
|
446
|
+
sql_item_name(primary_key, flavor=self.flavor, schema=None)
|
447
|
+
if primary_key
|
448
|
+
else None
|
449
|
+
)
|
450
|
+
autoincrement = (
|
451
|
+
pipe.parameters.get('autoincrement', False)
|
452
|
+
or (
|
453
|
+
primary_key is not None
|
454
|
+
and primary_key not in existing_cols_pd_types
|
455
|
+
)
|
456
|
+
)
|
457
|
+
primary_key_db_type = (
|
458
|
+
get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int'), self.flavor)
|
459
|
+
if primary_key
|
460
|
+
else None
|
461
|
+
)
|
462
|
+
primary_key_constraint_name = (
|
463
|
+
sql_item_name(f'pk_{pipe.target}', self.flavor, None)
|
464
|
+
if primary_key is not None
|
465
|
+
else None
|
466
|
+
)
|
423
467
|
|
424
468
|
_id_index_name = (
|
425
469
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -462,8 +506,10 @@ def get_create_index_queries(
|
|
462
506
|
)
|
463
507
|
elif self.flavor == 'mssql':
|
464
508
|
dt_query = (
|
465
|
-
|
466
|
-
|
509
|
+
"CREATE "
|
510
|
+
+ ("CLUSTERED " if not primary_key else '')
|
511
|
+
+ f"INDEX {_datetime_index_name} "
|
512
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
467
513
|
)
|
468
514
|
else: ### mssql, sqlite, etc.
|
469
515
|
dt_query = (
|
@@ -473,6 +519,115 @@ def get_create_index_queries(
|
|
473
519
|
|
474
520
|
index_queries[_datetime] = [dt_query]
|
475
521
|
|
522
|
+
primary_queries = []
|
523
|
+
if (
|
524
|
+
primary_key is not None
|
525
|
+
and primary_key not in existing_primary_keys
|
526
|
+
and not static
|
527
|
+
):
|
528
|
+
if autoincrement and primary_key not in existing_cols_pd_types:
|
529
|
+
autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
|
530
|
+
self.flavor,
|
531
|
+
AUTO_INCREMENT_COLUMN_FLAVORS['default']
|
532
|
+
)
|
533
|
+
primary_queries.extend([
|
534
|
+
(
|
535
|
+
f"ALTER TABLE {_pipe_name}\n"
|
536
|
+
f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
|
537
|
+
),
|
538
|
+
])
|
539
|
+
elif not autoincrement and primary_key in existing_cols_pd_types:
|
540
|
+
if self.flavor == 'sqlite':
|
541
|
+
new_table_name = sql_item_name(
|
542
|
+
f'_new_{pipe.target}',
|
543
|
+
self.flavor,
|
544
|
+
self.get_pipe_schema(pipe)
|
545
|
+
)
|
546
|
+
select_cols_str = ', '.join(
|
547
|
+
[
|
548
|
+
sql_item_name(col, self.flavor, None)
|
549
|
+
for col in existing_cols_types
|
550
|
+
]
|
551
|
+
)
|
552
|
+
primary_queries.extend(
|
553
|
+
get_create_table_queries(
|
554
|
+
existing_cols_pd_types,
|
555
|
+
f'_new_{pipe.target}',
|
556
|
+
self.flavor,
|
557
|
+
schema=self.get_pipe_schema(pipe),
|
558
|
+
primary_key=primary_key,
|
559
|
+
) + [
|
560
|
+
(
|
561
|
+
f"INSERT INTO {new_table_name} ({select_cols_str})\n"
|
562
|
+
f"SELECT {select_cols_str}\nFROM {_pipe_name}"
|
563
|
+
),
|
564
|
+
f"DROP TABLE {_pipe_name}",
|
565
|
+
] + get_rename_table_queries(
|
566
|
+
f'_new_{pipe.target}',
|
567
|
+
pipe.target,
|
568
|
+
self.flavor,
|
569
|
+
schema=self.get_pipe_schema(pipe),
|
570
|
+
)
|
571
|
+
)
|
572
|
+
elif self.flavor == 'oracle':
|
573
|
+
primary_queries.extend([
|
574
|
+
(
|
575
|
+
f"ALTER TABLE {_pipe_name}\n"
|
576
|
+
f"MODIFY {primary_key_name} NOT NULL"
|
577
|
+
),
|
578
|
+
(
|
579
|
+
f"ALTER TABLE {_pipe_name}\n"
|
580
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
581
|
+
)
|
582
|
+
])
|
583
|
+
elif self.flavor in ('mysql', 'mariadb'):
|
584
|
+
primary_queries.extend([
|
585
|
+
(
|
586
|
+
f"ALTER TABLE {_pipe_name}\n"
|
587
|
+
f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
|
588
|
+
),
|
589
|
+
(
|
590
|
+
f"ALTER TABLE {_pipe_name}\n"
|
591
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
592
|
+
)
|
593
|
+
])
|
594
|
+
elif self.flavor == 'timescaledb':
|
595
|
+
primary_queries.extend([
|
596
|
+
(
|
597
|
+
f"ALTER TABLE {_pipe_name}\n"
|
598
|
+
f"ALTER COLUMN {primary_key_name} SET NOT NULL"
|
599
|
+
),
|
600
|
+
(
|
601
|
+
f"ALTER TABLE {_pipe_name}\n"
|
602
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
|
603
|
+
f"{_datetime_name}, " if _datetime_name else ""
|
604
|
+
) + f"{primary_key_name})"
|
605
|
+
),
|
606
|
+
])
|
607
|
+
elif self.flavor in ('citus', 'postgresql', 'duckdb'):
|
608
|
+
primary_queries.extend([
|
609
|
+
(
|
610
|
+
f"ALTER TABLE {_pipe_name}\n"
|
611
|
+
f"ALTER COLUMN {primary_key_name} SET NOT NULL"
|
612
|
+
),
|
613
|
+
(
|
614
|
+
f"ALTER TABLE {_pipe_name}\n"
|
615
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
616
|
+
),
|
617
|
+
])
|
618
|
+
else:
|
619
|
+
primary_queries.extend([
|
620
|
+
(
|
621
|
+
f"ALTER TABLE {_pipe_name}\n"
|
622
|
+
f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
|
623
|
+
),
|
624
|
+
(
|
625
|
+
f"ALTER TABLE {_pipe_name}\n"
|
626
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
627
|
+
),
|
628
|
+
])
|
629
|
+
index_queries[primary_key] = primary_queries
|
630
|
+
|
476
631
|
### create id index
|
477
632
|
if _id_name is not None:
|
478
633
|
if self.flavor == 'timescaledb':
|
@@ -496,7 +651,7 @@ def get_create_index_queries(
|
|
496
651
|
other_index_names = {
|
497
652
|
ix_key: ix_unquoted
|
498
653
|
for ix_key, ix_unquoted in index_names.items()
|
499
|
-
if ix_key not in ('datetime', 'id')
|
654
|
+
if ix_key not in ('datetime', 'id', 'primary') and ix_unquoted not in existing_ix_names
|
500
655
|
}
|
501
656
|
for ix_key, ix_unquoted in other_index_names.items():
|
502
657
|
ix_name = sql_item_name(ix_unquoted, self.flavor, None)
|
@@ -509,13 +664,12 @@ def get_create_index_queries(
|
|
509
664
|
cols_names_str = ", ".join(cols_names)
|
510
665
|
index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"]
|
511
666
|
|
512
|
-
existing_cols_types = pipe.get_columns_types(debug=debug)
|
513
667
|
indices_cols_str = ', '.join(
|
514
|
-
|
668
|
+
list({
|
515
669
|
sql_item_name(ix, self.flavor)
|
516
670
|
for ix_key, ix in pipe.columns.items()
|
517
671
|
if ix and ix in existing_cols_types
|
518
|
-
|
672
|
+
})
|
519
673
|
)
|
520
674
|
coalesce_indices_cols_str = ', '.join(
|
521
675
|
[
|
@@ -718,7 +872,11 @@ def get_pipe_data(
|
|
718
872
|
from meerschaum.utils.sql import sql_item_name
|
719
873
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
720
874
|
from meerschaum.utils.packages import import_pandas
|
721
|
-
from meerschaum.utils.dtypes import
|
875
|
+
from meerschaum.utils.dtypes import (
|
876
|
+
attempt_cast_to_numeric,
|
877
|
+
attempt_cast_to_uuid,
|
878
|
+
are_dtypes_equal,
|
879
|
+
)
|
722
880
|
pd = import_pandas()
|
723
881
|
is_dask = 'dask' in pd.__name__
|
724
882
|
|
@@ -813,7 +971,7 @@ def get_pipe_data(
|
|
813
971
|
ignore_dt_cols = [
|
814
972
|
col
|
815
973
|
for col, dtype in pipe.dtypes.items()
|
816
|
-
if
|
974
|
+
if not are_dtypes_equal(str(dtype), 'datetime')
|
817
975
|
]
|
818
976
|
### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
|
819
977
|
df = (
|
@@ -821,6 +979,7 @@ def get_pipe_data(
|
|
821
979
|
df,
|
822
980
|
ignore_cols=ignore_dt_cols,
|
823
981
|
chunksize=kw.get('chunksize', None),
|
982
|
+
strip_timezone=(pipe.tzinfo is None),
|
824
983
|
debug=debug,
|
825
984
|
) if isinstance(df, pd.DataFrame) else (
|
826
985
|
[
|
@@ -828,6 +987,7 @@ def get_pipe_data(
|
|
828
987
|
c,
|
829
988
|
ignore_cols=ignore_dt_cols,
|
830
989
|
chunksize=kw.get('chunksize', None),
|
990
|
+
strip_timezone=(pipe.tzinfo is None),
|
831
991
|
debug=debug,
|
832
992
|
)
|
833
993
|
for c in df
|
@@ -855,6 +1015,7 @@ def get_pipe_data_query(
|
|
855
1015
|
begin_add_minutes: int = 0,
|
856
1016
|
end_add_minutes: int = 0,
|
857
1017
|
replace_nulls: Optional[str] = None,
|
1018
|
+
skip_existing_cols_check: bool = False,
|
858
1019
|
debug: bool = False,
|
859
1020
|
**kw: Any
|
860
1021
|
) -> Union[str, None]:
|
@@ -905,6 +1066,9 @@ def get_pipe_data_query(
|
|
905
1066
|
replace_nulls: Optional[str], default None
|
906
1067
|
If provided, replace null values with this value.
|
907
1068
|
|
1069
|
+
skip_existing_cols_check: bool, default False
|
1070
|
+
If `True`, do not verify that querying columns are actually on the table.
|
1071
|
+
|
908
1072
|
debug: bool, default False
|
909
1073
|
Verbosity toggle.
|
910
1074
|
|
@@ -912,16 +1076,13 @@ def get_pipe_data_query(
|
|
912
1076
|
-------
|
913
1077
|
A `SELECT` query to retrieve a pipe's data.
|
914
1078
|
"""
|
915
|
-
from meerschaum.utils.debug import dprint
|
916
1079
|
from meerschaum.utils.misc import items_str
|
917
1080
|
from meerschaum.utils.sql import sql_item_name, dateadd_str
|
918
|
-
from meerschaum.utils.packages import import_pandas
|
919
|
-
pd = import_pandas()
|
920
1081
|
existing_cols = pipe.get_columns_types(debug=debug)
|
921
1082
|
select_columns = (
|
922
1083
|
[col for col in existing_cols]
|
923
1084
|
if not select_columns
|
924
|
-
else [col for col in select_columns if col in existing_cols]
|
1085
|
+
else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
|
925
1086
|
)
|
926
1087
|
if omit_columns:
|
927
1088
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -935,7 +1096,12 @@ def get_pipe_data_query(
|
|
935
1096
|
if begin is not None:
|
936
1097
|
begin -= backtrack_interval
|
937
1098
|
|
938
|
-
|
1099
|
+
begin, end = pipe.parse_date_bounds(begin, end)
|
1100
|
+
|
1101
|
+
cols_names = [
|
1102
|
+
sql_item_name(col, self.flavor, None)
|
1103
|
+
for col in select_columns
|
1104
|
+
]
|
939
1105
|
select_cols_str = (
|
940
1106
|
'SELECT\n '
|
941
1107
|
+ ',\n '.join(
|
@@ -948,7 +1114,7 @@ def get_pipe_data_query(
|
|
948
1114
|
for col_name in cols_names
|
949
1115
|
]
|
950
1116
|
)
|
951
|
-
)
|
1117
|
+
) if cols_names else 'SELECT *'
|
952
1118
|
pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
953
1119
|
query = f"{select_cols_str}\nFROM {pipe_table_name}"
|
954
1120
|
where = ""
|
@@ -972,7 +1138,7 @@ def get_pipe_data_query(
|
|
972
1138
|
quoted_indices = {
|
973
1139
|
key: sql_item_name(val, self.flavor, None)
|
974
1140
|
for key, val in pipe.columns.items()
|
975
|
-
if val in existing_cols
|
1141
|
+
if val in existing_cols or skip_existing_cols_check
|
976
1142
|
}
|
977
1143
|
|
978
1144
|
if begin is not None or end is not None:
|
@@ -992,7 +1158,7 @@ def get_pipe_data_query(
|
|
992
1158
|
)
|
993
1159
|
|
994
1160
|
is_dt_bound = False
|
995
|
-
if begin is not None and _dt in existing_cols:
|
1161
|
+
if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
|
996
1162
|
begin_da = dateadd_str(
|
997
1163
|
flavor=self.flavor,
|
998
1164
|
datepart='minute',
|
@@ -1002,7 +1168,7 @@ def get_pipe_data_query(
|
|
1002
1168
|
where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
|
1003
1169
|
is_dt_bound = True
|
1004
1170
|
|
1005
|
-
if end is not None and _dt in existing_cols:
|
1171
|
+
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
1006
1172
|
if 'int' in str(type(end)).lower() and end == begin:
|
1007
1173
|
end += 1
|
1008
1174
|
end_da = dateadd_str(
|
@@ -1016,7 +1182,11 @@ def get_pipe_data_query(
|
|
1016
1182
|
|
1017
1183
|
if params is not None:
|
1018
1184
|
from meerschaum.utils.sql import build_where
|
1019
|
-
valid_params = {
|
1185
|
+
valid_params = {
|
1186
|
+
k: v
|
1187
|
+
for k, v in params.items()
|
1188
|
+
if k in existing_cols or skip_existing_cols_check
|
1189
|
+
}
|
1020
1190
|
if valid_params:
|
1021
1191
|
where += build_where(valid_params, self).replace(
|
1022
1192
|
'WHERE', ('AND' if is_dt_bound else "")
|
@@ -1030,7 +1200,7 @@ def get_pipe_data_query(
|
|
1030
1200
|
order_by = ""
|
1031
1201
|
if quoted_indices:
|
1032
1202
|
order_by += "\nORDER BY "
|
1033
|
-
if _dt and _dt in existing_cols:
|
1203
|
+
if _dt and (_dt in existing_cols or skip_existing_cols_check):
|
1034
1204
|
order_by += dt + ' ' + order + ','
|
1035
1205
|
for key, quoted_col_name in quoted_indices.items():
|
1036
1206
|
if dt == quoted_col_name:
|
@@ -1140,6 +1310,70 @@ def get_pipe_attributes(
|
|
1140
1310
|
return attributes
|
1141
1311
|
|
1142
1312
|
|
1313
|
+
def create_pipe_table_from_df(
|
1314
|
+
self,
|
1315
|
+
pipe: mrsm.Pipe,
|
1316
|
+
df: 'pd.DataFrame',
|
1317
|
+
debug: bool = False,
|
1318
|
+
) -> mrsm.SuccessTuple:
|
1319
|
+
"""
|
1320
|
+
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1321
|
+
"""
|
1322
|
+
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1323
|
+
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1324
|
+
primary_key = pipe.columns.get('primary', None)
|
1325
|
+
dt_col = pipe.columns.get('datetime', None)
|
1326
|
+
new_dtypes = {
|
1327
|
+
**{
|
1328
|
+
col: str(typ)
|
1329
|
+
for col, typ in df.dtypes.items()
|
1330
|
+
},
|
1331
|
+
**{
|
1332
|
+
col: 'int'
|
1333
|
+
for col_ix, col in pipe.columns.items()
|
1334
|
+
if col_ix != 'primary'
|
1335
|
+
},
|
1336
|
+
**{
|
1337
|
+
col: 'uuid'
|
1338
|
+
for col in get_uuid_cols(df)
|
1339
|
+
},
|
1340
|
+
**{
|
1341
|
+
col: 'json'
|
1342
|
+
for col in get_json_cols(df)
|
1343
|
+
},
|
1344
|
+
**{
|
1345
|
+
col: 'numeric'
|
1346
|
+
for col in get_numeric_cols(df)
|
1347
|
+
},
|
1348
|
+
**pipe.dtypes
|
1349
|
+
}
|
1350
|
+
autoincrement = (
|
1351
|
+
pipe.parameters.get('autoincrement', False)
|
1352
|
+
or (primary_key and primary_key not in new_dtypes)
|
1353
|
+
)
|
1354
|
+
if autoincrement:
|
1355
|
+
_ = new_dtypes.pop(primary_key, None)
|
1356
|
+
|
1357
|
+
create_table_queries = get_create_table_queries(
|
1358
|
+
new_dtypes,
|
1359
|
+
pipe.target,
|
1360
|
+
self.flavor,
|
1361
|
+
schema=self.get_pipe_schema(pipe),
|
1362
|
+
primary_key=primary_key,
|
1363
|
+
datetime_column=dt_col,
|
1364
|
+
)
|
1365
|
+
success = all(
|
1366
|
+
self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
|
1367
|
+
)
|
1368
|
+
target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
|
1369
|
+
msg = (
|
1370
|
+
"Success"
|
1371
|
+
if success
|
1372
|
+
else f"Failed to create {target_name}."
|
1373
|
+
)
|
1374
|
+
return success, msg
|
1375
|
+
|
1376
|
+
|
1143
1377
|
def sync_pipe(
|
1144
1378
|
self,
|
1145
1379
|
pipe: mrsm.Pipe,
|
@@ -1202,10 +1436,12 @@ def sync_pipe(
|
|
1202
1436
|
sql_item_name,
|
1203
1437
|
update_queries,
|
1204
1438
|
get_create_table_queries,
|
1439
|
+
get_reset_autoincrement_queries,
|
1205
1440
|
)
|
1206
1441
|
from meerschaum.utils.misc import generate_password
|
1207
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
|
1442
|
+
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1208
1443
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1444
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1209
1445
|
from meerschaum import Pipe
|
1210
1446
|
import time
|
1211
1447
|
import copy
|
@@ -1216,6 +1452,7 @@ def sync_pipe(
|
|
1216
1452
|
return False, msg
|
1217
1453
|
|
1218
1454
|
start = time.perf_counter()
|
1455
|
+
pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
|
1219
1456
|
|
1220
1457
|
if not pipe.temporary and not pipe.get_id(debug=debug):
|
1221
1458
|
register_tuple = pipe.register(debug=debug)
|
@@ -1244,11 +1481,15 @@ def sync_pipe(
|
|
1244
1481
|
### Check for new columns.
|
1245
1482
|
add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
|
1246
1483
|
if add_cols_queries:
|
1484
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
1485
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1247
1486
|
if not self.exec_queries(add_cols_queries, debug=debug):
|
1248
1487
|
warn(f"Failed to add new columns to {pipe}.")
|
1249
1488
|
|
1250
1489
|
alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
|
1251
1490
|
if alter_cols_queries:
|
1491
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
1492
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1252
1493
|
if not self.exec_queries(alter_cols_queries, debug=debug):
|
1253
1494
|
warn(f"Failed to alter columns for {pipe}.")
|
1254
1495
|
else:
|
@@ -1312,21 +1553,15 @@ def sync_pipe(
|
|
1312
1553
|
})
|
1313
1554
|
|
1314
1555
|
primary_key = pipe.columns.get('primary', None)
|
1315
|
-
new_dtypes = {
|
1316
|
-
**{
|
1317
|
-
col: str(typ)
|
1318
|
-
for col, typ in unseen_df.dtypes.items()
|
1319
|
-
},
|
1320
|
-
**{
|
1321
|
-
col: 'int'
|
1322
|
-
for col_ix, col in pipe.columns.items()
|
1323
|
-
if col_ix != 'primary'
|
1324
|
-
},
|
1325
|
-
**pipe.dtypes
|
1326
|
-
} if is_new else {}
|
1327
1556
|
autoincrement = (
|
1328
1557
|
pipe.parameters.get('autoincrement', False)
|
1329
|
-
or (
|
1558
|
+
or (
|
1559
|
+
is_new
|
1560
|
+
and primary_key
|
1561
|
+
and primary_key
|
1562
|
+
not in pipe.dtypes
|
1563
|
+
and primary_key not in unseen_df.columns
|
1564
|
+
)
|
1330
1565
|
)
|
1331
1566
|
if autoincrement and autoincrement not in pipe.parameters:
|
1332
1567
|
pipe.parameters['autoincrement'] = autoincrement
|
@@ -1334,77 +1569,117 @@ def sync_pipe(
|
|
1334
1569
|
if not edit_success:
|
1335
1570
|
return edit_success, edit_msg
|
1336
1571
|
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1572
|
+
autoincrement_needs_reset = False
|
1573
|
+
if autoincrement and primary_key:
|
1574
|
+
if primary_key not in df.columns:
|
1575
|
+
if unseen_df is not None and primary_key in unseen_df.columns:
|
1576
|
+
del unseen_df[primary_key]
|
1577
|
+
if update_df is not None and primary_key in update_df.columns:
|
1578
|
+
del update_df[primary_key]
|
1579
|
+
if delta_df is not None and primary_key in delta_df.columns:
|
1580
|
+
del delta_df[primary_key]
|
1581
|
+
elif unseen_df[primary_key].notnull().any():
|
1582
|
+
autoincrement_needs_reset = True
|
1344
1583
|
|
1345
1584
|
if is_new:
|
1346
|
-
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
if 'datetime' in pipe.columns and self.flavor == 'timescaledb':
|
1351
|
-
primary_key = None
|
1352
|
-
|
1353
|
-
create_table_queries = get_create_table_queries(
|
1354
|
-
new_dtypes,
|
1355
|
-
pipe.target,
|
1356
|
-
self.flavor,
|
1357
|
-
schema=self.get_pipe_schema(pipe),
|
1358
|
-
primary_key=primary_key,
|
1359
|
-
)
|
1360
|
-
create_success = all(
|
1361
|
-
self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
|
1585
|
+
create_success, create_msg = self.create_pipe_table_from_df(
|
1586
|
+
pipe,
|
1587
|
+
unseen_df,
|
1588
|
+
debug=debug,
|
1362
1589
|
)
|
1363
1590
|
if not create_success:
|
1364
|
-
|
1591
|
+
return create_success, create_msg
|
1592
|
+
|
1593
|
+
do_identity_insert = bool(
|
1594
|
+
self.flavor in ('mssql',)
|
1595
|
+
and primary_key in unseen_df.columns
|
1596
|
+
and autoincrement
|
1597
|
+
)
|
1598
|
+
with self.engine.connect() as connection:
|
1599
|
+
with connection.begin():
|
1600
|
+
if do_identity_insert:
|
1601
|
+
identity_on_result = self.exec(
|
1602
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1603
|
+
commit=False,
|
1604
|
+
_connection=connection,
|
1605
|
+
close=False,
|
1606
|
+
debug=debug,
|
1607
|
+
)
|
1608
|
+
if identity_on_result is None:
|
1609
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1365
1610
|
|
1366
|
-
|
1611
|
+
stats = self.to_sql(
|
1612
|
+
unseen_df,
|
1613
|
+
_connection=connection,
|
1614
|
+
**unseen_kw
|
1615
|
+
)
|
1616
|
+
|
1617
|
+
if do_identity_insert:
|
1618
|
+
identity_off_result = self.exec(
|
1619
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1620
|
+
commit=False,
|
1621
|
+
_connection=connection,
|
1622
|
+
close=False,
|
1623
|
+
debug=debug,
|
1624
|
+
)
|
1625
|
+
if identity_off_result is None:
|
1626
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1367
1627
|
|
1368
1628
|
if is_new:
|
1369
1629
|
if not self.create_indices(pipe, debug=debug):
|
1370
1630
|
warn(f"Failed to create indices for {pipe}. Continuing...")
|
1371
1631
|
|
1372
|
-
if
|
1373
|
-
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
else 1
|
1385
|
-
)
|
1386
|
-
) if dt_col else None
|
1632
|
+
if autoincrement_needs_reset:
|
1633
|
+
reset_autoincrement_queries = get_reset_autoincrement_queries(
|
1634
|
+
pipe.target,
|
1635
|
+
primary_key,
|
1636
|
+
self,
|
1637
|
+
schema=self.get_pipe_schema(pipe),
|
1638
|
+
debug=debug,
|
1639
|
+
)
|
1640
|
+
results = self.exec_queries(reset_autoincrement_queries, debug=debug)
|
1641
|
+
for result in results:
|
1642
|
+
if result is None:
|
1643
|
+
warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
|
1387
1644
|
|
1645
|
+
if update_df is not None and len(update_df) > 0:
|
1388
1646
|
transact_id = generate_password(3)
|
1389
|
-
|
1647
|
+
temp_prefix = '##' if self.flavor != 'oracle' else ''
|
1648
|
+
temp_target = temp_prefix + transact_id + '_' + pipe.target
|
1390
1649
|
self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
|
1391
1650
|
temp_pipe = Pipe(
|
1392
1651
|
pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
|
1393
1652
|
instance=pipe.instance_keys,
|
1394
1653
|
columns={
|
1395
|
-
ix_key: ix
|
1654
|
+
(ix_key if ix_key != 'primary' else 'primary_'): ix
|
1396
1655
|
for ix_key, ix in pipe.columns.items()
|
1397
1656
|
if ix and ix in update_df.columns
|
1398
1657
|
},
|
1399
|
-
dtypes=
|
1658
|
+
dtypes={
|
1659
|
+
col: typ
|
1660
|
+
for col, typ in pipe.dtypes.items()
|
1661
|
+
if col in update_df.columns
|
1662
|
+
},
|
1400
1663
|
target=temp_target,
|
1401
1664
|
temporary=True,
|
1402
1665
|
parameters={
|
1666
|
+
'static': True,
|
1403
1667
|
'schema': self.internal_schema,
|
1404
1668
|
'hypertable': False,
|
1669
|
+
'autoincrement': False,
|
1405
1670
|
},
|
1406
1671
|
)
|
1407
|
-
temp_pipe.
|
1672
|
+
temp_pipe.__dict__['_columns_types'] = {
|
1673
|
+
col: get_db_type_from_pd_type(
|
1674
|
+
pipe.dtypes.get(col, str(typ)),
|
1675
|
+
self.flavor,
|
1676
|
+
)
|
1677
|
+
for col, typ in update_df.dtypes.items()
|
1678
|
+
}
|
1679
|
+
temp_pipe.__dict__['_columns_types_timestamp'] = time.perf_counter()
|
1680
|
+
temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
|
1681
|
+
if not temp_success:
|
1682
|
+
return temp_success, temp_msg
|
1408
1683
|
existing_cols = pipe.get_columns_types(debug=debug)
|
1409
1684
|
join_cols = [
|
1410
1685
|
col
|
@@ -1530,28 +1805,23 @@ def sync_pipe_inplace(
|
|
1530
1805
|
)
|
1531
1806
|
from meerschaum.utils.sql import (
|
1532
1807
|
sql_item_name,
|
1533
|
-
get_sqlalchemy_table,
|
1534
1808
|
get_update_queries,
|
1535
1809
|
get_null_replacement,
|
1536
|
-
|
1537
|
-
NO_SELECT_INTO_FLAVORS,
|
1538
|
-
format_cte_subquery,
|
1539
|
-
get_create_table_query,
|
1810
|
+
get_create_table_queries,
|
1540
1811
|
get_table_cols_types,
|
1541
|
-
truncate_item_name,
|
1542
1812
|
session_execute,
|
1543
|
-
table_exists,
|
1544
1813
|
update_queries,
|
1545
1814
|
)
|
1815
|
+
from meerschaum.utils.dtypes import coerce_timezone, are_dtypes_equal
|
1546
1816
|
from meerschaum.utils.dtypes.sql import (
|
1547
1817
|
get_pd_type_from_db_type,
|
1548
1818
|
)
|
1549
1819
|
from meerschaum.utils.misc import generate_password
|
1550
|
-
from meerschaum.utils.debug import dprint
|
1551
1820
|
|
1552
1821
|
transact_id = generate_password(3)
|
1553
1822
|
def get_temp_table_name(label: str) -> str:
|
1554
|
-
|
1823
|
+
temp_prefix = '##' if self.flavor != 'oracle' else ''
|
1824
|
+
return temp_prefix + transact_id + '_' + label + '_' + pipe.target
|
1555
1825
|
|
1556
1826
|
internal_schema = self.internal_schema
|
1557
1827
|
temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
|
@@ -1578,6 +1848,11 @@ def sync_pipe_inplace(
|
|
1578
1848
|
pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
1579
1849
|
upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries
|
1580
1850
|
database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
|
1851
|
+
primary_key = pipe.columns.get('primary', None)
|
1852
|
+
autoincrement = pipe.parameters.get('autoincrement', False)
|
1853
|
+
dt_col = pipe.columns.get('datetime', None)
|
1854
|
+
dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
1855
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
1581
1856
|
|
1582
1857
|
def clean_up_temp_tables(ready_to_drop: bool = False):
|
1583
1858
|
log_success, log_msg = self._log_temporary_tables_creation(
|
@@ -1601,13 +1876,16 @@ def sync_pipe_inplace(
|
|
1601
1876
|
|
1602
1877
|
sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm')
|
1603
1878
|
if not pipe.exists(debug=debug):
|
1604
|
-
|
1879
|
+
create_pipe_queries = get_create_table_queries(
|
1605
1880
|
metadef,
|
1606
1881
|
pipe.target,
|
1607
1882
|
self.flavor,
|
1608
1883
|
schema=self.get_pipe_schema(pipe),
|
1884
|
+
primary_key=primary_key,
|
1885
|
+
autoincrement=autoincrement,
|
1886
|
+
datetime_column=dt_col,
|
1609
1887
|
)
|
1610
|
-
result = self.
|
1888
|
+
result = self.exec_queries(create_pipe_queries, debug=debug)
|
1611
1889
|
if result is None:
|
1612
1890
|
_ = clean_up_temp_tables()
|
1613
1891
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
@@ -1622,12 +1900,12 @@ def sync_pipe_inplace(
|
|
1622
1900
|
session = sqlalchemy_orm.Session(self.engine)
|
1623
1901
|
connectable = session if self.flavor != 'duckdb' else self
|
1624
1902
|
|
1625
|
-
create_new_query =
|
1903
|
+
create_new_query = get_create_table_queries(
|
1626
1904
|
metadef,
|
1627
1905
|
temp_tables[('new') if not upsert else 'update'],
|
1628
1906
|
self.flavor,
|
1629
1907
|
schema=internal_schema,
|
1630
|
-
)
|
1908
|
+
)[0]
|
1631
1909
|
(create_new_success, create_new_msg), create_new_results = session_execute(
|
1632
1910
|
session,
|
1633
1911
|
create_new_query,
|
@@ -1658,13 +1936,20 @@ def sync_pipe_inplace(
|
|
1658
1936
|
sql_item_name(col, self.flavor)
|
1659
1937
|
for col in new_cols
|
1660
1938
|
])
|
1939
|
+
def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
|
1940
|
+
if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
|
1941
|
+
return new_cols_types[col]
|
1942
|
+
return cols_types[col]
|
1661
1943
|
|
1662
1944
|
add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
|
1663
1945
|
if add_cols_queries:
|
1946
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1947
|
+
_ = pipe.__dict__.pop('_columns_indices', None)
|
1664
1948
|
self.exec_queries(add_cols_queries, debug=debug)
|
1665
1949
|
|
1666
1950
|
alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
|
1667
1951
|
if alter_cols_queries:
|
1952
|
+
_ = pipe.__dict__.pop('_columns_types', None)
|
1668
1953
|
self.exec_queries(alter_cols_queries, debug=debug)
|
1669
1954
|
|
1670
1955
|
insert_queries = [
|
@@ -1689,6 +1974,26 @@ def sync_pipe_inplace(
|
|
1689
1974
|
_ = clean_up_temp_tables()
|
1690
1975
|
return True, f"Inserted {new_count}, updated 0 rows."
|
1691
1976
|
|
1977
|
+
(new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
|
1978
|
+
session,
|
1979
|
+
[
|
1980
|
+
"SELECT\n"
|
1981
|
+
f" MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n"
|
1982
|
+
f" MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n"
|
1983
|
+
f"FROM {temp_table_names['new']}\n"
|
1984
|
+
f"WHERE {dt_col_name} IS NOT NULL"
|
1985
|
+
],
|
1986
|
+
with_results=True,
|
1987
|
+
debug=debug,
|
1988
|
+
)
|
1989
|
+
if not new_dt_bounds_success:
|
1990
|
+
return (
|
1991
|
+
new_dt_bounds_success,
|
1992
|
+
f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
|
1993
|
+
)
|
1994
|
+
|
1995
|
+
begin, end = new_dt_bounds_results[0].fetchone()
|
1996
|
+
|
1692
1997
|
backtrack_def = self.get_pipe_data_query(
|
1693
1998
|
pipe,
|
1694
1999
|
begin=begin,
|
@@ -1699,19 +2004,18 @@ def sync_pipe_inplace(
|
|
1699
2004
|
debug=debug,
|
1700
2005
|
order=None,
|
1701
2006
|
)
|
1702
|
-
|
1703
|
-
create_backtrack_query = get_create_table_query(
|
2007
|
+
create_backtrack_query = get_create_table_queries(
|
1704
2008
|
backtrack_def,
|
1705
2009
|
temp_tables['backtrack'],
|
1706
2010
|
self.flavor,
|
1707
2011
|
schema=internal_schema,
|
1708
|
-
)
|
1709
|
-
(create_backtrack_success, create_backtrack_msg),
|
2012
|
+
)[0]
|
2013
|
+
(create_backtrack_success, create_backtrack_msg), create_new_results = session_execute(
|
1710
2014
|
session,
|
1711
2015
|
create_backtrack_query,
|
1712
2016
|
with_results=True,
|
1713
2017
|
debug=debug,
|
1714
|
-
) if not upsert else (True, "Success"), None
|
2018
|
+
) if not upsert else ((True, "Success"), None)
|
1715
2019
|
|
1716
2020
|
if not create_backtrack_success:
|
1717
2021
|
_ = clean_up_temp_tables()
|
@@ -1728,7 +2032,7 @@ def sync_pipe_inplace(
|
|
1728
2032
|
|
1729
2033
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
1730
2034
|
on_cols = {
|
1731
|
-
col: new_cols.get(col
|
2035
|
+
col: new_cols.get(col)
|
1732
2036
|
for col_key, col in pipe.columns.items()
|
1733
2037
|
if (
|
1734
2038
|
col
|
@@ -1742,7 +2046,8 @@ def sync_pipe_inplace(
|
|
1742
2046
|
null_replace_new_cols_str = (
|
1743
2047
|
', '.join([
|
1744
2048
|
f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
|
1745
|
-
+
|
2049
|
+
+ get_null_replacement(get_col_typ(col, new_cols), self.flavor)
|
2050
|
+
+ ") AS "
|
1746
2051
|
+ sql_item_name(col, self.flavor, None)
|
1747
2052
|
for col, typ in new_cols.items()
|
1748
2053
|
])
|
@@ -1758,7 +2063,7 @@ def sync_pipe_inplace(
|
|
1758
2063
|
f"COALESCE({temp_table_names['new']}."
|
1759
2064
|
+ sql_item_name(c, self.flavor, None)
|
1760
2065
|
+ ", "
|
1761
|
-
+ get_null_replacement(
|
2066
|
+
+ get_null_replacement(get_col_typ(c, new_cols), self.flavor)
|
1762
2067
|
+ ") "
|
1763
2068
|
+ ' = '
|
1764
2069
|
+ f"COALESCE({temp_table_names['backtrack']}."
|
@@ -1775,12 +2080,12 @@ def sync_pipe_inplace(
|
|
1775
2080
|
) for c in common_cols
|
1776
2081
|
])
|
1777
2082
|
)
|
1778
|
-
create_delta_query =
|
2083
|
+
create_delta_query = get_create_table_queries(
|
1779
2084
|
select_delta_query,
|
1780
2085
|
temp_tables['delta'],
|
1781
2086
|
self.flavor,
|
1782
2087
|
schema=internal_schema,
|
1783
|
-
)
|
2088
|
+
)[0]
|
1784
2089
|
create_delta_success, create_delta_msg = session_execute(
|
1785
2090
|
session,
|
1786
2091
|
create_delta_query,
|
@@ -1833,20 +2138,28 @@ def sync_pipe_inplace(
|
|
1833
2138
|
+ '\nAND\n'.join([
|
1834
2139
|
(
|
1835
2140
|
f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
|
1836
|
-
+ ", "
|
2141
|
+
+ ", "
|
2142
|
+
+ get_null_replacement(
|
2143
|
+
get_col_typ(c, on_cols),
|
2144
|
+
self.flavor
|
2145
|
+
) + ")"
|
1837
2146
|
+ ' = '
|
1838
2147
|
+ f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
|
1839
|
-
+ ", "
|
2148
|
+
+ ", "
|
2149
|
+
+ get_null_replacement(
|
2150
|
+
get_col_typ(c, on_cols),
|
2151
|
+
self.flavor
|
2152
|
+
) + ")"
|
1840
2153
|
) for c, typ in on_cols.items()
|
1841
2154
|
])
|
1842
2155
|
)
|
1843
2156
|
|
1844
|
-
create_joined_query =
|
2157
|
+
create_joined_query = get_create_table_queries(
|
1845
2158
|
select_joined_query,
|
1846
2159
|
temp_tables['joined'],
|
1847
2160
|
self.flavor,
|
1848
|
-
schema
|
1849
|
-
)
|
2161
|
+
schema=internal_schema,
|
2162
|
+
)[0]
|
1850
2163
|
create_joined_success, create_joined_msg = session_execute(
|
1851
2164
|
session,
|
1852
2165
|
create_joined_query,
|
@@ -1861,7 +2174,7 @@ def sync_pipe_inplace(
|
|
1861
2174
|
+ (', '.join([
|
1862
2175
|
(
|
1863
2176
|
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1864
|
-
+ " != " + get_null_replacement(
|
2177
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
|
1865
2178
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1866
2179
|
+ "\n ELSE NULL\nEND "
|
1867
2180
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
@@ -1875,18 +2188,18 @@ def sync_pipe_inplace(
|
|
1875
2188
|
) for c in delta_cols
|
1876
2189
|
])
|
1877
2190
|
)
|
1878
|
-
create_unseen_query =
|
2191
|
+
create_unseen_query = get_create_table_queries(
|
1879
2192
|
select_unseen_query,
|
1880
2193
|
temp_tables['unseen'],
|
1881
2194
|
self.flavor,
|
1882
2195
|
internal_schema,
|
1883
|
-
)
|
2196
|
+
)[0]
|
1884
2197
|
(create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
|
1885
2198
|
session,
|
1886
2199
|
create_unseen_query,
|
1887
2200
|
with_results=True,
|
1888
2201
|
debug=debug
|
1889
|
-
) if not upsert else (True, "Success"), None
|
2202
|
+
) if not upsert else ((True, "Success"), None)
|
1890
2203
|
if not create_unseen_success:
|
1891
2204
|
_ = clean_up_temp_tables()
|
1892
2205
|
return create_unseen_success, create_unseen_msg
|
@@ -1896,7 +2209,7 @@ def sync_pipe_inplace(
|
|
1896
2209
|
+ (', '.join([
|
1897
2210
|
(
|
1898
2211
|
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1899
|
-
+ " != " + get_null_replacement(
|
2212
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
|
1900
2213
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1901
2214
|
+ "\n ELSE NULL\nEND "
|
1902
2215
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
@@ -1911,12 +2224,12 @@ def sync_pipe_inplace(
|
|
1911
2224
|
])
|
1912
2225
|
)
|
1913
2226
|
|
1914
|
-
create_update_query =
|
2227
|
+
create_update_query = get_create_table_queries(
|
1915
2228
|
select_update_query,
|
1916
2229
|
temp_tables['update'],
|
1917
2230
|
self.flavor,
|
1918
2231
|
internal_schema,
|
1919
|
-
)
|
2232
|
+
)[0]
|
1920
2233
|
(create_update_success, create_update_msg), create_update_results = session_execute(
|
1921
2234
|
session,
|
1922
2235
|
create_update_query,
|
@@ -1956,7 +2269,7 @@ def sync_pipe_inplace(
|
|
1956
2269
|
apply_unseen_queries,
|
1957
2270
|
with_results=True,
|
1958
2271
|
debug=debug,
|
1959
|
-
) if not upsert else (True, "Success"), None
|
2272
|
+
) if not upsert else ((True, "Success"), None)
|
1960
2273
|
if not apply_unseen_success:
|
1961
2274
|
_ = clean_up_temp_tables()
|
1962
2275
|
return apply_unseen_success, apply_unseen_msg
|
@@ -1986,12 +2299,12 @@ def sync_pipe_inplace(
|
|
1986
2299
|
|
1987
2300
|
|
1988
2301
|
def get_sync_time(
|
1989
|
-
|
1990
|
-
|
1991
|
-
|
1992
|
-
|
1993
|
-
|
1994
|
-
|
2302
|
+
self,
|
2303
|
+
pipe: 'mrsm.Pipe',
|
2304
|
+
params: Optional[Dict[str, Any]] = None,
|
2305
|
+
newest: bool = True,
|
2306
|
+
debug: bool = False,
|
2307
|
+
) -> Union[datetime, int, None]:
|
1995
2308
|
"""Get a Pipe's most recent datetime value.
|
1996
2309
|
|
1997
2310
|
Parameters
|
@@ -2086,10 +2399,10 @@ def get_sync_time(
|
|
2086
2399
|
|
2087
2400
|
|
2088
2401
|
def pipe_exists(
|
2089
|
-
|
2090
|
-
|
2091
|
-
|
2092
|
-
|
2402
|
+
self,
|
2403
|
+
pipe: mrsm.Pipe,
|
2404
|
+
debug: bool = False
|
2405
|
+
) -> bool:
|
2093
2406
|
"""
|
2094
2407
|
Check that a Pipe's table exists.
|
2095
2408
|
|
@@ -2097,7 +2410,7 @@ def pipe_exists(
|
|
2097
2410
|
----------
|
2098
2411
|
pipe: mrsm.Pipe:
|
2099
2412
|
The pipe to check.
|
2100
|
-
|
2413
|
+
|
2101
2414
|
debug: bool, default False
|
2102
2415
|
Verbosity toggle.
|
2103
2416
|
|
@@ -2110,8 +2423,8 @@ def pipe_exists(
|
|
2110
2423
|
exists = table_exists(
|
2111
2424
|
pipe.target,
|
2112
2425
|
self,
|
2113
|
-
schema
|
2114
|
-
debug
|
2426
|
+
schema=self.get_pipe_schema(pipe),
|
2427
|
+
debug=debug,
|
2115
2428
|
)
|
2116
2429
|
if debug:
|
2117
2430
|
from meerschaum.utils.debug import dprint
|
@@ -2440,14 +2753,14 @@ def get_pipe_columns_types(
|
|
2440
2753
|
if not pipe.exists(debug=debug):
|
2441
2754
|
return {}
|
2442
2755
|
|
2443
|
-
|
2444
|
-
|
2445
|
-
|
2446
|
-
|
2447
|
-
|
2448
|
-
|
2449
|
-
|
2450
|
-
|
2756
|
+
if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite'):
|
2757
|
+
return get_table_cols_types(
|
2758
|
+
pipe.target,
|
2759
|
+
self,
|
2760
|
+
flavor=self.flavor,
|
2761
|
+
schema=self.get_pipe_schema(pipe),
|
2762
|
+
debug=debug,
|
2763
|
+
)
|
2451
2764
|
|
2452
2765
|
table_columns = {}
|
2453
2766
|
try:
|
@@ -2465,6 +2778,35 @@ def get_pipe_columns_types(
|
|
2465
2778
|
return table_columns
|
2466
2779
|
|
2467
2780
|
|
2781
|
+
def get_pipe_columns_indices(
|
2782
|
+
self,
|
2783
|
+
pipe: mrsm.Pipe,
|
2784
|
+
debug: bool = False,
|
2785
|
+
) -> Dict[str, List[Dict[str, str]]]:
|
2786
|
+
"""
|
2787
|
+
Return a dictionary mapping columns to the indices created on those columns.
|
2788
|
+
|
2789
|
+
Parameters
|
2790
|
+
----------
|
2791
|
+
pipe: mrsm.Pipe
|
2792
|
+
The pipe to be queried against.
|
2793
|
+
|
2794
|
+
|
2795
|
+
Returns
|
2796
|
+
-------
|
2797
|
+
A dictionary mapping columns names to lists of dictionaries.
|
2798
|
+
The dictionaries in the lists contain the name and type of the indices.
|
2799
|
+
"""
|
2800
|
+
from meerschaum.utils.sql import get_table_cols_indices
|
2801
|
+
return get_table_cols_indices(
|
2802
|
+
pipe.target,
|
2803
|
+
self,
|
2804
|
+
flavor=self.flavor,
|
2805
|
+
schema=self.get_pipe_schema(pipe),
|
2806
|
+
debug=debug,
|
2807
|
+
)
|
2808
|
+
|
2809
|
+
|
2468
2810
|
def get_add_columns_queries(
|
2469
2811
|
self,
|
2470
2812
|
pipe: mrsm.Pipe,
|
@@ -2494,6 +2836,9 @@ def get_add_columns_queries(
|
|
2494
2836
|
if not pipe.exists(debug=debug):
|
2495
2837
|
return []
|
2496
2838
|
|
2839
|
+
if pipe.parameters.get('static', False):
|
2840
|
+
return []
|
2841
|
+
|
2497
2842
|
from decimal import Decimal
|
2498
2843
|
import copy
|
2499
2844
|
from meerschaum.utils.sql import (
|
@@ -2612,6 +2957,8 @@ def get_alter_columns_queries(
|
|
2612
2957
|
"""
|
2613
2958
|
if not pipe.exists(debug=debug):
|
2614
2959
|
return []
|
2960
|
+
if pipe.static:
|
2961
|
+
return
|
2615
2962
|
from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types
|
2616
2963
|
from meerschaum.utils.dataframe import get_numeric_cols
|
2617
2964
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
@@ -2845,7 +3192,6 @@ def get_alter_columns_queries(
|
|
2845
3192
|
|
2846
3193
|
return queries
|
2847
3194
|
|
2848
|
-
|
2849
3195
|
query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
|
2850
3196
|
for col, typ in altered_cols_types.items():
|
2851
3197
|
alter_col_prefix = (
|
@@ -2913,7 +3259,7 @@ def get_to_sql_dtype(
|
|
2913
3259
|
>>> get_to_sql_dtype(pipe, df)
|
2914
3260
|
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
|
2915
3261
|
"""
|
2916
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
|
3262
|
+
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
2917
3263
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2918
3264
|
df_dtypes = {
|
2919
3265
|
col: str(typ)
|
@@ -2921,8 +3267,10 @@ def get_to_sql_dtype(
|
|
2921
3267
|
}
|
2922
3268
|
json_cols = get_json_cols(df)
|
2923
3269
|
numeric_cols = get_numeric_cols(df)
|
3270
|
+
uuid_cols = get_uuid_cols(df)
|
2924
3271
|
df_dtypes.update({col: 'json' for col in json_cols})
|
2925
3272
|
df_dtypes.update({col: 'numeric' for col in numeric_cols})
|
3273
|
+
df_dtypes.update({col: 'uuid' for col in uuid_cols})
|
2926
3274
|
if update_dtypes:
|
2927
3275
|
df_dtypes.update(pipe.dtypes)
|
2928
3276
|
return {
|
@@ -2932,14 +3280,14 @@ def get_to_sql_dtype(
|
|
2932
3280
|
|
2933
3281
|
|
2934
3282
|
def deduplicate_pipe(
|
2935
|
-
|
2936
|
-
|
2937
|
-
|
2938
|
-
|
2939
|
-
|
2940
|
-
|
2941
|
-
|
2942
|
-
|
3283
|
+
self,
|
3284
|
+
pipe: mrsm.Pipe,
|
3285
|
+
begin: Union[datetime, int, None] = None,
|
3286
|
+
end: Union[datetime, int, None] = None,
|
3287
|
+
params: Optional[Dict[str, Any]] = None,
|
3288
|
+
debug: bool = False,
|
3289
|
+
**kwargs: Any
|
3290
|
+
) -> SuccessTuple:
|
2943
3291
|
"""
|
2944
3292
|
Delete duplicate values within a pipe's table.
|
2945
3293
|
|
@@ -3094,7 +3442,7 @@ def deduplicate_pipe(
|
|
3094
3442
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3095
3443
|
|
3096
3444
|
create_temporary_table_query = get_create_table_query(
|
3097
|
-
duplicates_cte_subquery,
|
3445
|
+
duplicates_cte_subquery,
|
3098
3446
|
dedup_table,
|
3099
3447
|
self.flavor,
|
3100
3448
|
) + f"""
|