meerschaum 2.6.0.dev1__py3-none-any.whl → 2.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. meerschaum/api/dash/pages/login.py +17 -17
  2. meerschaum/api/dash/pipes.py +13 -4
  3. meerschaum/api/routes/_pipes.py +162 -136
  4. meerschaum/config/_version.py +1 -1
  5. meerschaum/config/static/__init__.py +1 -0
  6. meerschaum/connectors/api/_APIConnector.py +1 -0
  7. meerschaum/connectors/api/_pipes.py +46 -13
  8. meerschaum/connectors/sql/_SQLConnector.py +4 -3
  9. meerschaum/connectors/sql/_fetch.py +4 -2
  10. meerschaum/connectors/sql/_pipes.py +496 -148
  11. meerschaum/connectors/sql/_sql.py +37 -16
  12. meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
  13. meerschaum/connectors/valkey/_pipes.py +13 -5
  14. meerschaum/core/Pipe/__init__.py +20 -0
  15. meerschaum/core/Pipe/_attributes.py +179 -9
  16. meerschaum/core/Pipe/_clear.py +10 -8
  17. meerschaum/core/Pipe/_copy.py +2 -0
  18. meerschaum/core/Pipe/_data.py +57 -28
  19. meerschaum/core/Pipe/_deduplicate.py +30 -28
  20. meerschaum/core/Pipe/_dtypes.py +12 -2
  21. meerschaum/core/Pipe/_fetch.py +11 -9
  22. meerschaum/core/Pipe/_sync.py +24 -7
  23. meerschaum/core/Pipe/_verify.py +51 -48
  24. meerschaum/utils/dataframe.py +16 -8
  25. meerschaum/utils/dtypes/__init__.py +9 -1
  26. meerschaum/utils/dtypes/sql.py +32 -6
  27. meerschaum/utils/misc.py +8 -8
  28. meerschaum/utils/sql.py +485 -16
  29. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/METADATA +1 -1
  30. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/RECORD +36 -36
  31. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/LICENSE +0 -0
  32. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/NOTICE +0 -0
  33. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/WHEEL +0 -0
  34. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/entry_points.txt +0 -0
  35. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/top_level.txt +0 -0
  36. {meerschaum-2.6.0.dev1.dist-info → meerschaum-2.6.1.dist-info}/zip-safe +0 -0
@@ -320,10 +320,11 @@ def create_indices(
320
320
  from meerschaum.utils.debug import dprint
321
321
  if debug:
322
322
  dprint(f"Creating indices for {pipe}...")
323
- if not pipe.columns:
323
+ if not pipe.indices:
324
324
  warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
325
325
  return True
326
326
 
327
+ _ = pipe.__dict__.pop('_columns_indices', None)
327
328
  ix_queries = {
328
329
  ix: queries
329
330
  for ix, queries in self.get_create_index_queries(pipe, debug=debug).items()
@@ -394,23 +395,43 @@ def get_create_index_queries(
394
395
  get_distinct_col_count,
395
396
  update_queries,
396
397
  get_null_replacement,
398
+ get_create_table_queries,
399
+ get_rename_table_queries,
397
400
  COALESCE_UNIQUE_INDEX_FLAVORS,
398
401
  )
402
+ from meerschaum.utils.dtypes.sql import (
403
+ get_db_type_from_pd_type,
404
+ get_pd_type_from_db_type,
405
+ AUTO_INCREMENT_COLUMN_FLAVORS,
406
+ )
399
407
  from meerschaum.config import get_config
400
408
  index_queries = {}
401
409
 
402
410
  upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
411
+ static = pipe.parameters.get('static', False)
403
412
  index_names = pipe.get_indices()
404
413
  indices = pipe.indices
414
+ existing_cols_types = pipe.get_columns_types(debug=debug)
415
+ existing_cols_pd_types = {
416
+ col: get_pd_type_from_db_type(typ)
417
+ for col, typ in existing_cols_types.items()
418
+ }
419
+ existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
+ existing_ix_names = set()
421
+ existing_primary_keys = []
422
+ for col, col_indices in existing_cols_indices.items():
423
+ for col_ix_doc in col_indices:
424
+ existing_ix_names.add(col_ix_doc.get('name', None))
425
+ if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
+ existing_primary_keys.append(col)
405
427
 
406
428
  _datetime = pipe.get_columns('datetime', error=False)
407
- _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns, UTC]')
408
429
  _datetime_name = (
409
430
  sql_item_name(_datetime, self.flavor, None)
410
431
  if _datetime is not None else None
411
432
  )
412
433
  _datetime_index_name = (
413
- sql_item_name(index_names['datetime'], self.flavor, None)
434
+ sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
414
435
  if index_names.get('datetime', None)
415
436
  else None
416
437
  )
@@ -420,6 +441,29 @@ def get_create_index_queries(
420
441
  if _id is not None
421
442
  else None
422
443
  )
444
+ primary_key = pipe.columns.get('primary', None)
445
+ primary_key_name = (
446
+ sql_item_name(primary_key, flavor=self.flavor, schema=None)
447
+ if primary_key
448
+ else None
449
+ )
450
+ autoincrement = (
451
+ pipe.parameters.get('autoincrement', False)
452
+ or (
453
+ primary_key is not None
454
+ and primary_key not in existing_cols_pd_types
455
+ )
456
+ )
457
+ primary_key_db_type = (
458
+ get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int'), self.flavor)
459
+ if primary_key
460
+ else None
461
+ )
462
+ primary_key_constraint_name = (
463
+ sql_item_name(f'pk_{pipe.target}', self.flavor, None)
464
+ if primary_key is not None
465
+ else None
466
+ )
423
467
 
424
468
  _id_index_name = (
425
469
  sql_item_name(index_names['id'], self.flavor, None)
@@ -462,8 +506,10 @@ def get_create_index_queries(
462
506
  )
463
507
  elif self.flavor == 'mssql':
464
508
  dt_query = (
465
- f"CREATE CLUSTERED INDEX {_datetime_index_name} "
466
- f"ON {_pipe_name} ({_datetime_name})"
509
+ "CREATE "
510
+ + ("CLUSTERED " if not primary_key else '')
511
+ + f"INDEX {_datetime_index_name} "
512
+ + f"ON {_pipe_name} ({_datetime_name})"
467
513
  )
468
514
  else: ### mssql, sqlite, etc.
469
515
  dt_query = (
@@ -473,6 +519,115 @@ def get_create_index_queries(
473
519
 
474
520
  index_queries[_datetime] = [dt_query]
475
521
 
522
+ primary_queries = []
523
+ if (
524
+ primary_key is not None
525
+ and primary_key not in existing_primary_keys
526
+ and not static
527
+ ):
528
+ if autoincrement and primary_key not in existing_cols_pd_types:
529
+ autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
530
+ self.flavor,
531
+ AUTO_INCREMENT_COLUMN_FLAVORS['default']
532
+ )
533
+ primary_queries.extend([
534
+ (
535
+ f"ALTER TABLE {_pipe_name}\n"
536
+ f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
537
+ ),
538
+ ])
539
+ elif not autoincrement and primary_key in existing_cols_pd_types:
540
+ if self.flavor == 'sqlite':
541
+ new_table_name = sql_item_name(
542
+ f'_new_{pipe.target}',
543
+ self.flavor,
544
+ self.get_pipe_schema(pipe)
545
+ )
546
+ select_cols_str = ', '.join(
547
+ [
548
+ sql_item_name(col, self.flavor, None)
549
+ for col in existing_cols_types
550
+ ]
551
+ )
552
+ primary_queries.extend(
553
+ get_create_table_queries(
554
+ existing_cols_pd_types,
555
+ f'_new_{pipe.target}',
556
+ self.flavor,
557
+ schema=self.get_pipe_schema(pipe),
558
+ primary_key=primary_key,
559
+ ) + [
560
+ (
561
+ f"INSERT INTO {new_table_name} ({select_cols_str})\n"
562
+ f"SELECT {select_cols_str}\nFROM {_pipe_name}"
563
+ ),
564
+ f"DROP TABLE {_pipe_name}",
565
+ ] + get_rename_table_queries(
566
+ f'_new_{pipe.target}',
567
+ pipe.target,
568
+ self.flavor,
569
+ schema=self.get_pipe_schema(pipe),
570
+ )
571
+ )
572
+ elif self.flavor == 'oracle':
573
+ primary_queries.extend([
574
+ (
575
+ f"ALTER TABLE {_pipe_name}\n"
576
+ f"MODIFY {primary_key_name} NOT NULL"
577
+ ),
578
+ (
579
+ f"ALTER TABLE {_pipe_name}\n"
580
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
581
+ )
582
+ ])
583
+ elif self.flavor in ('mysql', 'mariadb'):
584
+ primary_queries.extend([
585
+ (
586
+ f"ALTER TABLE {_pipe_name}\n"
587
+ f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
588
+ ),
589
+ (
590
+ f"ALTER TABLE {_pipe_name}\n"
591
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
592
+ )
593
+ ])
594
+ elif self.flavor == 'timescaledb':
595
+ primary_queries.extend([
596
+ (
597
+ f"ALTER TABLE {_pipe_name}\n"
598
+ f"ALTER COLUMN {primary_key_name} SET NOT NULL"
599
+ ),
600
+ (
601
+ f"ALTER TABLE {_pipe_name}\n"
602
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
603
+ f"{_datetime_name}, " if _datetime_name else ""
604
+ ) + f"{primary_key_name})"
605
+ ),
606
+ ])
607
+ elif self.flavor in ('citus', 'postgresql', 'duckdb'):
608
+ primary_queries.extend([
609
+ (
610
+ f"ALTER TABLE {_pipe_name}\n"
611
+ f"ALTER COLUMN {primary_key_name} SET NOT NULL"
612
+ ),
613
+ (
614
+ f"ALTER TABLE {_pipe_name}\n"
615
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
616
+ ),
617
+ ])
618
+ else:
619
+ primary_queries.extend([
620
+ (
621
+ f"ALTER TABLE {_pipe_name}\n"
622
+ f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
623
+ ),
624
+ (
625
+ f"ALTER TABLE {_pipe_name}\n"
626
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
627
+ ),
628
+ ])
629
+ index_queries[primary_key] = primary_queries
630
+
476
631
  ### create id index
477
632
  if _id_name is not None:
478
633
  if self.flavor == 'timescaledb':
@@ -496,7 +651,7 @@ def get_create_index_queries(
496
651
  other_index_names = {
497
652
  ix_key: ix_unquoted
498
653
  for ix_key, ix_unquoted in index_names.items()
499
- if ix_key not in ('datetime', 'id')
654
+ if ix_key not in ('datetime', 'id', 'primary') and ix_unquoted not in existing_ix_names
500
655
  }
501
656
  for ix_key, ix_unquoted in other_index_names.items():
502
657
  ix_name = sql_item_name(ix_unquoted, self.flavor, None)
@@ -509,13 +664,12 @@ def get_create_index_queries(
509
664
  cols_names_str = ", ".join(cols_names)
510
665
  index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"]
511
666
 
512
- existing_cols_types = pipe.get_columns_types(debug=debug)
513
667
  indices_cols_str = ', '.join(
514
- [
668
+ list({
515
669
  sql_item_name(ix, self.flavor)
516
670
  for ix_key, ix in pipe.columns.items()
517
671
  if ix and ix in existing_cols_types
518
- ]
672
+ })
519
673
  )
520
674
  coalesce_indices_cols_str = ', '.join(
521
675
  [
@@ -718,7 +872,11 @@ def get_pipe_data(
718
872
  from meerschaum.utils.sql import sql_item_name
719
873
  from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
720
874
  from meerschaum.utils.packages import import_pandas
721
- from meerschaum.utils.dtypes import attempt_cast_to_numeric, attempt_cast_to_uuid
875
+ from meerschaum.utils.dtypes import (
876
+ attempt_cast_to_numeric,
877
+ attempt_cast_to_uuid,
878
+ are_dtypes_equal,
879
+ )
722
880
  pd = import_pandas()
723
881
  is_dask = 'dask' in pd.__name__
724
882
 
@@ -813,7 +971,7 @@ def get_pipe_data(
813
971
  ignore_dt_cols = [
814
972
  col
815
973
  for col, dtype in pipe.dtypes.items()
816
- if 'datetime' not in str(dtype)
974
+ if not are_dtypes_equal(str(dtype), 'datetime')
817
975
  ]
818
976
  ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
819
977
  df = (
@@ -821,6 +979,7 @@ def get_pipe_data(
821
979
  df,
822
980
  ignore_cols=ignore_dt_cols,
823
981
  chunksize=kw.get('chunksize', None),
982
+ strip_timezone=(pipe.tzinfo is None),
824
983
  debug=debug,
825
984
  ) if isinstance(df, pd.DataFrame) else (
826
985
  [
@@ -828,6 +987,7 @@ def get_pipe_data(
828
987
  c,
829
988
  ignore_cols=ignore_dt_cols,
830
989
  chunksize=kw.get('chunksize', None),
990
+ strip_timezone=(pipe.tzinfo is None),
831
991
  debug=debug,
832
992
  )
833
993
  for c in df
@@ -855,6 +1015,7 @@ def get_pipe_data_query(
855
1015
  begin_add_minutes: int = 0,
856
1016
  end_add_minutes: int = 0,
857
1017
  replace_nulls: Optional[str] = None,
1018
+ skip_existing_cols_check: bool = False,
858
1019
  debug: bool = False,
859
1020
  **kw: Any
860
1021
  ) -> Union[str, None]:
@@ -905,6 +1066,9 @@ def get_pipe_data_query(
905
1066
  replace_nulls: Optional[str], default None
906
1067
  If provided, replace null values with this value.
907
1068
 
1069
+ skip_existing_cols_check: bool, default False
1070
+ If `True`, do not verify that querying columns are actually on the table.
1071
+
908
1072
  debug: bool, default False
909
1073
  Verbosity toggle.
910
1074
 
@@ -912,16 +1076,13 @@ def get_pipe_data_query(
912
1076
  -------
913
1077
  A `SELECT` query to retrieve a pipe's data.
914
1078
  """
915
- from meerschaum.utils.debug import dprint
916
1079
  from meerschaum.utils.misc import items_str
917
1080
  from meerschaum.utils.sql import sql_item_name, dateadd_str
918
- from meerschaum.utils.packages import import_pandas
919
- pd = import_pandas()
920
1081
  existing_cols = pipe.get_columns_types(debug=debug)
921
1082
  select_columns = (
922
1083
  [col for col in existing_cols]
923
1084
  if not select_columns
924
- else [col for col in select_columns if col in existing_cols]
1085
+ else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
925
1086
  )
926
1087
  if omit_columns:
927
1088
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -935,7 +1096,12 @@ def get_pipe_data_query(
935
1096
  if begin is not None:
936
1097
  begin -= backtrack_interval
937
1098
 
938
- cols_names = [sql_item_name(col, self.flavor, None) for col in select_columns]
1099
+ begin, end = pipe.parse_date_bounds(begin, end)
1100
+
1101
+ cols_names = [
1102
+ sql_item_name(col, self.flavor, None)
1103
+ for col in select_columns
1104
+ ]
939
1105
  select_cols_str = (
940
1106
  'SELECT\n '
941
1107
  + ',\n '.join(
@@ -948,7 +1114,7 @@ def get_pipe_data_query(
948
1114
  for col_name in cols_names
949
1115
  ]
950
1116
  )
951
- )
1117
+ ) if cols_names else 'SELECT *'
952
1118
  pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
953
1119
  query = f"{select_cols_str}\nFROM {pipe_table_name}"
954
1120
  where = ""
@@ -972,7 +1138,7 @@ def get_pipe_data_query(
972
1138
  quoted_indices = {
973
1139
  key: sql_item_name(val, self.flavor, None)
974
1140
  for key, val in pipe.columns.items()
975
- if val in existing_cols
1141
+ if val in existing_cols or skip_existing_cols_check
976
1142
  }
977
1143
 
978
1144
  if begin is not None or end is not None:
@@ -992,7 +1158,7 @@ def get_pipe_data_query(
992
1158
  )
993
1159
 
994
1160
  is_dt_bound = False
995
- if begin is not None and _dt in existing_cols:
1161
+ if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
996
1162
  begin_da = dateadd_str(
997
1163
  flavor=self.flavor,
998
1164
  datepart='minute',
@@ -1002,7 +1168,7 @@ def get_pipe_data_query(
1002
1168
  where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1003
1169
  is_dt_bound = True
1004
1170
 
1005
- if end is not None and _dt in existing_cols:
1171
+ if end is not None and (_dt in existing_cols or skip_existing_cols_check):
1006
1172
  if 'int' in str(type(end)).lower() and end == begin:
1007
1173
  end += 1
1008
1174
  end_da = dateadd_str(
@@ -1016,7 +1182,11 @@ def get_pipe_data_query(
1016
1182
 
1017
1183
  if params is not None:
1018
1184
  from meerschaum.utils.sql import build_where
1019
- valid_params = {k: v for k, v in params.items() if k in existing_cols}
1185
+ valid_params = {
1186
+ k: v
1187
+ for k, v in params.items()
1188
+ if k in existing_cols or skip_existing_cols_check
1189
+ }
1020
1190
  if valid_params:
1021
1191
  where += build_where(valid_params, self).replace(
1022
1192
  'WHERE', ('AND' if is_dt_bound else "")
@@ -1030,7 +1200,7 @@ def get_pipe_data_query(
1030
1200
  order_by = ""
1031
1201
  if quoted_indices:
1032
1202
  order_by += "\nORDER BY "
1033
- if _dt and _dt in existing_cols:
1203
+ if _dt and (_dt in existing_cols or skip_existing_cols_check):
1034
1204
  order_by += dt + ' ' + order + ','
1035
1205
  for key, quoted_col_name in quoted_indices.items():
1036
1206
  if dt == quoted_col_name:
@@ -1140,6 +1310,70 @@ def get_pipe_attributes(
1140
1310
  return attributes
1141
1311
 
1142
1312
 
1313
+ def create_pipe_table_from_df(
1314
+ self,
1315
+ pipe: mrsm.Pipe,
1316
+ df: 'pd.DataFrame',
1317
+ debug: bool = False,
1318
+ ) -> mrsm.SuccessTuple:
1319
+ """
1320
+ Create a pipe's table from its configured dtypes and an incoming dataframe.
1321
+ """
1322
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1323
+ from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1324
+ primary_key = pipe.columns.get('primary', None)
1325
+ dt_col = pipe.columns.get('datetime', None)
1326
+ new_dtypes = {
1327
+ **{
1328
+ col: str(typ)
1329
+ for col, typ in df.dtypes.items()
1330
+ },
1331
+ **{
1332
+ col: 'int'
1333
+ for col_ix, col in pipe.columns.items()
1334
+ if col_ix != 'primary'
1335
+ },
1336
+ **{
1337
+ col: 'uuid'
1338
+ for col in get_uuid_cols(df)
1339
+ },
1340
+ **{
1341
+ col: 'json'
1342
+ for col in get_json_cols(df)
1343
+ },
1344
+ **{
1345
+ col: 'numeric'
1346
+ for col in get_numeric_cols(df)
1347
+ },
1348
+ **pipe.dtypes
1349
+ }
1350
+ autoincrement = (
1351
+ pipe.parameters.get('autoincrement', False)
1352
+ or (primary_key and primary_key not in new_dtypes)
1353
+ )
1354
+ if autoincrement:
1355
+ _ = new_dtypes.pop(primary_key, None)
1356
+
1357
+ create_table_queries = get_create_table_queries(
1358
+ new_dtypes,
1359
+ pipe.target,
1360
+ self.flavor,
1361
+ schema=self.get_pipe_schema(pipe),
1362
+ primary_key=primary_key,
1363
+ datetime_column=dt_col,
1364
+ )
1365
+ success = all(
1366
+ self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1367
+ )
1368
+ target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
1369
+ msg = (
1370
+ "Success"
1371
+ if success
1372
+ else f"Failed to create {target_name}."
1373
+ )
1374
+ return success, msg
1375
+
1376
+
1143
1377
  def sync_pipe(
1144
1378
  self,
1145
1379
  pipe: mrsm.Pipe,
@@ -1202,10 +1436,12 @@ def sync_pipe(
1202
1436
  sql_item_name,
1203
1437
  update_queries,
1204
1438
  get_create_table_queries,
1439
+ get_reset_autoincrement_queries,
1205
1440
  )
1206
1441
  from meerschaum.utils.misc import generate_password
1207
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
1442
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1208
1443
  from meerschaum.utils.dtypes import are_dtypes_equal
1444
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1209
1445
  from meerschaum import Pipe
1210
1446
  import time
1211
1447
  import copy
@@ -1216,6 +1452,7 @@ def sync_pipe(
1216
1452
  return False, msg
1217
1453
 
1218
1454
  start = time.perf_counter()
1455
+ pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
1219
1456
 
1220
1457
  if not pipe.temporary and not pipe.get_id(debug=debug):
1221
1458
  register_tuple = pipe.register(debug=debug)
@@ -1244,11 +1481,15 @@ def sync_pipe(
1244
1481
  ### Check for new columns.
1245
1482
  add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
1246
1483
  if add_cols_queries:
1484
+ _ = pipe.__dict__.pop('_columns_indices', None)
1485
+ _ = pipe.__dict__.pop('_columns_types', None)
1247
1486
  if not self.exec_queries(add_cols_queries, debug=debug):
1248
1487
  warn(f"Failed to add new columns to {pipe}.")
1249
1488
 
1250
1489
  alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
1251
1490
  if alter_cols_queries:
1491
+ _ = pipe.__dict__.pop('_columns_indices', None)
1492
+ _ = pipe.__dict__.pop('_columns_types', None)
1252
1493
  if not self.exec_queries(alter_cols_queries, debug=debug):
1253
1494
  warn(f"Failed to alter columns for {pipe}.")
1254
1495
  else:
@@ -1312,21 +1553,15 @@ def sync_pipe(
1312
1553
  })
1313
1554
 
1314
1555
  primary_key = pipe.columns.get('primary', None)
1315
- new_dtypes = {
1316
- **{
1317
- col: str(typ)
1318
- for col, typ in unseen_df.dtypes.items()
1319
- },
1320
- **{
1321
- col: 'int'
1322
- for col_ix, col in pipe.columns.items()
1323
- if col_ix != 'primary'
1324
- },
1325
- **pipe.dtypes
1326
- } if is_new else {}
1327
1556
  autoincrement = (
1328
1557
  pipe.parameters.get('autoincrement', False)
1329
- or (is_new and primary_key and primary_key not in new_dtypes)
1558
+ or (
1559
+ is_new
1560
+ and primary_key
1561
+ and primary_key
1562
+ not in pipe.dtypes
1563
+ and primary_key not in unseen_df.columns
1564
+ )
1330
1565
  )
1331
1566
  if autoincrement and autoincrement not in pipe.parameters:
1332
1567
  pipe.parameters['autoincrement'] = autoincrement
@@ -1334,77 +1569,117 @@ def sync_pipe(
1334
1569
  if not edit_success:
1335
1570
  return edit_success, edit_msg
1336
1571
 
1337
- if autoincrement and primary_key and primary_key not in df.columns:
1338
- if unseen_df is not None and primary_key in unseen_df.columns:
1339
- del unseen_df[primary_key]
1340
- if update_df is not None and primary_key in update_df.columns:
1341
- del update_df[primary_key]
1342
- if delta_df is not None and primary_key in delta_df.columns:
1343
- del delta_df[primary_key]
1572
+ autoincrement_needs_reset = False
1573
+ if autoincrement and primary_key:
1574
+ if primary_key not in df.columns:
1575
+ if unseen_df is not None and primary_key in unseen_df.columns:
1576
+ del unseen_df[primary_key]
1577
+ if update_df is not None and primary_key in update_df.columns:
1578
+ del update_df[primary_key]
1579
+ if delta_df is not None and primary_key in delta_df.columns:
1580
+ del delta_df[primary_key]
1581
+ elif unseen_df[primary_key].notnull().any():
1582
+ autoincrement_needs_reset = True
1344
1583
 
1345
1584
  if is_new:
1346
- if autoincrement:
1347
- _ = new_dtypes.pop(primary_key, None)
1348
-
1349
- ### TODO: see if this can be removed
1350
- if 'datetime' in pipe.columns and self.flavor == 'timescaledb':
1351
- primary_key = None
1352
-
1353
- create_table_queries = get_create_table_queries(
1354
- new_dtypes,
1355
- pipe.target,
1356
- self.flavor,
1357
- schema=self.get_pipe_schema(pipe),
1358
- primary_key=primary_key,
1359
- )
1360
- create_success = all(
1361
- self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1585
+ create_success, create_msg = self.create_pipe_table_from_df(
1586
+ pipe,
1587
+ unseen_df,
1588
+ debug=debug,
1362
1589
  )
1363
1590
  if not create_success:
1364
- warn(f"Failed to create '{pipe.target}'. Continuing...")
1591
+ return create_success, create_msg
1592
+
1593
+ do_identity_insert = bool(
1594
+ self.flavor in ('mssql',)
1595
+ and primary_key in unseen_df.columns
1596
+ and autoincrement
1597
+ )
1598
+ with self.engine.connect() as connection:
1599
+ with connection.begin():
1600
+ if do_identity_insert:
1601
+ identity_on_result = self.exec(
1602
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1603
+ commit=False,
1604
+ _connection=connection,
1605
+ close=False,
1606
+ debug=debug,
1607
+ )
1608
+ if identity_on_result is None:
1609
+ return False, f"Could not enable identity inserts on {pipe}."
1365
1610
 
1366
- stats = self.to_sql(unseen_df, **unseen_kw)
1611
+ stats = self.to_sql(
1612
+ unseen_df,
1613
+ _connection=connection,
1614
+ **unseen_kw
1615
+ )
1616
+
1617
+ if do_identity_insert:
1618
+ identity_off_result = self.exec(
1619
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1620
+ commit=False,
1621
+ _connection=connection,
1622
+ close=False,
1623
+ debug=debug,
1624
+ )
1625
+ if identity_off_result is None:
1626
+ return False, f"Could not disable identity inserts on {pipe}."
1367
1627
 
1368
1628
  if is_new:
1369
1629
  if not self.create_indices(pipe, debug=debug):
1370
1630
  warn(f"Failed to create indices for {pipe}. Continuing...")
1371
1631
 
1372
- if update_df is not None and len(update_df) > 0:
1373
- dt_col = pipe.columns.get('datetime', None)
1374
- dt_typ = pipe.dtypes.get(dt_col, None)
1375
- dt_name = sql_item_name(dt_col, self.flavor) if dt_col else None
1376
- update_min = update_df[dt_col].min() if dt_col and dt_col in update_df.columns else None
1377
- update_max = update_df[dt_col].max() if dt_col and dt_col in update_df.columns else None
1378
- update_begin = update_min
1379
- update_end = (
1380
- update_max
1381
- + (
1382
- timedelta(minutes=1)
1383
- if are_dtypes_equal(str(dt_typ), 'datetime')
1384
- else 1
1385
- )
1386
- ) if dt_col else None
1632
+ if autoincrement_needs_reset:
1633
+ reset_autoincrement_queries = get_reset_autoincrement_queries(
1634
+ pipe.target,
1635
+ primary_key,
1636
+ self,
1637
+ schema=self.get_pipe_schema(pipe),
1638
+ debug=debug,
1639
+ )
1640
+ results = self.exec_queries(reset_autoincrement_queries, debug=debug)
1641
+ for result in results:
1642
+ if result is None:
1643
+ warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
1387
1644
 
1645
+ if update_df is not None and len(update_df) > 0:
1388
1646
  transact_id = generate_password(3)
1389
- temp_target = '##' + transact_id + '_' + pipe.target
1647
+ temp_prefix = '##' if self.flavor != 'oracle' else ''
1648
+ temp_target = temp_prefix + transact_id + '_' + pipe.target
1390
1649
  self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
1391
1650
  temp_pipe = Pipe(
1392
1651
  pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
1393
1652
  instance=pipe.instance_keys,
1394
1653
  columns={
1395
- ix_key: ix
1654
+ (ix_key if ix_key != 'primary' else 'primary_'): ix
1396
1655
  for ix_key, ix in pipe.columns.items()
1397
1656
  if ix and ix in update_df.columns
1398
1657
  },
1399
- dtypes=pipe.dtypes,
1658
+ dtypes={
1659
+ col: typ
1660
+ for col, typ in pipe.dtypes.items()
1661
+ if col in update_df.columns
1662
+ },
1400
1663
  target=temp_target,
1401
1664
  temporary=True,
1402
1665
  parameters={
1666
+ 'static': True,
1403
1667
  'schema': self.internal_schema,
1404
1668
  'hypertable': False,
1669
+ 'autoincrement': False,
1405
1670
  },
1406
1671
  )
1407
- temp_pipe.sync(update_df, check_existing=False, debug=debug)
1672
+ temp_pipe.__dict__['_columns_types'] = {
1673
+ col: get_db_type_from_pd_type(
1674
+ pipe.dtypes.get(col, str(typ)),
1675
+ self.flavor,
1676
+ )
1677
+ for col, typ in update_df.dtypes.items()
1678
+ }
1679
+ temp_pipe.__dict__['_columns_types_timestamp'] = time.perf_counter()
1680
+ temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
1681
+ if not temp_success:
1682
+ return temp_success, temp_msg
1408
1683
  existing_cols = pipe.get_columns_types(debug=debug)
1409
1684
  join_cols = [
1410
1685
  col
@@ -1530,28 +1805,23 @@ def sync_pipe_inplace(
1530
1805
  )
1531
1806
  from meerschaum.utils.sql import (
1532
1807
  sql_item_name,
1533
- get_sqlalchemy_table,
1534
1808
  get_update_queries,
1535
1809
  get_null_replacement,
1536
- NO_CTE_FLAVORS,
1537
- NO_SELECT_INTO_FLAVORS,
1538
- format_cte_subquery,
1539
- get_create_table_query,
1810
+ get_create_table_queries,
1540
1811
  get_table_cols_types,
1541
- truncate_item_name,
1542
1812
  session_execute,
1543
- table_exists,
1544
1813
  update_queries,
1545
1814
  )
1815
+ from meerschaum.utils.dtypes import coerce_timezone, are_dtypes_equal
1546
1816
  from meerschaum.utils.dtypes.sql import (
1547
1817
  get_pd_type_from_db_type,
1548
1818
  )
1549
1819
  from meerschaum.utils.misc import generate_password
1550
- from meerschaum.utils.debug import dprint
1551
1820
 
1552
1821
  transact_id = generate_password(3)
1553
1822
  def get_temp_table_name(label: str) -> str:
1554
- return '##' + transact_id + '_' + label + '_' + pipe.target
1823
+ temp_prefix = '##' if self.flavor != 'oracle' else ''
1824
+ return temp_prefix + transact_id + '_' + label + '_' + pipe.target
1555
1825
 
1556
1826
  internal_schema = self.internal_schema
1557
1827
  temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
@@ -1578,6 +1848,11 @@ def sync_pipe_inplace(
1578
1848
  pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1579
1849
  upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries
1580
1850
  database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
1851
+ primary_key = pipe.columns.get('primary', None)
1852
+ autoincrement = pipe.parameters.get('autoincrement', False)
1853
+ dt_col = pipe.columns.get('datetime', None)
1854
+ dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
1855
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
1581
1856
 
1582
1857
  def clean_up_temp_tables(ready_to_drop: bool = False):
1583
1858
  log_success, log_msg = self._log_temporary_tables_creation(
@@ -1601,13 +1876,16 @@ def sync_pipe_inplace(
1601
1876
 
1602
1877
  sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm')
1603
1878
  if not pipe.exists(debug=debug):
1604
- create_pipe_query = get_create_table_query(
1879
+ create_pipe_queries = get_create_table_queries(
1605
1880
  metadef,
1606
1881
  pipe.target,
1607
1882
  self.flavor,
1608
1883
  schema=self.get_pipe_schema(pipe),
1884
+ primary_key=primary_key,
1885
+ autoincrement=autoincrement,
1886
+ datetime_column=dt_col,
1609
1887
  )
1610
- result = self.exec(create_pipe_query, debug=debug)
1888
+ result = self.exec_queries(create_pipe_queries, debug=debug)
1611
1889
  if result is None:
1612
1890
  _ = clean_up_temp_tables()
1613
1891
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
@@ -1622,12 +1900,12 @@ def sync_pipe_inplace(
1622
1900
  session = sqlalchemy_orm.Session(self.engine)
1623
1901
  connectable = session if self.flavor != 'duckdb' else self
1624
1902
 
1625
- create_new_query = get_create_table_query(
1903
+ create_new_query = get_create_table_queries(
1626
1904
  metadef,
1627
1905
  temp_tables[('new') if not upsert else 'update'],
1628
1906
  self.flavor,
1629
1907
  schema=internal_schema,
1630
- )
1908
+ )[0]
1631
1909
  (create_new_success, create_new_msg), create_new_results = session_execute(
1632
1910
  session,
1633
1911
  create_new_query,
@@ -1658,13 +1936,20 @@ def sync_pipe_inplace(
1658
1936
  sql_item_name(col, self.flavor)
1659
1937
  for col in new_cols
1660
1938
  ])
1939
+ def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
1940
+ if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
1941
+ return new_cols_types[col]
1942
+ return cols_types[col]
1661
1943
 
1662
1944
  add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
1663
1945
  if add_cols_queries:
1946
+ _ = pipe.__dict__.pop('_columns_types', None)
1947
+ _ = pipe.__dict__.pop('_columns_indices', None)
1664
1948
  self.exec_queries(add_cols_queries, debug=debug)
1665
1949
 
1666
1950
  alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
1667
1951
  if alter_cols_queries:
1952
+ _ = pipe.__dict__.pop('_columns_types', None)
1668
1953
  self.exec_queries(alter_cols_queries, debug=debug)
1669
1954
 
1670
1955
  insert_queries = [
@@ -1689,6 +1974,26 @@ def sync_pipe_inplace(
1689
1974
  _ = clean_up_temp_tables()
1690
1975
  return True, f"Inserted {new_count}, updated 0 rows."
1691
1976
 
1977
+ (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
1978
+ session,
1979
+ [
1980
+ "SELECT\n"
1981
+ f" MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n"
1982
+ f" MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n"
1983
+ f"FROM {temp_table_names['new']}\n"
1984
+ f"WHERE {dt_col_name} IS NOT NULL"
1985
+ ],
1986
+ with_results=True,
1987
+ debug=debug,
1988
+ )
1989
+ if not new_dt_bounds_success:
1990
+ return (
1991
+ new_dt_bounds_success,
1992
+ f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
1993
+ )
1994
+
1995
+ begin, end = new_dt_bounds_results[0].fetchone()
1996
+
1692
1997
  backtrack_def = self.get_pipe_data_query(
1693
1998
  pipe,
1694
1999
  begin=begin,
@@ -1699,19 +2004,18 @@ def sync_pipe_inplace(
1699
2004
  debug=debug,
1700
2005
  order=None,
1701
2006
  )
1702
-
1703
- create_backtrack_query = get_create_table_query(
2007
+ create_backtrack_query = get_create_table_queries(
1704
2008
  backtrack_def,
1705
2009
  temp_tables['backtrack'],
1706
2010
  self.flavor,
1707
2011
  schema=internal_schema,
1708
- )
1709
- (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute(
2012
+ )[0]
2013
+ (create_backtrack_success, create_backtrack_msg), create_new_results = session_execute(
1710
2014
  session,
1711
2015
  create_backtrack_query,
1712
2016
  with_results=True,
1713
2017
  debug=debug,
1714
- ) if not upsert else (True, "Success"), None
2018
+ ) if not upsert else ((True, "Success"), None)
1715
2019
 
1716
2020
  if not create_backtrack_success:
1717
2021
  _ = clean_up_temp_tables()
@@ -1728,7 +2032,7 @@ def sync_pipe_inplace(
1728
2032
 
1729
2033
  common_cols = [col for col in new_cols if col in backtrack_cols_types]
1730
2034
  on_cols = {
1731
- col: new_cols.get(col, 'object')
2035
+ col: new_cols.get(col)
1732
2036
  for col_key, col in pipe.columns.items()
1733
2037
  if (
1734
2038
  col
@@ -1742,7 +2046,8 @@ def sync_pipe_inplace(
1742
2046
  null_replace_new_cols_str = (
1743
2047
  ', '.join([
1744
2048
  f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
1745
- + f"{get_null_replacement(typ, self.flavor)}) AS "
2049
+ + get_null_replacement(get_col_typ(col, new_cols), self.flavor)
2050
+ + ") AS "
1746
2051
  + sql_item_name(col, self.flavor, None)
1747
2052
  for col, typ in new_cols.items()
1748
2053
  ])
@@ -1758,7 +2063,7 @@ def sync_pipe_inplace(
1758
2063
  f"COALESCE({temp_table_names['new']}."
1759
2064
  + sql_item_name(c, self.flavor, None)
1760
2065
  + ", "
1761
- + get_null_replacement(new_cols[c], self.flavor)
2066
+ + get_null_replacement(get_col_typ(c, new_cols), self.flavor)
1762
2067
  + ") "
1763
2068
  + ' = '
1764
2069
  + f"COALESCE({temp_table_names['backtrack']}."
@@ -1775,12 +2080,12 @@ def sync_pipe_inplace(
1775
2080
  ) for c in common_cols
1776
2081
  ])
1777
2082
  )
1778
- create_delta_query = get_create_table_query(
2083
+ create_delta_query = get_create_table_queries(
1779
2084
  select_delta_query,
1780
2085
  temp_tables['delta'],
1781
2086
  self.flavor,
1782
2087
  schema=internal_schema,
1783
- )
2088
+ )[0]
1784
2089
  create_delta_success, create_delta_msg = session_execute(
1785
2090
  session,
1786
2091
  create_delta_query,
@@ -1833,20 +2138,28 @@ def sync_pipe_inplace(
1833
2138
  + '\nAND\n'.join([
1834
2139
  (
1835
2140
  f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
1836
- + ", " + get_null_replacement(typ, self.flavor) + ")"
2141
+ + ", "
2142
+ + get_null_replacement(
2143
+ get_col_typ(c, on_cols),
2144
+ self.flavor
2145
+ ) + ")"
1837
2146
  + ' = '
1838
2147
  + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
1839
- + ", " + get_null_replacement(typ, self.flavor) + ")"
2148
+ + ", "
2149
+ + get_null_replacement(
2150
+ get_col_typ(c, on_cols),
2151
+ self.flavor
2152
+ ) + ")"
1840
2153
  ) for c, typ in on_cols.items()
1841
2154
  ])
1842
2155
  )
1843
2156
 
1844
- create_joined_query = get_create_table_query(
2157
+ create_joined_query = get_create_table_queries(
1845
2158
  select_joined_query,
1846
2159
  temp_tables['joined'],
1847
2160
  self.flavor,
1848
- schema = internal_schema,
1849
- )
2161
+ schema=internal_schema,
2162
+ )[0]
1850
2163
  create_joined_success, create_joined_msg = session_execute(
1851
2164
  session,
1852
2165
  create_joined_query,
@@ -1861,7 +2174,7 @@ def sync_pipe_inplace(
1861
2174
  + (', '.join([
1862
2175
  (
1863
2176
  "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
1864
- + " != " + get_null_replacement(typ, self.flavor)
2177
+ + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
1865
2178
  + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
1866
2179
  + "\n ELSE NULL\nEND "
1867
2180
  + " AS " + sql_item_name(c, self.flavor, None)
@@ -1875,18 +2188,18 @@ def sync_pipe_inplace(
1875
2188
  ) for c in delta_cols
1876
2189
  ])
1877
2190
  )
1878
- create_unseen_query = get_create_table_query(
2191
+ create_unseen_query = get_create_table_queries(
1879
2192
  select_unseen_query,
1880
2193
  temp_tables['unseen'],
1881
2194
  self.flavor,
1882
2195
  internal_schema,
1883
- )
2196
+ )[0]
1884
2197
  (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
1885
2198
  session,
1886
2199
  create_unseen_query,
1887
2200
  with_results=True,
1888
2201
  debug=debug
1889
- ) if not upsert else (True, "Success"), None
2202
+ ) if not upsert else ((True, "Success"), None)
1890
2203
  if not create_unseen_success:
1891
2204
  _ = clean_up_temp_tables()
1892
2205
  return create_unseen_success, create_unseen_msg
@@ -1896,7 +2209,7 @@ def sync_pipe_inplace(
1896
2209
  + (', '.join([
1897
2210
  (
1898
2211
  "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
1899
- + " != " + get_null_replacement(typ, self.flavor)
2212
+ + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
1900
2213
  + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
1901
2214
  + "\n ELSE NULL\nEND "
1902
2215
  + " AS " + sql_item_name(c, self.flavor, None)
@@ -1911,12 +2224,12 @@ def sync_pipe_inplace(
1911
2224
  ])
1912
2225
  )
1913
2226
 
1914
- create_update_query = get_create_table_query(
2227
+ create_update_query = get_create_table_queries(
1915
2228
  select_update_query,
1916
2229
  temp_tables['update'],
1917
2230
  self.flavor,
1918
2231
  internal_schema,
1919
- )
2232
+ )[0]
1920
2233
  (create_update_success, create_update_msg), create_update_results = session_execute(
1921
2234
  session,
1922
2235
  create_update_query,
@@ -1956,7 +2269,7 @@ def sync_pipe_inplace(
1956
2269
  apply_unseen_queries,
1957
2270
  with_results=True,
1958
2271
  debug=debug,
1959
- ) if not upsert else (True, "Success"), None
2272
+ ) if not upsert else ((True, "Success"), None)
1960
2273
  if not apply_unseen_success:
1961
2274
  _ = clean_up_temp_tables()
1962
2275
  return apply_unseen_success, apply_unseen_msg
@@ -1986,12 +2299,12 @@ def sync_pipe_inplace(
1986
2299
 
1987
2300
 
1988
2301
  def get_sync_time(
1989
- self,
1990
- pipe: 'mrsm.Pipe',
1991
- params: Optional[Dict[str, Any]] = None,
1992
- newest: bool = True,
1993
- debug: bool = False,
1994
- ) -> Union[datetime, int, None]:
2302
+ self,
2303
+ pipe: 'mrsm.Pipe',
2304
+ params: Optional[Dict[str, Any]] = None,
2305
+ newest: bool = True,
2306
+ debug: bool = False,
2307
+ ) -> Union[datetime, int, None]:
1995
2308
  """Get a Pipe's most recent datetime value.
1996
2309
 
1997
2310
  Parameters
@@ -2086,10 +2399,10 @@ def get_sync_time(
2086
2399
 
2087
2400
 
2088
2401
  def pipe_exists(
2089
- self,
2090
- pipe: mrsm.Pipe,
2091
- debug: bool = False
2092
- ) -> bool:
2402
+ self,
2403
+ pipe: mrsm.Pipe,
2404
+ debug: bool = False
2405
+ ) -> bool:
2093
2406
  """
2094
2407
  Check that a Pipe's table exists.
2095
2408
 
@@ -2097,7 +2410,7 @@ def pipe_exists(
2097
2410
  ----------
2098
2411
  pipe: mrsm.Pipe:
2099
2412
  The pipe to check.
2100
-
2413
+
2101
2414
  debug: bool, default False
2102
2415
  Verbosity toggle.
2103
2416
 
@@ -2110,8 +2423,8 @@ def pipe_exists(
2110
2423
  exists = table_exists(
2111
2424
  pipe.target,
2112
2425
  self,
2113
- schema = self.get_pipe_schema(pipe),
2114
- debug = debug,
2426
+ schema=self.get_pipe_schema(pipe),
2427
+ debug=debug,
2115
2428
  )
2116
2429
  if debug:
2117
2430
  from meerschaum.utils.debug import dprint
@@ -2440,14 +2753,14 @@ def get_pipe_columns_types(
2440
2753
  if not pipe.exists(debug=debug):
2441
2754
  return {}
2442
2755
 
2443
- # if self.flavor not in ('oracle', 'mysql', 'mariadb'):
2444
- return get_table_cols_types(
2445
- pipe.target,
2446
- self,
2447
- flavor=self.flavor,
2448
- schema=self.get_pipe_schema(pipe),
2449
- debug=debug,
2450
- )
2756
+ if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite'):
2757
+ return get_table_cols_types(
2758
+ pipe.target,
2759
+ self,
2760
+ flavor=self.flavor,
2761
+ schema=self.get_pipe_schema(pipe),
2762
+ debug=debug,
2763
+ )
2451
2764
 
2452
2765
  table_columns = {}
2453
2766
  try:
@@ -2465,6 +2778,35 @@ def get_pipe_columns_types(
2465
2778
  return table_columns
2466
2779
 
2467
2780
 
2781
+ def get_pipe_columns_indices(
2782
+ self,
2783
+ pipe: mrsm.Pipe,
2784
+ debug: bool = False,
2785
+ ) -> Dict[str, List[Dict[str, str]]]:
2786
+ """
2787
+ Return a dictionary mapping columns to the indices created on those columns.
2788
+
2789
+ Parameters
2790
+ ----------
2791
+ pipe: mrsm.Pipe
2792
+ The pipe to be queried against.
2793
+
2794
+
2795
+ Returns
2796
+ -------
2797
+ A dictionary mapping columns names to lists of dictionaries.
2798
+ The dictionaries in the lists contain the name and type of the indices.
2799
+ """
2800
+ from meerschaum.utils.sql import get_table_cols_indices
2801
+ return get_table_cols_indices(
2802
+ pipe.target,
2803
+ self,
2804
+ flavor=self.flavor,
2805
+ schema=self.get_pipe_schema(pipe),
2806
+ debug=debug,
2807
+ )
2808
+
2809
+
2468
2810
  def get_add_columns_queries(
2469
2811
  self,
2470
2812
  pipe: mrsm.Pipe,
@@ -2494,6 +2836,9 @@ def get_add_columns_queries(
2494
2836
  if not pipe.exists(debug=debug):
2495
2837
  return []
2496
2838
 
2839
+ if pipe.parameters.get('static', False):
2840
+ return []
2841
+
2497
2842
  from decimal import Decimal
2498
2843
  import copy
2499
2844
  from meerschaum.utils.sql import (
@@ -2612,6 +2957,8 @@ def get_alter_columns_queries(
2612
2957
  """
2613
2958
  if not pipe.exists(debug=debug):
2614
2959
  return []
2960
+ if pipe.static:
2961
+ return
2615
2962
  from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types
2616
2963
  from meerschaum.utils.dataframe import get_numeric_cols
2617
2964
  from meerschaum.utils.dtypes import are_dtypes_equal
@@ -2845,7 +3192,6 @@ def get_alter_columns_queries(
2845
3192
 
2846
3193
  return queries
2847
3194
 
2848
-
2849
3195
  query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
2850
3196
  for col, typ in altered_cols_types.items():
2851
3197
  alter_col_prefix = (
@@ -2913,7 +3259,7 @@ def get_to_sql_dtype(
2913
3259
  >>> get_to_sql_dtype(pipe, df)
2914
3260
  {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
2915
3261
  """
2916
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
3262
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
2917
3263
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2918
3264
  df_dtypes = {
2919
3265
  col: str(typ)
@@ -2921,8 +3267,10 @@ def get_to_sql_dtype(
2921
3267
  }
2922
3268
  json_cols = get_json_cols(df)
2923
3269
  numeric_cols = get_numeric_cols(df)
3270
+ uuid_cols = get_uuid_cols(df)
2924
3271
  df_dtypes.update({col: 'json' for col in json_cols})
2925
3272
  df_dtypes.update({col: 'numeric' for col in numeric_cols})
3273
+ df_dtypes.update({col: 'uuid' for col in uuid_cols})
2926
3274
  if update_dtypes:
2927
3275
  df_dtypes.update(pipe.dtypes)
2928
3276
  return {
@@ -2932,14 +3280,14 @@ def get_to_sql_dtype(
2932
3280
 
2933
3281
 
2934
3282
  def deduplicate_pipe(
2935
- self,
2936
- pipe: mrsm.Pipe,
2937
- begin: Union[datetime, int, None] = None,
2938
- end: Union[datetime, int, None] = None,
2939
- params: Optional[Dict[str, Any]] = None,
2940
- debug: bool = False,
2941
- **kwargs: Any
2942
- ) -> SuccessTuple:
3283
+ self,
3284
+ pipe: mrsm.Pipe,
3285
+ begin: Union[datetime, int, None] = None,
3286
+ end: Union[datetime, int, None] = None,
3287
+ params: Optional[Dict[str, Any]] = None,
3288
+ debug: bool = False,
3289
+ **kwargs: Any
3290
+ ) -> SuccessTuple:
2943
3291
  """
2944
3292
  Delete duplicate values within a pipe's table.
2945
3293
 
@@ -3094,7 +3442,7 @@ def deduplicate_pipe(
3094
3442
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3095
3443
 
3096
3444
  create_temporary_table_query = get_create_table_query(
3097
- duplicates_cte_subquery,
3445
+ duplicates_cte_subquery,
3098
3446
  dedup_table,
3099
3447
  self.flavor,
3100
3448
  ) + f"""