meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. meerschaum/_internal/arguments/_parser.py +6 -1
  2. meerschaum/_internal/entry.py +16 -5
  3. meerschaum/actions/edit.py +6 -6
  4. meerschaum/actions/sql.py +12 -11
  5. meerschaum/api/dash/pages/login.py +17 -17
  6. meerschaum/api/dash/pipes.py +104 -13
  7. meerschaum/api/routes/_pipes.py +58 -40
  8. meerschaum/api/routes/_webterm.py +1 -0
  9. meerschaum/config/_edit.py +46 -19
  10. meerschaum/config/_read_config.py +20 -9
  11. meerschaum/config/_version.py +1 -1
  12. meerschaum/config/stack/__init__.py +1 -1
  13. meerschaum/config/static/__init__.py +1 -0
  14. meerschaum/connectors/api/_APIConnector.py +1 -0
  15. meerschaum/connectors/api/_pipes.py +39 -8
  16. meerschaum/connectors/sql/_SQLConnector.py +4 -3
  17. meerschaum/connectors/sql/_pipes.py +511 -118
  18. meerschaum/connectors/sql/_sql.py +55 -15
  19. meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
  20. meerschaum/connectors/valkey/_pipes.py +11 -5
  21. meerschaum/core/Pipe/__init__.py +27 -9
  22. meerschaum/core/Pipe/_attributes.py +181 -18
  23. meerschaum/core/Pipe/_clear.py +10 -8
  24. meerschaum/core/Pipe/_copy.py +2 -0
  25. meerschaum/core/Pipe/_data.py +65 -17
  26. meerschaum/core/Pipe/_deduplicate.py +30 -28
  27. meerschaum/core/Pipe/_dtypes.py +4 -4
  28. meerschaum/core/Pipe/_fetch.py +12 -10
  29. meerschaum/core/Pipe/_sync.py +28 -11
  30. meerschaum/core/Pipe/_verify.py +52 -49
  31. meerschaum/utils/dataframe.py +64 -34
  32. meerschaum/utils/dtypes/__init__.py +25 -6
  33. meerschaum/utils/dtypes/sql.py +76 -33
  34. meerschaum/utils/misc.py +57 -24
  35. meerschaum/utils/packages/_packages.py +2 -1
  36. meerschaum/utils/schedule.py +7 -5
  37. meerschaum/utils/sql.py +697 -44
  38. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
  39. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
  40. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
  41. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
  42. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
  43. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
  44. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
  45. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
@@ -320,10 +320,11 @@ def create_indices(
320
320
  from meerschaum.utils.debug import dprint
321
321
  if debug:
322
322
  dprint(f"Creating indices for {pipe}...")
323
- if not pipe.columns:
323
+ if not pipe.indices:
324
324
  warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
325
325
  return True
326
326
 
327
+ _ = pipe.__dict__.pop('_columns_indices', None)
327
328
  ix_queries = {
328
329
  ix: queries
329
330
  for ix, queries in self.get_create_index_queries(pipe, debug=debug).items()
@@ -394,23 +395,43 @@ def get_create_index_queries(
394
395
  get_distinct_col_count,
395
396
  update_queries,
396
397
  get_null_replacement,
398
+ get_create_table_queries,
399
+ get_rename_table_queries,
397
400
  COALESCE_UNIQUE_INDEX_FLAVORS,
398
401
  )
402
+ from meerschaum.utils.dtypes.sql import (
403
+ get_db_type_from_pd_type,
404
+ get_pd_type_from_db_type,
405
+ AUTO_INCREMENT_COLUMN_FLAVORS,
406
+ )
399
407
  from meerschaum.config import get_config
400
408
  index_queries = {}
401
409
 
402
410
  upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
411
+ static = pipe.parameters.get('static', False)
403
412
  index_names = pipe.get_indices()
404
413
  indices = pipe.indices
414
+ existing_cols_types = pipe.get_columns_types(debug=debug)
415
+ existing_cols_pd_types = {
416
+ col: get_pd_type_from_db_type(typ)
417
+ for col, typ in existing_cols_types.items()
418
+ }
419
+ existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
+ existing_ix_names = set()
421
+ existing_primary_keys = []
422
+ for col, col_indices in existing_cols_indices.items():
423
+ for col_ix_doc in col_indices:
424
+ existing_ix_names.add(col_ix_doc.get('name', None))
425
+ if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
+ existing_primary_keys.append(col)
405
427
 
406
428
  _datetime = pipe.get_columns('datetime', error=False)
407
- _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]')
408
429
  _datetime_name = (
409
430
  sql_item_name(_datetime, self.flavor, None)
410
431
  if _datetime is not None else None
411
432
  )
412
433
  _datetime_index_name = (
413
- sql_item_name(index_names['datetime'], self.flavor, None)
434
+ sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
414
435
  if index_names.get('datetime', None)
415
436
  else None
416
437
  )
@@ -420,6 +441,29 @@ def get_create_index_queries(
420
441
  if _id is not None
421
442
  else None
422
443
  )
444
+ primary_key = pipe.columns.get('primary', None)
445
+ primary_key_name = (
446
+ sql_item_name(primary_key, flavor=self.flavor, schema=None)
447
+ if primary_key
448
+ else None
449
+ )
450
+ autoincrement = (
451
+ pipe.parameters.get('autoincrement', False)
452
+ or (
453
+ primary_key is not None
454
+ and primary_key not in existing_cols_pd_types
455
+ )
456
+ )
457
+ primary_key_db_type = (
458
+ get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int'), self.flavor)
459
+ if primary_key
460
+ else None
461
+ )
462
+ primary_key_constraint_name = (
463
+ sql_item_name(f'pk_{pipe.target}', self.flavor, None)
464
+ if primary_key is not None
465
+ else None
466
+ )
423
467
 
424
468
  _id_index_name = (
425
469
  sql_item_name(index_names['id'], self.flavor, None)
@@ -462,8 +506,10 @@ def get_create_index_queries(
462
506
  )
463
507
  elif self.flavor == 'mssql':
464
508
  dt_query = (
465
- f"CREATE CLUSTERED INDEX {_datetime_index_name} "
466
- f"ON {_pipe_name} ({_datetime_name})"
509
+ "CREATE "
510
+ + ("CLUSTERED " if not primary_key else '')
511
+ + f"INDEX {_datetime_index_name} "
512
+ + f"ON {_pipe_name} ({_datetime_name})"
467
513
  )
468
514
  else: ### mssql, sqlite, etc.
469
515
  dt_query = (
@@ -473,6 +519,115 @@ def get_create_index_queries(
473
519
 
474
520
  index_queries[_datetime] = [dt_query]
475
521
 
522
+ primary_queries = []
523
+ if (
524
+ primary_key is not None
525
+ and primary_key not in existing_primary_keys
526
+ and not static
527
+ ):
528
+ if autoincrement and primary_key not in existing_cols_pd_types:
529
+ autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
530
+ self.flavor,
531
+ AUTO_INCREMENT_COLUMN_FLAVORS['default']
532
+ )
533
+ primary_queries.extend([
534
+ (
535
+ f"ALTER TABLE {_pipe_name}\n"
536
+ f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
537
+ ),
538
+ ])
539
+ elif not autoincrement and primary_key in existing_cols_pd_types:
540
+ if self.flavor == 'sqlite':
541
+ new_table_name = sql_item_name(
542
+ f'_new_{pipe.target}',
543
+ self.flavor,
544
+ self.get_pipe_schema(pipe)
545
+ )
546
+ select_cols_str = ', '.join(
547
+ [
548
+ sql_item_name(col, self.flavor, None)
549
+ for col in existing_cols_types
550
+ ]
551
+ )
552
+ primary_queries.extend(
553
+ get_create_table_queries(
554
+ existing_cols_pd_types,
555
+ f'_new_{pipe.target}',
556
+ self.flavor,
557
+ schema=self.get_pipe_schema(pipe),
558
+ primary_key=primary_key,
559
+ ) + [
560
+ (
561
+ f"INSERT INTO {new_table_name} ({select_cols_str})\n"
562
+ f"SELECT {select_cols_str}\nFROM {_pipe_name}"
563
+ ),
564
+ f"DROP TABLE {_pipe_name}",
565
+ ] + get_rename_table_queries(
566
+ f'_new_{pipe.target}',
567
+ pipe.target,
568
+ self.flavor,
569
+ schema=self.get_pipe_schema(pipe),
570
+ )
571
+ )
572
+ elif self.flavor == 'oracle':
573
+ primary_queries.extend([
574
+ (
575
+ f"ALTER TABLE {_pipe_name}\n"
576
+ f"MODIFY {primary_key_name} NOT NULL"
577
+ ),
578
+ (
579
+ f"ALTER TABLE {_pipe_name}\n"
580
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
581
+ )
582
+ ])
583
+ elif self.flavor in ('mysql', 'mariadb'):
584
+ primary_queries.extend([
585
+ (
586
+ f"ALTER TABLE {_pipe_name}\n"
587
+ f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
588
+ ),
589
+ (
590
+ f"ALTER TABLE {_pipe_name}\n"
591
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
592
+ )
593
+ ])
594
+ elif self.flavor == 'timescaledb':
595
+ primary_queries.extend([
596
+ (
597
+ f"ALTER TABLE {_pipe_name}\n"
598
+ f"ALTER COLUMN {primary_key_name} SET NOT NULL"
599
+ ),
600
+ (
601
+ f"ALTER TABLE {_pipe_name}\n"
602
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
603
+ f"{_datetime_name}, " if _datetime_name else ""
604
+ ) + f"{primary_key_name})"
605
+ ),
606
+ ])
607
+ elif self.flavor in ('citus', 'postgresql', 'duckdb'):
608
+ primary_queries.extend([
609
+ (
610
+ f"ALTER TABLE {_pipe_name}\n"
611
+ f"ALTER COLUMN {primary_key_name} SET NOT NULL"
612
+ ),
613
+ (
614
+ f"ALTER TABLE {_pipe_name}\n"
615
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
616
+ ),
617
+ ])
618
+ else:
619
+ primary_queries.extend([
620
+ (
621
+ f"ALTER TABLE {_pipe_name}\n"
622
+ f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
623
+ ),
624
+ (
625
+ f"ALTER TABLE {_pipe_name}\n"
626
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
627
+ ),
628
+ ])
629
+ index_queries[primary_key] = primary_queries
630
+
476
631
  ### create id index
477
632
  if _id_name is not None:
478
633
  if self.flavor == 'timescaledb':
@@ -496,7 +651,7 @@ def get_create_index_queries(
496
651
  other_index_names = {
497
652
  ix_key: ix_unquoted
498
653
  for ix_key, ix_unquoted in index_names.items()
499
- if ix_key not in ('datetime', 'id')
654
+ if ix_key not in ('datetime', 'id', 'primary') and ix_unquoted not in existing_ix_names
500
655
  }
501
656
  for ix_key, ix_unquoted in other_index_names.items():
502
657
  ix_name = sql_item_name(ix_unquoted, self.flavor, None)
@@ -509,13 +664,12 @@ def get_create_index_queries(
509
664
  cols_names_str = ", ".join(cols_names)
510
665
  index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"]
511
666
 
512
- existing_cols_types = pipe.get_columns_types(debug=debug)
513
667
  indices_cols_str = ', '.join(
514
- [
668
+ list({
515
669
  sql_item_name(ix, self.flavor)
516
670
  for ix_key, ix in pipe.columns.items()
517
671
  if ix and ix in existing_cols_types
518
- ]
672
+ })
519
673
  )
520
674
  coalesce_indices_cols_str = ', '.join(
521
675
  [
@@ -738,7 +892,7 @@ def get_pipe_data(
738
892
  dt_type = dtypes.get(_dt, 'object').lower()
739
893
  if 'datetime' not in dt_type:
740
894
  if 'int' not in dt_type:
741
- dtypes[_dt] = 'datetime64[ns]'
895
+ dtypes[_dt] = 'datetime64[ns, UTC]'
742
896
  existing_cols = pipe.get_columns_types(debug=debug)
743
897
  select_columns = (
744
898
  [
@@ -855,6 +1009,7 @@ def get_pipe_data_query(
855
1009
  begin_add_minutes: int = 0,
856
1010
  end_add_minutes: int = 0,
857
1011
  replace_nulls: Optional[str] = None,
1012
+ skip_existing_cols_check: bool = False,
858
1013
  debug: bool = False,
859
1014
  **kw: Any
860
1015
  ) -> Union[str, None]:
@@ -905,6 +1060,9 @@ def get_pipe_data_query(
905
1060
  replace_nulls: Optional[str], default None
906
1061
  If provided, replace null values with this value.
907
1062
 
1063
+ skip_existing_cols_check: bool, default False
1064
+ If `True`, do not verify that querying columns are actually on the table.
1065
+
908
1066
  debug: bool, default False
909
1067
  Verbosity toggle.
910
1068
 
@@ -912,16 +1070,13 @@ def get_pipe_data_query(
912
1070
  -------
913
1071
  A `SELECT` query to retrieve a pipe's data.
914
1072
  """
915
- from meerschaum.utils.debug import dprint
916
1073
  from meerschaum.utils.misc import items_str
917
1074
  from meerschaum.utils.sql import sql_item_name, dateadd_str
918
- from meerschaum.utils.packages import import_pandas
919
- pd = import_pandas()
920
1075
  existing_cols = pipe.get_columns_types(debug=debug)
921
1076
  select_columns = (
922
1077
  [col for col in existing_cols]
923
1078
  if not select_columns
924
- else [col for col in select_columns if col in existing_cols]
1079
+ else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
925
1080
  )
926
1081
  if omit_columns:
927
1082
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -935,7 +1090,10 @@ def get_pipe_data_query(
935
1090
  if begin is not None:
936
1091
  begin -= backtrack_interval
937
1092
 
938
- cols_names = [sql_item_name(col, self.flavor, None) for col in select_columns]
1093
+ cols_names = [
1094
+ sql_item_name(col, self.flavor, None)
1095
+ for col in select_columns
1096
+ ]
939
1097
  select_cols_str = (
940
1098
  'SELECT\n '
941
1099
  + ',\n '.join(
@@ -948,7 +1106,7 @@ def get_pipe_data_query(
948
1106
  for col_name in cols_names
949
1107
  ]
950
1108
  )
951
- )
1109
+ ) if cols_names else 'SELECT *'
952
1110
  pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
953
1111
  query = f"{select_cols_str}\nFROM {pipe_table_name}"
954
1112
  where = ""
@@ -972,7 +1130,7 @@ def get_pipe_data_query(
972
1130
  quoted_indices = {
973
1131
  key: sql_item_name(val, self.flavor, None)
974
1132
  for key, val in pipe.columns.items()
975
- if val in existing_cols
1133
+ if val in existing_cols or skip_existing_cols_check
976
1134
  }
977
1135
 
978
1136
  if begin is not None or end is not None:
@@ -992,7 +1150,7 @@ def get_pipe_data_query(
992
1150
  )
993
1151
 
994
1152
  is_dt_bound = False
995
- if begin is not None and _dt in existing_cols:
1153
+ if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
996
1154
  begin_da = dateadd_str(
997
1155
  flavor=self.flavor,
998
1156
  datepart='minute',
@@ -1002,7 +1160,7 @@ def get_pipe_data_query(
1002
1160
  where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1003
1161
  is_dt_bound = True
1004
1162
 
1005
- if end is not None and _dt in existing_cols:
1163
+ if end is not None and (_dt in existing_cols or skip_existing_cols_check):
1006
1164
  if 'int' in str(type(end)).lower() and end == begin:
1007
1165
  end += 1
1008
1166
  end_da = dateadd_str(
@@ -1016,7 +1174,11 @@ def get_pipe_data_query(
1016
1174
 
1017
1175
  if params is not None:
1018
1176
  from meerschaum.utils.sql import build_where
1019
- valid_params = {k: v for k, v in params.items() if k in existing_cols}
1177
+ valid_params = {
1178
+ k: v
1179
+ for k, v in params.items()
1180
+ if k in existing_cols or skip_existing_cols_check
1181
+ }
1020
1182
  if valid_params:
1021
1183
  where += build_where(valid_params, self).replace(
1022
1184
  'WHERE', ('AND' if is_dt_bound else "")
@@ -1030,7 +1192,7 @@ def get_pipe_data_query(
1030
1192
  order_by = ""
1031
1193
  if quoted_indices:
1032
1194
  order_by += "\nORDER BY "
1033
- if _dt and _dt in existing_cols:
1195
+ if _dt and (_dt in existing_cols or skip_existing_cols_check):
1034
1196
  order_by += dt + ' ' + order + ','
1035
1197
  for key, quoted_col_name in quoted_indices.items():
1036
1198
  if dt == quoted_col_name:
@@ -1140,6 +1302,70 @@ def get_pipe_attributes(
1140
1302
  return attributes
1141
1303
 
1142
1304
 
1305
+ def create_pipe_table_from_df(
1306
+ self,
1307
+ pipe: mrsm.Pipe,
1308
+ df: 'pd.DataFrame',
1309
+ debug: bool = False,
1310
+ ) -> mrsm.SuccessTuple:
1311
+ """
1312
+ Create a pipe's table from its configured dtypes and an incoming dataframe.
1313
+ """
1314
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1315
+ from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1316
+ primary_key = pipe.columns.get('primary', None)
1317
+ dt_col = pipe.columns.get('datetime', None)
1318
+ new_dtypes = {
1319
+ **{
1320
+ col: str(typ)
1321
+ for col, typ in df.dtypes.items()
1322
+ },
1323
+ **{
1324
+ col: 'int'
1325
+ for col_ix, col in pipe.columns.items()
1326
+ if col_ix != 'primary'
1327
+ },
1328
+ **{
1329
+ col: 'uuid'
1330
+ for col in get_uuid_cols(df)
1331
+ },
1332
+ **{
1333
+ col: 'json'
1334
+ for col in get_json_cols(df)
1335
+ },
1336
+ **{
1337
+ col: 'numeric'
1338
+ for col in get_numeric_cols(df)
1339
+ },
1340
+ **pipe.dtypes
1341
+ }
1342
+ autoincrement = (
1343
+ pipe.parameters.get('autoincrement', False)
1344
+ or (primary_key and primary_key not in new_dtypes)
1345
+ )
1346
+ if autoincrement:
1347
+ _ = new_dtypes.pop(primary_key, None)
1348
+
1349
+ create_table_queries = get_create_table_queries(
1350
+ new_dtypes,
1351
+ pipe.target,
1352
+ self.flavor,
1353
+ schema=self.get_pipe_schema(pipe),
1354
+ primary_key=primary_key,
1355
+ datetime_column=dt_col,
1356
+ )
1357
+ success = all(
1358
+ self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1359
+ )
1360
+ target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
1361
+ msg = (
1362
+ "Success"
1363
+ if success
1364
+ else f"Failed to create {target_name}."
1365
+ )
1366
+ return success, msg
1367
+
1368
+
1143
1369
  def sync_pipe(
1144
1370
  self,
1145
1371
  pipe: mrsm.Pipe,
@@ -1197,10 +1423,17 @@ def sync_pipe(
1197
1423
  A `SuccessTuple` of success (`bool`) and message (`str`).
1198
1424
  """
1199
1425
  from meerschaum.utils.packages import import_pandas
1200
- from meerschaum.utils.sql import get_update_queries, sql_item_name, json_flavors, update_queries
1426
+ from meerschaum.utils.sql import (
1427
+ get_update_queries,
1428
+ sql_item_name,
1429
+ update_queries,
1430
+ get_create_table_queries,
1431
+ get_reset_autoincrement_queries,
1432
+ )
1201
1433
  from meerschaum.utils.misc import generate_password
1202
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
1434
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1203
1435
  from meerschaum.utils.dtypes import are_dtypes_equal
1436
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1204
1437
  from meerschaum import Pipe
1205
1438
  import time
1206
1439
  import copy
@@ -1211,6 +1444,7 @@ def sync_pipe(
1211
1444
  return False, msg
1212
1445
 
1213
1446
  start = time.perf_counter()
1447
+ pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
1214
1448
 
1215
1449
  if not pipe.temporary and not pipe.get_id(debug=debug):
1216
1450
  register_tuple = pipe.register(debug=debug)
@@ -1232,7 +1466,6 @@ def sync_pipe(
1232
1466
 
1233
1467
  ### if table does not exist, create it with indices
1234
1468
  is_new = False
1235
- add_cols_query = None
1236
1469
  if not pipe.exists(debug=debug):
1237
1470
  check_existing = False
1238
1471
  is_new = True
@@ -1240,11 +1473,15 @@ def sync_pipe(
1240
1473
  ### Check for new columns.
1241
1474
  add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
1242
1475
  if add_cols_queries:
1476
+ _ = pipe.__dict__.pop('_columns_indices', None)
1477
+ _ = pipe.__dict__.pop('_columns_types', None)
1243
1478
  if not self.exec_queries(add_cols_queries, debug=debug):
1244
1479
  warn(f"Failed to add new columns to {pipe}.")
1245
1480
 
1246
1481
  alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
1247
1482
  if alter_cols_queries:
1483
+ _ = pipe.__dict__.pop('_columns_indices', None)
1484
+ _ = pipe.__dict__.pop('_columns_types', None)
1248
1485
  if not self.exec_queries(alter_cols_queries, debug=debug):
1249
1486
  warn(f"Failed to alter columns for {pipe}.")
1250
1487
  else:
@@ -1252,9 +1489,7 @@ def sync_pipe(
1252
1489
 
1253
1490
  ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
1254
1491
  ### so infer bools and persist them to `dtypes`.
1255
- ### MSSQL supports `BIT` for booleans, but we coerce bools to int for MSSQL
1256
- ### to avoid merge issues.
1257
- if self.flavor in ('oracle', 'sqlite', 'mssql', 'mysql', 'mariadb'):
1492
+ if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
1258
1493
  pipe_dtypes = pipe.dtypes
1259
1494
  new_bool_cols = {
1260
1495
  col: 'bool[pyarrow]'
@@ -1309,47 +1544,131 @@ def sync_pipe(
1309
1544
  'schema': self.get_pipe_schema(pipe),
1310
1545
  })
1311
1546
 
1312
- stats = self.to_sql(unseen_df, **unseen_kw)
1547
+ primary_key = pipe.columns.get('primary', None)
1548
+ autoincrement = (
1549
+ pipe.parameters.get('autoincrement', False)
1550
+ or (
1551
+ is_new
1552
+ and primary_key
1553
+ and primary_key
1554
+ not in pipe.dtypes
1555
+ and primary_key not in unseen_df.columns
1556
+ )
1557
+ )
1558
+ if autoincrement and autoincrement not in pipe.parameters:
1559
+ pipe.parameters['autoincrement'] = autoincrement
1560
+ edit_success, edit_msg = pipe.edit(debug=debug)
1561
+ if not edit_success:
1562
+ return edit_success, edit_msg
1563
+
1564
+ autoincrement_needs_reset = False
1565
+ if autoincrement and primary_key:
1566
+ if primary_key not in df.columns:
1567
+ if unseen_df is not None and primary_key in unseen_df.columns:
1568
+ del unseen_df[primary_key]
1569
+ if update_df is not None and primary_key in update_df.columns:
1570
+ del update_df[primary_key]
1571
+ if delta_df is not None and primary_key in delta_df.columns:
1572
+ del delta_df[primary_key]
1573
+ elif unseen_df[primary_key].notnull().any():
1574
+ autoincrement_needs_reset = True
1575
+
1576
+ if is_new:
1577
+ create_success, create_msg = self.create_pipe_table_from_df(
1578
+ pipe,
1579
+ unseen_df,
1580
+ debug=debug,
1581
+ )
1582
+ if not create_success:
1583
+ return create_success, create_msg
1584
+
1585
+ do_identity_insert = bool(
1586
+ self.flavor in ('mssql',)
1587
+ and primary_key in unseen_df.columns
1588
+ and autoincrement
1589
+ )
1590
+ with self.engine.connect() as connection:
1591
+ with connection.begin():
1592
+ if do_identity_insert:
1593
+ identity_on_result = self.exec(
1594
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1595
+ commit=False,
1596
+ _connection=connection,
1597
+ close=False,
1598
+ debug=debug,
1599
+ )
1600
+ if identity_on_result is None:
1601
+ return False, f"Could not enable identity inserts on {pipe}."
1602
+
1603
+ stats = self.to_sql(
1604
+ unseen_df,
1605
+ _connection=connection,
1606
+ **unseen_kw
1607
+ )
1608
+
1609
+ if do_identity_insert:
1610
+ identity_off_result = self.exec(
1611
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1612
+ commit=False,
1613
+ _connection=connection,
1614
+ close=False,
1615
+ debug=debug,
1616
+ )
1617
+ if identity_off_result is None:
1618
+ return False, f"Could not disable identity inserts on {pipe}."
1619
+
1313
1620
  if is_new:
1314
1621
  if not self.create_indices(pipe, debug=debug):
1315
1622
  warn(f"Failed to create indices for {pipe}. Continuing...")
1316
1623
 
1317
- if update_df is not None and len(update_df) > 0:
1318
- dt_col = pipe.columns.get('datetime', None)
1319
- dt_typ = pipe.dtypes.get(dt_col, None)
1320
- dt_name = sql_item_name(dt_col, self.flavor) if dt_col else None
1321
- update_min = update_df[dt_col].min() if dt_col and dt_col in update_df.columns else None
1322
- update_max = update_df[dt_col].max() if dt_col and dt_col in update_df.columns else None
1323
- update_begin = update_min
1324
- update_end = (
1325
- update_max
1326
- + (
1327
- timedelta(minutes=1)
1328
- if are_dtypes_equal(str(dt_typ), 'datetime')
1329
- else 1
1330
- )
1331
- ) if dt_col else None
1624
+ if autoincrement_needs_reset:
1625
+ reset_autoincrement_queries = get_reset_autoincrement_queries(
1626
+ pipe.target,
1627
+ primary_key,
1628
+ self,
1629
+ schema=self.get_pipe_schema(pipe),
1630
+ debug=debug,
1631
+ )
1632
+ results = self.exec_queries(reset_autoincrement_queries, debug=debug)
1633
+ for result in results:
1634
+ if result is None:
1635
+ warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
1332
1636
 
1637
+ if update_df is not None and len(update_df) > 0:
1333
1638
  transact_id = generate_password(3)
1334
- temp_target = '##' + transact_id + '_' + pipe.target
1639
+ temp_prefix = '##' if self.flavor != 'oracle' else ''
1640
+ temp_target = temp_prefix + transact_id + '_' + pipe.target
1335
1641
  self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
1336
1642
  temp_pipe = Pipe(
1337
1643
  pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
1338
1644
  instance=pipe.instance_keys,
1339
1645
  columns={
1340
- ix_key: ix
1646
+ (ix_key if ix_key != 'primary' else 'primary_'): ix
1341
1647
  for ix_key, ix in pipe.columns.items()
1342
1648
  if ix and ix in update_df.columns
1343
1649
  },
1344
- dtypes=pipe.dtypes,
1650
+ dtypes={
1651
+ col: typ
1652
+ for col, typ in pipe.dtypes.items()
1653
+ if col in update_df.columns
1654
+ },
1345
1655
  target=temp_target,
1346
1656
  temporary=True,
1347
1657
  parameters={
1658
+ 'static': True,
1348
1659
  'schema': self.internal_schema,
1349
1660
  'hypertable': False,
1661
+ 'autoincrement': False,
1350
1662
  },
1351
1663
  )
1352
- temp_pipe.sync(update_df, check_existing=False, debug=debug)
1664
+ temp_pipe._columns_types = {
1665
+ col: get_db_type_from_pd_type(str(typ), self.flavor)
1666
+ for col, typ in update_df.dtypes.items()
1667
+ }
1668
+ temp_pipe._columns_types_timestamp = time.perf_counter()
1669
+ temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
1670
+ if not temp_success:
1671
+ return temp_success, temp_msg
1353
1672
  existing_cols = pipe.get_columns_types(debug=debug)
1354
1673
  join_cols = [
1355
1674
  col
@@ -1358,7 +1677,7 @@ def sync_pipe(
1358
1677
  ]
1359
1678
  update_queries = get_update_queries(
1360
1679
  pipe.target,
1361
- temp_target,
1680
+ temp_target,
1362
1681
  self,
1363
1682
  join_cols,
1364
1683
  upsert=upsert,
@@ -1475,28 +1794,23 @@ def sync_pipe_inplace(
1475
1794
  )
1476
1795
  from meerschaum.utils.sql import (
1477
1796
  sql_item_name,
1478
- get_sqlalchemy_table,
1479
1797
  get_update_queries,
1480
1798
  get_null_replacement,
1481
- NO_CTE_FLAVORS,
1482
- NO_SELECT_INTO_FLAVORS,
1483
- format_cte_subquery,
1484
- get_create_table_query,
1799
+ get_create_table_queries,
1485
1800
  get_table_cols_types,
1486
- truncate_item_name,
1487
1801
  session_execute,
1488
- table_exists,
1489
1802
  update_queries,
1490
1803
  )
1804
+ from meerschaum.utils.dtypes import coerce_timezone, are_dtypes_equal
1491
1805
  from meerschaum.utils.dtypes.sql import (
1492
1806
  get_pd_type_from_db_type,
1493
1807
  )
1494
1808
  from meerschaum.utils.misc import generate_password
1495
- from meerschaum.utils.debug import dprint
1496
1809
 
1497
1810
  transact_id = generate_password(3)
1498
1811
  def get_temp_table_name(label: str) -> str:
1499
- return '##' + transact_id + '_' + label + '_' + pipe.target
1812
+ temp_prefix = '##' if self.flavor != 'oracle' else ''
1813
+ return temp_prefix + transact_id + '_' + label + '_' + pipe.target
1500
1814
 
1501
1815
  internal_schema = self.internal_schema
1502
1816
  temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
@@ -1523,6 +1837,11 @@ def sync_pipe_inplace(
1523
1837
  pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1524
1838
  upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries
1525
1839
  database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
1840
+ primary_key = pipe.columns.get('primary', None)
1841
+ autoincrement = pipe.parameters.get('autoincrement', False)
1842
+ dt_col = pipe.columns.get('datetime', None)
1843
+ dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
1844
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
1526
1845
 
1527
1846
  def clean_up_temp_tables(ready_to_drop: bool = False):
1528
1847
  log_success, log_msg = self._log_temporary_tables_creation(
@@ -1546,13 +1865,16 @@ def sync_pipe_inplace(
1546
1865
 
1547
1866
  sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm')
1548
1867
  if not pipe.exists(debug=debug):
1549
- create_pipe_query = get_create_table_query(
1868
+ create_pipe_queries = get_create_table_queries(
1550
1869
  metadef,
1551
1870
  pipe.target,
1552
1871
  self.flavor,
1553
1872
  schema=self.get_pipe_schema(pipe),
1873
+ primary_key=primary_key,
1874
+ autoincrement=autoincrement,
1875
+ datetime_column=dt_col,
1554
1876
  )
1555
- result = self.exec(create_pipe_query, debug=debug)
1877
+ result = self.exec_queries(create_pipe_queries, debug=debug)
1556
1878
  if result is None:
1557
1879
  _ = clean_up_temp_tables()
1558
1880
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
@@ -1567,12 +1889,12 @@ def sync_pipe_inplace(
1567
1889
  session = sqlalchemy_orm.Session(self.engine)
1568
1890
  connectable = session if self.flavor != 'duckdb' else self
1569
1891
 
1570
- create_new_query = get_create_table_query(
1892
+ create_new_query = get_create_table_queries(
1571
1893
  metadef,
1572
1894
  temp_tables[('new') if not upsert else 'update'],
1573
1895
  self.flavor,
1574
1896
  schema=internal_schema,
1575
- )
1897
+ )[0]
1576
1898
  (create_new_success, create_new_msg), create_new_results = session_execute(
1577
1899
  session,
1578
1900
  create_new_query,
@@ -1603,13 +1925,20 @@ def sync_pipe_inplace(
1603
1925
  sql_item_name(col, self.flavor)
1604
1926
  for col in new_cols
1605
1927
  ])
1928
+ def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
1929
+ if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
1930
+ return new_cols_types[col]
1931
+ return cols_types[col]
1606
1932
 
1607
1933
  add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
1608
1934
  if add_cols_queries:
1935
+ _ = pipe.__dict__.pop('_columns_types', None)
1936
+ _ = pipe.__dict__.pop('_columns_indices', None)
1609
1937
  self.exec_queries(add_cols_queries, debug=debug)
1610
1938
 
1611
1939
  alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
1612
1940
  if alter_cols_queries:
1941
+ _ = pipe.__dict__.pop('_columns_types', None)
1613
1942
  self.exec_queries(alter_cols_queries, debug=debug)
1614
1943
 
1615
1944
  insert_queries = [
@@ -1634,6 +1963,26 @@ def sync_pipe_inplace(
1634
1963
  _ = clean_up_temp_tables()
1635
1964
  return True, f"Inserted {new_count}, updated 0 rows."
1636
1965
 
1966
+ (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
1967
+ session,
1968
+ [
1969
+ "SELECT\n"
1970
+ f" MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n"
1971
+ f" MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n"
1972
+ f"FROM {temp_table_names['new']}\n"
1973
+ f"WHERE {dt_col_name} IS NOT NULL"
1974
+ ],
1975
+ with_results=True,
1976
+ debug=debug,
1977
+ )
1978
+ if not new_dt_bounds_success:
1979
+ return (
1980
+ new_dt_bounds_success,
1981
+ f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
1982
+ )
1983
+
1984
+ begin, end = new_dt_bounds_results[0].fetchone()
1985
+
1637
1986
  backtrack_def = self.get_pipe_data_query(
1638
1987
  pipe,
1639
1988
  begin=begin,
@@ -1644,19 +1993,18 @@ def sync_pipe_inplace(
1644
1993
  debug=debug,
1645
1994
  order=None,
1646
1995
  )
1647
-
1648
- create_backtrack_query = get_create_table_query(
1996
+ create_backtrack_query = get_create_table_queries(
1649
1997
  backtrack_def,
1650
1998
  temp_tables['backtrack'],
1651
1999
  self.flavor,
1652
2000
  schema=internal_schema,
1653
- )
1654
- (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute(
2001
+ )[0]
2002
+ (create_backtrack_success, create_backtrack_msg), create_new_results = session_execute(
1655
2003
  session,
1656
2004
  create_backtrack_query,
1657
2005
  with_results=True,
1658
2006
  debug=debug,
1659
- ) if not upsert else (True, "Success"), None
2007
+ ) if not upsert else ((True, "Success"), None)
1660
2008
 
1661
2009
  if not create_backtrack_success:
1662
2010
  _ = clean_up_temp_tables()
@@ -1673,7 +2021,7 @@ def sync_pipe_inplace(
1673
2021
 
1674
2022
  common_cols = [col for col in new_cols if col in backtrack_cols_types]
1675
2023
  on_cols = {
1676
- col: new_cols.get(col, 'object')
2024
+ col: new_cols.get(col)
1677
2025
  for col_key, col in pipe.columns.items()
1678
2026
  if (
1679
2027
  col
@@ -1687,7 +2035,8 @@ def sync_pipe_inplace(
1687
2035
  null_replace_new_cols_str = (
1688
2036
  ', '.join([
1689
2037
  f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
1690
- + f"{get_null_replacement(typ, self.flavor)}) AS "
2038
+ + get_null_replacement(get_col_typ(col, new_cols), self.flavor)
2039
+ + ") AS "
1691
2040
  + sql_item_name(col, self.flavor, None)
1692
2041
  for col, typ in new_cols.items()
1693
2042
  ])
@@ -1703,7 +2052,7 @@ def sync_pipe_inplace(
1703
2052
  f"COALESCE({temp_table_names['new']}."
1704
2053
  + sql_item_name(c, self.flavor, None)
1705
2054
  + ", "
1706
- + get_null_replacement(new_cols[c], self.flavor)
2055
+ + get_null_replacement(get_col_typ(c, new_cols), self.flavor)
1707
2056
  + ") "
1708
2057
  + ' = '
1709
2058
  + f"COALESCE({temp_table_names['backtrack']}."
@@ -1720,12 +2069,12 @@ def sync_pipe_inplace(
1720
2069
  ) for c in common_cols
1721
2070
  ])
1722
2071
  )
1723
- create_delta_query = get_create_table_query(
2072
+ create_delta_query = get_create_table_queries(
1724
2073
  select_delta_query,
1725
2074
  temp_tables['delta'],
1726
2075
  self.flavor,
1727
2076
  schema=internal_schema,
1728
- )
2077
+ )[0]
1729
2078
  create_delta_success, create_delta_msg = session_execute(
1730
2079
  session,
1731
2080
  create_delta_query,
@@ -1778,20 +2127,28 @@ def sync_pipe_inplace(
1778
2127
  + '\nAND\n'.join([
1779
2128
  (
1780
2129
  f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
1781
- + ", " + get_null_replacement(typ, self.flavor) + ")"
2130
+ + ", "
2131
+ + get_null_replacement(
2132
+ get_col_typ(c, on_cols),
2133
+ self.flavor
2134
+ ) + ")"
1782
2135
  + ' = '
1783
2136
  + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
1784
- + ", " + get_null_replacement(typ, self.flavor) + ")"
2137
+ + ", "
2138
+ + get_null_replacement(
2139
+ get_col_typ(c, on_cols),
2140
+ self.flavor
2141
+ ) + ")"
1785
2142
  ) for c, typ in on_cols.items()
1786
2143
  ])
1787
2144
  )
1788
2145
 
1789
- create_joined_query = get_create_table_query(
2146
+ create_joined_query = get_create_table_queries(
1790
2147
  select_joined_query,
1791
2148
  temp_tables['joined'],
1792
2149
  self.flavor,
1793
- schema = internal_schema,
1794
- )
2150
+ schema=internal_schema,
2151
+ )[0]
1795
2152
  create_joined_success, create_joined_msg = session_execute(
1796
2153
  session,
1797
2154
  create_joined_query,
@@ -1806,7 +2163,7 @@ def sync_pipe_inplace(
1806
2163
  + (', '.join([
1807
2164
  (
1808
2165
  "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
1809
- + " != " + get_null_replacement(typ, self.flavor)
2166
+ + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
1810
2167
  + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
1811
2168
  + "\n ELSE NULL\nEND "
1812
2169
  + " AS " + sql_item_name(c, self.flavor, None)
@@ -1820,18 +2177,18 @@ def sync_pipe_inplace(
1820
2177
  ) for c in delta_cols
1821
2178
  ])
1822
2179
  )
1823
- create_unseen_query = get_create_table_query(
2180
+ create_unseen_query = get_create_table_queries(
1824
2181
  select_unseen_query,
1825
2182
  temp_tables['unseen'],
1826
2183
  self.flavor,
1827
2184
  internal_schema,
1828
- )
2185
+ )[0]
1829
2186
  (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
1830
2187
  session,
1831
2188
  create_unseen_query,
1832
2189
  with_results=True,
1833
2190
  debug=debug
1834
- ) if not upsert else (True, "Success"), None
2191
+ ) if not upsert else ((True, "Success"), None)
1835
2192
  if not create_unseen_success:
1836
2193
  _ = clean_up_temp_tables()
1837
2194
  return create_unseen_success, create_unseen_msg
@@ -1841,7 +2198,7 @@ def sync_pipe_inplace(
1841
2198
  + (', '.join([
1842
2199
  (
1843
2200
  "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
1844
- + " != " + get_null_replacement(typ, self.flavor)
2201
+ + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
1845
2202
  + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
1846
2203
  + "\n ELSE NULL\nEND "
1847
2204
  + " AS " + sql_item_name(c, self.flavor, None)
@@ -1856,12 +2213,12 @@ def sync_pipe_inplace(
1856
2213
  ])
1857
2214
  )
1858
2215
 
1859
- create_update_query = get_create_table_query(
2216
+ create_update_query = get_create_table_queries(
1860
2217
  select_update_query,
1861
2218
  temp_tables['update'],
1862
2219
  self.flavor,
1863
2220
  internal_schema,
1864
- )
2221
+ )[0]
1865
2222
  (create_update_success, create_update_msg), create_update_results = session_execute(
1866
2223
  session,
1867
2224
  create_update_query,
@@ -1901,7 +2258,7 @@ def sync_pipe_inplace(
1901
2258
  apply_unseen_queries,
1902
2259
  with_results=True,
1903
2260
  debug=debug,
1904
- ) if not upsert else (True, "Success"), None
2261
+ ) if not upsert else ((True, "Success"), None)
1905
2262
  if not apply_unseen_success:
1906
2263
  _ = clean_up_temp_tables()
1907
2264
  return apply_unseen_success, apply_unseen_msg
@@ -1931,12 +2288,12 @@ def sync_pipe_inplace(
1931
2288
 
1932
2289
 
1933
2290
  def get_sync_time(
1934
- self,
1935
- pipe: 'mrsm.Pipe',
1936
- params: Optional[Dict[str, Any]] = None,
1937
- newest: bool = True,
1938
- debug: bool = False,
1939
- ) -> Union[datetime, int, None]:
2291
+ self,
2292
+ pipe: 'mrsm.Pipe',
2293
+ params: Optional[Dict[str, Any]] = None,
2294
+ newest: bool = True,
2295
+ debug: bool = False,
2296
+ ) -> Union[datetime, int, None]:
1940
2297
  """Get a Pipe's most recent datetime value.
1941
2298
 
1942
2299
  Parameters
@@ -1960,7 +2317,7 @@ def get_sync_time(
1960
2317
  table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1961
2318
 
1962
2319
  dt_col = pipe.columns.get('datetime', None)
1963
- dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns]')
2320
+ dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
1964
2321
  if not dt_col:
1965
2322
  _dt = pipe.guess_datetime()
1966
2323
  dt = sql_item_name(_dt, self.flavor, None) if _dt else None
@@ -2031,10 +2388,10 @@ def get_sync_time(
2031
2388
 
2032
2389
 
2033
2390
  def pipe_exists(
2034
- self,
2035
- pipe: mrsm.Pipe,
2036
- debug: bool = False
2037
- ) -> bool:
2391
+ self,
2392
+ pipe: mrsm.Pipe,
2393
+ debug: bool = False
2394
+ ) -> bool:
2038
2395
  """
2039
2396
  Check that a Pipe's table exists.
2040
2397
 
@@ -2042,7 +2399,7 @@ def pipe_exists(
2042
2399
  ----------
2043
2400
  pipe: mrsm.Pipe:
2044
2401
  The pipe to check.
2045
-
2402
+
2046
2403
  debug: bool, default False
2047
2404
  Verbosity toggle.
2048
2405
 
@@ -2055,8 +2412,8 @@ def pipe_exists(
2055
2412
  exists = table_exists(
2056
2413
  pipe.target,
2057
2414
  self,
2058
- schema = self.get_pipe_schema(pipe),
2059
- debug = debug,
2415
+ schema=self.get_pipe_schema(pipe),
2416
+ debug=debug,
2060
2417
  )
2061
2418
  if debug:
2062
2419
  from meerschaum.utils.debug import dprint
@@ -2366,7 +2723,7 @@ def get_pipe_columns_types(
2366
2723
  ----------
2367
2724
  pipe: mrsm.Pipe:
2368
2725
  The pipe to get the columns for.
2369
-
2726
+
2370
2727
  Returns
2371
2728
  -------
2372
2729
  A dictionary of columns names (`str`) and types (`str`).
@@ -2381,16 +2738,17 @@ def get_pipe_columns_types(
2381
2738
  }
2382
2739
  >>>
2383
2740
  """
2741
+ from meerschaum.utils.sql import get_table_cols_types
2384
2742
  if not pipe.exists(debug=debug):
2385
2743
  return {}
2386
2744
 
2387
- if self.flavor == 'duckdb':
2388
- from meerschaum.utils.sql import get_table_cols_types
2745
+ if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite'):
2389
2746
  return get_table_cols_types(
2390
2747
  pipe.target,
2391
2748
  self,
2392
2749
  flavor=self.flavor,
2393
2750
  schema=self.get_pipe_schema(pipe),
2751
+ debug=debug,
2394
2752
  )
2395
2753
 
2396
2754
  table_columns = {}
@@ -2409,6 +2767,35 @@ def get_pipe_columns_types(
2409
2767
  return table_columns
2410
2768
 
2411
2769
 
2770
+ def get_pipe_columns_indices(
2771
+ self,
2772
+ pipe: mrsm.Pipe,
2773
+ debug: bool = False,
2774
+ ) -> Dict[str, List[Dict[str, str]]]:
2775
+ """
2776
+ Return a dictionary mapping columns to the indices created on those columns.
2777
+
2778
+ Parameters
2779
+ ----------
2780
+ pipe: mrsm.Pipe
2781
+ The pipe to be queried against.
2782
+
2783
+
2784
+ Returns
2785
+ -------
2786
+ A dictionary mapping columns names to lists of dictionaries.
2787
+ The dictionaries in the lists contain the name and type of the indices.
2788
+ """
2789
+ from meerschaum.utils.sql import get_table_cols_indices
2790
+ return get_table_cols_indices(
2791
+ pipe.target,
2792
+ self,
2793
+ flavor=self.flavor,
2794
+ schema=self.get_pipe_schema(pipe),
2795
+ debug=debug,
2796
+ )
2797
+
2798
+
2412
2799
  def get_add_columns_queries(
2413
2800
  self,
2414
2801
  pipe: mrsm.Pipe,
@@ -2438,6 +2825,9 @@ def get_add_columns_queries(
2438
2825
  if not pipe.exists(debug=debug):
2439
2826
  return []
2440
2827
 
2828
+ if pipe.parameters.get('static', False):
2829
+ return []
2830
+
2441
2831
  from decimal import Decimal
2442
2832
  import copy
2443
2833
  from meerschaum.utils.sql import (
@@ -2556,6 +2946,8 @@ def get_alter_columns_queries(
2556
2946
  """
2557
2947
  if not pipe.exists(debug=debug):
2558
2948
  return []
2949
+ if pipe.static:
2950
+ return
2559
2951
  from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types
2560
2952
  from meerschaum.utils.dataframe import get_numeric_cols
2561
2953
  from meerschaum.utils.dtypes import are_dtypes_equal
@@ -2789,7 +3181,6 @@ def get_alter_columns_queries(
2789
3181
 
2790
3182
  return queries
2791
3183
 
2792
-
2793
3184
  query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
2794
3185
  for col, typ in altered_cols_types.items():
2795
3186
  alter_col_prefix = (
@@ -2823,11 +3214,11 @@ def get_alter_columns_queries(
2823
3214
 
2824
3215
 
2825
3216
  def get_to_sql_dtype(
2826
- self,
2827
- pipe: 'mrsm.Pipe',
2828
- df: 'pd.DataFrame',
2829
- update_dtypes: bool = True,
2830
- ) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
3217
+ self,
3218
+ pipe: 'mrsm.Pipe',
3219
+ df: 'pd.DataFrame',
3220
+ update_dtypes: bool = True,
3221
+ ) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
2831
3222
  """
2832
3223
  Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
2833
3224
 
@@ -2857,7 +3248,7 @@ def get_to_sql_dtype(
2857
3248
  >>> get_to_sql_dtype(pipe, df)
2858
3249
  {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
2859
3250
  """
2860
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
3251
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
2861
3252
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2862
3253
  df_dtypes = {
2863
3254
  col: str(typ)
@@ -2865,8 +3256,10 @@ def get_to_sql_dtype(
2865
3256
  }
2866
3257
  json_cols = get_json_cols(df)
2867
3258
  numeric_cols = get_numeric_cols(df)
3259
+ uuid_cols = get_uuid_cols(df)
2868
3260
  df_dtypes.update({col: 'json' for col in json_cols})
2869
3261
  df_dtypes.update({col: 'numeric' for col in numeric_cols})
3262
+ df_dtypes.update({col: 'uuid' for col in uuid_cols})
2870
3263
  if update_dtypes:
2871
3264
  df_dtypes.update(pipe.dtypes)
2872
3265
  return {
@@ -2876,14 +3269,14 @@ def get_to_sql_dtype(
2876
3269
 
2877
3270
 
2878
3271
  def deduplicate_pipe(
2879
- self,
2880
- pipe: mrsm.Pipe,
2881
- begin: Union[datetime, int, None] = None,
2882
- end: Union[datetime, int, None] = None,
2883
- params: Optional[Dict[str, Any]] = None,
2884
- debug: bool = False,
2885
- **kwargs: Any
2886
- ) -> SuccessTuple:
3272
+ self,
3273
+ pipe: mrsm.Pipe,
3274
+ begin: Union[datetime, int, None] = None,
3275
+ end: Union[datetime, int, None] = None,
3276
+ params: Optional[Dict[str, Any]] = None,
3277
+ debug: bool = False,
3278
+ **kwargs: Any
3279
+ ) -> SuccessTuple:
2887
3280
  """
2888
3281
  Delete duplicate values within a pipe's table.
2889
3282
 
@@ -2947,7 +3340,7 @@ def deduplicate_pipe(
2947
3340
  duplicates_cte_name = sql_item_name('dups', self.flavor, None)
2948
3341
  duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
2949
3342
  previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
2950
-
3343
+
2951
3344
  index_list_str = (
2952
3345
  sql_item_name(dt_col, self.flavor, None)
2953
3346
  if dt_col
@@ -3038,7 +3431,7 @@ def deduplicate_pipe(
3038
3431
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3039
3432
 
3040
3433
  create_temporary_table_query = get_create_table_query(
3041
- duplicates_cte_subquery,
3434
+ duplicates_cte_subquery,
3042
3435
  dedup_table,
3043
3436
  self.flavor,
3044
3437
  ) + f"""