meerschaum 2.6.16__py3-none-any.whl → 2.7.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. meerschaum/_internal/arguments/_parse_arguments.py +1 -1
  2. meerschaum/actions/delete.py +65 -69
  3. meerschaum/actions/edit.py +22 -2
  4. meerschaum/actions/install.py +1 -2
  5. meerschaum/actions/sync.py +2 -3
  6. meerschaum/api/routes/_pipes.py +7 -8
  7. meerschaum/config/_default.py +1 -1
  8. meerschaum/config/_paths.py +2 -1
  9. meerschaum/config/_version.py +1 -1
  10. meerschaum/connectors/api/_pipes.py +18 -21
  11. meerschaum/connectors/sql/_create_engine.py +3 -3
  12. meerschaum/connectors/sql/_instance.py +11 -12
  13. meerschaum/connectors/sql/_pipes.py +143 -91
  14. meerschaum/connectors/sql/_sql.py +43 -8
  15. meerschaum/connectors/valkey/_pipes.py +12 -1
  16. meerschaum/core/Pipe/__init__.py +23 -13
  17. meerschaum/core/Pipe/_attributes.py +25 -1
  18. meerschaum/core/Pipe/_dtypes.py +23 -16
  19. meerschaum/core/Pipe/_sync.py +59 -31
  20. meerschaum/core/Pipe/_verify.py +8 -7
  21. meerschaum/jobs/_Job.py +4 -1
  22. meerschaum/plugins/_Plugin.py +11 -14
  23. meerschaum/utils/daemon/Daemon.py +22 -15
  24. meerschaum/utils/dataframe.py +178 -16
  25. meerschaum/utils/dtypes/__init__.py +149 -14
  26. meerschaum/utils/dtypes/sql.py +41 -7
  27. meerschaum/utils/misc.py +8 -8
  28. meerschaum/utils/packages/_packages.py +1 -1
  29. meerschaum/utils/schedule.py +8 -3
  30. meerschaum/utils/sql.py +180 -100
  31. meerschaum/utils/venv/_Venv.py +4 -4
  32. meerschaum/utils/venv/__init__.py +53 -20
  33. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/METADATA +2 -2
  34. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/RECORD +40 -40
  35. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
  36. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
  37. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
  38. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
  39. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
  40. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
97
97
  if pipe.id is None:
98
98
  return False, f"{pipe} is not registered and cannot be edited."
99
99
 
100
- from meerschaum.utils.debug import dprint
101
100
  from meerschaum.utils.packages import attempt_import
102
101
  from meerschaum.utils.sql import json_flavors
103
102
  if not patch:
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
172
171
  """
173
172
  from meerschaum.utils.debug import dprint
174
173
  from meerschaum.utils.packages import attempt_import
175
- from meerschaum.utils.misc import separate_negation_values, flatten_list
174
+ from meerschaum.utils.misc import separate_negation_values
176
175
  from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
176
  from meerschaum.config.static import STATIC_CONFIG
178
177
  import json
@@ -316,7 +315,6 @@ def create_indices(
316
315
  """
317
316
  Create a pipe's indices.
318
317
  """
319
- from meerschaum.utils.sql import sql_item_name, update_queries
320
318
  from meerschaum.utils.debug import dprint
321
319
  if debug:
322
320
  dprint(f"Creating indices for {pipe}...")
@@ -419,11 +417,14 @@ def get_create_index_queries(
419
417
  existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
418
  existing_ix_names = set()
421
419
  existing_primary_keys = []
420
+ existing_clustered_primary_keys = []
422
421
  for col, col_indices in existing_cols_indices.items():
423
422
  for col_ix_doc in col_indices:
424
423
  existing_ix_names.add(col_ix_doc.get('name', None))
425
424
  if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
425
  existing_primary_keys.append(col)
426
+ if col_ix_doc.get('clustered', True):
427
+ existing_clustered_primary_keys.append(col)
427
428
 
428
429
  _datetime = pipe.get_columns('datetime', error=False)
429
430
  _datetime_name = (
@@ -460,10 +461,16 @@ def get_create_index_queries(
460
461
  else None
461
462
  )
462
463
  primary_key_constraint_name = (
463
- sql_item_name(f'pk_{pipe.target}', self.flavor, None)
464
+ sql_item_name(f'PK_{pipe.target}', self.flavor, None)
464
465
  if primary_key is not None
465
466
  else None
466
467
  )
468
+ primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
469
+ datetime_clustered = (
470
+ "CLUSTERED"
471
+ if not existing_clustered_primary_keys and _datetime is not None
472
+ else "NONCLUSTERED"
473
+ )
467
474
 
468
475
  _id_index_name = (
469
476
  sql_item_name(index_names['id'], self.flavor, None)
@@ -474,6 +481,7 @@ def get_create_index_queries(
474
481
  _create_space_partition = get_config('system', 'experimental', 'space')
475
482
 
476
483
  ### create datetime index
484
+ dt_query = None
477
485
  if _datetime is not None:
478
486
  if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
479
487
  _id_count = (
@@ -504,19 +512,19 @@ def get_create_index_queries(
504
512
  + 'if_not_exists => true, '
505
513
  + "migrate_data => true);"
506
514
  )
507
- elif self.flavor == 'mssql':
508
- dt_query = (
509
- "CREATE "
510
- + ("CLUSTERED " if not primary_key else '')
511
- + f"INDEX {_datetime_index_name} "
512
- + f"ON {_pipe_name} ({_datetime_name})"
513
- )
514
- else: ### mssql, sqlite, etc.
515
- dt_query = (
516
- f"CREATE INDEX {_datetime_index_name} "
517
- + f"ON {_pipe_name} ({_datetime_name})"
518
- )
515
+ elif _datetime_index_name:
516
+ if self.flavor == 'mssql':
517
+ dt_query = (
518
+ f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
519
+ f"ON {_pipe_name} ({_datetime_name})"
520
+ )
521
+ else:
522
+ dt_query = (
523
+ f"CREATE INDEX {_datetime_index_name} "
524
+ + f"ON {_pipe_name} ({_datetime_name})"
525
+ )
519
526
 
527
+ if dt_query:
520
528
  index_queries[_datetime] = [dt_query]
521
529
 
522
530
  primary_queries = []
@@ -623,7 +631,7 @@ def get_create_index_queries(
623
631
  ),
624
632
  (
625
633
  f"ALTER TABLE {_pipe_name}\n"
626
- f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
634
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
627
635
  ),
628
636
  ])
629
637
  index_queries[primary_key] = primary_queries
@@ -658,6 +666,8 @@ def get_create_index_queries(
658
666
  cols = indices[ix_key]
659
667
  if not isinstance(cols, (list, tuple)):
660
668
  cols = [cols]
669
+ if ix_key == 'unique' and upsert:
670
+ continue
661
671
  cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
662
672
  if not cols_names:
663
673
  continue
@@ -785,8 +795,6 @@ def delete_pipe(
785
795
  """
786
796
  Delete a Pipe's registration.
787
797
  """
788
- from meerschaum.utils.sql import sql_item_name
789
- from meerschaum.utils.debug import dprint
790
798
  from meerschaum.utils.packages import attempt_import
791
799
  sqlalchemy = attempt_import('sqlalchemy')
792
800
 
@@ -869,19 +877,19 @@ def get_pipe_data(
869
877
 
870
878
  """
871
879
  import json
872
- from meerschaum.utils.sql import sql_item_name
873
880
  from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
874
881
  from meerschaum.utils.packages import import_pandas
875
882
  from meerschaum.utils.dtypes import (
876
883
  attempt_cast_to_numeric,
877
884
  attempt_cast_to_uuid,
885
+ attempt_cast_to_bytes,
878
886
  are_dtypes_equal,
879
887
  )
880
888
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
881
889
  pd = import_pandas()
882
890
  is_dask = 'dask' in pd.__name__
883
891
 
884
- cols_types = pipe.get_columns_types(debug=debug)
892
+ cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
885
893
  dtypes = {
886
894
  **{
887
895
  p_col: to_pandas_dtype(p_typ)
@@ -891,24 +899,21 @@ def get_pipe_data(
891
899
  col: get_pd_type_from_db_type(typ)
892
900
  for col, typ in cols_types.items()
893
901
  }
894
- }
902
+ } if pipe.enforce else {}
895
903
  if dtypes:
896
904
  if self.flavor == 'sqlite':
897
905
  if not pipe.columns.get('datetime', None):
898
906
  _dt = pipe.guess_datetime()
899
- dt = sql_item_name(_dt, self.flavor, None) if _dt else None
900
- is_guess = True
901
907
  else:
902
908
  _dt = pipe.get_columns('datetime')
903
- dt = sql_item_name(_dt, self.flavor, None)
904
- is_guess = False
905
909
 
906
910
  if _dt:
907
911
  dt_type = dtypes.get(_dt, 'object').lower()
908
912
  if 'datetime' not in dt_type:
909
913
  if 'int' not in dt_type:
910
914
  dtypes[_dt] = 'datetime64[ns, UTC]'
911
- existing_cols = pipe.get_columns_types(debug=debug)
915
+
916
+ existing_cols = cols_types.keys()
912
917
  select_columns = (
913
918
  [
914
919
  col
@@ -922,14 +927,14 @@ def get_pipe_data(
922
927
  if col in existing_cols
923
928
  and col not in (omit_columns or [])
924
929
  ]
925
- )
930
+ ) if pipe.enforce else select_columns
926
931
  if select_columns:
927
932
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
928
933
  dtypes = {
929
934
  col: to_pandas_dtype(typ)
930
935
  for col, typ in dtypes.items()
931
936
  if col in select_columns and col not in (omit_columns or [])
932
- }
937
+ } if pipe.enforce else {}
933
938
  query = self.get_pipe_data_query(
934
939
  pipe,
935
940
  select_columns=select_columns,
@@ -959,6 +964,11 @@ def get_pipe_data(
959
964
  for col, typ in pipe.dtypes.items()
960
965
  if typ == 'uuid' and col in dtypes
961
966
  ]
967
+ bytes_columns = [
968
+ col
969
+ for col, typ in pipe.dtypes.items()
970
+ if typ == 'bytes' and col in dtypes
971
+ ]
962
972
 
963
973
  kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
964
974
 
@@ -978,6 +988,11 @@ def get_pipe_data(
978
988
  continue
979
989
  df[col] = df[col].apply(attempt_cast_to_uuid)
980
990
 
991
+ for col in bytes_columns:
992
+ if col not in df.columns:
993
+ continue
994
+ df[col] = df[col].apply(attempt_cast_to_bytes)
995
+
981
996
  if self.flavor == 'sqlite':
982
997
  ignore_dt_cols = [
983
998
  col
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
1093
1108
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1094
1109
 
1095
1110
  dt_col = pipe.columns.get('datetime', None)
1096
- existing_cols = pipe.get_columns_types(debug=debug)
1111
+ existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1112
+ skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1097
1113
  dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1098
1114
  select_columns = (
1099
1115
  [col for col in existing_cols]
1100
1116
  if not select_columns
1101
- else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
1117
+ else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1102
1118
  )
1103
1119
  if omit_columns:
1104
1120
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
1185
1201
  number=begin_add_minutes,
1186
1202
  begin=begin,
1187
1203
  )
1188
- where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1204
+ where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
1189
1205
  is_dt_bound = True
1190
1206
 
1191
1207
  if end is not None and (_dt in existing_cols or skip_existing_cols_check):
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
1197
1213
  number=end_add_minutes,
1198
1214
  begin=end
1199
1215
  )
1200
- where += f"{dt} < {end_da}"
1216
+ where += f"{dt} < {end_da}"
1201
1217
  is_dt_bound = True
1202
1218
 
1203
1219
  if params is not None:
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
1209
1225
  }
1210
1226
  if valid_params:
1211
1227
  where += build_where(valid_params, self).replace(
1212
- 'WHERE', ('AND' if is_dt_bound else "")
1228
+ 'WHERE', (' AND' if is_dt_bound else " ")
1213
1229
  )
1214
1230
 
1215
1231
  if len(where) > 0:
@@ -1264,7 +1280,6 @@ def get_pipe_id(
1264
1280
  if pipe.temporary:
1265
1281
  return None
1266
1282
  from meerschaum.utils.packages import attempt_import
1267
- import json
1268
1283
  sqlalchemy = attempt_import('sqlalchemy')
1269
1284
  from meerschaum.connectors.sql.tables import get_tables
1270
1285
  pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
1339
1354
  """
1340
1355
  Create a pipe's table from its configured dtypes and an incoming dataframe.
1341
1356
  """
1342
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1357
+ from meerschaum.utils.dataframe import (
1358
+ get_json_cols,
1359
+ get_numeric_cols,
1360
+ get_uuid_cols,
1361
+ get_datetime_cols,
1362
+ get_bytes_cols,
1363
+ )
1343
1364
  from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1344
1365
  primary_key = pipe.columns.get('primary', None)
1345
1366
  dt_col = pipe.columns.get('datetime', None)
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
1365
1386
  col: 'numeric'
1366
1387
  for col in get_numeric_cols(df)
1367
1388
  },
1389
+ **{
1390
+ col: 'bytes'
1391
+ for col in get_bytes_cols(df)
1392
+ },
1393
+ **{
1394
+ col: 'datetime64[ns, UTC]'
1395
+ for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
1396
+ },
1397
+ **{
1398
+ col: 'datetime64[ns]'
1399
+ for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
1400
+ },
1368
1401
  **pipe.dtypes
1369
1402
  }
1370
1403
  autoincrement = (
@@ -1455,11 +1488,9 @@ def sync_pipe(
1455
1488
  get_update_queries,
1456
1489
  sql_item_name,
1457
1490
  update_queries,
1458
- get_create_table_queries,
1459
1491
  get_reset_autoincrement_queries,
1460
1492
  )
1461
1493
  from meerschaum.utils.misc import generate_password
1462
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1463
1494
  from meerschaum.utils.dtypes import are_dtypes_equal
1464
1495
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1465
1496
  from meerschaum import Pipe
@@ -1567,11 +1598,13 @@ def sync_pipe(
1567
1598
  'if_exists': if_exists,
1568
1599
  'debug': debug,
1569
1600
  'as_dict': True,
1601
+ 'safe_copy': kw.get('safe_copy', False),
1570
1602
  'chunksize': chunksize,
1571
1603
  'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1572
1604
  'schema': self.get_pipe_schema(pipe),
1573
1605
  })
1574
1606
 
1607
+ dt_col = pipe.columns.get('datetime', None)
1575
1608
  primary_key = pipe.columns.get('primary', None)
1576
1609
  autoincrement = (
1577
1610
  pipe.parameters.get('autoincrement', False)
@@ -1589,17 +1622,23 @@ def sync_pipe(
1589
1622
  if not edit_success:
1590
1623
  return edit_success, edit_msg
1591
1624
 
1592
- autoincrement_needs_reset = False
1625
+ def _check_pk(_df_to_clear):
1626
+ if _df_to_clear is None:
1627
+ return
1628
+ if primary_key not in _df_to_clear.columns:
1629
+ return
1630
+ if not _df_to_clear[primary_key].notnull().any():
1631
+ del _df_to_clear[primary_key]
1632
+
1633
+ autoincrement_needs_reset = bool(
1634
+ autoincrement
1635
+ and primary_key
1636
+ and primary_key in unseen_df.columns
1637
+ and unseen_df[primary_key].notnull().any()
1638
+ )
1593
1639
  if autoincrement and primary_key:
1594
- if primary_key not in df.columns:
1595
- if unseen_df is not None and primary_key in unseen_df.columns:
1596
- del unseen_df[primary_key]
1597
- if update_df is not None and primary_key in update_df.columns:
1598
- del update_df[primary_key]
1599
- if delta_df is not None and primary_key in delta_df.columns:
1600
- del delta_df[primary_key]
1601
- elif unseen_df[primary_key].notnull().any():
1602
- autoincrement_needs_reset = True
1640
+ for _df_to_clear in (unseen_df, update_df, delta_df):
1641
+ _check_pk(_df_to_clear)
1603
1642
 
1604
1643
  if is_new:
1605
1644
  create_success, create_msg = self.create_pipe_table_from_df(
@@ -1612,38 +1651,41 @@ def sync_pipe(
1612
1651
 
1613
1652
  do_identity_insert = bool(
1614
1653
  self.flavor in ('mssql',)
1654
+ and primary_key
1615
1655
  and primary_key in unseen_df.columns
1616
1656
  and autoincrement
1617
1657
  )
1618
- with self.engine.connect() as connection:
1619
- with connection.begin():
1620
- if do_identity_insert:
1621
- identity_on_result = self.exec(
1622
- f"SET IDENTITY_INSERT {pipe_name} ON",
1623
- commit=False,
1624
- _connection=connection,
1625
- close=False,
1626
- debug=debug,
1627
- )
1628
- if identity_on_result is None:
1629
- return False, f"Could not enable identity inserts on {pipe}."
1630
-
1631
- stats = self.to_sql(
1632
- unseen_df,
1633
- _connection=connection,
1634
- **unseen_kw
1635
- )
1658
+ stats = {'success': True, 'msg': 'Success'}
1659
+ if len(unseen_df) > 0:
1660
+ with self.engine.connect() as connection:
1661
+ with connection.begin():
1662
+ if do_identity_insert:
1663
+ identity_on_result = self.exec(
1664
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1665
+ commit=False,
1666
+ _connection=connection,
1667
+ close=False,
1668
+ debug=debug,
1669
+ )
1670
+ if identity_on_result is None:
1671
+ return False, f"Could not enable identity inserts on {pipe}."
1636
1672
 
1637
- if do_identity_insert:
1638
- identity_off_result = self.exec(
1639
- f"SET IDENTITY_INSERT {pipe_name} OFF",
1640
- commit=False,
1673
+ stats = self.to_sql(
1674
+ unseen_df,
1641
1675
  _connection=connection,
1642
- close=False,
1643
- debug=debug,
1676
+ **unseen_kw
1644
1677
  )
1645
- if identity_off_result is None:
1646
- return False, f"Could not disable identity inserts on {pipe}."
1678
+
1679
+ if do_identity_insert:
1680
+ identity_off_result = self.exec(
1681
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1682
+ commit=False,
1683
+ _connection=connection,
1684
+ close=False,
1685
+ debug=debug,
1686
+ )
1687
+ if identity_off_result is None:
1688
+ return False, f"Could not disable identity inserts on {pipe}."
1647
1689
 
1648
1690
  if is_new:
1649
1691
  if not self.create_indices(pipe, debug=debug):
@@ -1682,11 +1724,12 @@ def sync_pipe(
1682
1724
  },
1683
1725
  target=temp_target,
1684
1726
  temporary=True,
1727
+ enforce=False,
1728
+ static=True,
1729
+ autoincrement=False,
1685
1730
  parameters={
1686
- 'static': True,
1687
- 'schema': self.internal_schema,
1731
+ 'schema': (self.internal_schema if self.flavor != 'mssql' else None),
1688
1732
  'hypertable': False,
1689
- 'autoincrement': False,
1690
1733
  },
1691
1734
  )
1692
1735
  temp_pipe.__dict__['_columns_types'] = {
@@ -1707,7 +1750,11 @@ def sync_pipe(
1707
1750
  col
1708
1751
  for col_key, col in pipe.columns.items()
1709
1752
  if col and col in existing_cols
1710
- ]
1753
+ ] if not primary_key or self.flavor == 'oracle' else (
1754
+ [dt_col, primary_key]
1755
+ if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1756
+ else [primary_key]
1757
+ )
1711
1758
  update_queries = get_update_queries(
1712
1759
  pipe.target,
1713
1760
  temp_target,
@@ -1716,12 +1763,17 @@ def sync_pipe(
1716
1763
  upsert=upsert,
1717
1764
  schema=self.get_pipe_schema(pipe),
1718
1765
  patch_schema=self.internal_schema,
1719
- datetime_col=pipe.columns.get('datetime', None),
1766
+ datetime_col=(dt_col if dt_col in update_df.columns else None),
1767
+ identity_insert=(autoincrement and primary_key in update_df.columns),
1720
1768
  debug=debug,
1721
1769
  )
1722
- update_success = all(
1723
- self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1770
+ update_results = self.exec_queries(
1771
+ update_queries,
1772
+ break_on_error=True,
1773
+ rollback=True,
1774
+ debug=debug,
1724
1775
  )
1776
+ update_success = all(update_results)
1725
1777
  self._log_temporary_tables_creation(
1726
1778
  temp_target,
1727
1779
  ready_to_drop=True,
@@ -1730,6 +1782,8 @@ def sync_pipe(
1730
1782
  )
1731
1783
  if not update_success:
1732
1784
  warn(f"Failed to apply update to {pipe}.")
1785
+ stats['success'] = stats['success'] and update_success
1786
+ stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1733
1787
 
1734
1788
  stop = time.perf_counter()
1735
1789
  success = stats['success']
@@ -1834,7 +1888,6 @@ def sync_pipe_inplace(
1834
1888
  session_execute,
1835
1889
  update_queries,
1836
1890
  )
1837
- from meerschaum.utils.dtypes import are_dtypes_equal
1838
1891
  from meerschaum.utils.dtypes.sql import (
1839
1892
  get_pd_type_from_db_type,
1840
1893
  )
@@ -1907,8 +1960,8 @@ def sync_pipe_inplace(
1907
1960
  autoincrement=autoincrement,
1908
1961
  datetime_column=dt_col,
1909
1962
  )
1910
- result = self.exec_queries(create_pipe_queries, debug=debug)
1911
- if result is None:
1963
+ results = self.exec_queries(create_pipe_queries, debug=debug)
1964
+ if not all(results):
1912
1965
  _ = clean_up_temp_tables()
1913
1966
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
1914
1967
 
@@ -2054,6 +2107,7 @@ def sync_pipe_inplace(
2054
2107
  ) if not (upsert or static) else new_cols_types
2055
2108
 
2056
2109
  common_cols = [col for col in new_cols if col in backtrack_cols_types]
2110
+ primary_key = pipe.columns.get('primary', None)
2057
2111
  on_cols = {
2058
2112
  col: new_cols.get(col)
2059
2113
  for col_key, col in pipe.columns.items()
@@ -2064,7 +2118,7 @@ def sync_pipe_inplace(
2064
2118
  and col in backtrack_cols_types
2065
2119
  and col in new_cols
2066
2120
  )
2067
- }
2121
+ } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
2068
2122
 
2069
2123
  null_replace_new_cols_str = (
2070
2124
  ', '.join([
@@ -2591,7 +2645,7 @@ def get_pipe_rowcount(
2591
2645
  result = self.value(query, debug=debug, silent=True)
2592
2646
  try:
2593
2647
  return int(result)
2594
- except Exception as e:
2648
+ except Exception:
2595
2649
  return None
2596
2650
 
2597
2651
 
@@ -2616,10 +2670,11 @@ def drop_pipe(
2616
2670
  from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
2617
2671
  success = True
2618
2672
  target = pipe.target
2673
+ schema = self.get_pipe_schema(pipe)
2619
2674
  target_name = (
2620
- sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
2675
+ sql_item_name(target, self.flavor, schema)
2621
2676
  )
2622
- if table_exists(target, self, debug=debug):
2677
+ if table_exists(target, self, schema=schema, debug=debug):
2623
2678
  if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
2624
2679
  success = self.exec(
2625
2680
  f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
@@ -3330,9 +3385,7 @@ def deduplicate_pipe(
3330
3385
  """
3331
3386
  from meerschaum.utils.sql import (
3332
3387
  sql_item_name,
3333
- NO_CTE_FLAVORS,
3334
3388
  get_rename_table_queries,
3335
- NO_SELECT_INTO_FLAVORS,
3336
3389
  DROP_IF_EXISTS_FLAVORS,
3337
3390
  get_create_table_query,
3338
3391
  format_cte_subquery,
@@ -3454,7 +3507,6 @@ def deduplicate_pipe(
3454
3507
  dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3455
3508
  temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3456
3509
 
3457
- dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
3458
3510
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3459
3511
 
3460
3512
  create_temporary_table_query = get_create_table_query(
@@ -624,7 +624,7 @@ def exec_queries(
624
624
  rollback: bool = True,
625
625
  silent: bool = False,
626
626
  debug: bool = False,
627
- ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
627
+ ) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
628
628
  """
629
629
  Execute a list of queries in a single transaction.
630
630
 
@@ -688,6 +688,7 @@ def exec_queries(
688
688
  if result is None and break_on_error:
689
689
  if rollback:
690
690
  session.rollback()
691
+ results.append(result)
691
692
  break
692
693
  elif result is not None and hook is not None:
693
694
  hook_queries = hook(session)
@@ -715,6 +716,7 @@ def to_sql(
715
716
  method: str = "",
716
717
  chunksize: Optional[int] = -1,
717
718
  schema: Optional[str] = None,
719
+ safe_copy: bool = True,
718
720
  silent: bool = False,
719
721
  debug: bool = False,
720
722
  as_tuple: bool = False,
@@ -729,7 +731,7 @@ def to_sql(
729
731
  Parameters
730
732
  ----------
731
733
  df: pd.DataFrame
732
- The DataFrame to be uploaded.
734
+ The DataFrame to be inserted.
733
735
 
734
736
  name: str
735
737
  The name of the table to be created.
@@ -752,6 +754,9 @@ def to_sql(
752
754
  Optionally override the schema for the table.
753
755
  Defaults to `SQLConnector.schema`.
754
756
 
757
+ safe_copy: bool, defaul True
758
+ If `True`, copy the dataframe before making any changes.
759
+
755
760
  as_tuple: bool, default False
756
761
  If `True`, return a (success_bool, message) tuple instead of a `bool`.
757
762
  Defaults to `False`.
@@ -770,8 +775,7 @@ def to_sql(
770
775
  """
771
776
  import time
772
777
  import json
773
- import decimal
774
- from decimal import Decimal, Context
778
+ from decimal import Decimal
775
779
  from meerschaum.utils.warnings import error, warn
776
780
  import warnings
777
781
  import functools
@@ -790,10 +794,21 @@ def to_sql(
790
794
  truncate_item_name,
791
795
  DROP_IF_EXISTS_FLAVORS,
792
796
  )
793
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
794
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
797
+ from meerschaum.utils.dataframe import (
798
+ get_json_cols,
799
+ get_numeric_cols,
800
+ get_uuid_cols,
801
+ get_bytes_cols,
802
+ )
803
+ from meerschaum.utils.dtypes import (
804
+ are_dtypes_equal,
805
+ quantize_decimal,
806
+ coerce_timezone,
807
+ encode_bytes_for_bytea,
808
+ )
795
809
  from meerschaum.utils.dtypes.sql import (
796
810
  NUMERIC_PRECISION_FLAVORS,
811
+ NUMERIC_AS_TEXT_FLAVORS,
797
812
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
798
813
  get_db_type_from_pd_type,
799
814
  )
@@ -803,14 +818,35 @@ def to_sql(
803
818
  pd = import_pandas()
804
819
  is_dask = 'dask' in df.__module__
805
820
 
806
- stats = {'target': name, }
821
+ bytes_cols = get_bytes_cols(df)
822
+ numeric_cols = get_numeric_cols(df)
823
+
824
+ stats = {'target': name,}
807
825
  ### resort to defaults if None
826
+ copied = False
827
+ use_psql_copy = False
808
828
  if method == "":
809
829
  if self.flavor in _bulk_flavors:
810
830
  method = functools.partial(psql_insert_copy, schema=self.schema)
831
+ use_psql_copy = True
811
832
  else:
812
833
  ### Should resolve to 'multi' or `None`.
813
834
  method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
835
+
836
+ if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
837
+ if safe_copy and not copied:
838
+ df = df.copy()
839
+ copied = True
840
+ for col in bytes_cols:
841
+ df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
842
+
843
+ if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
844
+ if safe_copy and not copied:
845
+ df = df.copy()
846
+ copied = True
847
+ for col in numeric_cols:
848
+ df[col] = df[col].astype(str)
849
+
814
850
  stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
815
851
 
816
852
  default_chunksize = self._sys_config.get('chunksize', None)
@@ -920,7 +956,6 @@ def to_sql(
920
956
  ### Check for numeric columns.
921
957
  numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
922
958
  if numeric_precision is not None and numeric_scale is not None:
923
- numeric_cols = get_numeric_cols(df)
924
959
  for col in numeric_cols:
925
960
  df[col] = df[col].apply(
926
961
  lambda x: (