meerschaum 2.6.16__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. meerschaum/_internal/arguments/_parse_arguments.py +1 -1
  2. meerschaum/actions/delete.py +65 -69
  3. meerschaum/actions/edit.py +22 -2
  4. meerschaum/actions/install.py +1 -2
  5. meerschaum/actions/sync.py +2 -3
  6. meerschaum/api/routes/_pipes.py +7 -8
  7. meerschaum/config/_default.py +1 -1
  8. meerschaum/config/_paths.py +2 -1
  9. meerschaum/config/_version.py +1 -1
  10. meerschaum/connectors/api/_pipes.py +18 -21
  11. meerschaum/connectors/sql/_create_engine.py +3 -3
  12. meerschaum/connectors/sql/_instance.py +11 -12
  13. meerschaum/connectors/sql/_pipes.py +143 -91
  14. meerschaum/connectors/sql/_sql.py +43 -8
  15. meerschaum/connectors/valkey/_pipes.py +12 -1
  16. meerschaum/core/Pipe/__init__.py +23 -13
  17. meerschaum/core/Pipe/_attributes.py +25 -1
  18. meerschaum/core/Pipe/_dtypes.py +23 -16
  19. meerschaum/core/Pipe/_sync.py +59 -31
  20. meerschaum/core/Pipe/_verify.py +8 -7
  21. meerschaum/jobs/_Job.py +4 -1
  22. meerschaum/plugins/_Plugin.py +11 -14
  23. meerschaum/utils/daemon/Daemon.py +22 -15
  24. meerschaum/utils/dataframe.py +178 -16
  25. meerschaum/utils/dtypes/__init__.py +149 -14
  26. meerschaum/utils/dtypes/sql.py +41 -7
  27. meerschaum/utils/misc.py +8 -8
  28. meerschaum/utils/packages/_packages.py +1 -1
  29. meerschaum/utils/schedule.py +8 -3
  30. meerschaum/utils/sql.py +180 -100
  31. meerschaum/utils/venv/_Venv.py +4 -4
  32. meerschaum/utils/venv/__init__.py +53 -20
  33. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/METADATA +2 -2
  34. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/RECORD +40 -40
  35. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
  36. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
  37. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
  38. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
  39. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
  40. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
97
97
  if pipe.id is None:
98
98
  return False, f"{pipe} is not registered and cannot be edited."
99
99
 
100
- from meerschaum.utils.debug import dprint
101
100
  from meerschaum.utils.packages import attempt_import
102
101
  from meerschaum.utils.sql import json_flavors
103
102
  if not patch:
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
172
171
  """
173
172
  from meerschaum.utils.debug import dprint
174
173
  from meerschaum.utils.packages import attempt_import
175
- from meerschaum.utils.misc import separate_negation_values, flatten_list
174
+ from meerschaum.utils.misc import separate_negation_values
176
175
  from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
176
  from meerschaum.config.static import STATIC_CONFIG
178
177
  import json
@@ -316,7 +315,6 @@ def create_indices(
316
315
  """
317
316
  Create a pipe's indices.
318
317
  """
319
- from meerschaum.utils.sql import sql_item_name, update_queries
320
318
  from meerschaum.utils.debug import dprint
321
319
  if debug:
322
320
  dprint(f"Creating indices for {pipe}...")
@@ -419,11 +417,14 @@ def get_create_index_queries(
419
417
  existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
418
  existing_ix_names = set()
421
419
  existing_primary_keys = []
420
+ existing_clustered_primary_keys = []
422
421
  for col, col_indices in existing_cols_indices.items():
423
422
  for col_ix_doc in col_indices:
424
423
  existing_ix_names.add(col_ix_doc.get('name', None))
425
424
  if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
425
  existing_primary_keys.append(col)
426
+ if col_ix_doc.get('clustered', True):
427
+ existing_clustered_primary_keys.append(col)
427
428
 
428
429
  _datetime = pipe.get_columns('datetime', error=False)
429
430
  _datetime_name = (
@@ -460,10 +461,16 @@ def get_create_index_queries(
460
461
  else None
461
462
  )
462
463
  primary_key_constraint_name = (
463
- sql_item_name(f'pk_{pipe.target}', self.flavor, None)
464
+ sql_item_name(f'PK_{pipe.target}', self.flavor, None)
464
465
  if primary_key is not None
465
466
  else None
466
467
  )
468
+ primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
469
+ datetime_clustered = (
470
+ "CLUSTERED"
471
+ if not existing_clustered_primary_keys and _datetime is not None
472
+ else "NONCLUSTERED"
473
+ )
467
474
 
468
475
  _id_index_name = (
469
476
  sql_item_name(index_names['id'], self.flavor, None)
@@ -474,6 +481,7 @@ def get_create_index_queries(
474
481
  _create_space_partition = get_config('system', 'experimental', 'space')
475
482
 
476
483
  ### create datetime index
484
+ dt_query = None
477
485
  if _datetime is not None:
478
486
  if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
479
487
  _id_count = (
@@ -504,19 +512,19 @@ def get_create_index_queries(
504
512
  + 'if_not_exists => true, '
505
513
  + "migrate_data => true);"
506
514
  )
507
- elif self.flavor == 'mssql':
508
- dt_query = (
509
- "CREATE "
510
- + ("CLUSTERED " if not primary_key else '')
511
- + f"INDEX {_datetime_index_name} "
512
- + f"ON {_pipe_name} ({_datetime_name})"
513
- )
514
- else: ### mssql, sqlite, etc.
515
- dt_query = (
516
- f"CREATE INDEX {_datetime_index_name} "
517
- + f"ON {_pipe_name} ({_datetime_name})"
518
- )
515
+ elif _datetime_index_name:
516
+ if self.flavor == 'mssql':
517
+ dt_query = (
518
+ f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
519
+ f"ON {_pipe_name} ({_datetime_name})"
520
+ )
521
+ else:
522
+ dt_query = (
523
+ f"CREATE INDEX {_datetime_index_name} "
524
+ + f"ON {_pipe_name} ({_datetime_name})"
525
+ )
519
526
 
527
+ if dt_query:
520
528
  index_queries[_datetime] = [dt_query]
521
529
 
522
530
  primary_queries = []
@@ -623,7 +631,7 @@ def get_create_index_queries(
623
631
  ),
624
632
  (
625
633
  f"ALTER TABLE {_pipe_name}\n"
626
- f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
634
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
627
635
  ),
628
636
  ])
629
637
  index_queries[primary_key] = primary_queries
@@ -658,6 +666,8 @@ def get_create_index_queries(
658
666
  cols = indices[ix_key]
659
667
  if not isinstance(cols, (list, tuple)):
660
668
  cols = [cols]
669
+ if ix_key == 'unique' and upsert:
670
+ continue
661
671
  cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
662
672
  if not cols_names:
663
673
  continue
@@ -785,8 +795,6 @@ def delete_pipe(
785
795
  """
786
796
  Delete a Pipe's registration.
787
797
  """
788
- from meerschaum.utils.sql import sql_item_name
789
- from meerschaum.utils.debug import dprint
790
798
  from meerschaum.utils.packages import attempt_import
791
799
  sqlalchemy = attempt_import('sqlalchemy')
792
800
 
@@ -869,19 +877,19 @@ def get_pipe_data(
869
877
 
870
878
  """
871
879
  import json
872
- from meerschaum.utils.sql import sql_item_name
873
880
  from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
874
881
  from meerschaum.utils.packages import import_pandas
875
882
  from meerschaum.utils.dtypes import (
876
883
  attempt_cast_to_numeric,
877
884
  attempt_cast_to_uuid,
885
+ attempt_cast_to_bytes,
878
886
  are_dtypes_equal,
879
887
  )
880
888
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
881
889
  pd = import_pandas()
882
890
  is_dask = 'dask' in pd.__name__
883
891
 
884
- cols_types = pipe.get_columns_types(debug=debug)
892
+ cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
885
893
  dtypes = {
886
894
  **{
887
895
  p_col: to_pandas_dtype(p_typ)
@@ -891,24 +899,21 @@ def get_pipe_data(
891
899
  col: get_pd_type_from_db_type(typ)
892
900
  for col, typ in cols_types.items()
893
901
  }
894
- }
902
+ } if pipe.enforce else {}
895
903
  if dtypes:
896
904
  if self.flavor == 'sqlite':
897
905
  if not pipe.columns.get('datetime', None):
898
906
  _dt = pipe.guess_datetime()
899
- dt = sql_item_name(_dt, self.flavor, None) if _dt else None
900
- is_guess = True
901
907
  else:
902
908
  _dt = pipe.get_columns('datetime')
903
- dt = sql_item_name(_dt, self.flavor, None)
904
- is_guess = False
905
909
 
906
910
  if _dt:
907
911
  dt_type = dtypes.get(_dt, 'object').lower()
908
912
  if 'datetime' not in dt_type:
909
913
  if 'int' not in dt_type:
910
914
  dtypes[_dt] = 'datetime64[ns, UTC]'
911
- existing_cols = pipe.get_columns_types(debug=debug)
915
+
916
+ existing_cols = cols_types.keys()
912
917
  select_columns = (
913
918
  [
914
919
  col
@@ -922,14 +927,14 @@ def get_pipe_data(
922
927
  if col in existing_cols
923
928
  and col not in (omit_columns or [])
924
929
  ]
925
- )
930
+ ) if pipe.enforce else select_columns
926
931
  if select_columns:
927
932
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
928
933
  dtypes = {
929
934
  col: to_pandas_dtype(typ)
930
935
  for col, typ in dtypes.items()
931
936
  if col in select_columns and col not in (omit_columns or [])
932
- }
937
+ } if pipe.enforce else {}
933
938
  query = self.get_pipe_data_query(
934
939
  pipe,
935
940
  select_columns=select_columns,
@@ -959,6 +964,11 @@ def get_pipe_data(
959
964
  for col, typ in pipe.dtypes.items()
960
965
  if typ == 'uuid' and col in dtypes
961
966
  ]
967
+ bytes_columns = [
968
+ col
969
+ for col, typ in pipe.dtypes.items()
970
+ if typ == 'bytes' and col in dtypes
971
+ ]
962
972
 
963
973
  kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
964
974
 
@@ -978,6 +988,11 @@ def get_pipe_data(
978
988
  continue
979
989
  df[col] = df[col].apply(attempt_cast_to_uuid)
980
990
 
991
+ for col in bytes_columns:
992
+ if col not in df.columns:
993
+ continue
994
+ df[col] = df[col].apply(attempt_cast_to_bytes)
995
+
981
996
  if self.flavor == 'sqlite':
982
997
  ignore_dt_cols = [
983
998
  col
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
1093
1108
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1094
1109
 
1095
1110
  dt_col = pipe.columns.get('datetime', None)
1096
- existing_cols = pipe.get_columns_types(debug=debug)
1111
+ existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1112
+ skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1097
1113
  dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1098
1114
  select_columns = (
1099
1115
  [col for col in existing_cols]
1100
1116
  if not select_columns
1101
- else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
1117
+ else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1102
1118
  )
1103
1119
  if omit_columns:
1104
1120
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
1185
1201
  number=begin_add_minutes,
1186
1202
  begin=begin,
1187
1203
  )
1188
- where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1204
+ where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
1189
1205
  is_dt_bound = True
1190
1206
 
1191
1207
  if end is not None and (_dt in existing_cols or skip_existing_cols_check):
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
1197
1213
  number=end_add_minutes,
1198
1214
  begin=end
1199
1215
  )
1200
- where += f"{dt} < {end_da}"
1216
+ where += f"{dt} < {end_da}"
1201
1217
  is_dt_bound = True
1202
1218
 
1203
1219
  if params is not None:
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
1209
1225
  }
1210
1226
  if valid_params:
1211
1227
  where += build_where(valid_params, self).replace(
1212
- 'WHERE', ('AND' if is_dt_bound else "")
1228
+ 'WHERE', (' AND' if is_dt_bound else " ")
1213
1229
  )
1214
1230
 
1215
1231
  if len(where) > 0:
@@ -1264,7 +1280,6 @@ def get_pipe_id(
1264
1280
  if pipe.temporary:
1265
1281
  return None
1266
1282
  from meerschaum.utils.packages import attempt_import
1267
- import json
1268
1283
  sqlalchemy = attempt_import('sqlalchemy')
1269
1284
  from meerschaum.connectors.sql.tables import get_tables
1270
1285
  pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
1339
1354
  """
1340
1355
  Create a pipe's table from its configured dtypes and an incoming dataframe.
1341
1356
  """
1342
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1357
+ from meerschaum.utils.dataframe import (
1358
+ get_json_cols,
1359
+ get_numeric_cols,
1360
+ get_uuid_cols,
1361
+ get_datetime_cols,
1362
+ get_bytes_cols,
1363
+ )
1343
1364
  from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1344
1365
  primary_key = pipe.columns.get('primary', None)
1345
1366
  dt_col = pipe.columns.get('datetime', None)
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
1365
1386
  col: 'numeric'
1366
1387
  for col in get_numeric_cols(df)
1367
1388
  },
1389
+ **{
1390
+ col: 'bytes'
1391
+ for col in get_bytes_cols(df)
1392
+ },
1393
+ **{
1394
+ col: 'datetime64[ns, UTC]'
1395
+ for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
1396
+ },
1397
+ **{
1398
+ col: 'datetime64[ns]'
1399
+ for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
1400
+ },
1368
1401
  **pipe.dtypes
1369
1402
  }
1370
1403
  autoincrement = (
@@ -1455,11 +1488,9 @@ def sync_pipe(
1455
1488
  get_update_queries,
1456
1489
  sql_item_name,
1457
1490
  update_queries,
1458
- get_create_table_queries,
1459
1491
  get_reset_autoincrement_queries,
1460
1492
  )
1461
1493
  from meerschaum.utils.misc import generate_password
1462
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1463
1494
  from meerschaum.utils.dtypes import are_dtypes_equal
1464
1495
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1465
1496
  from meerschaum import Pipe
@@ -1567,11 +1598,13 @@ def sync_pipe(
1567
1598
  'if_exists': if_exists,
1568
1599
  'debug': debug,
1569
1600
  'as_dict': True,
1601
+ 'safe_copy': kw.get('safe_copy', False),
1570
1602
  'chunksize': chunksize,
1571
1603
  'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1572
1604
  'schema': self.get_pipe_schema(pipe),
1573
1605
  })
1574
1606
 
1607
+ dt_col = pipe.columns.get('datetime', None)
1575
1608
  primary_key = pipe.columns.get('primary', None)
1576
1609
  autoincrement = (
1577
1610
  pipe.parameters.get('autoincrement', False)
@@ -1589,17 +1622,23 @@ def sync_pipe(
1589
1622
  if not edit_success:
1590
1623
  return edit_success, edit_msg
1591
1624
 
1592
- autoincrement_needs_reset = False
1625
+ def _check_pk(_df_to_clear):
1626
+ if _df_to_clear is None:
1627
+ return
1628
+ if primary_key not in _df_to_clear.columns:
1629
+ return
1630
+ if not _df_to_clear[primary_key].notnull().any():
1631
+ del _df_to_clear[primary_key]
1632
+
1633
+ autoincrement_needs_reset = bool(
1634
+ autoincrement
1635
+ and primary_key
1636
+ and primary_key in unseen_df.columns
1637
+ and unseen_df[primary_key].notnull().any()
1638
+ )
1593
1639
  if autoincrement and primary_key:
1594
- if primary_key not in df.columns:
1595
- if unseen_df is not None and primary_key in unseen_df.columns:
1596
- del unseen_df[primary_key]
1597
- if update_df is not None and primary_key in update_df.columns:
1598
- del update_df[primary_key]
1599
- if delta_df is not None and primary_key in delta_df.columns:
1600
- del delta_df[primary_key]
1601
- elif unseen_df[primary_key].notnull().any():
1602
- autoincrement_needs_reset = True
1640
+ for _df_to_clear in (unseen_df, update_df, delta_df):
1641
+ _check_pk(_df_to_clear)
1603
1642
 
1604
1643
  if is_new:
1605
1644
  create_success, create_msg = self.create_pipe_table_from_df(
@@ -1612,38 +1651,41 @@ def sync_pipe(
1612
1651
 
1613
1652
  do_identity_insert = bool(
1614
1653
  self.flavor in ('mssql',)
1654
+ and primary_key
1615
1655
  and primary_key in unseen_df.columns
1616
1656
  and autoincrement
1617
1657
  )
1618
- with self.engine.connect() as connection:
1619
- with connection.begin():
1620
- if do_identity_insert:
1621
- identity_on_result = self.exec(
1622
- f"SET IDENTITY_INSERT {pipe_name} ON",
1623
- commit=False,
1624
- _connection=connection,
1625
- close=False,
1626
- debug=debug,
1627
- )
1628
- if identity_on_result is None:
1629
- return False, f"Could not enable identity inserts on {pipe}."
1630
-
1631
- stats = self.to_sql(
1632
- unseen_df,
1633
- _connection=connection,
1634
- **unseen_kw
1635
- )
1658
+ stats = {'success': True, 'msg': 'Success'}
1659
+ if len(unseen_df) > 0:
1660
+ with self.engine.connect() as connection:
1661
+ with connection.begin():
1662
+ if do_identity_insert:
1663
+ identity_on_result = self.exec(
1664
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1665
+ commit=False,
1666
+ _connection=connection,
1667
+ close=False,
1668
+ debug=debug,
1669
+ )
1670
+ if identity_on_result is None:
1671
+ return False, f"Could not enable identity inserts on {pipe}."
1636
1672
 
1637
- if do_identity_insert:
1638
- identity_off_result = self.exec(
1639
- f"SET IDENTITY_INSERT {pipe_name} OFF",
1640
- commit=False,
1673
+ stats = self.to_sql(
1674
+ unseen_df,
1641
1675
  _connection=connection,
1642
- close=False,
1643
- debug=debug,
1676
+ **unseen_kw
1644
1677
  )
1645
- if identity_off_result is None:
1646
- return False, f"Could not disable identity inserts on {pipe}."
1678
+
1679
+ if do_identity_insert:
1680
+ identity_off_result = self.exec(
1681
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1682
+ commit=False,
1683
+ _connection=connection,
1684
+ close=False,
1685
+ debug=debug,
1686
+ )
1687
+ if identity_off_result is None:
1688
+ return False, f"Could not disable identity inserts on {pipe}."
1647
1689
 
1648
1690
  if is_new:
1649
1691
  if not self.create_indices(pipe, debug=debug):
@@ -1682,11 +1724,12 @@ def sync_pipe(
1682
1724
  },
1683
1725
  target=temp_target,
1684
1726
  temporary=True,
1727
+ enforce=False,
1728
+ static=True,
1729
+ autoincrement=False,
1685
1730
  parameters={
1686
- 'static': True,
1687
- 'schema': self.internal_schema,
1731
+ 'schema': (self.internal_schema if self.flavor != 'mssql' else None),
1688
1732
  'hypertable': False,
1689
- 'autoincrement': False,
1690
1733
  },
1691
1734
  )
1692
1735
  temp_pipe.__dict__['_columns_types'] = {
@@ -1707,7 +1750,11 @@ def sync_pipe(
1707
1750
  col
1708
1751
  for col_key, col in pipe.columns.items()
1709
1752
  if col and col in existing_cols
1710
- ]
1753
+ ] if not primary_key or self.flavor == 'oracle' else (
1754
+ [dt_col, primary_key]
1755
+ if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1756
+ else [primary_key]
1757
+ )
1711
1758
  update_queries = get_update_queries(
1712
1759
  pipe.target,
1713
1760
  temp_target,
@@ -1716,12 +1763,17 @@ def sync_pipe(
1716
1763
  upsert=upsert,
1717
1764
  schema=self.get_pipe_schema(pipe),
1718
1765
  patch_schema=self.internal_schema,
1719
- datetime_col=pipe.columns.get('datetime', None),
1766
+ datetime_col=(dt_col if dt_col in update_df.columns else None),
1767
+ identity_insert=(autoincrement and primary_key in update_df.columns),
1720
1768
  debug=debug,
1721
1769
  )
1722
- update_success = all(
1723
- self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1770
+ update_results = self.exec_queries(
1771
+ update_queries,
1772
+ break_on_error=True,
1773
+ rollback=True,
1774
+ debug=debug,
1724
1775
  )
1776
+ update_success = all(update_results)
1725
1777
  self._log_temporary_tables_creation(
1726
1778
  temp_target,
1727
1779
  ready_to_drop=True,
@@ -1730,6 +1782,8 @@ def sync_pipe(
1730
1782
  )
1731
1783
  if not update_success:
1732
1784
  warn(f"Failed to apply update to {pipe}.")
1785
+ stats['success'] = stats['success'] and update_success
1786
+ stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1733
1787
 
1734
1788
  stop = time.perf_counter()
1735
1789
  success = stats['success']
@@ -1834,7 +1888,6 @@ def sync_pipe_inplace(
1834
1888
  session_execute,
1835
1889
  update_queries,
1836
1890
  )
1837
- from meerschaum.utils.dtypes import are_dtypes_equal
1838
1891
  from meerschaum.utils.dtypes.sql import (
1839
1892
  get_pd_type_from_db_type,
1840
1893
  )
@@ -1907,8 +1960,8 @@ def sync_pipe_inplace(
1907
1960
  autoincrement=autoincrement,
1908
1961
  datetime_column=dt_col,
1909
1962
  )
1910
- result = self.exec_queries(create_pipe_queries, debug=debug)
1911
- if result is None:
1963
+ results = self.exec_queries(create_pipe_queries, debug=debug)
1964
+ if not all(results):
1912
1965
  _ = clean_up_temp_tables()
1913
1966
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
1914
1967
 
@@ -2054,6 +2107,7 @@ def sync_pipe_inplace(
2054
2107
  ) if not (upsert or static) else new_cols_types
2055
2108
 
2056
2109
  common_cols = [col for col in new_cols if col in backtrack_cols_types]
2110
+ primary_key = pipe.columns.get('primary', None)
2057
2111
  on_cols = {
2058
2112
  col: new_cols.get(col)
2059
2113
  for col_key, col in pipe.columns.items()
@@ -2064,7 +2118,7 @@ def sync_pipe_inplace(
2064
2118
  and col in backtrack_cols_types
2065
2119
  and col in new_cols
2066
2120
  )
2067
- }
2121
+ } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
2068
2122
 
2069
2123
  null_replace_new_cols_str = (
2070
2124
  ', '.join([
@@ -2591,7 +2645,7 @@ def get_pipe_rowcount(
2591
2645
  result = self.value(query, debug=debug, silent=True)
2592
2646
  try:
2593
2647
  return int(result)
2594
- except Exception as e:
2648
+ except Exception:
2595
2649
  return None
2596
2650
 
2597
2651
 
@@ -2616,10 +2670,11 @@ def drop_pipe(
2616
2670
  from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
2617
2671
  success = True
2618
2672
  target = pipe.target
2673
+ schema = self.get_pipe_schema(pipe)
2619
2674
  target_name = (
2620
- sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
2675
+ sql_item_name(target, self.flavor, schema)
2621
2676
  )
2622
- if table_exists(target, self, debug=debug):
2677
+ if table_exists(target, self, schema=schema, debug=debug):
2623
2678
  if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
2624
2679
  success = self.exec(
2625
2680
  f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
@@ -3330,9 +3385,7 @@ def deduplicate_pipe(
3330
3385
  """
3331
3386
  from meerschaum.utils.sql import (
3332
3387
  sql_item_name,
3333
- NO_CTE_FLAVORS,
3334
3388
  get_rename_table_queries,
3335
- NO_SELECT_INTO_FLAVORS,
3336
3389
  DROP_IF_EXISTS_FLAVORS,
3337
3390
  get_create_table_query,
3338
3391
  format_cte_subquery,
@@ -3454,7 +3507,6 @@ def deduplicate_pipe(
3454
3507
  dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3455
3508
  temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3456
3509
 
3457
- dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
3458
3510
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3459
3511
 
3460
3512
  create_temporary_table_query = get_create_table_query(
@@ -624,7 +624,7 @@ def exec_queries(
624
624
  rollback: bool = True,
625
625
  silent: bool = False,
626
626
  debug: bool = False,
627
- ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
627
+ ) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
628
628
  """
629
629
  Execute a list of queries in a single transaction.
630
630
 
@@ -688,6 +688,7 @@ def exec_queries(
688
688
  if result is None and break_on_error:
689
689
  if rollback:
690
690
  session.rollback()
691
+ results.append(result)
691
692
  break
692
693
  elif result is not None and hook is not None:
693
694
  hook_queries = hook(session)
@@ -715,6 +716,7 @@ def to_sql(
715
716
  method: str = "",
716
717
  chunksize: Optional[int] = -1,
717
718
  schema: Optional[str] = None,
719
+ safe_copy: bool = True,
718
720
  silent: bool = False,
719
721
  debug: bool = False,
720
722
  as_tuple: bool = False,
@@ -729,7 +731,7 @@ def to_sql(
729
731
  Parameters
730
732
  ----------
731
733
  df: pd.DataFrame
732
- The DataFrame to be uploaded.
734
+ The DataFrame to be inserted.
733
735
 
734
736
  name: str
735
737
  The name of the table to be created.
@@ -752,6 +754,9 @@ def to_sql(
752
754
  Optionally override the schema for the table.
753
755
  Defaults to `SQLConnector.schema`.
754
756
 
757
+ safe_copy: bool, defaul True
758
+ If `True`, copy the dataframe before making any changes.
759
+
755
760
  as_tuple: bool, default False
756
761
  If `True`, return a (success_bool, message) tuple instead of a `bool`.
757
762
  Defaults to `False`.
@@ -770,8 +775,7 @@ def to_sql(
770
775
  """
771
776
  import time
772
777
  import json
773
- import decimal
774
- from decimal import Decimal, Context
778
+ from decimal import Decimal
775
779
  from meerschaum.utils.warnings import error, warn
776
780
  import warnings
777
781
  import functools
@@ -790,10 +794,21 @@ def to_sql(
790
794
  truncate_item_name,
791
795
  DROP_IF_EXISTS_FLAVORS,
792
796
  )
793
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
794
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
797
+ from meerschaum.utils.dataframe import (
798
+ get_json_cols,
799
+ get_numeric_cols,
800
+ get_uuid_cols,
801
+ get_bytes_cols,
802
+ )
803
+ from meerschaum.utils.dtypes import (
804
+ are_dtypes_equal,
805
+ quantize_decimal,
806
+ coerce_timezone,
807
+ encode_bytes_for_bytea,
808
+ )
795
809
  from meerschaum.utils.dtypes.sql import (
796
810
  NUMERIC_PRECISION_FLAVORS,
811
+ NUMERIC_AS_TEXT_FLAVORS,
797
812
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
798
813
  get_db_type_from_pd_type,
799
814
  )
@@ -803,14 +818,35 @@ def to_sql(
803
818
  pd = import_pandas()
804
819
  is_dask = 'dask' in df.__module__
805
820
 
806
- stats = {'target': name, }
821
+ bytes_cols = get_bytes_cols(df)
822
+ numeric_cols = get_numeric_cols(df)
823
+
824
+ stats = {'target': name,}
807
825
  ### resort to defaults if None
826
+ copied = False
827
+ use_psql_copy = False
808
828
  if method == "":
809
829
  if self.flavor in _bulk_flavors:
810
830
  method = functools.partial(psql_insert_copy, schema=self.schema)
831
+ use_psql_copy = True
811
832
  else:
812
833
  ### Should resolve to 'multi' or `None`.
813
834
  method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
835
+
836
+ if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
837
+ if safe_copy and not copied:
838
+ df = df.copy()
839
+ copied = True
840
+ for col in bytes_cols:
841
+ df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
842
+
843
+ if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
844
+ if safe_copy and not copied:
845
+ df = df.copy()
846
+ copied = True
847
+ for col in numeric_cols:
848
+ df[col] = df[col].astype(str)
849
+
814
850
  stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
815
851
 
816
852
  default_chunksize = self._sys_config.get('chunksize', None)
@@ -920,7 +956,6 @@ def to_sql(
920
956
  ### Check for numeric columns.
921
957
  numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
922
958
  if numeric_precision is not None and numeric_scale is not None:
923
- numeric_cols = get_numeric_cols(df)
924
959
  for col in numeric_cols:
925
960
  df[col] = df[col].apply(
926
961
  lambda x: (