meerschaum 2.6.17__py3-none-any.whl → 2.7.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. meerschaum/actions/delete.py +65 -69
  2. meerschaum/actions/install.py +1 -2
  3. meerschaum/api/routes/_pipes.py +7 -8
  4. meerschaum/config/_default.py +1 -1
  5. meerschaum/config/_paths.py +2 -1
  6. meerschaum/config/_version.py +1 -1
  7. meerschaum/connectors/api/_pipes.py +18 -21
  8. meerschaum/connectors/sql/_instance.py +11 -12
  9. meerschaum/connectors/sql/_pipes.py +122 -78
  10. meerschaum/connectors/sql/_sql.py +43 -8
  11. meerschaum/connectors/valkey/_pipes.py +12 -1
  12. meerschaum/core/Pipe/__init__.py +23 -13
  13. meerschaum/core/Pipe/_attributes.py +25 -1
  14. meerschaum/core/Pipe/_dtypes.py +23 -16
  15. meerschaum/core/Pipe/_sync.py +59 -31
  16. meerschaum/core/Pipe/_verify.py +8 -7
  17. meerschaum/jobs/_Job.py +2 -0
  18. meerschaum/plugins/_Plugin.py +11 -14
  19. meerschaum/utils/daemon/Daemon.py +20 -13
  20. meerschaum/utils/dataframe.py +178 -16
  21. meerschaum/utils/dtypes/__init__.py +149 -14
  22. meerschaum/utils/dtypes/sql.py +41 -7
  23. meerschaum/utils/misc.py +8 -8
  24. meerschaum/utils/sql.py +174 -64
  25. meerschaum/utils/venv/_Venv.py +4 -4
  26. meerschaum/utils/venv/__init__.py +53 -20
  27. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/METADATA +1 -1
  28. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/RECORD +34 -34
  29. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
  30. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
  31. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
  32. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
  33. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
  34. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
97
97
  if pipe.id is None:
98
98
  return False, f"{pipe} is not registered and cannot be edited."
99
99
 
100
- from meerschaum.utils.debug import dprint
101
100
  from meerschaum.utils.packages import attempt_import
102
101
  from meerschaum.utils.sql import json_flavors
103
102
  if not patch:
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
172
171
  """
173
172
  from meerschaum.utils.debug import dprint
174
173
  from meerschaum.utils.packages import attempt_import
175
- from meerschaum.utils.misc import separate_negation_values, flatten_list
174
+ from meerschaum.utils.misc import separate_negation_values
176
175
  from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
176
  from meerschaum.config.static import STATIC_CONFIG
178
177
  import json
@@ -316,7 +315,6 @@ def create_indices(
316
315
  """
317
316
  Create a pipe's indices.
318
317
  """
319
- from meerschaum.utils.sql import sql_item_name, update_queries
320
318
  from meerschaum.utils.debug import dprint
321
319
  if debug:
322
320
  dprint(f"Creating indices for {pipe}...")
@@ -419,11 +417,14 @@ def get_create_index_queries(
419
417
  existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
418
  existing_ix_names = set()
421
419
  existing_primary_keys = []
420
+ existing_clustered_primary_keys = []
422
421
  for col, col_indices in existing_cols_indices.items():
423
422
  for col_ix_doc in col_indices:
424
423
  existing_ix_names.add(col_ix_doc.get('name', None))
425
424
  if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
425
  existing_primary_keys.append(col)
426
+ if col_ix_doc.get('clustered', True):
427
+ existing_clustered_primary_keys.append(col)
427
428
 
428
429
  _datetime = pipe.get_columns('datetime', error=False)
429
430
  _datetime_name = (
@@ -460,10 +461,16 @@ def get_create_index_queries(
460
461
  else None
461
462
  )
462
463
  primary_key_constraint_name = (
463
- sql_item_name(f'pk_{pipe.target}', self.flavor, None)
464
+ sql_item_name(f'PK_{pipe.target}', self.flavor, None)
464
465
  if primary_key is not None
465
466
  else None
466
467
  )
468
+ primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
469
+ datetime_clustered = (
470
+ "CLUSTERED"
471
+ if not existing_clustered_primary_keys and _datetime is not None
472
+ else "NONCLUSTERED"
473
+ )
467
474
 
468
475
  _id_index_name = (
469
476
  sql_item_name(index_names['id'], self.flavor, None)
@@ -474,6 +481,7 @@ def get_create_index_queries(
474
481
  _create_space_partition = get_config('system', 'experimental', 'space')
475
482
 
476
483
  ### create datetime index
484
+ dt_query = None
477
485
  if _datetime is not None:
478
486
  if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
479
487
  _id_count = (
@@ -504,19 +512,19 @@ def get_create_index_queries(
504
512
  + 'if_not_exists => true, '
505
513
  + "migrate_data => true);"
506
514
  )
507
- elif self.flavor == 'mssql':
508
- dt_query = (
509
- "CREATE "
510
- + ("CLUSTERED " if not primary_key else '')
511
- + f"INDEX {_datetime_index_name} "
512
- + f"ON {_pipe_name} ({_datetime_name})"
513
- )
514
- else: ### mssql, sqlite, etc.
515
- dt_query = (
516
- f"CREATE INDEX {_datetime_index_name} "
517
- + f"ON {_pipe_name} ({_datetime_name})"
518
- )
515
+ elif _datetime_index_name:
516
+ if self.flavor == 'mssql':
517
+ dt_query = (
518
+ f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
519
+ f"ON {_pipe_name} ({_datetime_name})"
520
+ )
521
+ else:
522
+ dt_query = (
523
+ f"CREATE INDEX {_datetime_index_name} "
524
+ + f"ON {_pipe_name} ({_datetime_name})"
525
+ )
519
526
 
527
+ if dt_query:
520
528
  index_queries[_datetime] = [dt_query]
521
529
 
522
530
  primary_queries = []
@@ -623,7 +631,7 @@ def get_create_index_queries(
623
631
  ),
624
632
  (
625
633
  f"ALTER TABLE {_pipe_name}\n"
626
- f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
634
+ f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
627
635
  ),
628
636
  ])
629
637
  index_queries[primary_key] = primary_queries
@@ -658,6 +666,8 @@ def get_create_index_queries(
658
666
  cols = indices[ix_key]
659
667
  if not isinstance(cols, (list, tuple)):
660
668
  cols = [cols]
669
+ if ix_key == 'unique' and upsert:
670
+ continue
661
671
  cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
662
672
  if not cols_names:
663
673
  continue
@@ -785,8 +795,6 @@ def delete_pipe(
785
795
  """
786
796
  Delete a Pipe's registration.
787
797
  """
788
- from meerschaum.utils.sql import sql_item_name
789
- from meerschaum.utils.debug import dprint
790
798
  from meerschaum.utils.packages import attempt_import
791
799
  sqlalchemy = attempt_import('sqlalchemy')
792
800
 
@@ -869,19 +877,19 @@ def get_pipe_data(
869
877
 
870
878
  """
871
879
  import json
872
- from meerschaum.utils.sql import sql_item_name
873
880
  from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
874
881
  from meerschaum.utils.packages import import_pandas
875
882
  from meerschaum.utils.dtypes import (
876
883
  attempt_cast_to_numeric,
877
884
  attempt_cast_to_uuid,
885
+ attempt_cast_to_bytes,
878
886
  are_dtypes_equal,
879
887
  )
880
888
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
881
889
  pd = import_pandas()
882
890
  is_dask = 'dask' in pd.__name__
883
891
 
884
- cols_types = pipe.get_columns_types(debug=debug)
892
+ cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
885
893
  dtypes = {
886
894
  **{
887
895
  p_col: to_pandas_dtype(p_typ)
@@ -891,24 +899,21 @@ def get_pipe_data(
891
899
  col: get_pd_type_from_db_type(typ)
892
900
  for col, typ in cols_types.items()
893
901
  }
894
- }
902
+ } if pipe.enforce else {}
895
903
  if dtypes:
896
904
  if self.flavor == 'sqlite':
897
905
  if not pipe.columns.get('datetime', None):
898
906
  _dt = pipe.guess_datetime()
899
- dt = sql_item_name(_dt, self.flavor, None) if _dt else None
900
- is_guess = True
901
907
  else:
902
908
  _dt = pipe.get_columns('datetime')
903
- dt = sql_item_name(_dt, self.flavor, None)
904
- is_guess = False
905
909
 
906
910
  if _dt:
907
911
  dt_type = dtypes.get(_dt, 'object').lower()
908
912
  if 'datetime' not in dt_type:
909
913
  if 'int' not in dt_type:
910
914
  dtypes[_dt] = 'datetime64[ns, UTC]'
911
- existing_cols = pipe.get_columns_types(debug=debug)
915
+
916
+ existing_cols = cols_types.keys()
912
917
  select_columns = (
913
918
  [
914
919
  col
@@ -922,14 +927,14 @@ def get_pipe_data(
922
927
  if col in existing_cols
923
928
  and col not in (omit_columns or [])
924
929
  ]
925
- )
930
+ ) if pipe.enforce else select_columns
926
931
  if select_columns:
927
932
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
928
933
  dtypes = {
929
934
  col: to_pandas_dtype(typ)
930
935
  for col, typ in dtypes.items()
931
936
  if col in select_columns and col not in (omit_columns or [])
932
- }
937
+ } if pipe.enforce else {}
933
938
  query = self.get_pipe_data_query(
934
939
  pipe,
935
940
  select_columns=select_columns,
@@ -959,6 +964,11 @@ def get_pipe_data(
959
964
  for col, typ in pipe.dtypes.items()
960
965
  if typ == 'uuid' and col in dtypes
961
966
  ]
967
+ bytes_columns = [
968
+ col
969
+ for col, typ in pipe.dtypes.items()
970
+ if typ == 'bytes' and col in dtypes
971
+ ]
962
972
 
963
973
  kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
964
974
 
@@ -978,6 +988,11 @@ def get_pipe_data(
978
988
  continue
979
989
  df[col] = df[col].apply(attempt_cast_to_uuid)
980
990
 
991
+ for col in bytes_columns:
992
+ if col not in df.columns:
993
+ continue
994
+ df[col] = df[col].apply(attempt_cast_to_bytes)
995
+
981
996
  if self.flavor == 'sqlite':
982
997
  ignore_dt_cols = [
983
998
  col
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
1093
1108
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1094
1109
 
1095
1110
  dt_col = pipe.columns.get('datetime', None)
1096
- existing_cols = pipe.get_columns_types(debug=debug)
1111
+ existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1112
+ skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1097
1113
  dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1098
1114
  select_columns = (
1099
1115
  [col for col in existing_cols]
1100
1116
  if not select_columns
1101
- else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
1117
+ else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1102
1118
  )
1103
1119
  if omit_columns:
1104
1120
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
1185
1201
  number=begin_add_minutes,
1186
1202
  begin=begin,
1187
1203
  )
1188
- where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1204
+ where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
1189
1205
  is_dt_bound = True
1190
1206
 
1191
1207
  if end is not None and (_dt in existing_cols or skip_existing_cols_check):
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
1197
1213
  number=end_add_minutes,
1198
1214
  begin=end
1199
1215
  )
1200
- where += f"{dt} < {end_da}"
1216
+ where += f"{dt} < {end_da}"
1201
1217
  is_dt_bound = True
1202
1218
 
1203
1219
  if params is not None:
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
1209
1225
  }
1210
1226
  if valid_params:
1211
1227
  where += build_where(valid_params, self).replace(
1212
- 'WHERE', ('AND' if is_dt_bound else "")
1228
+ 'WHERE', (' AND' if is_dt_bound else " ")
1213
1229
  )
1214
1230
 
1215
1231
  if len(where) > 0:
@@ -1264,7 +1280,6 @@ def get_pipe_id(
1264
1280
  if pipe.temporary:
1265
1281
  return None
1266
1282
  from meerschaum.utils.packages import attempt_import
1267
- import json
1268
1283
  sqlalchemy = attempt_import('sqlalchemy')
1269
1284
  from meerschaum.connectors.sql.tables import get_tables
1270
1285
  pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
1339
1354
  """
1340
1355
  Create a pipe's table from its configured dtypes and an incoming dataframe.
1341
1356
  """
1342
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1357
+ from meerschaum.utils.dataframe import (
1358
+ get_json_cols,
1359
+ get_numeric_cols,
1360
+ get_uuid_cols,
1361
+ get_datetime_cols,
1362
+ get_bytes_cols,
1363
+ )
1343
1364
  from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1344
1365
  primary_key = pipe.columns.get('primary', None)
1345
1366
  dt_col = pipe.columns.get('datetime', None)
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
1365
1386
  col: 'numeric'
1366
1387
  for col in get_numeric_cols(df)
1367
1388
  },
1389
+ **{
1390
+ col: 'bytes'
1391
+ for col in get_bytes_cols(df)
1392
+ },
1393
+ **{
1394
+ col: 'datetime64[ns, UTC]'
1395
+ for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
1396
+ },
1397
+ **{
1398
+ col: 'datetime64[ns]'
1399
+ for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
1400
+ },
1368
1401
  **pipe.dtypes
1369
1402
  }
1370
1403
  autoincrement = (
@@ -1455,11 +1488,9 @@ def sync_pipe(
1455
1488
  get_update_queries,
1456
1489
  sql_item_name,
1457
1490
  update_queries,
1458
- get_create_table_queries,
1459
1491
  get_reset_autoincrement_queries,
1460
1492
  )
1461
1493
  from meerschaum.utils.misc import generate_password
1462
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
1463
1494
  from meerschaum.utils.dtypes import are_dtypes_equal
1464
1495
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1465
1496
  from meerschaum import Pipe
@@ -1567,11 +1598,13 @@ def sync_pipe(
1567
1598
  'if_exists': if_exists,
1568
1599
  'debug': debug,
1569
1600
  'as_dict': True,
1601
+ 'safe_copy': kw.get('safe_copy', False),
1570
1602
  'chunksize': chunksize,
1571
1603
  'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1572
1604
  'schema': self.get_pipe_schema(pipe),
1573
1605
  })
1574
1606
 
1607
+ dt_col = pipe.columns.get('datetime', None)
1575
1608
  primary_key = pipe.columns.get('primary', None)
1576
1609
  autoincrement = (
1577
1610
  pipe.parameters.get('autoincrement', False)
@@ -1622,35 +1655,37 @@ def sync_pipe(
1622
1655
  and primary_key in unseen_df.columns
1623
1656
  and autoincrement
1624
1657
  )
1625
- with self.engine.connect() as connection:
1626
- with connection.begin():
1627
- if do_identity_insert:
1628
- identity_on_result = self.exec(
1629
- f"SET IDENTITY_INSERT {pipe_name} ON",
1630
- commit=False,
1631
- _connection=connection,
1632
- close=False,
1633
- debug=debug,
1634
- )
1635
- if identity_on_result is None:
1636
- return False, f"Could not enable identity inserts on {pipe}."
1637
-
1638
- stats = self.to_sql(
1639
- unseen_df,
1640
- _connection=connection,
1641
- **unseen_kw
1642
- )
1658
+ stats = {'success': True, 'msg': 'Success'}
1659
+ if len(unseen_df) > 0:
1660
+ with self.engine.connect() as connection:
1661
+ with connection.begin():
1662
+ if do_identity_insert:
1663
+ identity_on_result = self.exec(
1664
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1665
+ commit=False,
1666
+ _connection=connection,
1667
+ close=False,
1668
+ debug=debug,
1669
+ )
1670
+ if identity_on_result is None:
1671
+ return False, f"Could not enable identity inserts on {pipe}."
1643
1672
 
1644
- if do_identity_insert:
1645
- identity_off_result = self.exec(
1646
- f"SET IDENTITY_INSERT {pipe_name} OFF",
1647
- commit=False,
1673
+ stats = self.to_sql(
1674
+ unseen_df,
1648
1675
  _connection=connection,
1649
- close=False,
1650
- debug=debug,
1676
+ **unseen_kw
1651
1677
  )
1652
- if identity_off_result is None:
1653
- return False, f"Could not disable identity inserts on {pipe}."
1678
+
1679
+ if do_identity_insert:
1680
+ identity_off_result = self.exec(
1681
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1682
+ commit=False,
1683
+ _connection=connection,
1684
+ close=False,
1685
+ debug=debug,
1686
+ )
1687
+ if identity_off_result is None:
1688
+ return False, f"Could not disable identity inserts on {pipe}."
1654
1689
 
1655
1690
  if is_new:
1656
1691
  if not self.create_indices(pipe, debug=debug):
@@ -1689,11 +1724,12 @@ def sync_pipe(
1689
1724
  },
1690
1725
  target=temp_target,
1691
1726
  temporary=True,
1727
+ enforce=False,
1728
+ static=True,
1729
+ autoincrement=False,
1692
1730
  parameters={
1693
- 'static': True,
1694
- 'schema': self.internal_schema,
1731
+ 'schema': (self.internal_schema if self.flavor != 'mssql' else None),
1695
1732
  'hypertable': False,
1696
- 'autoincrement': False,
1697
1733
  },
1698
1734
  )
1699
1735
  temp_pipe.__dict__['_columns_types'] = {
@@ -1714,7 +1750,11 @@ def sync_pipe(
1714
1750
  col
1715
1751
  for col_key, col in pipe.columns.items()
1716
1752
  if col and col in existing_cols
1717
- ]
1753
+ ] if not primary_key or self.flavor == 'oracle' else (
1754
+ [dt_col, primary_key]
1755
+ if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1756
+ else [primary_key]
1757
+ )
1718
1758
  update_queries = get_update_queries(
1719
1759
  pipe.target,
1720
1760
  temp_target,
@@ -1723,12 +1763,17 @@ def sync_pipe(
1723
1763
  upsert=upsert,
1724
1764
  schema=self.get_pipe_schema(pipe),
1725
1765
  patch_schema=self.internal_schema,
1726
- datetime_col=pipe.columns.get('datetime', None),
1766
+ datetime_col=(dt_col if dt_col in update_df.columns else None),
1767
+ identity_insert=(autoincrement and primary_key in update_df.columns),
1727
1768
  debug=debug,
1728
1769
  )
1729
- update_success = all(
1730
- self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1770
+ update_results = self.exec_queries(
1771
+ update_queries,
1772
+ break_on_error=True,
1773
+ rollback=True,
1774
+ debug=debug,
1731
1775
  )
1776
+ update_success = all(update_results)
1732
1777
  self._log_temporary_tables_creation(
1733
1778
  temp_target,
1734
1779
  ready_to_drop=True,
@@ -1737,6 +1782,8 @@ def sync_pipe(
1737
1782
  )
1738
1783
  if not update_success:
1739
1784
  warn(f"Failed to apply update to {pipe}.")
1785
+ stats['success'] = stats['success'] and update_success
1786
+ stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1740
1787
 
1741
1788
  stop = time.perf_counter()
1742
1789
  success = stats['success']
@@ -1841,7 +1888,6 @@ def sync_pipe_inplace(
1841
1888
  session_execute,
1842
1889
  update_queries,
1843
1890
  )
1844
- from meerschaum.utils.dtypes import are_dtypes_equal
1845
1891
  from meerschaum.utils.dtypes.sql import (
1846
1892
  get_pd_type_from_db_type,
1847
1893
  )
@@ -1914,8 +1960,8 @@ def sync_pipe_inplace(
1914
1960
  autoincrement=autoincrement,
1915
1961
  datetime_column=dt_col,
1916
1962
  )
1917
- result = self.exec_queries(create_pipe_queries, debug=debug)
1918
- if result is None:
1963
+ results = self.exec_queries(create_pipe_queries, debug=debug)
1964
+ if not all(results):
1919
1965
  _ = clean_up_temp_tables()
1920
1966
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
1921
1967
 
@@ -2061,6 +2107,7 @@ def sync_pipe_inplace(
2061
2107
  ) if not (upsert or static) else new_cols_types
2062
2108
 
2063
2109
  common_cols = [col for col in new_cols if col in backtrack_cols_types]
2110
+ primary_key = pipe.columns.get('primary', None)
2064
2111
  on_cols = {
2065
2112
  col: new_cols.get(col)
2066
2113
  for col_key, col in pipe.columns.items()
@@ -2071,7 +2118,7 @@ def sync_pipe_inplace(
2071
2118
  and col in backtrack_cols_types
2072
2119
  and col in new_cols
2073
2120
  )
2074
- }
2121
+ } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
2075
2122
 
2076
2123
  null_replace_new_cols_str = (
2077
2124
  ', '.join([
@@ -3338,9 +3385,7 @@ def deduplicate_pipe(
3338
3385
  """
3339
3386
  from meerschaum.utils.sql import (
3340
3387
  sql_item_name,
3341
- NO_CTE_FLAVORS,
3342
3388
  get_rename_table_queries,
3343
- NO_SELECT_INTO_FLAVORS,
3344
3389
  DROP_IF_EXISTS_FLAVORS,
3345
3390
  get_create_table_query,
3346
3391
  format_cte_subquery,
@@ -3462,7 +3507,6 @@ def deduplicate_pipe(
3462
3507
  dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3463
3508
  temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3464
3509
 
3465
- dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
3466
3510
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3467
3511
 
3468
3512
  create_temporary_table_query = get_create_table_query(
@@ -624,7 +624,7 @@ def exec_queries(
624
624
  rollback: bool = True,
625
625
  silent: bool = False,
626
626
  debug: bool = False,
627
- ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
627
+ ) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
628
628
  """
629
629
  Execute a list of queries in a single transaction.
630
630
 
@@ -688,6 +688,7 @@ def exec_queries(
688
688
  if result is None and break_on_error:
689
689
  if rollback:
690
690
  session.rollback()
691
+ results.append(result)
691
692
  break
692
693
  elif result is not None and hook is not None:
693
694
  hook_queries = hook(session)
@@ -715,6 +716,7 @@ def to_sql(
715
716
  method: str = "",
716
717
  chunksize: Optional[int] = -1,
717
718
  schema: Optional[str] = None,
719
+ safe_copy: bool = True,
718
720
  silent: bool = False,
719
721
  debug: bool = False,
720
722
  as_tuple: bool = False,
@@ -729,7 +731,7 @@ def to_sql(
729
731
  Parameters
730
732
  ----------
731
733
  df: pd.DataFrame
732
- The DataFrame to be uploaded.
734
+ The DataFrame to be inserted.
733
735
 
734
736
  name: str
735
737
  The name of the table to be created.
@@ -752,6 +754,9 @@ def to_sql(
752
754
  Optionally override the schema for the table.
753
755
  Defaults to `SQLConnector.schema`.
754
756
 
757
+ safe_copy: bool, defaul True
758
+ If `True`, copy the dataframe before making any changes.
759
+
755
760
  as_tuple: bool, default False
756
761
  If `True`, return a (success_bool, message) tuple instead of a `bool`.
757
762
  Defaults to `False`.
@@ -770,8 +775,7 @@ def to_sql(
770
775
  """
771
776
  import time
772
777
  import json
773
- import decimal
774
- from decimal import Decimal, Context
778
+ from decimal import Decimal
775
779
  from meerschaum.utils.warnings import error, warn
776
780
  import warnings
777
781
  import functools
@@ -790,10 +794,21 @@ def to_sql(
790
794
  truncate_item_name,
791
795
  DROP_IF_EXISTS_FLAVORS,
792
796
  )
793
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
794
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
797
+ from meerschaum.utils.dataframe import (
798
+ get_json_cols,
799
+ get_numeric_cols,
800
+ get_uuid_cols,
801
+ get_bytes_cols,
802
+ )
803
+ from meerschaum.utils.dtypes import (
804
+ are_dtypes_equal,
805
+ quantize_decimal,
806
+ coerce_timezone,
807
+ encode_bytes_for_bytea,
808
+ )
795
809
  from meerschaum.utils.dtypes.sql import (
796
810
  NUMERIC_PRECISION_FLAVORS,
811
+ NUMERIC_AS_TEXT_FLAVORS,
797
812
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
798
813
  get_db_type_from_pd_type,
799
814
  )
@@ -803,14 +818,35 @@ def to_sql(
803
818
  pd = import_pandas()
804
819
  is_dask = 'dask' in df.__module__
805
820
 
806
- stats = {'target': name, }
821
+ bytes_cols = get_bytes_cols(df)
822
+ numeric_cols = get_numeric_cols(df)
823
+
824
+ stats = {'target': name,}
807
825
  ### resort to defaults if None
826
+ copied = False
827
+ use_psql_copy = False
808
828
  if method == "":
809
829
  if self.flavor in _bulk_flavors:
810
830
  method = functools.partial(psql_insert_copy, schema=self.schema)
831
+ use_psql_copy = True
811
832
  else:
812
833
  ### Should resolve to 'multi' or `None`.
813
834
  method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
835
+
836
+ if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
837
+ if safe_copy and not copied:
838
+ df = df.copy()
839
+ copied = True
840
+ for col in bytes_cols:
841
+ df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
842
+
843
+ if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
844
+ if safe_copy and not copied:
845
+ df = df.copy()
846
+ copied = True
847
+ for col in numeric_cols:
848
+ df[col] = df[col].astype(str)
849
+
814
850
  stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
815
851
 
816
852
  default_chunksize = self._sys_config.get('chunksize', None)
@@ -920,7 +956,6 @@ def to_sql(
920
956
  ### Check for numeric columns.
921
957
  numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
922
958
  if numeric_precision is not None and numeric_scale is not None:
923
- numeric_cols = get_numeric_cols(df)
924
959
  for col in numeric_cols:
925
960
  df[col] = df[col].apply(
926
961
  lambda x: (
@@ -46,9 +46,20 @@ def serialize_document(doc: Dict[str, Any]) -> str:
46
46
  -------
47
47
  A serialized string for the document.
48
48
  """
49
+ from meerschaum.utils.dtypes import serialize_bytes
49
50
  return json.dumps(
50
51
  doc,
51
- default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
52
+ default=(
53
+ lambda x: (
54
+ json_serialize_datetime(x)
55
+ if hasattr(x, 'tzinfo')
56
+ else (
57
+ serialize_bytes(x)
58
+ if isinstance(x, bytes)
59
+ else str(x)
60
+ )
61
+ )
62
+ ),
52
63
  separators=(',', ':'),
53
64
  sort_keys=True,
54
65
  )