meerschaum 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. meerschaum/_internal/docs/index.py +1 -0
  2. meerschaum/actions/show.py +2 -1
  3. meerschaum/actions/sql.py +11 -11
  4. meerschaum/api/dash/pipes.py +4 -2
  5. meerschaum/api/routes/_pipes.py +3 -8
  6. meerschaum/config/_version.py +1 -1
  7. meerschaum/connectors/api/_pipes.py +4 -4
  8. meerschaum/connectors/sql/_SQLConnector.py +12 -2
  9. meerschaum/connectors/sql/_create_engine.py +13 -6
  10. meerschaum/connectors/sql/_pipes.py +81 -65
  11. meerschaum/connectors/sql/_sql.py +194 -106
  12. meerschaum/connectors/valkey/_ValkeyConnector.py +2 -5
  13. meerschaum/core/Pipe/__init__.py +1 -0
  14. meerschaum/core/Pipe/_attributes.py +1 -1
  15. meerschaum/core/Pipe/_data.py +16 -16
  16. meerschaum/core/Pipe/_deduplicate.py +27 -27
  17. meerschaum/core/Pipe/_sync.py +26 -1
  18. meerschaum/core/Pipe/_verify.py +5 -5
  19. meerschaum/utils/dataframe.py +127 -8
  20. meerschaum/utils/dtypes/__init__.py +26 -4
  21. meerschaum/utils/dtypes/sql.py +30 -0
  22. meerschaum/utils/misc.py +1 -1
  23. meerschaum/utils/sql.py +100 -64
  24. meerschaum/utils/yaml.py +3 -6
  25. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/METADATA +1 -1
  26. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/RECORD +32 -32
  27. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/LICENSE +0 -0
  28. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/NOTICE +0 -0
  29. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/WHEEL +0 -0
  30. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/entry_points.txt +0 -0
  31. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/top_level.txt +0 -0
  32. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/zip-safe +0 -0
@@ -20,32 +20,33 @@ _bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
20
20
  _disallow_chunks_flavors = {'duckdb', 'mssql'}
21
21
  _max_chunks_flavors = {'sqlite': 1000,}
22
22
 
23
+
23
24
  def read(
24
- self,
25
- query_or_table: Union[str, sqlalchemy.Query],
26
- params: Union[Dict[str, Any], List[str], None] = None,
27
- dtype: Optional[Dict[str, Any]] = None,
28
- coerce_float: bool = True,
29
- chunksize: Optional[int] = -1,
30
- workers: Optional[int] = None,
31
- chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None,
32
- as_hook_results: bool = False,
33
- chunks: Optional[int] = None,
34
- schema: Optional[str] = None,
35
- as_chunks: bool = False,
36
- as_iterator: bool = False,
37
- as_dask: bool = False,
38
- index_col: Optional[str] = None,
39
- silent: bool = False,
40
- debug: bool = False,
41
- **kw: Any
42
- ) -> Union[
43
- pandas.DataFrame,
44
- dask.DataFrame,
45
- List[pandas.DataFrame],
46
- List[Any],
47
- None,
48
- ]:
25
+ self,
26
+ query_or_table: Union[str, sqlalchemy.Query],
27
+ params: Union[Dict[str, Any], List[str], None] = None,
28
+ dtype: Optional[Dict[str, Any]] = None,
29
+ coerce_float: bool = True,
30
+ chunksize: Optional[int] = -1,
31
+ workers: Optional[int] = None,
32
+ chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None,
33
+ as_hook_results: bool = False,
34
+ chunks: Optional[int] = None,
35
+ schema: Optional[str] = None,
36
+ as_chunks: bool = False,
37
+ as_iterator: bool = False,
38
+ as_dask: bool = False,
39
+ index_col: Optional[str] = None,
40
+ silent: bool = False,
41
+ debug: bool = False,
42
+ **kw: Any
43
+ ) -> Union[
44
+ pandas.DataFrame,
45
+ dask.DataFrame,
46
+ List[pandas.DataFrame],
47
+ List[Any],
48
+ None,
49
+ ]:
49
50
  """
50
51
  Read a SQL query or table into a pandas dataframe.
51
52
 
@@ -145,7 +146,7 @@ def read(
145
146
  if chunksize is None and as_iterator:
146
147
  if not silent and self.flavor not in _disallow_chunks_flavors:
147
148
  warn(
148
- f"An iterator may only be generated if chunksize is not None.\n"
149
+ "An iterator may only be generated if chunksize is not None.\n"
149
150
  + "Falling back to a chunksize of 1000.", stacklevel=3,
150
151
  )
151
152
  chunksize = 1000
@@ -386,12 +387,12 @@ def read(
386
387
 
387
388
 
388
389
  def value(
389
- self,
390
- query: str,
391
- *args: Any,
392
- use_pandas: bool = False,
393
- **kw: Any
394
- ) -> Any:
390
+ self,
391
+ query: str,
392
+ *args: Any,
393
+ use_pandas: bool = False,
394
+ **kw: Any
395
+ ) -> Any:
395
396
  """
396
397
  Execute the provided query and return the first value.
397
398
 
@@ -424,18 +425,22 @@ def value(
424
425
  if use_pandas:
425
426
  try:
426
427
  return self.read(query, *args, **kw).iloc[0, 0]
427
- except Exception as e:
428
+ except Exception:
428
429
  return None
429
430
 
430
431
  _close = kw.get('close', True)
431
432
  _commit = kw.get('commit', (self.flavor != 'mssql'))
433
+
434
+ # _close = True
435
+ # _commit = True
436
+
432
437
  try:
433
438
  result, connection = self.exec(
434
439
  query,
435
440
  *args,
436
- with_connection = True,
437
- close = False,
438
- commit = _commit,
441
+ with_connection=True,
442
+ close=False,
443
+ commit=_commit,
439
444
  **kw
440
445
  )
441
446
  first = result.first() if result is not None else None
@@ -452,10 +457,10 @@ def value(
452
457
 
453
458
 
454
459
  def execute(
455
- self,
456
- *args : Any,
457
- **kw : Any
458
- ) -> Optional[sqlalchemy.engine.result.resultProxy]:
460
+ self,
461
+ *args : Any,
462
+ **kw : Any
463
+ ) -> Optional[sqlalchemy.engine.result.resultProxy]:
459
464
  """
460
465
  An alias for `meerschaum.connectors.sql.SQLConnector.exec`.
461
466
  """
@@ -463,22 +468,22 @@ def execute(
463
468
 
464
469
 
465
470
  def exec(
466
- self,
467
- query: str,
468
- *args: Any,
469
- silent: bool = False,
470
- debug: bool = False,
471
- commit: Optional[bool] = None,
472
- close: Optional[bool] = None,
473
- with_connection: bool = False,
474
- **kw: Any
475
- ) -> Union[
476
- sqlalchemy.engine.result.resultProxy,
477
- sqlalchemy.engine.cursor.LegacyCursorResult,
478
- Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection],
479
- Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection],
480
- None
481
- ]:
471
+ self,
472
+ query: str,
473
+ *args: Any,
474
+ silent: bool = False,
475
+ debug: bool = False,
476
+ commit: Optional[bool] = None,
477
+ close: Optional[bool] = None,
478
+ with_connection: bool = False,
479
+ **kw: Any
480
+ ) -> Union[
481
+ sqlalchemy.engine.result.resultProxy,
482
+ sqlalchemy.engine.cursor.LegacyCursorResult,
483
+ Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection],
484
+ Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection],
485
+ None
486
+ ]:
482
487
  """
483
488
  Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures.
484
489
 
@@ -492,7 +497,7 @@ def exec(
492
497
 
493
498
  args: Any
494
499
  Arguments passed to `sqlalchemy.engine.execute`.
495
-
500
+
496
501
  silent: bool, default False
497
502
  If `True`, suppress warnings.
498
503
 
@@ -509,7 +514,7 @@ def exec(
509
514
  with_connection: bool, default False
510
515
  If `True`, return a tuple including the connection object.
511
516
  This does not apply if `query` is a list of strings.
512
-
517
+
513
518
  Returns
514
519
  -------
515
520
  The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided.
@@ -519,8 +524,8 @@ def exec(
519
524
  return self.exec_queries(
520
525
  list(query),
521
526
  *args,
522
- silent = silent,
523
- debug = debug,
527
+ silent=silent,
528
+ debug=debug,
524
529
  **kw
525
530
  )
526
531
 
@@ -538,8 +543,19 @@ def exec(
538
543
  if not hasattr(query, 'compile'):
539
544
  query = sqlalchemy.text(query)
540
545
 
541
- connection = self.engine.connect()
542
- transaction = connection.begin() if _commit else None
546
+ connection = self.get_connection()
547
+
548
+ try:
549
+ transaction = connection.begin() if _commit else None
550
+ except sqlalchemy.exc.InvalidRequestError:
551
+ connection = self.get_connection(rebuild=True)
552
+ transaction = connection.begin()
553
+
554
+ if transaction is not None and not transaction.is_active:
555
+ connection = self.get_connection(rebuild=True)
556
+ transaction = connection.begin() if _commit else None
557
+
558
+ result = None
543
559
  try:
544
560
  result = connection.execute(query, *args, **kw)
545
561
  if _commit:
@@ -552,29 +568,30 @@ def exec(
552
568
  result = None
553
569
  if _commit:
554
570
  transaction.rollback()
571
+ connection = self.get_connection(rebuild=True)
555
572
  finally:
556
573
  if _close:
557
574
  connection.close()
558
575
 
559
- if with_connection:
560
- return result, connection
576
+ if with_connection:
577
+ return result, connection
561
578
 
562
579
  return result
563
580
 
564
581
 
565
582
  def exec_queries(
566
- self,
567
- queries: List[
568
- Union[
569
- str,
570
- Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]
571
- ]
572
- ],
573
- break_on_error: bool = False,
574
- rollback: bool = True,
575
- silent: bool = False,
576
- debug: bool = False,
577
- ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
583
+ self,
584
+ queries: List[
585
+ Union[
586
+ str,
587
+ Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]
588
+ ]
589
+ ],
590
+ break_on_error: bool = False,
591
+ rollback: bool = True,
592
+ silent: bool = False,
593
+ debug: bool = False,
594
+ ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
578
595
  """
579
596
  Execute a list of queries in a single transaction.
580
597
 
@@ -624,7 +641,7 @@ def exec_queries(
624
641
 
625
642
  if debug:
626
643
  dprint(f"[{self}]\n" + str(query))
627
-
644
+
628
645
  try:
629
646
  result = session.execute(query)
630
647
  session.flush()
@@ -645,9 +662,9 @@ def exec_queries(
645
662
  hook_results = self.exec_queries(
646
663
  hook_queries,
647
664
  break_on_error = break_on_error,
648
- rollback = rollback,
649
- silent = silent,
650
- debug = debug,
665
+ rollback=rollback,
666
+ silent=silent,
667
+ debug=debug,
651
668
  )
652
669
  result = (result, hook_results)
653
670
 
@@ -657,20 +674,20 @@ def exec_queries(
657
674
 
658
675
 
659
676
  def to_sql(
660
- self,
661
- df: pandas.DataFrame,
662
- name: str = None,
663
- index: bool = False,
664
- if_exists: str = 'replace',
665
- method: str = "",
666
- chunksize: Optional[int] = -1,
667
- schema: Optional[str] = None,
668
- silent: bool = False,
669
- debug: bool = False,
670
- as_tuple: bool = False,
671
- as_dict: bool = False,
672
- **kw
673
- ) -> Union[bool, SuccessTuple]:
677
+ self,
678
+ df: pandas.DataFrame,
679
+ name: str = None,
680
+ index: bool = False,
681
+ if_exists: str = 'replace',
682
+ method: str = "",
683
+ chunksize: Optional[int] = -1,
684
+ schema: Optional[str] = None,
685
+ silent: bool = False,
686
+ debug: bool = False,
687
+ as_tuple: bool = False,
688
+ as_dict: bool = False,
689
+ **kw
690
+ ) -> Union[bool, SuccessTuple]:
674
691
  """
675
692
  Upload a DataFrame's contents to the SQL server.
676
693
 
@@ -708,7 +725,7 @@ def to_sql(
708
725
  If `True`, return a dictionary of transaction information.
709
726
  The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`,
710
727
  `method`, and `target`.
711
-
728
+
712
729
  kw: Any
713
730
  Additional arguments will be passed to the DataFrame's `to_sql` function
714
731
 
@@ -737,9 +754,12 @@ def to_sql(
737
754
  json_flavors,
738
755
  truncate_item_name,
739
756
  )
740
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
757
+ from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
741
758
  from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
742
- from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS
759
+ from meerschaum.utils.dtypes.sql import (
760
+ NUMERIC_PRECISION_FLAVORS,
761
+ PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
762
+ )
743
763
  from meerschaum.connectors.sql._create_engine import flavor_configs
744
764
  from meerschaum.utils.packages import attempt_import, import_pandas
745
765
  sqlalchemy = attempt_import('sqlalchemy', debug=debug)
@@ -859,6 +879,11 @@ def to_sql(
859
879
  )
860
880
  )
861
881
 
882
+ if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid':
883
+ uuid_cols = get_uuid_cols(df)
884
+ for col in uuid_cols:
885
+ df[col] = df[col].astype(str)
886
+
862
887
  try:
863
888
  with warnings.catch_warnings():
864
889
  warnings.filterwarnings('ignore', 'case sensitivity issues')
@@ -890,12 +915,12 @@ def to_sql(
890
915
 
891
916
 
892
917
  def psql_insert_copy(
893
- table: pandas.io.sql.SQLTable,
894
- conn: Union[sqlalchemy.engine.Engine, sqlalchemy.engine.Connection],
895
- keys: List[str],
896
- data_iter: Iterable[Any],
897
- schema: Optional[str] = None,
898
- ) -> None:
918
+ table: pandas.io.sql.SQLTable,
919
+ conn: Union[sqlalchemy.engine.Engine, sqlalchemy.engine.Connection],
920
+ keys: List[str],
921
+ data_iter: Iterable[Any],
922
+ schema: Optional[str] = None,
923
+ ) -> None:
899
924
  """
900
925
  Execute SQL statement inserting data for PostgreSQL.
901
926
 
@@ -981,8 +1006,71 @@ def format_sql_query_for_dask(query: str) -> 'sqlalchemy.sql.selectable.Select':
981
1006
  sqlalchemy_sql = attempt_import("sqlalchemy.sql")
982
1007
  select, text = sqlalchemy_sql.select, sqlalchemy_sql.text
983
1008
 
984
- parts = query.rsplit('ORDER BY', maxsplit=1)
985
1009
  meta_query = f"SELECT * FROM (\n{query}\n) AS s"
986
- # if parts[1]:
987
- # meta_query += "\nORDER BY " + parts[1]
988
1010
  return select(text(_remove_leading_select(meta_query)))
1011
+
1012
+
1013
+ def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection':
1014
+ """
1015
+ Return the current alive connection.
1016
+
1017
+ Parameters
1018
+ ----------
1019
+ rebuild: bool, default False
1020
+ If `True`, close the previous connection and open a new one.
1021
+
1022
+ Returns
1023
+ -------
1024
+ A `sqlalchemy.engine.base.Connection` object.
1025
+ """
1026
+ import threading
1027
+ if '_thread_connections' not in self.__dict__:
1028
+ self.__dict__['_thread_connections'] = {}
1029
+
1030
+ self._cleanup_connections()
1031
+
1032
+ thread_id = threading.get_ident()
1033
+
1034
+ thread_connections = self.__dict__.get('_thread_connections', {})
1035
+ connection = thread_connections.get(thread_id, None)
1036
+
1037
+ if rebuild and connection is not None:
1038
+ try:
1039
+ connection.close()
1040
+ except Exception:
1041
+ pass
1042
+
1043
+ _ = thread_connections.pop(thread_id, None)
1044
+ connection = None
1045
+
1046
+ if connection is None or connection.closed:
1047
+ connection = self.engine.connect()
1048
+ thread_connections[thread_id] = connection
1049
+
1050
+ return connection
1051
+
1052
+
1053
+ def _cleanup_connections(self) -> None:
1054
+ """
1055
+ Remove connections for inactive threads.
1056
+ """
1057
+ import threading
1058
+ thread_connections = self.__dict__.get('_thread_connections', None)
1059
+ if not thread_connections:
1060
+ return
1061
+ thread_ids = set(thread_connections)
1062
+ active_threads = [
1063
+ thread
1064
+ for thread in threading.enumerate()
1065
+ if thread.ident in thread_ids
1066
+ ]
1067
+ active_thread_ids = {thread.ident for thread in active_threads}
1068
+ inactive_thread_ids = thread_ids - active_thread_ids
1069
+ for thread_id in inactive_thread_ids:
1070
+ connection = thread_connections.pop(thread_id, None)
1071
+ if connection is None:
1072
+ continue
1073
+ try:
1074
+ connection.close()
1075
+ except Exception:
1076
+ pass
@@ -202,11 +202,8 @@ class ValkeyConnector(Connector):
202
202
  -------
203
203
  The current index counter value (how many docs have been pushed).
204
204
  """
205
- docs_str = df.to_json(
206
- date_format='iso',
207
- orient='records',
208
- date_unit='us',
209
- )
205
+ from meerschaum.utils.dataframe import to_json
206
+ docs_str = to_json(df)
210
207
  docs = json.loads(docs_str)
211
208
  return self.push_docs(
212
209
  docs,
@@ -123,6 +123,7 @@ class Pipe:
123
123
  get_num_workers,
124
124
  _persist_new_json_columns,
125
125
  _persist_new_numeric_columns,
126
+ _persist_new_uuid_columns,
126
127
  )
127
128
  from ._verify import (
128
129
  verify,
@@ -298,7 +298,7 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]:
298
298
  break
299
299
  if not candidates:
300
300
  if debug:
301
- dprint(f"No value column could be determined.")
301
+ dprint("No value column could be determined.")
302
302
  return None
303
303
 
304
304
  return candidates[0]
@@ -574,10 +574,10 @@ def get_rowcount(
574
574
 
575
575
 
576
576
  def get_chunk_interval(
577
- self,
578
- chunk_interval: Union[timedelta, int, None] = None,
579
- debug: bool = False,
580
- ) -> Union[timedelta, int]:
577
+ self,
578
+ chunk_interval: Union[timedelta, int, None] = None,
579
+ debug: bool = False,
580
+ ) -> Union[timedelta, int]:
581
581
  """
582
582
  Get the chunk interval to use for this pipe.
583
583
 
@@ -615,18 +615,18 @@ def get_chunk_interval(
615
615
 
616
616
 
617
617
  def get_chunk_bounds(
618
- self,
619
- begin: Union[datetime, int, None] = None,
620
- end: Union[datetime, int, None] = None,
621
- bounded: bool = False,
622
- chunk_interval: Union[timedelta, int, None] = None,
623
- debug: bool = False,
624
- ) -> List[
625
- Tuple[
626
- Union[datetime, int, None],
627
- Union[datetime, int, None],
628
- ]
629
- ]:
618
+ self,
619
+ begin: Union[datetime, int, None] = None,
620
+ end: Union[datetime, int, None] = None,
621
+ bounded: bool = False,
622
+ chunk_interval: Union[timedelta, int, None] = None,
623
+ debug: bool = False,
624
+ ) -> List[
625
+ Tuple[
626
+ Union[datetime, int, None],
627
+ Union[datetime, int, None],
628
+ ]
629
+ ]:
630
630
  """
631
631
  Return a list of datetime bounds for iterating over the pipe's `datetime` axis.
632
632
 
@@ -12,17 +12,17 @@ from meerschaum.utils.typing import SuccessTuple, Any, Optional, Dict, Tuple, Un
12
12
 
13
13
 
14
14
  def deduplicate(
15
- self,
16
- begin: Union[datetime, int, None] = None,
17
- end: Union[datetime, int, None] = None,
18
- params: Optional[Dict[str, Any]] = None,
19
- chunk_interval: Union[datetime, int, None] = None,
20
- bounded: Optional[bool] = None,
21
- workers: Optional[int] = None,
22
- debug: bool = False,
23
- _use_instance_method: bool = True,
24
- **kwargs: Any
25
- ) -> SuccessTuple:
15
+ self,
16
+ begin: Union[datetime, int, None] = None,
17
+ end: Union[datetime, int, None] = None,
18
+ params: Optional[Dict[str, Any]] = None,
19
+ chunk_interval: Union[datetime, int, None] = None,
20
+ bounded: Optional[bool] = None,
21
+ workers: Optional[int] = None,
22
+ debug: bool = False,
23
+ _use_instance_method: bool = True,
24
+ **kwargs: Any
25
+ ) -> SuccessTuple:
26
26
  """
27
27
  Call the Pipe's instance connector's `delete_duplicates` method to delete duplicate rows.
28
28
 
@@ -158,10 +158,10 @@ def deduplicate(
158
158
  chunk_msg_body = ""
159
159
 
160
160
  full_chunk = self.get_data(
161
- begin = chunk_begin,
162
- end = chunk_end,
163
- params = params,
164
- debug = debug,
161
+ begin=chunk_begin,
162
+ end=chunk_end,
163
+ params=params,
164
+ debug=debug,
165
165
  )
166
166
  if full_chunk is None or len(full_chunk) == 0:
167
167
  return bounds, (True, f"{chunk_msg_header}\nChunk is empty, skipping...")
@@ -171,10 +171,10 @@ def deduplicate(
171
171
  return bounds, (False, f"None of {items_str(indices)} were present in chunk.")
172
172
  try:
173
173
  full_chunk = full_chunk.drop_duplicates(
174
- subset = chunk_indices,
175
- keep = 'last'
174
+ subset=chunk_indices,
175
+ keep='last'
176
176
  ).reset_index(
177
- drop = True,
177
+ drop=True,
178
178
  )
179
179
  except Exception as e:
180
180
  return (
@@ -183,10 +183,10 @@ def deduplicate(
183
183
  )
184
184
 
185
185
  clear_success, clear_msg = self.clear(
186
- begin = chunk_begin,
187
- end = chunk_end,
188
- params = params,
189
- debug = debug,
186
+ begin=chunk_begin,
187
+ end=chunk_end,
188
+ params=params,
189
+ debug=debug,
190
190
  )
191
191
  if not clear_success:
192
192
  chunk_msg_body += f"Failed to clear chunk while deduplicating:\n{clear_msg}\n"
@@ -195,13 +195,13 @@ def deduplicate(
195
195
  sync_success, sync_msg = self.sync(full_chunk, debug=debug)
196
196
  if not sync_success:
197
197
  chunk_msg_body += f"Failed to sync chunk while deduplicating:\n{sync_msg}\n"
198
-
198
+
199
199
  ### Finally check if the deduplication worked.
200
200
  chunk_rowcount = self.get_rowcount(
201
- begin = chunk_begin,
202
- end = chunk_end,
203
- params = params,
204
- debug = debug,
201
+ begin=chunk_begin,
202
+ end=chunk_end,
203
+ params=params,
204
+ debug=debug,
205
205
  )
206
206
  if chunk_rowcount != deduped_chunk_len:
207
207
  return bounds, (
@@ -367,9 +367,10 @@ def sync(
367
367
  ### Cast to a dataframe and ensure datatypes are what we expect.
368
368
  df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
369
369
 
370
- ### Capture `numeric` and `json` columns.
370
+ ### Capture `numeric`, `uuid`, and `json` columns.
371
371
  self._persist_new_json_columns(df, debug=debug)
372
372
  self._persist_new_numeric_columns(df, debug=debug)
373
+ self._persist_new_uuid_columns(df, debug=debug)
373
374
 
374
375
  if debug:
375
376
  dprint(
@@ -928,6 +929,30 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
928
929
  return True, "Success"
929
930
 
930
931
 
932
+ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
933
+ """
934
+ Check for new numeric columns and update the parameters.
935
+ """
936
+ from meerschaum.utils.dataframe import get_uuid_cols
937
+ uuid_cols = get_uuid_cols(df)
938
+ existing_uuid_cols = [col for col, typ in self.dtypes.items() if typ == 'uuid']
939
+ new_uuid_cols = [col for col in uuid_cols if col not in existing_uuid_cols]
940
+ if not new_uuid_cols:
941
+ return True, "Success"
942
+
943
+ dtypes = self.parameters.get('dtypes', {})
944
+ dtypes.update({col: 'uuid' for col in uuid_cols})
945
+ self.parameters['dtypes'] = dtypes
946
+ if not self.temporary:
947
+ edit_success, edit_msg = self.edit(interactive=False, debug=debug)
948
+ if not edit_success:
949
+ warn(f"Unable to update UUID dtypes for {self}:\n{edit_msg}")
950
+
951
+ return edit_success, edit_msg
952
+
953
+ return True, "Success"
954
+
955
+
931
956
  def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
932
957
  """
933
958
  Check for new JSON columns and update the parameters.
@@ -281,9 +281,9 @@ def verify(
281
281
 
282
282
 
283
283
  def get_chunks_success_message(
284
- chunk_success_tuples: Dict[Tuple[Any, Any], SuccessTuple],
285
- header: str = '',
286
- ) -> str:
284
+ chunk_success_tuples: Dict[Tuple[Any, Any], SuccessTuple],
285
+ header: str = '',
286
+ ) -> str:
287
287
  """
288
288
  Sum together all of the inserts and updates from the chunks.
289
289
 
@@ -323,8 +323,8 @@ def get_chunks_success_message(
323
323
  + ([f'updated {num_updated}'] if num_updated else [])
324
324
  + ([f'upserted {num_upserted}'] if num_upserted else [])
325
325
  ) or ['synced 0'],
326
- quotes = False,
327
- and_ = False,
326
+ quotes=False,
327
+ and_=False,
328
328
  )
329
329
 
330
330
  success_msg = (