meerschaum 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. meerschaum/_internal/docs/index.py +1 -0
  2. meerschaum/actions/show.py +2 -1
  3. meerschaum/actions/sql.py +11 -11
  4. meerschaum/api/dash/pipes.py +4 -2
  5. meerschaum/api/routes/_pipes.py +3 -8
  6. meerschaum/config/_version.py +1 -1
  7. meerschaum/connectors/api/_pipes.py +4 -4
  8. meerschaum/connectors/sql/_SQLConnector.py +12 -2
  9. meerschaum/connectors/sql/_create_engine.py +13 -6
  10. meerschaum/connectors/sql/_pipes.py +81 -65
  11. meerschaum/connectors/sql/_sql.py +194 -106
  12. meerschaum/connectors/valkey/_ValkeyConnector.py +2 -5
  13. meerschaum/core/Pipe/__init__.py +1 -0
  14. meerschaum/core/Pipe/_attributes.py +1 -1
  15. meerschaum/core/Pipe/_data.py +16 -16
  16. meerschaum/core/Pipe/_deduplicate.py +27 -27
  17. meerschaum/core/Pipe/_sync.py +26 -1
  18. meerschaum/core/Pipe/_verify.py +5 -5
  19. meerschaum/utils/dataframe.py +127 -8
  20. meerschaum/utils/dtypes/__init__.py +26 -4
  21. meerschaum/utils/dtypes/sql.py +30 -0
  22. meerschaum/utils/misc.py +1 -1
  23. meerschaum/utils/sql.py +100 -64
  24. meerschaum/utils/yaml.py +3 -6
  25. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/METADATA +1 -1
  26. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/RECORD +32 -32
  27. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/LICENSE +0 -0
  28. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/NOTICE +0 -0
  29. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/WHEEL +0 -0
  30. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/entry_points.txt +0 -0
  31. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/top_level.txt +0 -0
  32. {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/zip-safe +0 -0
@@ -138,12 +138,14 @@ def filter_unseen_df(
138
138
  import functools
139
139
  import traceback
140
140
  from decimal import Decimal
141
+ from uuid import UUID
141
142
  from meerschaum.utils.warnings import warn
142
143
  from meerschaum.utils.packages import import_pandas, attempt_import
143
144
  from meerschaum.utils.dtypes import (
144
145
  to_pandas_dtype,
145
146
  are_dtypes_equal,
146
147
  attempt_cast_to_numeric,
148
+ attempt_cast_to_uuid,
147
149
  coerce_timezone,
148
150
  )
149
151
  pd = import_pandas(debug=debug)
@@ -299,6 +301,18 @@ def filter_unseen_df(
299
301
  lambda x: f'{x:f}' if isinstance(x, Decimal) else x
300
302
  )
301
303
 
304
+ old_uuid_cols = get_uuid_cols(old_df)
305
+ new_uuid_cols = get_uuid_cols(new_df)
306
+ uuid_cols = set(new_uuid_cols + old_uuid_cols)
307
+ for uuid_col in old_uuid_cols:
308
+ old_df[uuid_col] = old_df[uuid_col].apply(
309
+ lambda x: f'{x}' if isinstance(x, UUID) else x
310
+ )
311
+ for uuid_col in new_uuid_cols:
312
+ new_df[uuid_col] = new_df[uuid_col].apply(
313
+ lambda x: f'{x}' if isinstance(x, UUID) else x
314
+ )
315
+
302
316
  joined_df = merge(
303
317
  new_df.fillna(NA),
304
318
  old_df.fillna(NA),
@@ -326,6 +340,14 @@ def filter_unseen_df(
326
340
  except Exception:
327
341
  warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
328
342
 
343
+ for uuid_col in uuid_cols:
344
+ if uuid_col not in delta_df.columns:
345
+ continue
346
+ try:
347
+ delta_df[uuid_col] = delta_df[uuid_col].apply(attempt_cast_to_uuid)
348
+ except Exception:
349
+ warn(f"Unable to parse numeric column '{uuid_col}':\n{traceback.format_exc()}")
350
+
329
351
  return delta_df
330
352
 
331
353
 
@@ -575,7 +597,7 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
575
597
  is_dask = 'dask' in df.__module__
576
598
  if is_dask:
577
599
  df = get_first_valid_dask_partition(df)
578
-
600
+
579
601
  if len(df) == 0:
580
602
  return []
581
603
 
@@ -594,6 +616,42 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
594
616
  ]
595
617
 
596
618
 
619
+ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
620
+ """
621
+ Get the columns which contain `decimal.Decimal` objects from a Pandas DataFrame.
622
+
623
+ Parameters
624
+ ----------
625
+ df: pd.DataFrame
626
+ The DataFrame which may contain decimal objects.
627
+
628
+ Returns
629
+ -------
630
+ A list of columns to treat as numerics.
631
+ """
632
+ from uuid import UUID
633
+ is_dask = 'dask' in df.__module__
634
+ if is_dask:
635
+ df = get_first_valid_dask_partition(df)
636
+
637
+ if len(df) == 0:
638
+ return []
639
+
640
+ cols_indices = {
641
+ col: df[col].first_valid_index()
642
+ for col in df.columns
643
+ }
644
+ return [
645
+ col
646
+ for col, ix in cols_indices.items()
647
+ if (
648
+ ix is not None
649
+ and
650
+ isinstance(df.loc[ix][col], UUID)
651
+ )
652
+ ]
653
+
654
+
597
655
  def enforce_dtypes(
598
656
  df: 'pd.DataFrame',
599
657
  dtypes: Dict[str, str],
@@ -640,11 +698,11 @@ def enforce_dtypes(
640
698
  to_pandas_dtype,
641
699
  is_dtype_numeric,
642
700
  attempt_cast_to_numeric,
701
+ attempt_cast_to_uuid,
643
702
  )
644
703
  if safe_copy:
645
704
  df = df.copy()
646
- df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
647
- if len(df_dtypes) == 0:
705
+ if len(df.columns) == 0:
648
706
  if debug:
649
707
  dprint("Incoming DataFrame has no columns. Skipping enforcement...")
650
708
  return df
@@ -663,12 +721,17 @@ def enforce_dtypes(
663
721
  for col, typ in dtypes.items()
664
722
  if typ == 'numeric'
665
723
  ]
724
+ uuid_cols = [
725
+ col
726
+ for col, typ in dtypes.items()
727
+ if typ == 'uuid'
728
+ ]
666
729
  df_numeric_cols = get_numeric_cols(df)
667
730
  if debug:
668
731
  dprint("Desired data types:")
669
732
  pprint(dtypes)
670
733
  dprint("Data types for incoming DataFrame:")
671
- pprint(df_dtypes)
734
+ pprint({_col: str(_typ) for _col, _typ in df.dtypes.items()})
672
735
 
673
736
  if json_cols and len(df) > 0:
674
737
  if debug:
@@ -700,9 +763,21 @@ def enforce_dtypes(
700
763
  if debug:
701
764
  dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
702
765
 
766
+ if uuid_cols:
767
+ if debug:
768
+ dprint(f"Checking for UUIDs: {uuid_cols}")
769
+ for col in uuid_cols:
770
+ if col in df.columns:
771
+ try:
772
+ df[col] = df[col].apply(attempt_cast_to_uuid)
773
+ except Exception as e:
774
+ if debug:
775
+ dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
776
+
777
+ df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
703
778
  if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
704
779
  if debug:
705
- dprint(f"Data types match. Exiting enforcement...")
780
+ dprint("Data types match. Exiting enforcement...")
706
781
  return df
707
782
 
708
783
  common_dtypes = {}
@@ -714,7 +789,7 @@ def enforce_dtypes(
714
789
  common_diff_dtypes[col] = df_dtypes[col]
715
790
 
716
791
  if debug:
717
- dprint(f"Common columns with different dtypes:")
792
+ dprint("Common columns with different dtypes:")
718
793
  pprint(common_diff_dtypes)
719
794
 
720
795
  detected_dt_cols = {}
@@ -726,7 +801,7 @@ def enforce_dtypes(
726
801
  del common_diff_dtypes[col]
727
802
 
728
803
  if debug:
729
- dprint(f"Common columns with different dtypes (after dates):")
804
+ dprint("Common columns with different dtypes (after dates):")
730
805
  pprint(common_diff_dtypes)
731
806
 
732
807
  if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
@@ -1231,5 +1306,49 @@ def query_df(
1231
1306
 
1232
1307
  _process_select_columns(result_df)
1233
1308
  _process_omit_columns(result_df)
1234
-
1309
+
1235
1310
  return result_df
1311
+
1312
+
1313
+ def to_json(
1314
+ df: 'pd.DataFrame',
1315
+ safe_copy: bool = True,
1316
+ orient: str = 'records',
1317
+ date_format: str = 'iso',
1318
+ date_unit: str = 'us',
1319
+ **kwargs: Any
1320
+ ) -> str:
1321
+ """
1322
+ Serialize the given dataframe as a JSON string.
1323
+
1324
+ Parameters
1325
+ ----------
1326
+ df: pd.DataFrame
1327
+ The DataFrame to be serialized.
1328
+
1329
+ safe_copy: bool, default True
1330
+ If `False`, modify the DataFrame inplace.
1331
+
1332
+ date_format: str, default 'iso'
1333
+ The default format for timestamps.
1334
+
1335
+ date_unit: str, default 'us'
1336
+ The precision of the timestamps.
1337
+
1338
+ Returns
1339
+ -------
1340
+ A JSON string.
1341
+ """
1342
+ from meerschaum.utils.packages import import_pandas
1343
+ pd = import_pandas()
1344
+ uuid_cols = get_uuid_cols(df)
1345
+ if uuid_cols and safe_copy:
1346
+ df = df.copy()
1347
+ for col in uuid_cols:
1348
+ df[col] = df[col].astype(str)
1349
+ return df.fillna(pd.NA).to_json(
1350
+ date_format=date_format,
1351
+ date_unit=date_unit,
1352
+ orient=orient,
1353
+ **kwargs
1354
+ )
@@ -7,6 +7,7 @@ Utility functions for working with data types.
7
7
  """
8
8
 
9
9
  import traceback
10
+ import uuid
10
11
  from datetime import timezone
11
12
  from decimal import Decimal, Context, InvalidOperation
12
13
 
@@ -17,6 +18,7 @@ from meerschaum.utils.warnings import warn
17
18
  MRSM_PD_DTYPES: Dict[str, str] = {
18
19
  'json': 'object',
19
20
  'numeric': 'object',
21
+ 'uuid': 'object',
20
22
  'datetime': 'datetime64[ns]',
21
23
  'bool': 'bool[pyarrow]',
22
24
  'int': 'Int64',
@@ -51,8 +53,8 @@ def to_pandas_dtype(dtype: str) -> str:
51
53
  warn(
52
54
  f"Invalid dtype '{dtype}', will use 'object' instead:\n"
53
55
  + f"{traceback.format_exc()}",
54
- stack = False,
55
- )
56
+ stack=False,
57
+ )
56
58
  return 'object'
57
59
 
58
60
 
@@ -109,8 +111,12 @@ def are_dtypes_equal(
109
111
  if ldtype in numeric_dtypes and rdtype in numeric_dtypes:
110
112
  return True
111
113
 
112
- ldtype_clean = ldtype.split('[')[0]
113
- rdtype_clean = rdtype.split('[')[0]
114
+ uuid_dtypes = ('uuid', 'object')
115
+ if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
116
+ return True
117
+
118
+ ldtype_clean = ldtype.split('[', maxsplit=1)[0]
119
+ rdtype_clean = rdtype.split('[', maxsplit=1)[0]
114
120
 
115
121
  if ldtype_clean.lower() == rdtype_clean.lower():
116
122
  return True
@@ -183,6 +189,22 @@ def attempt_cast_to_numeric(value: Any) -> Any:
183
189
  return value
184
190
 
185
191
 
192
+ def attempt_cast_to_uuid(value: Any) -> Any:
193
+ """
194
+ Given a value, attempt to coerce it into a UUID (`uuid4`).
195
+ """
196
+ if isinstance(value, uuid.UUID):
197
+ return value
198
+ try:
199
+ return (
200
+ uuid.UUID(str(value))
201
+ if not value_is_null(value)
202
+ else None
203
+ )
204
+ except Exception as e:
205
+ return value
206
+
207
+
186
208
  def value_is_null(value: Any) -> bool:
187
209
  """
188
210
  Determine if a value is a null-like string.
@@ -55,6 +55,7 @@ DB_FLAVORS_CAST_DTYPES = {
55
55
  'NVARCHAR COLLATE "SQL_Latin1_General_CP1_CI_AS"': 'NVARCHAR(MAX)',
56
56
  'VARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)',
57
57
  'VARCHAR COLLATE "SQL_Latin1_General_CP1_CI_AS"': 'NVARCHAR(MAX)',
58
+ 'NVARCHAR': 'NVARCHAR(MAX)',
58
59
  },
59
60
  }
60
61
  for _flavor, (_precision, _scale) in NUMERIC_PRECISION_FLAVORS.items():
@@ -92,6 +93,8 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
92
93
  'BIT(1)': 'bool[pyarrow]',
93
94
  'JSON': 'json',
94
95
  'JSONB': 'json',
96
+ 'UUID': 'uuid',
97
+ 'UNIQUEIDENTIFIER': 'uuid',
95
98
  'substrings': {
96
99
  'CHAR': 'string[pyarrow]',
97
100
  'TIMESTAMP': 'datetime64[ns]',
@@ -239,6 +242,20 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
239
242
  'cockroachdb': 'NUMERIC',
240
243
  'default': 'NUMERIC',
241
244
  },
245
+ 'uuid': {
246
+ 'timescaledb': 'UUID',
247
+ 'postgresql': 'UUID',
248
+ 'mariadb': 'CHAR(32)',
249
+ 'mysql': 'CHAR(32)',
250
+ 'mssql': 'UNIQUEIDENTIFIER',
251
+ ### I know this is too much space, but erring on the side of caution.
252
+ 'oracle': 'NVARCHAR(2000)',
253
+ 'sqlite': 'TEXT',
254
+ 'duckdb': 'UUID',
255
+ 'citus': 'UUID',
256
+ 'cockroachdb': 'UUID',
257
+ 'default': 'TEXT',
258
+ },
242
259
  }
243
260
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
244
261
  'int': {
@@ -358,6 +375,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
358
375
  'cockroachdb': 'Numeric',
359
376
  'default': 'Numeric',
360
377
  },
378
+ 'uuid': {
379
+ 'timescaledb': 'Uuid',
380
+ 'postgresql': 'Uuid',
381
+ 'mariadb': 'Uuid',
382
+ 'mysql': 'Uuid',
383
+ 'mssql': 'Uuid',
384
+ 'oracle': 'UnicodeText',
385
+ 'sqlite': 'Uuid',
386
+ 'duckdb': 'Uuid',
387
+ 'citus': 'Uuid',
388
+ 'cockroachdb': 'Uuid',
389
+ 'default': 'Uuid',
390
+ },
361
391
  }
362
392
 
363
393
 
meerschaum/utils/misc.py CHANGED
@@ -959,7 +959,7 @@ def get_connector_labels(
959
959
  def json_serialize_datetime(dt: datetime) -> Union[str, None]:
960
960
  """
961
961
  Serialize a datetime object into JSON (ISO format string).
962
-
962
+
963
963
  Examples
964
964
  --------
965
965
  >>> import json
meerschaum/utils/sql.py CHANGED
@@ -107,6 +107,16 @@ update_queries = {
107
107
  UPDATE
108
108
  {sets_subquery_none};
109
109
  """,
110
+ 'mssql-upsert': """
111
+ MERGE {target_table_name} f
112
+ USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
113
+ ON {and_subquery_f}
114
+ AND {date_bounds_subquery}
115
+ {when_matched_update_sets_subquery_none}
116
+ WHEN NOT MATCHED THEN
117
+ INSERT ({patch_cols_str})
118
+ VALUES ({patch_cols_prefixed_str});
119
+ """,
110
120
  'oracle': """
111
121
  MERGE INTO {target_table_name} f
112
122
  USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
@@ -172,8 +182,9 @@ columns_types_queries = {
172
182
  TABLE_NAME AS [table],
173
183
  COLUMN_NAME AS [column],
174
184
  DATA_TYPE AS [type]
175
- FROM INFORMATION_SCHEMA.COLUMNS
176
- WHERE TABLE_NAME IN ('{table}', '{table_trunc}')
185
+ FROM {db_prefix}INFORMATION_SCHEMA.COLUMNS
186
+ WHERE TABLE_NAME LIKE '{table}%'
187
+ OR TABLE_NAME LIKE '{table_trunc}%'
177
188
  """,
178
189
  'mysql': """
179
190
  SELECT
@@ -182,7 +193,7 @@ columns_types_queries = {
182
193
  TABLE_NAME `table`,
183
194
  COLUMN_NAME `column`,
184
195
  DATA_TYPE `type`
185
- FROM INFORMATION_SCHEMA.COLUMNS
196
+ FROM INFORMATION_SCHEMA.COLUMNS
186
197
  WHERE TABLE_NAME IN ('{table}', '{table_trunc}')
187
198
  """,
188
199
  'mariadb': """
@@ -192,7 +203,7 @@ columns_types_queries = {
192
203
  TABLE_NAME `table`,
193
204
  COLUMN_NAME `column`,
194
205
  DATA_TYPE `type`
195
- FROM INFORMATION_SCHEMA.COLUMNS
206
+ FROM INFORMATION_SCHEMA.COLUMNS
196
207
  WHERE TABLE_NAME IN ('{table}', '{table_trunc}')
197
208
  """,
198
209
  'oracle': """
@@ -273,11 +284,11 @@ def clean(substring: str) -> str:
273
284
 
274
285
 
275
286
  def dateadd_str(
276
- flavor: str = 'postgresql',
277
- datepart: str = 'day',
278
- number: Union[int, float] = 0,
279
- begin: Union[str, datetime, int] = 'now'
280
- ) -> str:
287
+ flavor: str = 'postgresql',
288
+ datepart: str = 'day',
289
+ number: Union[int, float] = 0,
290
+ begin: Union[str, datetime, int] = 'now'
291
+ ) -> str:
281
292
  """
282
293
  Generate a `DATEADD` clause depending on database flavor.
283
294
 
@@ -843,17 +854,17 @@ def get_sqlalchemy_table(
843
854
 
844
855
 
845
856
  def get_table_cols_types(
846
- table: str,
847
- connectable: Union[
848
- 'mrsm.connectors.sql.SQLConnector',
849
- 'sqlalchemy.orm.session.Session',
850
- 'sqlalchemy.engine.base.Engine'
851
- ],
852
- flavor: Optional[str] = None,
853
- schema: Optional[str] = None,
854
- database: Optional[str] = None,
855
- debug: bool = False,
856
- ) -> Dict[str, str]:
857
+ table: str,
858
+ connectable: Union[
859
+ 'mrsm.connectors.sql.SQLConnector',
860
+ 'sqlalchemy.orm.session.Session',
861
+ 'sqlalchemy.engine.base.Engine'
862
+ ],
863
+ flavor: Optional[str] = None,
864
+ schema: Optional[str] = None,
865
+ database: Optional[str] = None,
866
+ debug: bool = False,
867
+ ) -> Dict[str, str]:
857
868
  """
858
869
  Return a dictionary mapping a table's columns to data types.
859
870
  This is useful for inspecting tables creating during a not-yet-committed session.
@@ -889,13 +900,12 @@ def get_table_cols_types(
889
900
  A dictionary mapping column names to data types.
890
901
  """
891
902
  from meerschaum.connectors import SQLConnector
892
- from meerschaum.utils.misc import filter_keywords
893
903
  sqlalchemy = mrsm.attempt_import('sqlalchemy')
894
904
  flavor = flavor or getattr(connectable, 'flavor', None)
895
905
  if not flavor:
896
- raise ValueError(f"Please provide a database flavor.")
906
+ raise ValueError("Please provide a database flavor.")
897
907
  if flavor == 'duckdb' and not isinstance(connectable, SQLConnector):
898
- raise ValueError(f"You must provide a SQLConnector when using DuckDB.")
908
+ raise ValueError("You must provide a SQLConnector when using DuckDB.")
899
909
  if flavor in NO_SCHEMA_FLAVORS:
900
910
  schema = None
901
911
  if schema is None:
@@ -907,18 +917,24 @@ def get_table_cols_types(
907
917
  table_upper = table.upper()
908
918
  table_lower_trunc = truncate_item_name(table_lower, flavor=flavor)
909
919
  table_upper_trunc = truncate_item_name(table_upper, flavor=flavor)
920
+ db_prefix = (
921
+ "tempdb."
922
+ if flavor == 'mssql' and table.startswith('#')
923
+ else ""
924
+ )
910
925
 
911
926
  cols_types_query = sqlalchemy.text(
912
927
  columns_types_queries.get(
913
928
  flavor,
914
929
  columns_types_queries['default']
915
930
  ).format(
916
- table = table,
917
- table_trunc = table_trunc,
918
- table_lower = table_lower,
919
- table_lower_trunc = table_lower_trunc,
920
- table_upper = table_upper,
921
- table_upper_trunc = table_upper_trunc,
931
+ table=table,
932
+ table_trunc=table_trunc,
933
+ table_lower=table_lower,
934
+ table_lower_trunc=table_lower_trunc,
935
+ table_upper=table_upper,
936
+ table_upper_trunc=table_upper_trunc,
937
+ db_prefix=db_prefix,
922
938
  )
923
939
  )
924
940
 
@@ -987,20 +1003,20 @@ def get_table_cols_types(
987
1003
 
988
1004
 
989
1005
  def get_update_queries(
990
- target: str,
991
- patch: str,
992
- connectable: Union[
993
- mrsm.connectors.sql.SQLConnector,
994
- 'sqlalchemy.orm.session.Session'
995
- ],
996
- join_cols: Iterable[str],
997
- flavor: Optional[str] = None,
998
- upsert: bool = False,
999
- datetime_col: Optional[str] = None,
1000
- schema: Optional[str] = None,
1001
- patch_schema: Optional[str] = None,
1002
- debug: bool = False,
1003
- ) -> List[str]:
1006
+ target: str,
1007
+ patch: str,
1008
+ connectable: Union[
1009
+ mrsm.connectors.sql.SQLConnector,
1010
+ 'sqlalchemy.orm.session.Session'
1011
+ ],
1012
+ join_cols: Iterable[str],
1013
+ flavor: Optional[str] = None,
1014
+ upsert: bool = False,
1015
+ datetime_col: Optional[str] = None,
1016
+ schema: Optional[str] = None,
1017
+ patch_schema: Optional[str] = None,
1018
+ debug: bool = False,
1019
+ ) -> List[str]:
1004
1020
  """
1005
1021
  Build a list of `MERGE`, `UPDATE`, `DELETE`/`INSERT` queries to apply a patch to target table.
1006
1022
 
@@ -1067,16 +1083,16 @@ def get_update_queries(
1067
1083
  target_table_columns = get_table_cols_types(
1068
1084
  target,
1069
1085
  connectable,
1070
- flavor = flavor,
1071
- schema = schema,
1072
- debug = debug,
1086
+ flavor=flavor,
1087
+ schema=schema,
1088
+ debug=debug,
1073
1089
  )
1074
1090
  patch_table_columns = get_table_cols_types(
1075
1091
  patch,
1076
1092
  connectable,
1077
- flavor = flavor,
1078
- schema = patch_schema,
1079
- debug = debug,
1093
+ flavor=flavor,
1094
+ schema=patch_schema,
1095
+ debug=debug,
1080
1096
  )
1081
1097
 
1082
1098
  patch_cols_str = ', '.join(
@@ -1085,6 +1101,13 @@ def get_update_queries(
1085
1101
  for col in patch_table_columns
1086
1102
  ]
1087
1103
  )
1104
+ patch_cols_prefixed_str = ', '.join(
1105
+ [
1106
+ 'p.' + sql_item_name(col, flavor)
1107
+ for col in patch_table_columns
1108
+ ]
1109
+ )
1110
+
1088
1111
  join_cols_str = ', '.join(
1089
1112
  [
1090
1113
  sql_item_name(col, flavor)
@@ -1095,7 +1118,7 @@ def get_update_queries(
1095
1118
  value_cols = []
1096
1119
  join_cols_types = []
1097
1120
  if debug:
1098
- dprint(f"target_table_columns:")
1121
+ dprint("target_table_columns:")
1099
1122
  mrsm.pprint(target_table_columns)
1100
1123
  for c_name, c_type in target_table_columns.items():
1101
1124
  if c_name not in patch_table_columns:
@@ -1156,7 +1179,7 @@ def get_update_queries(
1156
1179
  + ' = '
1157
1180
  + "COALESCE("
1158
1181
  + r_prefix
1159
- + sql_item_name(c_name, flavor, None)
1182
+ + sql_item_name(c_name, flavor, None)
1160
1183
  + ", "
1161
1184
  + get_null_replacement(c_type, flavor)
1162
1185
  + ")"
@@ -1175,20 +1198,28 @@ def get_update_queries(
1175
1198
  else "1 = 1"
1176
1199
  )
1177
1200
 
1201
+ ### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
1202
+ when_matched_update_sets_subquery_none = "" if not value_cols else (
1203
+ "WHEN MATCHED THEN"
1204
+ f" UPDATE {sets_subquery('', 'p.')}"
1205
+ )
1206
+
1178
1207
  return [
1179
1208
  base_query.format(
1180
- sets_subquery_none = sets_subquery('', 'p.'),
1181
- sets_subquery_none_excluded = sets_subquery('', 'EXCLUDED.'),
1182
- sets_subquery_f = sets_subquery('f.', 'p.'),
1183
- and_subquery_f = and_subquery('p.', 'f.'),
1184
- and_subquery_t = and_subquery('p.', 't.'),
1185
- target_table_name = target_table_name,
1186
- patch_table_name = patch_table_name,
1187
- patch_cols_str = patch_cols_str,
1188
- date_bounds_subquery = date_bounds_subquery,
1189
- join_cols_str = join_cols_str,
1190
- coalesce_join_cols_str = coalesce_join_cols_str,
1191
- update_or_nothing = update_or_nothing,
1209
+ sets_subquery_none=sets_subquery('', 'p.'),
1210
+ sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
1211
+ sets_subquery_f=sets_subquery('f.', 'p.'),
1212
+ and_subquery_f=and_subquery('p.', 'f.'),
1213
+ and_subquery_t=and_subquery('p.', 't.'),
1214
+ target_table_name=target_table_name,
1215
+ patch_table_name=patch_table_name,
1216
+ patch_cols_str=patch_cols_str,
1217
+ patch_cols_prefixed_str=patch_cols_prefixed_str,
1218
+ date_bounds_subquery=date_bounds_subquery,
1219
+ join_cols_str=join_cols_str,
1220
+ coalesce_join_cols_str=coalesce_join_cols_str,
1221
+ update_or_nothing=update_or_nothing,
1222
+ when_matched_update_sets_subquery_none=when_matched_update_sets_subquery_none,
1192
1223
  )
1193
1224
  for base_query in base_queries
1194
1225
  ]
@@ -1232,6 +1263,11 @@ def get_null_replacement(typ: str, flavor: str) -> str:
1232
1263
  return dateadd_str(flavor=flavor, begin='1900-01-01')
1233
1264
  if 'float' in typ.lower() or 'double' in typ.lower() or typ.lower() in ('decimal',):
1234
1265
  return '-987654321.0'
1266
+ if typ.lower() in ('uniqueidentifier', 'guid', 'uuid'):
1267
+ magic_val = 'DEADBEEF-ABBA-BABE-CAFE-DECAFC0FFEE5'
1268
+ if flavor == 'mssql':
1269
+ return f"CAST('{magic_val}' AS UNIQUEIDENTIFIER)"
1270
+ return f"'{magic_val}'"
1235
1271
  return ('n' if flavor == 'oracle' else '') + "'-987654321'"
1236
1272
 
1237
1273
 
@@ -1443,7 +1479,7 @@ def session_execute(
1443
1479
  successes, msgs, results = [], [], []
1444
1480
  for query in queries:
1445
1481
  query_text = sqlalchemy.text(query)
1446
- fail_msg = f"Failed to execute queries."
1482
+ fail_msg = "Failed to execute queries."
1447
1483
  try:
1448
1484
  result = session.execute(query_text)
1449
1485
  query_success = result is not None
meerschaum/utils/yaml.py CHANGED
@@ -61,7 +61,6 @@ class yaml:
61
61
  _yaml.add_representer(str, _string_presenter)
62
62
  _yaml.representer.SafeRepresenter.add_representer(str, _string_presenter)
63
63
 
64
-
65
64
  @staticmethod
66
65
  def safe_load(*args, **kw):
67
66
  """
@@ -71,7 +70,6 @@ class yaml:
71
70
  return _yaml.load(*args, **filter_keywords(_yaml.load, **kw))
72
71
  return _yaml.safe_load(*args, **filter_keywords(_yaml.safe_load, **kw))
73
72
 
74
-
75
73
  @staticmethod
76
74
  def load(*args, **kw):
77
75
  """
@@ -80,15 +78,14 @@ class yaml:
80
78
  (added `yaml.Loader` as a positional argument).
81
79
  """
82
80
  packaging_version = attempt_import('packaging.version')
83
- _args = list(args)
84
81
  if (
85
82
  _import_name == 'yaml'
86
83
  and packaging_version.parse(_yaml.__version__) >= packaging_version.parse('6.0')
84
+ and 'Loader' not in kw
87
85
  ):
88
- _args += [_yaml.Loader]
89
-
90
- return _yaml.load(*_args, **filter_keywords(_yaml.load, **kw))
86
+ kw['Loader'] = _yaml.Loader
91
87
 
88
+ return _yaml.load(*args, **filter_keywords(_yaml.load, **kw))
92
89
 
93
90
  @staticmethod
94
91
  def dump(data, stream=None, **kw):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: meerschaum
3
- Version: 2.4.5
3
+ Version: 2.4.7
4
4
  Summary: Sync Time-Series Pipes with Meerschaum
5
5
  Home-page: https://meerschaum.io
6
6
  Author: Bennett Meares