meerschaum 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/docs/index.py +1 -0
- meerschaum/actions/show.py +2 -1
- meerschaum/actions/sql.py +11 -11
- meerschaum/api/dash/pipes.py +4 -2
- meerschaum/api/routes/_pipes.py +3 -8
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +4 -4
- meerschaum/connectors/sql/_SQLConnector.py +12 -2
- meerschaum/connectors/sql/_create_engine.py +13 -6
- meerschaum/connectors/sql/_pipes.py +81 -65
- meerschaum/connectors/sql/_sql.py +194 -106
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -5
- meerschaum/core/Pipe/__init__.py +1 -0
- meerschaum/core/Pipe/_attributes.py +1 -1
- meerschaum/core/Pipe/_data.py +16 -16
- meerschaum/core/Pipe/_deduplicate.py +27 -27
- meerschaum/core/Pipe/_sync.py +26 -1
- meerschaum/core/Pipe/_verify.py +5 -5
- meerschaum/utils/dataframe.py +127 -8
- meerschaum/utils/dtypes/__init__.py +26 -4
- meerschaum/utils/dtypes/sql.py +30 -0
- meerschaum/utils/misc.py +1 -1
- meerschaum/utils/sql.py +100 -64
- meerschaum/utils/yaml.py +3 -6
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/METADATA +1 -1
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/RECORD +32 -32
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/LICENSE +0 -0
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/NOTICE +0 -0
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/WHEEL +0 -0
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/top_level.txt +0 -0
- {meerschaum-2.4.5.dist-info → meerschaum-2.4.7.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -138,12 +138,14 @@ def filter_unseen_df(
|
|
138
138
|
import functools
|
139
139
|
import traceback
|
140
140
|
from decimal import Decimal
|
141
|
+
from uuid import UUID
|
141
142
|
from meerschaum.utils.warnings import warn
|
142
143
|
from meerschaum.utils.packages import import_pandas, attempt_import
|
143
144
|
from meerschaum.utils.dtypes import (
|
144
145
|
to_pandas_dtype,
|
145
146
|
are_dtypes_equal,
|
146
147
|
attempt_cast_to_numeric,
|
148
|
+
attempt_cast_to_uuid,
|
147
149
|
coerce_timezone,
|
148
150
|
)
|
149
151
|
pd = import_pandas(debug=debug)
|
@@ -299,6 +301,18 @@ def filter_unseen_df(
|
|
299
301
|
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
300
302
|
)
|
301
303
|
|
304
|
+
old_uuid_cols = get_uuid_cols(old_df)
|
305
|
+
new_uuid_cols = get_uuid_cols(new_df)
|
306
|
+
uuid_cols = set(new_uuid_cols + old_uuid_cols)
|
307
|
+
for uuid_col in old_uuid_cols:
|
308
|
+
old_df[uuid_col] = old_df[uuid_col].apply(
|
309
|
+
lambda x: f'{x}' if isinstance(x, UUID) else x
|
310
|
+
)
|
311
|
+
for uuid_col in new_uuid_cols:
|
312
|
+
new_df[uuid_col] = new_df[uuid_col].apply(
|
313
|
+
lambda x: f'{x}' if isinstance(x, UUID) else x
|
314
|
+
)
|
315
|
+
|
302
316
|
joined_df = merge(
|
303
317
|
new_df.fillna(NA),
|
304
318
|
old_df.fillna(NA),
|
@@ -326,6 +340,14 @@ def filter_unseen_df(
|
|
326
340
|
except Exception:
|
327
341
|
warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
|
328
342
|
|
343
|
+
for uuid_col in uuid_cols:
|
344
|
+
if uuid_col not in delta_df.columns:
|
345
|
+
continue
|
346
|
+
try:
|
347
|
+
delta_df[uuid_col] = delta_df[uuid_col].apply(attempt_cast_to_uuid)
|
348
|
+
except Exception:
|
349
|
+
warn(f"Unable to parse numeric column '{uuid_col}':\n{traceback.format_exc()}")
|
350
|
+
|
329
351
|
return delta_df
|
330
352
|
|
331
353
|
|
@@ -575,7 +597,7 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
|
|
575
597
|
is_dask = 'dask' in df.__module__
|
576
598
|
if is_dask:
|
577
599
|
df = get_first_valid_dask_partition(df)
|
578
|
-
|
600
|
+
|
579
601
|
if len(df) == 0:
|
580
602
|
return []
|
581
603
|
|
@@ -594,6 +616,42 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
|
|
594
616
|
]
|
595
617
|
|
596
618
|
|
619
|
+
def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
|
620
|
+
"""
|
621
|
+
Get the columns which contain `decimal.Decimal` objects from a Pandas DataFrame.
|
622
|
+
|
623
|
+
Parameters
|
624
|
+
----------
|
625
|
+
df: pd.DataFrame
|
626
|
+
The DataFrame which may contain decimal objects.
|
627
|
+
|
628
|
+
Returns
|
629
|
+
-------
|
630
|
+
A list of columns to treat as numerics.
|
631
|
+
"""
|
632
|
+
from uuid import UUID
|
633
|
+
is_dask = 'dask' in df.__module__
|
634
|
+
if is_dask:
|
635
|
+
df = get_first_valid_dask_partition(df)
|
636
|
+
|
637
|
+
if len(df) == 0:
|
638
|
+
return []
|
639
|
+
|
640
|
+
cols_indices = {
|
641
|
+
col: df[col].first_valid_index()
|
642
|
+
for col in df.columns
|
643
|
+
}
|
644
|
+
return [
|
645
|
+
col
|
646
|
+
for col, ix in cols_indices.items()
|
647
|
+
if (
|
648
|
+
ix is not None
|
649
|
+
and
|
650
|
+
isinstance(df.loc[ix][col], UUID)
|
651
|
+
)
|
652
|
+
]
|
653
|
+
|
654
|
+
|
597
655
|
def enforce_dtypes(
|
598
656
|
df: 'pd.DataFrame',
|
599
657
|
dtypes: Dict[str, str],
|
@@ -640,11 +698,11 @@ def enforce_dtypes(
|
|
640
698
|
to_pandas_dtype,
|
641
699
|
is_dtype_numeric,
|
642
700
|
attempt_cast_to_numeric,
|
701
|
+
attempt_cast_to_uuid,
|
643
702
|
)
|
644
703
|
if safe_copy:
|
645
704
|
df = df.copy()
|
646
|
-
|
647
|
-
if len(df_dtypes) == 0:
|
705
|
+
if len(df.columns) == 0:
|
648
706
|
if debug:
|
649
707
|
dprint("Incoming DataFrame has no columns. Skipping enforcement...")
|
650
708
|
return df
|
@@ -663,12 +721,17 @@ def enforce_dtypes(
|
|
663
721
|
for col, typ in dtypes.items()
|
664
722
|
if typ == 'numeric'
|
665
723
|
]
|
724
|
+
uuid_cols = [
|
725
|
+
col
|
726
|
+
for col, typ in dtypes.items()
|
727
|
+
if typ == 'uuid'
|
728
|
+
]
|
666
729
|
df_numeric_cols = get_numeric_cols(df)
|
667
730
|
if debug:
|
668
731
|
dprint("Desired data types:")
|
669
732
|
pprint(dtypes)
|
670
733
|
dprint("Data types for incoming DataFrame:")
|
671
|
-
pprint(
|
734
|
+
pprint({_col: str(_typ) for _col, _typ in df.dtypes.items()})
|
672
735
|
|
673
736
|
if json_cols and len(df) > 0:
|
674
737
|
if debug:
|
@@ -700,9 +763,21 @@ def enforce_dtypes(
|
|
700
763
|
if debug:
|
701
764
|
dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
|
702
765
|
|
766
|
+
if uuid_cols:
|
767
|
+
if debug:
|
768
|
+
dprint(f"Checking for UUIDs: {uuid_cols}")
|
769
|
+
for col in uuid_cols:
|
770
|
+
if col in df.columns:
|
771
|
+
try:
|
772
|
+
df[col] = df[col].apply(attempt_cast_to_uuid)
|
773
|
+
except Exception as e:
|
774
|
+
if debug:
|
775
|
+
dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
|
776
|
+
|
777
|
+
df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
|
703
778
|
if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
|
704
779
|
if debug:
|
705
|
-
dprint(
|
780
|
+
dprint("Data types match. Exiting enforcement...")
|
706
781
|
return df
|
707
782
|
|
708
783
|
common_dtypes = {}
|
@@ -714,7 +789,7 @@ def enforce_dtypes(
|
|
714
789
|
common_diff_dtypes[col] = df_dtypes[col]
|
715
790
|
|
716
791
|
if debug:
|
717
|
-
dprint(
|
792
|
+
dprint("Common columns with different dtypes:")
|
718
793
|
pprint(common_diff_dtypes)
|
719
794
|
|
720
795
|
detected_dt_cols = {}
|
@@ -726,7 +801,7 @@ def enforce_dtypes(
|
|
726
801
|
del common_diff_dtypes[col]
|
727
802
|
|
728
803
|
if debug:
|
729
|
-
dprint(
|
804
|
+
dprint("Common columns with different dtypes (after dates):")
|
730
805
|
pprint(common_diff_dtypes)
|
731
806
|
|
732
807
|
if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
|
@@ -1231,5 +1306,49 @@ def query_df(
|
|
1231
1306
|
|
1232
1307
|
_process_select_columns(result_df)
|
1233
1308
|
_process_omit_columns(result_df)
|
1234
|
-
|
1309
|
+
|
1235
1310
|
return result_df
|
1311
|
+
|
1312
|
+
|
1313
|
+
def to_json(
|
1314
|
+
df: 'pd.DataFrame',
|
1315
|
+
safe_copy: bool = True,
|
1316
|
+
orient: str = 'records',
|
1317
|
+
date_format: str = 'iso',
|
1318
|
+
date_unit: str = 'us',
|
1319
|
+
**kwargs: Any
|
1320
|
+
) -> str:
|
1321
|
+
"""
|
1322
|
+
Serialize the given dataframe as a JSON string.
|
1323
|
+
|
1324
|
+
Parameters
|
1325
|
+
----------
|
1326
|
+
df: pd.DataFrame
|
1327
|
+
The DataFrame to be serialized.
|
1328
|
+
|
1329
|
+
safe_copy: bool, default True
|
1330
|
+
If `False`, modify the DataFrame inplace.
|
1331
|
+
|
1332
|
+
date_format: str, default 'iso'
|
1333
|
+
The default format for timestamps.
|
1334
|
+
|
1335
|
+
date_unit: str, default 'us'
|
1336
|
+
The precision of the timestamps.
|
1337
|
+
|
1338
|
+
Returns
|
1339
|
+
-------
|
1340
|
+
A JSON string.
|
1341
|
+
"""
|
1342
|
+
from meerschaum.utils.packages import import_pandas
|
1343
|
+
pd = import_pandas()
|
1344
|
+
uuid_cols = get_uuid_cols(df)
|
1345
|
+
if uuid_cols and safe_copy:
|
1346
|
+
df = df.copy()
|
1347
|
+
for col in uuid_cols:
|
1348
|
+
df[col] = df[col].astype(str)
|
1349
|
+
return df.fillna(pd.NA).to_json(
|
1350
|
+
date_format=date_format,
|
1351
|
+
date_unit=date_unit,
|
1352
|
+
orient=orient,
|
1353
|
+
**kwargs
|
1354
|
+
)
|
@@ -7,6 +7,7 @@ Utility functions for working with data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
import traceback
|
10
|
+
import uuid
|
10
11
|
from datetime import timezone
|
11
12
|
from decimal import Decimal, Context, InvalidOperation
|
12
13
|
|
@@ -17,6 +18,7 @@ from meerschaum.utils.warnings import warn
|
|
17
18
|
MRSM_PD_DTYPES: Dict[str, str] = {
|
18
19
|
'json': 'object',
|
19
20
|
'numeric': 'object',
|
21
|
+
'uuid': 'object',
|
20
22
|
'datetime': 'datetime64[ns]',
|
21
23
|
'bool': 'bool[pyarrow]',
|
22
24
|
'int': 'Int64',
|
@@ -51,8 +53,8 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
51
53
|
warn(
|
52
54
|
f"Invalid dtype '{dtype}', will use 'object' instead:\n"
|
53
55
|
+ f"{traceback.format_exc()}",
|
54
|
-
stack
|
55
|
-
)
|
56
|
+
stack=False,
|
57
|
+
)
|
56
58
|
return 'object'
|
57
59
|
|
58
60
|
|
@@ -109,8 +111,12 @@ def are_dtypes_equal(
|
|
109
111
|
if ldtype in numeric_dtypes and rdtype in numeric_dtypes:
|
110
112
|
return True
|
111
113
|
|
112
|
-
|
113
|
-
|
114
|
+
uuid_dtypes = ('uuid', 'object')
|
115
|
+
if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
|
116
|
+
return True
|
117
|
+
|
118
|
+
ldtype_clean = ldtype.split('[', maxsplit=1)[0]
|
119
|
+
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
114
120
|
|
115
121
|
if ldtype_clean.lower() == rdtype_clean.lower():
|
116
122
|
return True
|
@@ -183,6 +189,22 @@ def attempt_cast_to_numeric(value: Any) -> Any:
|
|
183
189
|
return value
|
184
190
|
|
185
191
|
|
192
|
+
def attempt_cast_to_uuid(value: Any) -> Any:
|
193
|
+
"""
|
194
|
+
Given a value, attempt to coerce it into a UUID (`uuid4`).
|
195
|
+
"""
|
196
|
+
if isinstance(value, uuid.UUID):
|
197
|
+
return value
|
198
|
+
try:
|
199
|
+
return (
|
200
|
+
uuid.UUID(str(value))
|
201
|
+
if not value_is_null(value)
|
202
|
+
else None
|
203
|
+
)
|
204
|
+
except Exception as e:
|
205
|
+
return value
|
206
|
+
|
207
|
+
|
186
208
|
def value_is_null(value: Any) -> bool:
|
187
209
|
"""
|
188
210
|
Determine if a value is a null-like string.
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -55,6 +55,7 @@ DB_FLAVORS_CAST_DTYPES = {
|
|
55
55
|
'NVARCHAR COLLATE "SQL_Latin1_General_CP1_CI_AS"': 'NVARCHAR(MAX)',
|
56
56
|
'VARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)',
|
57
57
|
'VARCHAR COLLATE "SQL_Latin1_General_CP1_CI_AS"': 'NVARCHAR(MAX)',
|
58
|
+
'NVARCHAR': 'NVARCHAR(MAX)',
|
58
59
|
},
|
59
60
|
}
|
60
61
|
for _flavor, (_precision, _scale) in NUMERIC_PRECISION_FLAVORS.items():
|
@@ -92,6 +93,8 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
|
|
92
93
|
'BIT(1)': 'bool[pyarrow]',
|
93
94
|
'JSON': 'json',
|
94
95
|
'JSONB': 'json',
|
96
|
+
'UUID': 'uuid',
|
97
|
+
'UNIQUEIDENTIFIER': 'uuid',
|
95
98
|
'substrings': {
|
96
99
|
'CHAR': 'string[pyarrow]',
|
97
100
|
'TIMESTAMP': 'datetime64[ns]',
|
@@ -239,6 +242,20 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
239
242
|
'cockroachdb': 'NUMERIC',
|
240
243
|
'default': 'NUMERIC',
|
241
244
|
},
|
245
|
+
'uuid': {
|
246
|
+
'timescaledb': 'UUID',
|
247
|
+
'postgresql': 'UUID',
|
248
|
+
'mariadb': 'CHAR(32)',
|
249
|
+
'mysql': 'CHAR(32)',
|
250
|
+
'mssql': 'UNIQUEIDENTIFIER',
|
251
|
+
### I know this is too much space, but erring on the side of caution.
|
252
|
+
'oracle': 'NVARCHAR(2000)',
|
253
|
+
'sqlite': 'TEXT',
|
254
|
+
'duckdb': 'UUID',
|
255
|
+
'citus': 'UUID',
|
256
|
+
'cockroachdb': 'UUID',
|
257
|
+
'default': 'TEXT',
|
258
|
+
},
|
242
259
|
}
|
243
260
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
244
261
|
'int': {
|
@@ -358,6 +375,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
358
375
|
'cockroachdb': 'Numeric',
|
359
376
|
'default': 'Numeric',
|
360
377
|
},
|
378
|
+
'uuid': {
|
379
|
+
'timescaledb': 'Uuid',
|
380
|
+
'postgresql': 'Uuid',
|
381
|
+
'mariadb': 'Uuid',
|
382
|
+
'mysql': 'Uuid',
|
383
|
+
'mssql': 'Uuid',
|
384
|
+
'oracle': 'UnicodeText',
|
385
|
+
'sqlite': 'Uuid',
|
386
|
+
'duckdb': 'Uuid',
|
387
|
+
'citus': 'Uuid',
|
388
|
+
'cockroachdb': 'Uuid',
|
389
|
+
'default': 'Uuid',
|
390
|
+
},
|
361
391
|
}
|
362
392
|
|
363
393
|
|
meerschaum/utils/misc.py
CHANGED
meerschaum/utils/sql.py
CHANGED
@@ -107,6 +107,16 @@ update_queries = {
|
|
107
107
|
UPDATE
|
108
108
|
{sets_subquery_none};
|
109
109
|
""",
|
110
|
+
'mssql-upsert': """
|
111
|
+
MERGE {target_table_name} f
|
112
|
+
USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
|
113
|
+
ON {and_subquery_f}
|
114
|
+
AND {date_bounds_subquery}
|
115
|
+
{when_matched_update_sets_subquery_none}
|
116
|
+
WHEN NOT MATCHED THEN
|
117
|
+
INSERT ({patch_cols_str})
|
118
|
+
VALUES ({patch_cols_prefixed_str});
|
119
|
+
""",
|
110
120
|
'oracle': """
|
111
121
|
MERGE INTO {target_table_name} f
|
112
122
|
USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
|
@@ -172,8 +182,9 @@ columns_types_queries = {
|
|
172
182
|
TABLE_NAME AS [table],
|
173
183
|
COLUMN_NAME AS [column],
|
174
184
|
DATA_TYPE AS [type]
|
175
|
-
FROM INFORMATION_SCHEMA.COLUMNS
|
176
|
-
WHERE TABLE_NAME
|
185
|
+
FROM {db_prefix}INFORMATION_SCHEMA.COLUMNS
|
186
|
+
WHERE TABLE_NAME LIKE '{table}%'
|
187
|
+
OR TABLE_NAME LIKE '{table_trunc}%'
|
177
188
|
""",
|
178
189
|
'mysql': """
|
179
190
|
SELECT
|
@@ -182,7 +193,7 @@ columns_types_queries = {
|
|
182
193
|
TABLE_NAME `table`,
|
183
194
|
COLUMN_NAME `column`,
|
184
195
|
DATA_TYPE `type`
|
185
|
-
FROM INFORMATION_SCHEMA.COLUMNS
|
196
|
+
FROM INFORMATION_SCHEMA.COLUMNS
|
186
197
|
WHERE TABLE_NAME IN ('{table}', '{table_trunc}')
|
187
198
|
""",
|
188
199
|
'mariadb': """
|
@@ -192,7 +203,7 @@ columns_types_queries = {
|
|
192
203
|
TABLE_NAME `table`,
|
193
204
|
COLUMN_NAME `column`,
|
194
205
|
DATA_TYPE `type`
|
195
|
-
FROM INFORMATION_SCHEMA.COLUMNS
|
206
|
+
FROM INFORMATION_SCHEMA.COLUMNS
|
196
207
|
WHERE TABLE_NAME IN ('{table}', '{table_trunc}')
|
197
208
|
""",
|
198
209
|
'oracle': """
|
@@ -273,11 +284,11 @@ def clean(substring: str) -> str:
|
|
273
284
|
|
274
285
|
|
275
286
|
def dateadd_str(
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
287
|
+
flavor: str = 'postgresql',
|
288
|
+
datepart: str = 'day',
|
289
|
+
number: Union[int, float] = 0,
|
290
|
+
begin: Union[str, datetime, int] = 'now'
|
291
|
+
) -> str:
|
281
292
|
"""
|
282
293
|
Generate a `DATEADD` clause depending on database flavor.
|
283
294
|
|
@@ -843,17 +854,17 @@ def get_sqlalchemy_table(
|
|
843
854
|
|
844
855
|
|
845
856
|
def get_table_cols_types(
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
+
table: str,
|
858
|
+
connectable: Union[
|
859
|
+
'mrsm.connectors.sql.SQLConnector',
|
860
|
+
'sqlalchemy.orm.session.Session',
|
861
|
+
'sqlalchemy.engine.base.Engine'
|
862
|
+
],
|
863
|
+
flavor: Optional[str] = None,
|
864
|
+
schema: Optional[str] = None,
|
865
|
+
database: Optional[str] = None,
|
866
|
+
debug: bool = False,
|
867
|
+
) -> Dict[str, str]:
|
857
868
|
"""
|
858
869
|
Return a dictionary mapping a table's columns to data types.
|
859
870
|
This is useful for inspecting tables creating during a not-yet-committed session.
|
@@ -889,13 +900,12 @@ def get_table_cols_types(
|
|
889
900
|
A dictionary mapping column names to data types.
|
890
901
|
"""
|
891
902
|
from meerschaum.connectors import SQLConnector
|
892
|
-
from meerschaum.utils.misc import filter_keywords
|
893
903
|
sqlalchemy = mrsm.attempt_import('sqlalchemy')
|
894
904
|
flavor = flavor or getattr(connectable, 'flavor', None)
|
895
905
|
if not flavor:
|
896
|
-
raise ValueError(
|
906
|
+
raise ValueError("Please provide a database flavor.")
|
897
907
|
if flavor == 'duckdb' and not isinstance(connectable, SQLConnector):
|
898
|
-
raise ValueError(
|
908
|
+
raise ValueError("You must provide a SQLConnector when using DuckDB.")
|
899
909
|
if flavor in NO_SCHEMA_FLAVORS:
|
900
910
|
schema = None
|
901
911
|
if schema is None:
|
@@ -907,18 +917,24 @@ def get_table_cols_types(
|
|
907
917
|
table_upper = table.upper()
|
908
918
|
table_lower_trunc = truncate_item_name(table_lower, flavor=flavor)
|
909
919
|
table_upper_trunc = truncate_item_name(table_upper, flavor=flavor)
|
920
|
+
db_prefix = (
|
921
|
+
"tempdb."
|
922
|
+
if flavor == 'mssql' and table.startswith('#')
|
923
|
+
else ""
|
924
|
+
)
|
910
925
|
|
911
926
|
cols_types_query = sqlalchemy.text(
|
912
927
|
columns_types_queries.get(
|
913
928
|
flavor,
|
914
929
|
columns_types_queries['default']
|
915
930
|
).format(
|
916
|
-
table
|
917
|
-
table_trunc
|
918
|
-
table_lower
|
919
|
-
table_lower_trunc
|
920
|
-
table_upper
|
921
|
-
table_upper_trunc
|
931
|
+
table=table,
|
932
|
+
table_trunc=table_trunc,
|
933
|
+
table_lower=table_lower,
|
934
|
+
table_lower_trunc=table_lower_trunc,
|
935
|
+
table_upper=table_upper,
|
936
|
+
table_upper_trunc=table_upper_trunc,
|
937
|
+
db_prefix=db_prefix,
|
922
938
|
)
|
923
939
|
)
|
924
940
|
|
@@ -987,20 +1003,20 @@ def get_table_cols_types(
|
|
987
1003
|
|
988
1004
|
|
989
1005
|
def get_update_queries(
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1006
|
+
target: str,
|
1007
|
+
patch: str,
|
1008
|
+
connectable: Union[
|
1009
|
+
mrsm.connectors.sql.SQLConnector,
|
1010
|
+
'sqlalchemy.orm.session.Session'
|
1011
|
+
],
|
1012
|
+
join_cols: Iterable[str],
|
1013
|
+
flavor: Optional[str] = None,
|
1014
|
+
upsert: bool = False,
|
1015
|
+
datetime_col: Optional[str] = None,
|
1016
|
+
schema: Optional[str] = None,
|
1017
|
+
patch_schema: Optional[str] = None,
|
1018
|
+
debug: bool = False,
|
1019
|
+
) -> List[str]:
|
1004
1020
|
"""
|
1005
1021
|
Build a list of `MERGE`, `UPDATE`, `DELETE`/`INSERT` queries to apply a patch to target table.
|
1006
1022
|
|
@@ -1067,16 +1083,16 @@ def get_update_queries(
|
|
1067
1083
|
target_table_columns = get_table_cols_types(
|
1068
1084
|
target,
|
1069
1085
|
connectable,
|
1070
|
-
flavor
|
1071
|
-
schema
|
1072
|
-
debug
|
1086
|
+
flavor=flavor,
|
1087
|
+
schema=schema,
|
1088
|
+
debug=debug,
|
1073
1089
|
)
|
1074
1090
|
patch_table_columns = get_table_cols_types(
|
1075
1091
|
patch,
|
1076
1092
|
connectable,
|
1077
|
-
flavor
|
1078
|
-
schema
|
1079
|
-
debug
|
1093
|
+
flavor=flavor,
|
1094
|
+
schema=patch_schema,
|
1095
|
+
debug=debug,
|
1080
1096
|
)
|
1081
1097
|
|
1082
1098
|
patch_cols_str = ', '.join(
|
@@ -1085,6 +1101,13 @@ def get_update_queries(
|
|
1085
1101
|
for col in patch_table_columns
|
1086
1102
|
]
|
1087
1103
|
)
|
1104
|
+
patch_cols_prefixed_str = ', '.join(
|
1105
|
+
[
|
1106
|
+
'p.' + sql_item_name(col, flavor)
|
1107
|
+
for col in patch_table_columns
|
1108
|
+
]
|
1109
|
+
)
|
1110
|
+
|
1088
1111
|
join_cols_str = ', '.join(
|
1089
1112
|
[
|
1090
1113
|
sql_item_name(col, flavor)
|
@@ -1095,7 +1118,7 @@ def get_update_queries(
|
|
1095
1118
|
value_cols = []
|
1096
1119
|
join_cols_types = []
|
1097
1120
|
if debug:
|
1098
|
-
dprint(
|
1121
|
+
dprint("target_table_columns:")
|
1099
1122
|
mrsm.pprint(target_table_columns)
|
1100
1123
|
for c_name, c_type in target_table_columns.items():
|
1101
1124
|
if c_name not in patch_table_columns:
|
@@ -1156,7 +1179,7 @@ def get_update_queries(
|
|
1156
1179
|
+ ' = '
|
1157
1180
|
+ "COALESCE("
|
1158
1181
|
+ r_prefix
|
1159
|
-
+ sql_item_name(c_name, flavor, None)
|
1182
|
+
+ sql_item_name(c_name, flavor, None)
|
1160
1183
|
+ ", "
|
1161
1184
|
+ get_null_replacement(c_type, flavor)
|
1162
1185
|
+ ")"
|
@@ -1175,20 +1198,28 @@ def get_update_queries(
|
|
1175
1198
|
else "1 = 1"
|
1176
1199
|
)
|
1177
1200
|
|
1201
|
+
### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
|
1202
|
+
when_matched_update_sets_subquery_none = "" if not value_cols else (
|
1203
|
+
"WHEN MATCHED THEN"
|
1204
|
+
f" UPDATE {sets_subquery('', 'p.')}"
|
1205
|
+
)
|
1206
|
+
|
1178
1207
|
return [
|
1179
1208
|
base_query.format(
|
1180
|
-
sets_subquery_none
|
1181
|
-
sets_subquery_none_excluded
|
1182
|
-
sets_subquery_f
|
1183
|
-
and_subquery_f
|
1184
|
-
and_subquery_t
|
1185
|
-
target_table_name
|
1186
|
-
patch_table_name
|
1187
|
-
patch_cols_str
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1209
|
+
sets_subquery_none=sets_subquery('', 'p.'),
|
1210
|
+
sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
|
1211
|
+
sets_subquery_f=sets_subquery('f.', 'p.'),
|
1212
|
+
and_subquery_f=and_subquery('p.', 'f.'),
|
1213
|
+
and_subquery_t=and_subquery('p.', 't.'),
|
1214
|
+
target_table_name=target_table_name,
|
1215
|
+
patch_table_name=patch_table_name,
|
1216
|
+
patch_cols_str=patch_cols_str,
|
1217
|
+
patch_cols_prefixed_str=patch_cols_prefixed_str,
|
1218
|
+
date_bounds_subquery=date_bounds_subquery,
|
1219
|
+
join_cols_str=join_cols_str,
|
1220
|
+
coalesce_join_cols_str=coalesce_join_cols_str,
|
1221
|
+
update_or_nothing=update_or_nothing,
|
1222
|
+
when_matched_update_sets_subquery_none=when_matched_update_sets_subquery_none,
|
1192
1223
|
)
|
1193
1224
|
for base_query in base_queries
|
1194
1225
|
]
|
@@ -1232,6 +1263,11 @@ def get_null_replacement(typ: str, flavor: str) -> str:
|
|
1232
1263
|
return dateadd_str(flavor=flavor, begin='1900-01-01')
|
1233
1264
|
if 'float' in typ.lower() or 'double' in typ.lower() or typ.lower() in ('decimal',):
|
1234
1265
|
return '-987654321.0'
|
1266
|
+
if typ.lower() in ('uniqueidentifier', 'guid', 'uuid'):
|
1267
|
+
magic_val = 'DEADBEEF-ABBA-BABE-CAFE-DECAFC0FFEE5'
|
1268
|
+
if flavor == 'mssql':
|
1269
|
+
return f"CAST('{magic_val}' AS UNIQUEIDENTIFIER)"
|
1270
|
+
return f"'{magic_val}'"
|
1235
1271
|
return ('n' if flavor == 'oracle' else '') + "'-987654321'"
|
1236
1272
|
|
1237
1273
|
|
@@ -1443,7 +1479,7 @@ def session_execute(
|
|
1443
1479
|
successes, msgs, results = [], [], []
|
1444
1480
|
for query in queries:
|
1445
1481
|
query_text = sqlalchemy.text(query)
|
1446
|
-
fail_msg =
|
1482
|
+
fail_msg = "Failed to execute queries."
|
1447
1483
|
try:
|
1448
1484
|
result = session.execute(query_text)
|
1449
1485
|
query_success = result is not None
|
meerschaum/utils/yaml.py
CHANGED
@@ -61,7 +61,6 @@ class yaml:
|
|
61
61
|
_yaml.add_representer(str, _string_presenter)
|
62
62
|
_yaml.representer.SafeRepresenter.add_representer(str, _string_presenter)
|
63
63
|
|
64
|
-
|
65
64
|
@staticmethod
|
66
65
|
def safe_load(*args, **kw):
|
67
66
|
"""
|
@@ -71,7 +70,6 @@ class yaml:
|
|
71
70
|
return _yaml.load(*args, **filter_keywords(_yaml.load, **kw))
|
72
71
|
return _yaml.safe_load(*args, **filter_keywords(_yaml.safe_load, **kw))
|
73
72
|
|
74
|
-
|
75
73
|
@staticmethod
|
76
74
|
def load(*args, **kw):
|
77
75
|
"""
|
@@ -80,15 +78,14 @@ class yaml:
|
|
80
78
|
(added `yaml.Loader` as a positional argument).
|
81
79
|
"""
|
82
80
|
packaging_version = attempt_import('packaging.version')
|
83
|
-
_args = list(args)
|
84
81
|
if (
|
85
82
|
_import_name == 'yaml'
|
86
83
|
and packaging_version.parse(_yaml.__version__) >= packaging_version.parse('6.0')
|
84
|
+
and 'Loader' not in kw
|
87
85
|
):
|
88
|
-
|
89
|
-
|
90
|
-
return _yaml.load(*_args, **filter_keywords(_yaml.load, **kw))
|
86
|
+
kw['Loader'] = _yaml.Loader
|
91
87
|
|
88
|
+
return _yaml.load(*args, **filter_keywords(_yaml.load, **kw))
|
92
89
|
|
93
90
|
@staticmethod
|
94
91
|
def dump(data, stream=None, **kw):
|