meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/api/dash/callbacks/dashboard.py +46 -37
- meerschaum/api/dash/connectors.py +7 -9
- meerschaum/api/resources/templates/termpage.html +32 -24
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/api/routes/_webterm.py +4 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +14 -18
- meerschaum/connectors/sql/_create_engine.py +6 -1
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +62 -56
- meerschaum/connectors/sql/_sql.py +37 -7
- meerschaum/core/Pipe/_attributes.py +6 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +1 -13
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/utils/daemon/Daemon.py +2 -2
- meerschaum/utils/dataframe.py +3 -3
- meerschaum/utils/dtypes/__init__.py +48 -2
- meerschaum/utils/dtypes/sql.py +15 -7
- meerschaum/utils/sql.py +114 -57
- meerschaum/utils/venv/__init__.py +22 -9
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0
@@ -231,11 +231,11 @@ def attempt_cast_to_bytes(value: Any) -> Any:
|
|
231
231
|
"""
|
232
232
|
Given a value, attempt to coerce it into a bytestring.
|
233
233
|
"""
|
234
|
-
if isinstance(value,
|
234
|
+
if isinstance(value, bytes):
|
235
235
|
return value
|
236
236
|
try:
|
237
237
|
return (
|
238
|
-
|
238
|
+
deserialize_bytes_string(str(value))
|
239
239
|
if not value_is_null(value)
|
240
240
|
else None
|
241
241
|
)
|
@@ -382,9 +382,55 @@ def serialize_bytes(data: bytes) -> str:
|
|
382
382
|
return base64.b64encode(data).decode('utf-8')
|
383
383
|
|
384
384
|
|
385
|
+
def deserialize_bytes_string(data: str | None, force_hex: bool = False) -> bytes | None:
|
386
|
+
"""
|
387
|
+
Given a serialized ASCII string of bytes data, return the original bytes.
|
388
|
+
The input data may either be base64- or hex-encoded.
|
389
|
+
|
390
|
+
Parameters
|
391
|
+
----------
|
392
|
+
data: str | None
|
393
|
+
The string to be deserialized into bytes.
|
394
|
+
May be base64- or hex-encoded (prefixed with `'\\x'`).
|
395
|
+
|
396
|
+
force_hex: bool = False
|
397
|
+
If `True`, treat the input string as hex-encoded.
|
398
|
+
If `data` does not begin with the prefix `'\\x'`, set `force_hex` to `True`.
|
399
|
+
This will still strip the leading `'\\x'` prefix if present.
|
400
|
+
|
401
|
+
Returns
|
402
|
+
-------
|
403
|
+
The original bytes used to produce the encoded string `data`.
|
404
|
+
"""
|
405
|
+
if not isinstance(data, str) and value_is_null(data):
|
406
|
+
return data
|
407
|
+
|
408
|
+
import binascii
|
409
|
+
import base64
|
410
|
+
|
411
|
+
is_hex = force_hex or data.startswith('\\x')
|
412
|
+
|
413
|
+
if is_hex:
|
414
|
+
if data.startswith('\\x'):
|
415
|
+
data = data[2:]
|
416
|
+
return binascii.unhexlify(data)
|
417
|
+
|
418
|
+
return base64.b64decode(data)
|
419
|
+
|
420
|
+
|
385
421
|
def deserialize_base64(data: str) -> bytes:
|
386
422
|
"""
|
387
423
|
Return the original bytestring from the given base64-encoded string.
|
388
424
|
"""
|
389
425
|
import base64
|
390
426
|
return base64.b64decode(data)
|
427
|
+
|
428
|
+
|
429
|
+
def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None:
|
430
|
+
"""
|
431
|
+
Return the given bytes as a hex string for PostgreSQL's `BYTEA` type.
|
432
|
+
"""
|
433
|
+
import binascii
|
434
|
+
if not isinstance(data, bytes) and value_is_null(data):
|
435
|
+
return data
|
436
|
+
return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8')
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -13,9 +13,8 @@ NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
|
13
13
|
'mariadb': (38, 20),
|
14
14
|
'mysql': (38, 20),
|
15
15
|
'mssql': (28, 10),
|
16
|
-
'duckdb': (15, 3),
|
17
|
-
'sqlite': (15, 4),
|
18
16
|
}
|
17
|
+
NUMERIC_AS_TEXT_FLAVORS = {'sqlite', 'duckdb'}
|
19
18
|
TIMEZONE_NAIVE_FLAVORS = {'oracle', 'mysql', 'mariadb'}
|
20
19
|
|
21
20
|
### MySQL doesn't allow for casting as BIGINT, so this is a workaround.
|
@@ -102,6 +101,10 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
|
|
102
101
|
'JSONB': 'json',
|
103
102
|
'UUID': 'uuid',
|
104
103
|
'UNIQUEIDENTIFIER': 'uuid',
|
104
|
+
'BYTEA': 'bytes',
|
105
|
+
'BLOB': 'bytes',
|
106
|
+
'VARBINARY': 'bytes',
|
107
|
+
'VARBINARY(MAX)': 'bytes',
|
105
108
|
'substrings': {
|
106
109
|
'CHAR': 'string[pyarrow]',
|
107
110
|
'TIMESTAMP': 'datetime64[ns]',
|
@@ -114,6 +117,9 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
|
|
114
117
|
'INT': 'int64[pyarrow]',
|
115
118
|
'BOOL': 'bool[pyarrow]',
|
116
119
|
'JSON': 'json',
|
120
|
+
'BYTE': 'bytes',
|
121
|
+
'LOB': 'bytes',
|
122
|
+
'BINARY': 'bytes',
|
117
123
|
},
|
118
124
|
'default': 'object',
|
119
125
|
}
|
@@ -256,8 +262,8 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
256
262
|
'mysql': f'DECIMAL{NUMERIC_PRECISION_FLAVORS["mysql"]}',
|
257
263
|
'mssql': f'NUMERIC{NUMERIC_PRECISION_FLAVORS["mssql"]}',
|
258
264
|
'oracle': 'NUMBER',
|
259
|
-
'sqlite':
|
260
|
-
'duckdb': '
|
265
|
+
'sqlite': 'TEXT',
|
266
|
+
'duckdb': 'TEXT',
|
261
267
|
'citus': 'NUMERIC',
|
262
268
|
'cockroachdb': 'NUMERIC',
|
263
269
|
'default': 'NUMERIC',
|
@@ -415,7 +421,7 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
415
421
|
'mysql': 'Numeric',
|
416
422
|
'mssql': 'Numeric',
|
417
423
|
'oracle': 'Numeric',
|
418
|
-
'sqlite': '
|
424
|
+
'sqlite': 'UnicodeText',
|
419
425
|
'duckdb': 'Numeric',
|
420
426
|
'citus': 'Numeric',
|
421
427
|
'cockroachdb': 'Numeric',
|
@@ -528,7 +534,7 @@ def get_db_type_from_pd_type(
|
|
528
534
|
"""
|
529
535
|
from meerschaum.utils.warnings import warn
|
530
536
|
from meerschaum.utils.packages import attempt_import
|
531
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
537
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_ALIAS_DTYPES
|
532
538
|
from meerschaum.utils.misc import parse_arguments_str
|
533
539
|
sqlalchemy_types = attempt_import('sqlalchemy.types')
|
534
540
|
|
@@ -538,6 +544,9 @@ def get_db_type_from_pd_type(
|
|
538
544
|
else PD_TO_SQLALCHEMY_DTYPES_FLAVORS
|
539
545
|
)
|
540
546
|
|
547
|
+
if pd_type in MRSM_ALIAS_DTYPES:
|
548
|
+
pd_type = MRSM_ALIAS_DTYPES[pd_type]
|
549
|
+
|
541
550
|
### Check whether we are able to match this type (e.g. pyarrow support).
|
542
551
|
found_db_type = False
|
543
552
|
if pd_type not in types_registry:
|
@@ -594,7 +603,6 @@ def get_db_type_from_pd_type(
|
|
594
603
|
return cls(*cls_args, **cls_kwargs)
|
595
604
|
|
596
605
|
if 'numeric' in db_type.lower():
|
597
|
-
numeric_type_str = PD_TO_DB_DTYPES_FLAVORS['numeric'].get(flavor, 'NUMERIC')
|
598
606
|
if flavor not in NUMERIC_PRECISION_FLAVORS:
|
599
607
|
return sqlalchemy_types.Numeric
|
600
608
|
precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
|
meerschaum/utils/sql.py
CHANGED
@@ -7,6 +7,7 @@ Flavor-specific SQL tools.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
|
10
11
|
from datetime import datetime, timezone, timedelta
|
11
12
|
import meerschaum as mrsm
|
12
13
|
from meerschaum.utils.typing import Optional, Dict, Any, Union, List, Iterable, Tuple
|
@@ -50,10 +51,12 @@ update_queries = {
|
|
50
51
|
{sets_subquery_none}
|
51
52
|
FROM {target_table_name} AS t
|
52
53
|
INNER JOIN (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) AS p
|
53
|
-
ON
|
54
|
+
ON
|
55
|
+
{and_subquery_t}
|
54
56
|
WHERE
|
55
57
|
{and_subquery_f}
|
56
|
-
AND
|
58
|
+
AND
|
59
|
+
{date_bounds_subquery}
|
57
60
|
""",
|
58
61
|
'timescaledb-upsert': """
|
59
62
|
INSERT INTO {target_table_name} ({patch_cols_str})
|
@@ -82,9 +85,11 @@ update_queries = {
|
|
82
85
|
'mysql': """
|
83
86
|
UPDATE {target_table_name} AS f
|
84
87
|
JOIN (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) AS p
|
85
|
-
ON
|
88
|
+
ON
|
89
|
+
{and_subquery_f}
|
86
90
|
{sets_subquery_f}
|
87
|
-
WHERE
|
91
|
+
WHERE
|
92
|
+
{date_bounds_subquery}
|
88
93
|
""",
|
89
94
|
'mysql-upsert': """
|
90
95
|
INSERT {ignore}INTO {target_table_name} ({patch_cols_str})
|
@@ -96,9 +101,11 @@ update_queries = {
|
|
96
101
|
'mariadb': """
|
97
102
|
UPDATE {target_table_name} AS f
|
98
103
|
JOIN (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) AS p
|
99
|
-
ON
|
104
|
+
ON
|
105
|
+
{and_subquery_f}
|
100
106
|
{sets_subquery_f}
|
101
|
-
WHERE
|
107
|
+
WHERE
|
108
|
+
{date_bounds_subquery}
|
102
109
|
""",
|
103
110
|
'mariadb-upsert': """
|
104
111
|
INSERT {ignore}INTO {target_table_name} ({patch_cols_str})
|
@@ -111,8 +118,10 @@ update_queries = {
|
|
111
118
|
{with_temp_date_bounds}
|
112
119
|
MERGE {target_table_name} f
|
113
120
|
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
114
|
-
ON
|
115
|
-
|
121
|
+
ON
|
122
|
+
{and_subquery_f}
|
123
|
+
AND
|
124
|
+
{date_bounds_subquery}
|
116
125
|
WHEN MATCHED THEN
|
117
126
|
UPDATE
|
118
127
|
{sets_subquery_none};
|
@@ -123,9 +132,10 @@ update_queries = {
|
|
123
132
|
{with_temp_date_bounds}
|
124
133
|
MERGE {target_table_name} f
|
125
134
|
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
126
|
-
ON
|
127
|
-
|
128
|
-
|
135
|
+
ON
|
136
|
+
{and_subquery_f}
|
137
|
+
AND
|
138
|
+
{date_bounds_subquery}{when_matched_update_sets_subquery_none}
|
129
139
|
WHEN NOT MATCHED THEN
|
130
140
|
INSERT ({patch_cols_str})
|
131
141
|
VALUES ({patch_cols_prefixed_str});
|
@@ -134,14 +144,27 @@ update_queries = {
|
|
134
144
|
],
|
135
145
|
'oracle': """
|
136
146
|
MERGE INTO {target_table_name} f
|
137
|
-
USING (SELECT
|
147
|
+
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
138
148
|
ON (
|
139
149
|
{and_subquery_f}
|
140
|
-
AND
|
150
|
+
AND
|
151
|
+
{date_bounds_subquery}
|
141
152
|
)
|
142
|
-
|
143
|
-
|
144
|
-
|
153
|
+
WHEN MATCHED THEN
|
154
|
+
UPDATE
|
155
|
+
{sets_subquery_none}
|
156
|
+
""",
|
157
|
+
'oracle-upsert': """
|
158
|
+
MERGE INTO {target_table_name} f
|
159
|
+
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
160
|
+
ON (
|
161
|
+
{and_subquery_f}
|
162
|
+
AND
|
163
|
+
{date_bounds_subquery}
|
164
|
+
){when_matched_update_sets_subquery_none}
|
165
|
+
WHEN NOT MATCHED THEN
|
166
|
+
INSERT ({patch_cols_str})
|
167
|
+
VALUES ({patch_cols_prefixed_str})
|
145
168
|
""",
|
146
169
|
'sqlite-upsert': """
|
147
170
|
INSERT INTO {target_table_name} ({patch_cols_str})
|
@@ -329,7 +352,11 @@ columns_indices_queries = {
|
|
329
352
|
CASE
|
330
353
|
WHEN kc.type = 'PK' THEN 'PRIMARY KEY'
|
331
354
|
ELSE 'INDEX'
|
332
|
-
END AS [index_type]
|
355
|
+
END AS [index_type],
|
356
|
+
CASE
|
357
|
+
WHEN i.type = 1 THEN CAST(1 AS BIT)
|
358
|
+
ELSE CAST(0 AS BIT)
|
359
|
+
END AS [clustered]
|
333
360
|
FROM
|
334
361
|
sys.schemas s
|
335
362
|
INNER JOIN sys.tables t
|
@@ -495,7 +522,8 @@ def dateadd_str(
|
|
495
522
|
flavor: str = 'postgresql',
|
496
523
|
datepart: str = 'day',
|
497
524
|
number: Union[int, float] = 0,
|
498
|
-
begin: Union[str, datetime, int] = 'now'
|
525
|
+
begin: Union[str, datetime, int] = 'now',
|
526
|
+
db_type: Optional[str] = None,
|
499
527
|
) -> str:
|
500
528
|
"""
|
501
529
|
Generate a `DATEADD` clause depending on database flavor.
|
@@ -534,6 +562,10 @@ def dateadd_str(
|
|
534
562
|
begin: Union[str, datetime], default `'now'`
|
535
563
|
Base datetime to which to add dateparts.
|
536
564
|
|
565
|
+
db_type: Optional[str], default None
|
566
|
+
If provided, cast the datetime string as the type.
|
567
|
+
Otherwise, infer this from the input datetime value.
|
568
|
+
|
537
569
|
Returns
|
538
570
|
-------
|
539
571
|
The appropriate `DATEADD` string for the corresponding database flavor.
|
@@ -545,7 +577,7 @@ def dateadd_str(
|
|
545
577
|
... begin = datetime(2022, 1, 1, 0, 0),
|
546
578
|
... number = 1,
|
547
579
|
... )
|
548
|
-
"DATEADD(day, 1, CAST('2022-01-01 00:00:00' AS
|
580
|
+
"DATEADD(day, 1, CAST('2022-01-01 00:00:00' AS DATETIME2))"
|
549
581
|
>>> dateadd_str(
|
550
582
|
... flavor = 'postgresql',
|
551
583
|
... begin = datetime(2022, 1, 1, 0, 0),
|
@@ -588,7 +620,7 @@ def dateadd_str(
|
|
588
620
|
)
|
589
621
|
|
590
622
|
dt_is_utc = begin_time.tzinfo is not None if begin_time is not None else '+' in str(begin)
|
591
|
-
db_type = get_db_type_from_pd_type(
|
623
|
+
db_type = db_type or get_db_type_from_pd_type(
|
592
624
|
('datetime64[ns, UTC]' if dt_is_utc else 'datetime64[ns]'),
|
593
625
|
flavor=flavor,
|
594
626
|
)
|
@@ -713,7 +745,7 @@ def get_distinct_col_count(
|
|
713
745
|
result = connector.value(_meta_query, debug=debug)
|
714
746
|
try:
|
715
747
|
return int(result)
|
716
|
-
except Exception
|
748
|
+
except Exception:
|
717
749
|
return None
|
718
750
|
|
719
751
|
|
@@ -723,12 +755,15 @@ def sql_item_name(item: str, flavor: str, schema: Optional[str] = None) -> str:
|
|
723
755
|
|
724
756
|
Parameters
|
725
757
|
----------
|
726
|
-
item: str
|
758
|
+
item: str
|
727
759
|
The database item (table, view, etc.) in need of quotes.
|
728
760
|
|
729
|
-
flavor: str
|
761
|
+
flavor: str
|
730
762
|
The database flavor (`'postgresql'`, `'mssql'`, `'sqllite'`, etc.).
|
731
763
|
|
764
|
+
schema: Optional[str], default None
|
765
|
+
If provided, prefix the table name with the schema.
|
766
|
+
|
732
767
|
Returns
|
733
768
|
-------
|
734
769
|
A `str` which contains the input `item` wrapped in the corresponding escape characters.
|
@@ -760,6 +795,8 @@ def sql_item_name(item: str, flavor: str, schema: Optional[str] = None) -> str:
|
|
760
795
|
### NOTE: SQLite does not support schemas.
|
761
796
|
if flavor == 'sqlite':
|
762
797
|
schema = None
|
798
|
+
elif flavor == 'mssql' and str(item).startswith('#'):
|
799
|
+
schema = None
|
763
800
|
|
764
801
|
schema_prefix = (
|
765
802
|
(wrappers[0] + schema + wrappers[1] + '.')
|
@@ -1115,6 +1152,7 @@ def get_table_cols_types(
|
|
1115
1152
|
-------
|
1116
1153
|
A dictionary mapping column names to data types.
|
1117
1154
|
"""
|
1155
|
+
import textwrap
|
1118
1156
|
from meerschaum.connectors import SQLConnector
|
1119
1157
|
sqlalchemy = mrsm.attempt_import('sqlalchemy')
|
1120
1158
|
flavor = flavor or getattr(connectable, 'flavor', None)
|
@@ -1140,7 +1178,7 @@ def get_table_cols_types(
|
|
1140
1178
|
)
|
1141
1179
|
|
1142
1180
|
cols_types_query = sqlalchemy.text(
|
1143
|
-
columns_types_queries.get(
|
1181
|
+
textwrap.dedent(columns_types_queries.get(
|
1144
1182
|
flavor,
|
1145
1183
|
columns_types_queries['default']
|
1146
1184
|
).format(
|
@@ -1151,7 +1189,7 @@ def get_table_cols_types(
|
|
1151
1189
|
table_upper=table_upper,
|
1152
1190
|
table_upper_trunc=table_upper_trunc,
|
1153
1191
|
db_prefix=db_prefix,
|
1154
|
-
)
|
1192
|
+
)).lstrip().rstrip()
|
1155
1193
|
)
|
1156
1194
|
|
1157
1195
|
cols = ['database', 'schema', 'table', 'column', 'type']
|
@@ -1265,6 +1303,7 @@ def get_table_cols_indices(
|
|
1265
1303
|
-------
|
1266
1304
|
A dictionary mapping column names to a list of indices.
|
1267
1305
|
"""
|
1306
|
+
import textwrap
|
1268
1307
|
from collections import defaultdict
|
1269
1308
|
from meerschaum.connectors import SQLConnector
|
1270
1309
|
sqlalchemy = mrsm.attempt_import('sqlalchemy')
|
@@ -1291,7 +1330,7 @@ def get_table_cols_indices(
|
|
1291
1330
|
)
|
1292
1331
|
|
1293
1332
|
cols_indices_query = sqlalchemy.text(
|
1294
|
-
columns_indices_queries.get(
|
1333
|
+
textwrap.dedent(columns_indices_queries.get(
|
1295
1334
|
flavor,
|
1296
1335
|
columns_indices_queries['default']
|
1297
1336
|
).format(
|
@@ -1303,10 +1342,12 @@ def get_table_cols_indices(
|
|
1303
1342
|
table_upper_trunc=table_upper_trunc,
|
1304
1343
|
db_prefix=db_prefix,
|
1305
1344
|
schema=schema,
|
1306
|
-
)
|
1345
|
+
)).lstrip().rstrip()
|
1307
1346
|
)
|
1308
1347
|
|
1309
1348
|
cols = ['database', 'schema', 'table', 'column', 'index', 'index_type']
|
1349
|
+
if flavor == 'mssql':
|
1350
|
+
cols.append('clustered')
|
1310
1351
|
result_cols_ix = dict(enumerate(cols))
|
1311
1352
|
|
1312
1353
|
debug_kwargs = {'debug': debug} if isinstance(connectable, SQLConnector) else {}
|
@@ -1347,7 +1388,6 @@ def get_table_cols_indices(
|
|
1347
1388
|
)
|
1348
1389
|
)
|
1349
1390
|
]
|
1350
|
-
|
1351
1391
|
### NOTE: This may return incorrect columns if the schema is not explicitly stated.
|
1352
1392
|
if cols_types_docs and not cols_types_docs_filtered:
|
1353
1393
|
cols_types_docs_filtered = cols_types_docs
|
@@ -1363,12 +1403,13 @@ def get_table_cols_indices(
|
|
1363
1403
|
else doc['column']
|
1364
1404
|
)
|
1365
1405
|
)
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1406
|
+
index_doc = {
|
1407
|
+
'name': doc.get('index', None),
|
1408
|
+
'type': doc.get('index_type', None)
|
1409
|
+
}
|
1410
|
+
if flavor == 'mssql':
|
1411
|
+
index_doc['clustered'] = doc.get('clustered', None)
|
1412
|
+
cols_indices[col].append(index_doc)
|
1372
1413
|
|
1373
1414
|
return dict(cols_indices)
|
1374
1415
|
except Exception as e:
|
@@ -1438,9 +1479,11 @@ def get_update_queries(
|
|
1438
1479
|
-------
|
1439
1480
|
A list of query strings to perform the update operation.
|
1440
1481
|
"""
|
1482
|
+
import textwrap
|
1441
1483
|
from meerschaum.connectors import SQLConnector
|
1442
1484
|
from meerschaum.utils.debug import dprint
|
1443
|
-
from meerschaum.utils.dtypes
|
1485
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
1486
|
+
from meerschaum.utils.dtypes.sql import DB_FLAVORS_CAST_DTYPES, get_pd_type_from_db_type
|
1444
1487
|
flavor = flavor or (connectable.flavor if isinstance(connectable, SQLConnector) else None)
|
1445
1488
|
if not flavor:
|
1446
1489
|
raise ValueError("Provide a flavor if using a SQLAlchemy session.")
|
@@ -1533,21 +1576,35 @@ def get_update_queries(
|
|
1533
1576
|
def sets_subquery(l_prefix: str, r_prefix: str):
|
1534
1577
|
if not value_cols:
|
1535
1578
|
return ''
|
1579
|
+
|
1580
|
+
cast_func_cols = {
|
1581
|
+
c_name: (
|
1582
|
+
('', '', '')
|
1583
|
+
if (
|
1584
|
+
flavor == 'oracle'
|
1585
|
+
and are_dtypes_equal(get_pd_type_from_db_type(c_type), 'bytes')
|
1586
|
+
)
|
1587
|
+
else (
|
1588
|
+
('CAST(', f" AS {c_type.replace('_', ' ')}", ')')
|
1589
|
+
if flavor != 'sqlite'
|
1590
|
+
else ('', '', '')
|
1591
|
+
)
|
1592
|
+
)
|
1593
|
+
for c_name, c_type in value_cols
|
1594
|
+
}
|
1536
1595
|
return 'SET ' + ',\n'.join([
|
1537
1596
|
(
|
1538
1597
|
l_prefix + sql_item_name(c_name, flavor, None)
|
1539
1598
|
+ ' = '
|
1540
|
-
+
|
1541
|
-
+ r_prefix
|
1542
|
-
+
|
1543
|
-
+
|
1544
|
-
+ (c_type.replace('_', ' ') if flavor != 'sqlite' else '')
|
1545
|
-
+ (')' if flavor != 'sqlite' else '')
|
1599
|
+
+ cast_func_cols[c_name][0]
|
1600
|
+
+ r_prefix + sql_item_name(c_name, flavor, None)
|
1601
|
+
+ cast_func_cols[c_name][1]
|
1602
|
+
+ cast_func_cols[c_name][2]
|
1546
1603
|
) for c_name, c_type in value_cols
|
1547
1604
|
])
|
1548
1605
|
|
1549
1606
|
def and_subquery(l_prefix: str, r_prefix: str):
|
1550
|
-
return '\
|
1607
|
+
return '\n AND\n '.join([
|
1551
1608
|
(
|
1552
1609
|
"COALESCE("
|
1553
1610
|
+ l_prefix
|
@@ -1555,7 +1612,7 @@ def get_update_queries(
|
|
1555
1612
|
+ ", "
|
1556
1613
|
+ get_null_replacement(c_type, flavor)
|
1557
1614
|
+ ")"
|
1558
|
-
+ '
|
1615
|
+
+ '\n =\n '
|
1559
1616
|
+ "COALESCE("
|
1560
1617
|
+ r_prefix
|
1561
1618
|
+ sql_item_name(c_name, flavor, None)
|
@@ -1573,15 +1630,13 @@ def get_update_queries(
|
|
1573
1630
|
min_dt_col_name = f"MIN({dt_col_name})" if flavor != 'mssql' else '[Min_dt]'
|
1574
1631
|
max_dt_col_name = f"MAX({dt_col_name})" if flavor != 'mssql' else '[Max_dt]'
|
1575
1632
|
date_bounds_subquery = (
|
1576
|
-
f"""
|
1577
|
-
|
1578
|
-
|
1579
|
-
"""
|
1633
|
+
f"""f.{dt_col_name} >= (SELECT {min_dt_col_name} FROM {date_bounds_table})
|
1634
|
+
AND
|
1635
|
+
f.{dt_col_name} <= (SELECT {max_dt_col_name} FROM {date_bounds_table})"""
|
1580
1636
|
if datetime_col
|
1581
1637
|
else "1 = 1"
|
1582
1638
|
)
|
1583
|
-
with_temp_date_bounds = f"""
|
1584
|
-
WITH [date_bounds] AS (
|
1639
|
+
with_temp_date_bounds = f"""WITH [date_bounds] AS (
|
1585
1640
|
SELECT MIN({dt_col_name}) AS {min_dt_col_name}, MAX({dt_col_name}) AS {max_dt_col_name}
|
1586
1641
|
FROM {patch_table_name}
|
1587
1642
|
)""" if datetime_col else ""
|
@@ -1598,8 +1653,8 @@ def get_update_queries(
|
|
1598
1653
|
|
1599
1654
|
### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
|
1600
1655
|
when_matched_update_sets_subquery_none = "" if not value_cols else (
|
1601
|
-
"WHEN MATCHED THEN"
|
1602
|
-
f"
|
1656
|
+
"\n WHEN MATCHED THEN\n"
|
1657
|
+
f" UPDATE {sets_subquery('', 'p.')}"
|
1603
1658
|
)
|
1604
1659
|
|
1605
1660
|
cols_equal_values = '\n,'.join(
|
@@ -1616,7 +1671,7 @@ def get_update_queries(
|
|
1616
1671
|
ignore = "IGNORE " if not value_cols else ""
|
1617
1672
|
|
1618
1673
|
formatted_queries = [
|
1619
|
-
base_query.format(
|
1674
|
+
textwrap.dedent(base_query.format(
|
1620
1675
|
sets_subquery_none=sets_subquery('', 'p.'),
|
1621
1676
|
sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
|
1622
1677
|
sets_subquery_f=sets_subquery('f.', 'p.'),
|
@@ -1637,7 +1692,7 @@ def get_update_queries(
|
|
1637
1692
|
with_temp_date_bounds=with_temp_date_bounds,
|
1638
1693
|
identity_insert_on=identity_insert_on,
|
1639
1694
|
identity_insert_off=identity_insert_off,
|
1640
|
-
)
|
1695
|
+
)).lstrip().rstrip()
|
1641
1696
|
for base_query in base_queries
|
1642
1697
|
]
|
1643
1698
|
|
@@ -1681,11 +1736,14 @@ def get_null_replacement(typ: str, flavor: str) -> str:
|
|
1681
1736
|
)
|
1682
1737
|
return f'CAST({val_to_cast} AS {bool_typ})'
|
1683
1738
|
if 'time' in typ.lower() or 'date' in typ.lower():
|
1684
|
-
|
1739
|
+
db_type = typ if typ.isupper() else None
|
1740
|
+
return dateadd_str(flavor=flavor, begin='1900-01-01', db_type=db_type)
|
1685
1741
|
if 'float' in typ.lower() or 'double' in typ.lower() or typ.lower() in ('decimal',):
|
1686
1742
|
return '-987654321.0'
|
1687
1743
|
if flavor == 'oracle' and typ.lower().split('(', maxsplit=1)[0] == 'char':
|
1688
1744
|
return "'-987654321'"
|
1745
|
+
if flavor == 'oracle' and typ.lower() in ('blob', 'bytes'):
|
1746
|
+
return '00'
|
1689
1747
|
if typ.lower() in ('uniqueidentifier', 'guid', 'uuid'):
|
1690
1748
|
magic_val = 'DEADBEEF-ABBA-BABE-CAFE-DECAFC0FFEE5'
|
1691
1749
|
if flavor == 'mssql':
|
@@ -1964,7 +2022,6 @@ def _get_create_table_query_from_cte(
|
|
1964
2022
|
Create a new table from a CTE query.
|
1965
2023
|
"""
|
1966
2024
|
import textwrap
|
1967
|
-
from meerschaum.utils.dtypes.sql import AUTO_INCREMENT_COLUMN_FLAVORS
|
1968
2025
|
create_cte = 'create_query'
|
1969
2026
|
create_cte_name = sql_item_name(create_cte, flavor, None)
|
1970
2027
|
new_table_name = sql_item_name(new_table, flavor, schema)
|
@@ -2052,11 +2109,11 @@ def _get_create_table_query_from_cte(
|
|
2052
2109
|
ADD PRIMARY KEY ({primary_key_name})
|
2053
2110
|
"""
|
2054
2111
|
|
2055
|
-
create_table_query = textwrap.dedent(create_table_query)
|
2112
|
+
create_table_query = textwrap.dedent(create_table_query).lstrip().rstrip()
|
2056
2113
|
if not primary_key:
|
2057
2114
|
return [create_table_query]
|
2058
2115
|
|
2059
|
-
alter_type_query = textwrap.dedent(alter_type_query)
|
2116
|
+
alter_type_query = textwrap.dedent(alter_type_query).lstrip().rstrip()
|
2060
2117
|
|
2061
2118
|
return [
|
2062
2119
|
create_table_query,
|
@@ -218,17 +218,22 @@ def is_venv_active(
|
|
218
218
|
|
219
219
|
verified_venvs = set()
|
220
220
|
def verify_venv(
|
221
|
-
|
222
|
-
|
223
|
-
|
221
|
+
venv: str,
|
222
|
+
debug: bool = False,
|
223
|
+
) -> None:
|
224
224
|
"""
|
225
225
|
Verify that the virtual environment matches the expected state.
|
226
226
|
"""
|
227
|
-
import pathlib
|
227
|
+
import pathlib
|
228
|
+
import platform
|
229
|
+
import os
|
230
|
+
import shutil
|
231
|
+
import sys
|
228
232
|
from meerschaum.config._paths import VIRTENV_RESOURCES_PATH
|
229
233
|
from meerschaum.utils.process import run_process
|
230
234
|
from meerschaum.utils.misc import make_symlink, is_symlink
|
231
235
|
from meerschaum.utils.warnings import warn
|
236
|
+
|
232
237
|
venv_path = VIRTENV_RESOURCES_PATH / venv
|
233
238
|
bin_path = venv_path / (
|
234
239
|
'bin' if platform.system() != 'Windows' else "Scripts"
|
@@ -368,16 +373,21 @@ def init_venv(
|
|
368
373
|
return True
|
369
374
|
|
370
375
|
import io
|
371
|
-
from contextlib import redirect_stdout
|
372
|
-
import sys
|
376
|
+
from contextlib import redirect_stdout
|
377
|
+
import sys
|
378
|
+
import platform
|
379
|
+
import os
|
380
|
+
import pathlib
|
381
|
+
import shutil
|
382
|
+
|
373
383
|
from meerschaum.config.static import STATIC_CONFIG
|
374
384
|
from meerschaum.config._paths import VIRTENV_RESOURCES_PATH, VENVS_CACHE_RESOURCES_PATH
|
375
385
|
from meerschaum.utils.packages import is_uv_enabled
|
386
|
+
|
376
387
|
venv_path = VIRTENV_RESOURCES_PATH / venv
|
377
388
|
vtp = venv_target_path(venv=venv, allow_nonexistent=True, debug=debug)
|
378
389
|
docker_home_venv_path = pathlib.Path('/home/meerschaum/venvs/mrsm')
|
379
390
|
|
380
|
-
runtime_env_var = STATIC_CONFIG['environment']['runtime']
|
381
391
|
work_dir_env_var = STATIC_CONFIG['environment']['work_dir']
|
382
392
|
if (
|
383
393
|
not force
|
@@ -404,10 +414,13 @@ def init_venv(
|
|
404
414
|
|
405
415
|
_venv_success = False
|
406
416
|
temp_vtp = VENVS_CACHE_RESOURCES_PATH / str(venv)
|
407
|
-
rename_vtp = vtp.exists()
|
417
|
+
rename_vtp = vtp.exists() and not temp_vtp.exists()
|
408
418
|
|
409
419
|
if rename_vtp:
|
410
|
-
|
420
|
+
try:
|
421
|
+
vtp.rename(temp_vtp)
|
422
|
+
except FileExistsError:
|
423
|
+
pass
|
411
424
|
|
412
425
|
if uv is not None:
|
413
426
|
_venv_success = run_python_package(
|