meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/api/dash/callbacks/dashboard.py +46 -37
- meerschaum/api/dash/connectors.py +7 -9
- meerschaum/api/resources/templates/termpage.html +32 -24
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/api/routes/_webterm.py +4 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +14 -18
- meerschaum/connectors/sql/_create_engine.py +6 -1
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +62 -56
- meerschaum/connectors/sql/_sql.py +37 -7
- meerschaum/core/Pipe/_attributes.py +6 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +1 -13
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/utils/daemon/Daemon.py +2 -2
- meerschaum/utils/dataframe.py +3 -3
- meerschaum/utils/dtypes/__init__.py +48 -2
- meerschaum/utils/dtypes/sql.py +15 -7
- meerschaum/utils/sql.py +114 -57
- meerschaum/utils/venv/__init__.py +22 -9
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0
@@ -231,11 +231,11 @@ def attempt_cast_to_bytes(value: Any) -> Any:
|
|
231
231
|
"""
|
232
232
|
Given a value, attempt to coerce it into a bytestring.
|
233
233
|
"""
|
234
|
-
if isinstance(value,
|
234
|
+
if isinstance(value, bytes):
|
235
235
|
return value
|
236
236
|
try:
|
237
237
|
return (
|
238
|
-
|
238
|
+
deserialize_bytes_string(str(value))
|
239
239
|
if not value_is_null(value)
|
240
240
|
else None
|
241
241
|
)
|
@@ -382,9 +382,55 @@ def serialize_bytes(data: bytes) -> str:
|
|
382
382
|
return base64.b64encode(data).decode('utf-8')
|
383
383
|
|
384
384
|
|
385
|
+
def deserialize_bytes_string(data: str | None, force_hex: bool = False) -> bytes | None:
|
386
|
+
"""
|
387
|
+
Given a serialized ASCII string of bytes data, return the original bytes.
|
388
|
+
The input data may either be base64- or hex-encoded.
|
389
|
+
|
390
|
+
Parameters
|
391
|
+
----------
|
392
|
+
data: str | None
|
393
|
+
The string to be deserialized into bytes.
|
394
|
+
May be base64- or hex-encoded (prefixed with `'\\x'`).
|
395
|
+
|
396
|
+
force_hex: bool = False
|
397
|
+
If `True`, treat the input string as hex-encoded.
|
398
|
+
If `data` does not begin with the prefix `'\\x'`, set `force_hex` to `True`.
|
399
|
+
This will still strip the leading `'\\x'` prefix if present.
|
400
|
+
|
401
|
+
Returns
|
402
|
+
-------
|
403
|
+
The original bytes used to produce the encoded string `data`.
|
404
|
+
"""
|
405
|
+
if not isinstance(data, str) and value_is_null(data):
|
406
|
+
return data
|
407
|
+
|
408
|
+
import binascii
|
409
|
+
import base64
|
410
|
+
|
411
|
+
is_hex = force_hex or data.startswith('\\x')
|
412
|
+
|
413
|
+
if is_hex:
|
414
|
+
if data.startswith('\\x'):
|
415
|
+
data = data[2:]
|
416
|
+
return binascii.unhexlify(data)
|
417
|
+
|
418
|
+
return base64.b64decode(data)
|
419
|
+
|
420
|
+
|
385
421
|
def deserialize_base64(data: str) -> bytes:
|
386
422
|
"""
|
387
423
|
Return the original bytestring from the given base64-encoded string.
|
388
424
|
"""
|
389
425
|
import base64
|
390
426
|
return base64.b64decode(data)
|
427
|
+
|
428
|
+
|
429
|
+
def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None:
|
430
|
+
"""
|
431
|
+
Return the given bytes as a hex string for PostgreSQL's `BYTEA` type.
|
432
|
+
"""
|
433
|
+
import binascii
|
434
|
+
if not isinstance(data, bytes) and value_is_null(data):
|
435
|
+
return data
|
436
|
+
return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8')
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -13,9 +13,8 @@ NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
|
13
13
|
'mariadb': (38, 20),
|
14
14
|
'mysql': (38, 20),
|
15
15
|
'mssql': (28, 10),
|
16
|
-
'duckdb': (15, 3),
|
17
|
-
'sqlite': (15, 4),
|
18
16
|
}
|
17
|
+
NUMERIC_AS_TEXT_FLAVORS = {'sqlite', 'duckdb'}
|
19
18
|
TIMEZONE_NAIVE_FLAVORS = {'oracle', 'mysql', 'mariadb'}
|
20
19
|
|
21
20
|
### MySQL doesn't allow for casting as BIGINT, so this is a workaround.
|
@@ -102,6 +101,10 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
|
|
102
101
|
'JSONB': 'json',
|
103
102
|
'UUID': 'uuid',
|
104
103
|
'UNIQUEIDENTIFIER': 'uuid',
|
104
|
+
'BYTEA': 'bytes',
|
105
|
+
'BLOB': 'bytes',
|
106
|
+
'VARBINARY': 'bytes',
|
107
|
+
'VARBINARY(MAX)': 'bytes',
|
105
108
|
'substrings': {
|
106
109
|
'CHAR': 'string[pyarrow]',
|
107
110
|
'TIMESTAMP': 'datetime64[ns]',
|
@@ -114,6 +117,9 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
|
|
114
117
|
'INT': 'int64[pyarrow]',
|
115
118
|
'BOOL': 'bool[pyarrow]',
|
116
119
|
'JSON': 'json',
|
120
|
+
'BYTE': 'bytes',
|
121
|
+
'LOB': 'bytes',
|
122
|
+
'BINARY': 'bytes',
|
117
123
|
},
|
118
124
|
'default': 'object',
|
119
125
|
}
|
@@ -256,8 +262,8 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
256
262
|
'mysql': f'DECIMAL{NUMERIC_PRECISION_FLAVORS["mysql"]}',
|
257
263
|
'mssql': f'NUMERIC{NUMERIC_PRECISION_FLAVORS["mssql"]}',
|
258
264
|
'oracle': 'NUMBER',
|
259
|
-
'sqlite':
|
260
|
-
'duckdb': '
|
265
|
+
'sqlite': 'TEXT',
|
266
|
+
'duckdb': 'TEXT',
|
261
267
|
'citus': 'NUMERIC',
|
262
268
|
'cockroachdb': 'NUMERIC',
|
263
269
|
'default': 'NUMERIC',
|
@@ -415,7 +421,7 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
415
421
|
'mysql': 'Numeric',
|
416
422
|
'mssql': 'Numeric',
|
417
423
|
'oracle': 'Numeric',
|
418
|
-
'sqlite': '
|
424
|
+
'sqlite': 'UnicodeText',
|
419
425
|
'duckdb': 'Numeric',
|
420
426
|
'citus': 'Numeric',
|
421
427
|
'cockroachdb': 'Numeric',
|
@@ -528,7 +534,7 @@ def get_db_type_from_pd_type(
|
|
528
534
|
"""
|
529
535
|
from meerschaum.utils.warnings import warn
|
530
536
|
from meerschaum.utils.packages import attempt_import
|
531
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
537
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_ALIAS_DTYPES
|
532
538
|
from meerschaum.utils.misc import parse_arguments_str
|
533
539
|
sqlalchemy_types = attempt_import('sqlalchemy.types')
|
534
540
|
|
@@ -538,6 +544,9 @@ def get_db_type_from_pd_type(
|
|
538
544
|
else PD_TO_SQLALCHEMY_DTYPES_FLAVORS
|
539
545
|
)
|
540
546
|
|
547
|
+
if pd_type in MRSM_ALIAS_DTYPES:
|
548
|
+
pd_type = MRSM_ALIAS_DTYPES[pd_type]
|
549
|
+
|
541
550
|
### Check whether we are able to match this type (e.g. pyarrow support).
|
542
551
|
found_db_type = False
|
543
552
|
if pd_type not in types_registry:
|
@@ -594,7 +603,6 @@ def get_db_type_from_pd_type(
|
|
594
603
|
return cls(*cls_args, **cls_kwargs)
|
595
604
|
|
596
605
|
if 'numeric' in db_type.lower():
|
597
|
-
numeric_type_str = PD_TO_DB_DTYPES_FLAVORS['numeric'].get(flavor, 'NUMERIC')
|
598
606
|
if flavor not in NUMERIC_PRECISION_FLAVORS:
|
599
607
|
return sqlalchemy_types.Numeric
|
600
608
|
precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
|
meerschaum/utils/sql.py
CHANGED
@@ -7,6 +7,7 @@ Flavor-specific SQL tools.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
|
10
11
|
from datetime import datetime, timezone, timedelta
|
11
12
|
import meerschaum as mrsm
|
12
13
|
from meerschaum.utils.typing import Optional, Dict, Any, Union, List, Iterable, Tuple
|
@@ -50,10 +51,12 @@ update_queries = {
|
|
50
51
|
{sets_subquery_none}
|
51
52
|
FROM {target_table_name} AS t
|
52
53
|
INNER JOIN (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) AS p
|
53
|
-
ON
|
54
|
+
ON
|
55
|
+
{and_subquery_t}
|
54
56
|
WHERE
|
55
57
|
{and_subquery_f}
|
56
|
-
AND
|
58
|
+
AND
|
59
|
+
{date_bounds_subquery}
|
57
60
|
""",
|
58
61
|
'timescaledb-upsert': """
|
59
62
|
INSERT INTO {target_table_name} ({patch_cols_str})
|
@@ -82,9 +85,11 @@ update_queries = {
|
|
82
85
|
'mysql': """
|
83
86
|
UPDATE {target_table_name} AS f
|
84
87
|
JOIN (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) AS p
|
85
|
-
ON
|
88
|
+
ON
|
89
|
+
{and_subquery_f}
|
86
90
|
{sets_subquery_f}
|
87
|
-
WHERE
|
91
|
+
WHERE
|
92
|
+
{date_bounds_subquery}
|
88
93
|
""",
|
89
94
|
'mysql-upsert': """
|
90
95
|
INSERT {ignore}INTO {target_table_name} ({patch_cols_str})
|
@@ -96,9 +101,11 @@ update_queries = {
|
|
96
101
|
'mariadb': """
|
97
102
|
UPDATE {target_table_name} AS f
|
98
103
|
JOIN (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) AS p
|
99
|
-
ON
|
104
|
+
ON
|
105
|
+
{and_subquery_f}
|
100
106
|
{sets_subquery_f}
|
101
|
-
WHERE
|
107
|
+
WHERE
|
108
|
+
{date_bounds_subquery}
|
102
109
|
""",
|
103
110
|
'mariadb-upsert': """
|
104
111
|
INSERT {ignore}INTO {target_table_name} ({patch_cols_str})
|
@@ -111,8 +118,10 @@ update_queries = {
|
|
111
118
|
{with_temp_date_bounds}
|
112
119
|
MERGE {target_table_name} f
|
113
120
|
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
114
|
-
ON
|
115
|
-
|
121
|
+
ON
|
122
|
+
{and_subquery_f}
|
123
|
+
AND
|
124
|
+
{date_bounds_subquery}
|
116
125
|
WHEN MATCHED THEN
|
117
126
|
UPDATE
|
118
127
|
{sets_subquery_none};
|
@@ -123,9 +132,10 @@ update_queries = {
|
|
123
132
|
{with_temp_date_bounds}
|
124
133
|
MERGE {target_table_name} f
|
125
134
|
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
126
|
-
ON
|
127
|
-
|
128
|
-
|
135
|
+
ON
|
136
|
+
{and_subquery_f}
|
137
|
+
AND
|
138
|
+
{date_bounds_subquery}{when_matched_update_sets_subquery_none}
|
129
139
|
WHEN NOT MATCHED THEN
|
130
140
|
INSERT ({patch_cols_str})
|
131
141
|
VALUES ({patch_cols_prefixed_str});
|
@@ -134,14 +144,27 @@ update_queries = {
|
|
134
144
|
],
|
135
145
|
'oracle': """
|
136
146
|
MERGE INTO {target_table_name} f
|
137
|
-
USING (SELECT
|
147
|
+
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
138
148
|
ON (
|
139
149
|
{and_subquery_f}
|
140
|
-
AND
|
150
|
+
AND
|
151
|
+
{date_bounds_subquery}
|
141
152
|
)
|
142
|
-
|
143
|
-
|
144
|
-
|
153
|
+
WHEN MATCHED THEN
|
154
|
+
UPDATE
|
155
|
+
{sets_subquery_none}
|
156
|
+
""",
|
157
|
+
'oracle-upsert': """
|
158
|
+
MERGE INTO {target_table_name} f
|
159
|
+
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
160
|
+
ON (
|
161
|
+
{and_subquery_f}
|
162
|
+
AND
|
163
|
+
{date_bounds_subquery}
|
164
|
+
){when_matched_update_sets_subquery_none}
|
165
|
+
WHEN NOT MATCHED THEN
|
166
|
+
INSERT ({patch_cols_str})
|
167
|
+
VALUES ({patch_cols_prefixed_str})
|
145
168
|
""",
|
146
169
|
'sqlite-upsert': """
|
147
170
|
INSERT INTO {target_table_name} ({patch_cols_str})
|
@@ -329,7 +352,11 @@ columns_indices_queries = {
|
|
329
352
|
CASE
|
330
353
|
WHEN kc.type = 'PK' THEN 'PRIMARY KEY'
|
331
354
|
ELSE 'INDEX'
|
332
|
-
END AS [index_type]
|
355
|
+
END AS [index_type],
|
356
|
+
CASE
|
357
|
+
WHEN i.type = 1 THEN CAST(1 AS BIT)
|
358
|
+
ELSE CAST(0 AS BIT)
|
359
|
+
END AS [clustered]
|
333
360
|
FROM
|
334
361
|
sys.schemas s
|
335
362
|
INNER JOIN sys.tables t
|
@@ -495,7 +522,8 @@ def dateadd_str(
|
|
495
522
|
flavor: str = 'postgresql',
|
496
523
|
datepart: str = 'day',
|
497
524
|
number: Union[int, float] = 0,
|
498
|
-
begin: Union[str, datetime, int] = 'now'
|
525
|
+
begin: Union[str, datetime, int] = 'now',
|
526
|
+
db_type: Optional[str] = None,
|
499
527
|
) -> str:
|
500
528
|
"""
|
501
529
|
Generate a `DATEADD` clause depending on database flavor.
|
@@ -534,6 +562,10 @@ def dateadd_str(
|
|
534
562
|
begin: Union[str, datetime], default `'now'`
|
535
563
|
Base datetime to which to add dateparts.
|
536
564
|
|
565
|
+
db_type: Optional[str], default None
|
566
|
+
If provided, cast the datetime string as the type.
|
567
|
+
Otherwise, infer this from the input datetime value.
|
568
|
+
|
537
569
|
Returns
|
538
570
|
-------
|
539
571
|
The appropriate `DATEADD` string for the corresponding database flavor.
|
@@ -545,7 +577,7 @@ def dateadd_str(
|
|
545
577
|
... begin = datetime(2022, 1, 1, 0, 0),
|
546
578
|
... number = 1,
|
547
579
|
... )
|
548
|
-
"DATEADD(day, 1, CAST('2022-01-01 00:00:00' AS
|
580
|
+
"DATEADD(day, 1, CAST('2022-01-01 00:00:00' AS DATETIME2))"
|
549
581
|
>>> dateadd_str(
|
550
582
|
... flavor = 'postgresql',
|
551
583
|
... begin = datetime(2022, 1, 1, 0, 0),
|
@@ -588,7 +620,7 @@ def dateadd_str(
|
|
588
620
|
)
|
589
621
|
|
590
622
|
dt_is_utc = begin_time.tzinfo is not None if begin_time is not None else '+' in str(begin)
|
591
|
-
db_type = get_db_type_from_pd_type(
|
623
|
+
db_type = db_type or get_db_type_from_pd_type(
|
592
624
|
('datetime64[ns, UTC]' if dt_is_utc else 'datetime64[ns]'),
|
593
625
|
flavor=flavor,
|
594
626
|
)
|
@@ -713,7 +745,7 @@ def get_distinct_col_count(
|
|
713
745
|
result = connector.value(_meta_query, debug=debug)
|
714
746
|
try:
|
715
747
|
return int(result)
|
716
|
-
except Exception
|
748
|
+
except Exception:
|
717
749
|
return None
|
718
750
|
|
719
751
|
|
@@ -723,12 +755,15 @@ def sql_item_name(item: str, flavor: str, schema: Optional[str] = None) -> str:
|
|
723
755
|
|
724
756
|
Parameters
|
725
757
|
----------
|
726
|
-
item: str
|
758
|
+
item: str
|
727
759
|
The database item (table, view, etc.) in need of quotes.
|
728
760
|
|
729
|
-
flavor: str
|
761
|
+
flavor: str
|
730
762
|
The database flavor (`'postgresql'`, `'mssql'`, `'sqllite'`, etc.).
|
731
763
|
|
764
|
+
schema: Optional[str], default None
|
765
|
+
If provided, prefix the table name with the schema.
|
766
|
+
|
732
767
|
Returns
|
733
768
|
-------
|
734
769
|
A `str` which contains the input `item` wrapped in the corresponding escape characters.
|
@@ -760,6 +795,8 @@ def sql_item_name(item: str, flavor: str, schema: Optional[str] = None) -> str:
|
|
760
795
|
### NOTE: SQLite does not support schemas.
|
761
796
|
if flavor == 'sqlite':
|
762
797
|
schema = None
|
798
|
+
elif flavor == 'mssql' and str(item).startswith('#'):
|
799
|
+
schema = None
|
763
800
|
|
764
801
|
schema_prefix = (
|
765
802
|
(wrappers[0] + schema + wrappers[1] + '.')
|
@@ -1115,6 +1152,7 @@ def get_table_cols_types(
|
|
1115
1152
|
-------
|
1116
1153
|
A dictionary mapping column names to data types.
|
1117
1154
|
"""
|
1155
|
+
import textwrap
|
1118
1156
|
from meerschaum.connectors import SQLConnector
|
1119
1157
|
sqlalchemy = mrsm.attempt_import('sqlalchemy')
|
1120
1158
|
flavor = flavor or getattr(connectable, 'flavor', None)
|
@@ -1140,7 +1178,7 @@ def get_table_cols_types(
|
|
1140
1178
|
)
|
1141
1179
|
|
1142
1180
|
cols_types_query = sqlalchemy.text(
|
1143
|
-
columns_types_queries.get(
|
1181
|
+
textwrap.dedent(columns_types_queries.get(
|
1144
1182
|
flavor,
|
1145
1183
|
columns_types_queries['default']
|
1146
1184
|
).format(
|
@@ -1151,7 +1189,7 @@ def get_table_cols_types(
|
|
1151
1189
|
table_upper=table_upper,
|
1152
1190
|
table_upper_trunc=table_upper_trunc,
|
1153
1191
|
db_prefix=db_prefix,
|
1154
|
-
)
|
1192
|
+
)).lstrip().rstrip()
|
1155
1193
|
)
|
1156
1194
|
|
1157
1195
|
cols = ['database', 'schema', 'table', 'column', 'type']
|
@@ -1265,6 +1303,7 @@ def get_table_cols_indices(
|
|
1265
1303
|
-------
|
1266
1304
|
A dictionary mapping column names to a list of indices.
|
1267
1305
|
"""
|
1306
|
+
import textwrap
|
1268
1307
|
from collections import defaultdict
|
1269
1308
|
from meerschaum.connectors import SQLConnector
|
1270
1309
|
sqlalchemy = mrsm.attempt_import('sqlalchemy')
|
@@ -1291,7 +1330,7 @@ def get_table_cols_indices(
|
|
1291
1330
|
)
|
1292
1331
|
|
1293
1332
|
cols_indices_query = sqlalchemy.text(
|
1294
|
-
columns_indices_queries.get(
|
1333
|
+
textwrap.dedent(columns_indices_queries.get(
|
1295
1334
|
flavor,
|
1296
1335
|
columns_indices_queries['default']
|
1297
1336
|
).format(
|
@@ -1303,10 +1342,12 @@ def get_table_cols_indices(
|
|
1303
1342
|
table_upper_trunc=table_upper_trunc,
|
1304
1343
|
db_prefix=db_prefix,
|
1305
1344
|
schema=schema,
|
1306
|
-
)
|
1345
|
+
)).lstrip().rstrip()
|
1307
1346
|
)
|
1308
1347
|
|
1309
1348
|
cols = ['database', 'schema', 'table', 'column', 'index', 'index_type']
|
1349
|
+
if flavor == 'mssql':
|
1350
|
+
cols.append('clustered')
|
1310
1351
|
result_cols_ix = dict(enumerate(cols))
|
1311
1352
|
|
1312
1353
|
debug_kwargs = {'debug': debug} if isinstance(connectable, SQLConnector) else {}
|
@@ -1347,7 +1388,6 @@ def get_table_cols_indices(
|
|
1347
1388
|
)
|
1348
1389
|
)
|
1349
1390
|
]
|
1350
|
-
|
1351
1391
|
### NOTE: This may return incorrect columns if the schema is not explicitly stated.
|
1352
1392
|
if cols_types_docs and not cols_types_docs_filtered:
|
1353
1393
|
cols_types_docs_filtered = cols_types_docs
|
@@ -1363,12 +1403,13 @@ def get_table_cols_indices(
|
|
1363
1403
|
else doc['column']
|
1364
1404
|
)
|
1365
1405
|
)
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1406
|
+
index_doc = {
|
1407
|
+
'name': doc.get('index', None),
|
1408
|
+
'type': doc.get('index_type', None)
|
1409
|
+
}
|
1410
|
+
if flavor == 'mssql':
|
1411
|
+
index_doc['clustered'] = doc.get('clustered', None)
|
1412
|
+
cols_indices[col].append(index_doc)
|
1372
1413
|
|
1373
1414
|
return dict(cols_indices)
|
1374
1415
|
except Exception as e:
|
@@ -1438,9 +1479,11 @@ def get_update_queries(
|
|
1438
1479
|
-------
|
1439
1480
|
A list of query strings to perform the update operation.
|
1440
1481
|
"""
|
1482
|
+
import textwrap
|
1441
1483
|
from meerschaum.connectors import SQLConnector
|
1442
1484
|
from meerschaum.utils.debug import dprint
|
1443
|
-
from meerschaum.utils.dtypes
|
1485
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
1486
|
+
from meerschaum.utils.dtypes.sql import DB_FLAVORS_CAST_DTYPES, get_pd_type_from_db_type
|
1444
1487
|
flavor = flavor or (connectable.flavor if isinstance(connectable, SQLConnector) else None)
|
1445
1488
|
if not flavor:
|
1446
1489
|
raise ValueError("Provide a flavor if using a SQLAlchemy session.")
|
@@ -1533,21 +1576,35 @@ def get_update_queries(
|
|
1533
1576
|
def sets_subquery(l_prefix: str, r_prefix: str):
|
1534
1577
|
if not value_cols:
|
1535
1578
|
return ''
|
1579
|
+
|
1580
|
+
cast_func_cols = {
|
1581
|
+
c_name: (
|
1582
|
+
('', '', '')
|
1583
|
+
if (
|
1584
|
+
flavor == 'oracle'
|
1585
|
+
and are_dtypes_equal(get_pd_type_from_db_type(c_type), 'bytes')
|
1586
|
+
)
|
1587
|
+
else (
|
1588
|
+
('CAST(', f" AS {c_type.replace('_', ' ')}", ')')
|
1589
|
+
if flavor != 'sqlite'
|
1590
|
+
else ('', '', '')
|
1591
|
+
)
|
1592
|
+
)
|
1593
|
+
for c_name, c_type in value_cols
|
1594
|
+
}
|
1536
1595
|
return 'SET ' + ',\n'.join([
|
1537
1596
|
(
|
1538
1597
|
l_prefix + sql_item_name(c_name, flavor, None)
|
1539
1598
|
+ ' = '
|
1540
|
-
+
|
1541
|
-
+ r_prefix
|
1542
|
-
+
|
1543
|
-
+
|
1544
|
-
+ (c_type.replace('_', ' ') if flavor != 'sqlite' else '')
|
1545
|
-
+ (')' if flavor != 'sqlite' else '')
|
1599
|
+
+ cast_func_cols[c_name][0]
|
1600
|
+
+ r_prefix + sql_item_name(c_name, flavor, None)
|
1601
|
+
+ cast_func_cols[c_name][1]
|
1602
|
+
+ cast_func_cols[c_name][2]
|
1546
1603
|
) for c_name, c_type in value_cols
|
1547
1604
|
])
|
1548
1605
|
|
1549
1606
|
def and_subquery(l_prefix: str, r_prefix: str):
|
1550
|
-
return '\
|
1607
|
+
return '\n AND\n '.join([
|
1551
1608
|
(
|
1552
1609
|
"COALESCE("
|
1553
1610
|
+ l_prefix
|
@@ -1555,7 +1612,7 @@ def get_update_queries(
|
|
1555
1612
|
+ ", "
|
1556
1613
|
+ get_null_replacement(c_type, flavor)
|
1557
1614
|
+ ")"
|
1558
|
-
+ '
|
1615
|
+
+ '\n =\n '
|
1559
1616
|
+ "COALESCE("
|
1560
1617
|
+ r_prefix
|
1561
1618
|
+ sql_item_name(c_name, flavor, None)
|
@@ -1573,15 +1630,13 @@ def get_update_queries(
|
|
1573
1630
|
min_dt_col_name = f"MIN({dt_col_name})" if flavor != 'mssql' else '[Min_dt]'
|
1574
1631
|
max_dt_col_name = f"MAX({dt_col_name})" if flavor != 'mssql' else '[Max_dt]'
|
1575
1632
|
date_bounds_subquery = (
|
1576
|
-
f"""
|
1577
|
-
|
1578
|
-
|
1579
|
-
"""
|
1633
|
+
f"""f.{dt_col_name} >= (SELECT {min_dt_col_name} FROM {date_bounds_table})
|
1634
|
+
AND
|
1635
|
+
f.{dt_col_name} <= (SELECT {max_dt_col_name} FROM {date_bounds_table})"""
|
1580
1636
|
if datetime_col
|
1581
1637
|
else "1 = 1"
|
1582
1638
|
)
|
1583
|
-
with_temp_date_bounds = f"""
|
1584
|
-
WITH [date_bounds] AS (
|
1639
|
+
with_temp_date_bounds = f"""WITH [date_bounds] AS (
|
1585
1640
|
SELECT MIN({dt_col_name}) AS {min_dt_col_name}, MAX({dt_col_name}) AS {max_dt_col_name}
|
1586
1641
|
FROM {patch_table_name}
|
1587
1642
|
)""" if datetime_col else ""
|
@@ -1598,8 +1653,8 @@ def get_update_queries(
|
|
1598
1653
|
|
1599
1654
|
### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
|
1600
1655
|
when_matched_update_sets_subquery_none = "" if not value_cols else (
|
1601
|
-
"WHEN MATCHED THEN"
|
1602
|
-
f"
|
1656
|
+
"\n WHEN MATCHED THEN\n"
|
1657
|
+
f" UPDATE {sets_subquery('', 'p.')}"
|
1603
1658
|
)
|
1604
1659
|
|
1605
1660
|
cols_equal_values = '\n,'.join(
|
@@ -1616,7 +1671,7 @@ def get_update_queries(
|
|
1616
1671
|
ignore = "IGNORE " if not value_cols else ""
|
1617
1672
|
|
1618
1673
|
formatted_queries = [
|
1619
|
-
base_query.format(
|
1674
|
+
textwrap.dedent(base_query.format(
|
1620
1675
|
sets_subquery_none=sets_subquery('', 'p.'),
|
1621
1676
|
sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
|
1622
1677
|
sets_subquery_f=sets_subquery('f.', 'p.'),
|
@@ -1637,7 +1692,7 @@ def get_update_queries(
|
|
1637
1692
|
with_temp_date_bounds=with_temp_date_bounds,
|
1638
1693
|
identity_insert_on=identity_insert_on,
|
1639
1694
|
identity_insert_off=identity_insert_off,
|
1640
|
-
)
|
1695
|
+
)).lstrip().rstrip()
|
1641
1696
|
for base_query in base_queries
|
1642
1697
|
]
|
1643
1698
|
|
@@ -1681,11 +1736,14 @@ def get_null_replacement(typ: str, flavor: str) -> str:
|
|
1681
1736
|
)
|
1682
1737
|
return f'CAST({val_to_cast} AS {bool_typ})'
|
1683
1738
|
if 'time' in typ.lower() or 'date' in typ.lower():
|
1684
|
-
|
1739
|
+
db_type = typ if typ.isupper() else None
|
1740
|
+
return dateadd_str(flavor=flavor, begin='1900-01-01', db_type=db_type)
|
1685
1741
|
if 'float' in typ.lower() or 'double' in typ.lower() or typ.lower() in ('decimal',):
|
1686
1742
|
return '-987654321.0'
|
1687
1743
|
if flavor == 'oracle' and typ.lower().split('(', maxsplit=1)[0] == 'char':
|
1688
1744
|
return "'-987654321'"
|
1745
|
+
if flavor == 'oracle' and typ.lower() in ('blob', 'bytes'):
|
1746
|
+
return '00'
|
1689
1747
|
if typ.lower() in ('uniqueidentifier', 'guid', 'uuid'):
|
1690
1748
|
magic_val = 'DEADBEEF-ABBA-BABE-CAFE-DECAFC0FFEE5'
|
1691
1749
|
if flavor == 'mssql':
|
@@ -1964,7 +2022,6 @@ def _get_create_table_query_from_cte(
|
|
1964
2022
|
Create a new table from a CTE query.
|
1965
2023
|
"""
|
1966
2024
|
import textwrap
|
1967
|
-
from meerschaum.utils.dtypes.sql import AUTO_INCREMENT_COLUMN_FLAVORS
|
1968
2025
|
create_cte = 'create_query'
|
1969
2026
|
create_cte_name = sql_item_name(create_cte, flavor, None)
|
1970
2027
|
new_table_name = sql_item_name(new_table, flavor, schema)
|
@@ -2052,11 +2109,11 @@ def _get_create_table_query_from_cte(
|
|
2052
2109
|
ADD PRIMARY KEY ({primary_key_name})
|
2053
2110
|
"""
|
2054
2111
|
|
2055
|
-
create_table_query = textwrap.dedent(create_table_query)
|
2112
|
+
create_table_query = textwrap.dedent(create_table_query).lstrip().rstrip()
|
2056
2113
|
if not primary_key:
|
2057
2114
|
return [create_table_query]
|
2058
2115
|
|
2059
|
-
alter_type_query = textwrap.dedent(alter_type_query)
|
2116
|
+
alter_type_query = textwrap.dedent(alter_type_query).lstrip().rstrip()
|
2060
2117
|
|
2061
2118
|
return [
|
2062
2119
|
create_table_query,
|
@@ -218,17 +218,22 @@ def is_venv_active(
|
|
218
218
|
|
219
219
|
verified_venvs = set()
|
220
220
|
def verify_venv(
|
221
|
-
|
222
|
-
|
223
|
-
|
221
|
+
venv: str,
|
222
|
+
debug: bool = False,
|
223
|
+
) -> None:
|
224
224
|
"""
|
225
225
|
Verify that the virtual environment matches the expected state.
|
226
226
|
"""
|
227
|
-
import pathlib
|
227
|
+
import pathlib
|
228
|
+
import platform
|
229
|
+
import os
|
230
|
+
import shutil
|
231
|
+
import sys
|
228
232
|
from meerschaum.config._paths import VIRTENV_RESOURCES_PATH
|
229
233
|
from meerschaum.utils.process import run_process
|
230
234
|
from meerschaum.utils.misc import make_symlink, is_symlink
|
231
235
|
from meerschaum.utils.warnings import warn
|
236
|
+
|
232
237
|
venv_path = VIRTENV_RESOURCES_PATH / venv
|
233
238
|
bin_path = venv_path / (
|
234
239
|
'bin' if platform.system() != 'Windows' else "Scripts"
|
@@ -368,16 +373,21 @@ def init_venv(
|
|
368
373
|
return True
|
369
374
|
|
370
375
|
import io
|
371
|
-
from contextlib import redirect_stdout
|
372
|
-
import sys
|
376
|
+
from contextlib import redirect_stdout
|
377
|
+
import sys
|
378
|
+
import platform
|
379
|
+
import os
|
380
|
+
import pathlib
|
381
|
+
import shutil
|
382
|
+
|
373
383
|
from meerschaum.config.static import STATIC_CONFIG
|
374
384
|
from meerschaum.config._paths import VIRTENV_RESOURCES_PATH, VENVS_CACHE_RESOURCES_PATH
|
375
385
|
from meerschaum.utils.packages import is_uv_enabled
|
386
|
+
|
376
387
|
venv_path = VIRTENV_RESOURCES_PATH / venv
|
377
388
|
vtp = venv_target_path(venv=venv, allow_nonexistent=True, debug=debug)
|
378
389
|
docker_home_venv_path = pathlib.Path('/home/meerschaum/venvs/mrsm')
|
379
390
|
|
380
|
-
runtime_env_var = STATIC_CONFIG['environment']['runtime']
|
381
391
|
work_dir_env_var = STATIC_CONFIG['environment']['work_dir']
|
382
392
|
if (
|
383
393
|
not force
|
@@ -404,10 +414,13 @@ def init_venv(
|
|
404
414
|
|
405
415
|
_venv_success = False
|
406
416
|
temp_vtp = VENVS_CACHE_RESOURCES_PATH / str(venv)
|
407
|
-
rename_vtp = vtp.exists()
|
417
|
+
rename_vtp = vtp.exists() and not temp_vtp.exists()
|
408
418
|
|
409
419
|
if rename_vtp:
|
410
|
-
|
420
|
+
try:
|
421
|
+
vtp.rename(temp_vtp)
|
422
|
+
except FileExistsError:
|
423
|
+
pass
|
411
424
|
|
412
425
|
if uv is not None:
|
413
426
|
_venv_success = run_python_package(
|