meerschaum 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +1 -1
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/edit.py +22 -2
- meerschaum/actions/install.py +1 -2
- meerschaum/actions/sync.py +2 -3
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +4 -3
- meerschaum/connectors/sql/_create_engine.py +3 -3
- meerschaum/connectors/sql/_pipes.py +84 -38
- meerschaum/connectors/sql/_sql.py +6 -1
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +19 -0
- meerschaum/core/Pipe/_dtypes.py +1 -1
- meerschaum/core/Pipe/_sync.py +61 -21
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +2 -1
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +20 -13
- meerschaum/utils/dataframe.py +175 -13
- meerschaum/utils/dtypes/__init__.py +103 -14
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/schedule.py +8 -3
- meerschaum/utils/sql.py +70 -47
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +33 -13
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +2 -2
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +38 -38
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
@@ -15,7 +15,19 @@ import meerschaum as mrsm
|
|
15
15
|
from meerschaum.utils.typing import Dict, Union, Any
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
|
-
|
18
|
+
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
19
|
+
'decimal': 'numeric',
|
20
|
+
'number': 'numeric',
|
21
|
+
'jsonl': 'json',
|
22
|
+
'JSON': 'json',
|
23
|
+
'binary': 'bytes',
|
24
|
+
'blob': 'bytes',
|
25
|
+
'varbinary': 'bytes',
|
26
|
+
'bytea': 'bytes',
|
27
|
+
'guid': 'uuid',
|
28
|
+
'UUID': 'uuid',
|
29
|
+
}
|
30
|
+
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
19
31
|
'json': 'object',
|
20
32
|
'numeric': 'object',
|
21
33
|
'uuid': 'object',
|
@@ -27,6 +39,8 @@ MRSM_PD_DTYPES: Dict[str, str] = {
|
|
27
39
|
'int32': 'Int32',
|
28
40
|
'int64': 'Int64',
|
29
41
|
'str': 'string[python]',
|
42
|
+
'bytes': 'object',
|
43
|
+
None: 'object',
|
30
44
|
}
|
31
45
|
|
32
46
|
|
@@ -38,6 +52,10 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
38
52
|
if known_dtype is not None:
|
39
53
|
return known_dtype
|
40
54
|
|
55
|
+
alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None)
|
56
|
+
if alias_dtype is not None:
|
57
|
+
return MRSM_PD_DTYPES[alias_dtype]
|
58
|
+
|
41
59
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
42
60
|
### treat it as a SQL db type.
|
43
61
|
if dtype.split(' ')[0].isupper():
|
@@ -95,7 +113,7 @@ def are_dtypes_equal(
|
|
95
113
|
try:
|
96
114
|
if ldtype == rdtype:
|
97
115
|
return True
|
98
|
-
except Exception
|
116
|
+
except Exception:
|
99
117
|
warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}")
|
100
118
|
return False
|
101
119
|
|
@@ -115,6 +133,10 @@ def are_dtypes_equal(
|
|
115
133
|
if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
|
116
134
|
return True
|
117
135
|
|
136
|
+
bytes_dtypes = ('bytes', 'object')
|
137
|
+
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
138
|
+
return True
|
139
|
+
|
118
140
|
ldtype_clean = ldtype.split('[', maxsplit=1)[0]
|
119
141
|
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
120
142
|
|
@@ -185,7 +207,7 @@ def attempt_cast_to_numeric(value: Any) -> Any:
|
|
185
207
|
if not value_is_null(value)
|
186
208
|
else Decimal('NaN')
|
187
209
|
)
|
188
|
-
except Exception
|
210
|
+
except Exception:
|
189
211
|
return value
|
190
212
|
|
191
213
|
|
@@ -201,7 +223,23 @@ def attempt_cast_to_uuid(value: Any) -> Any:
|
|
201
223
|
if not value_is_null(value)
|
202
224
|
else None
|
203
225
|
)
|
204
|
-
except Exception
|
226
|
+
except Exception:
|
227
|
+
return value
|
228
|
+
|
229
|
+
|
230
|
+
def attempt_cast_to_bytes(value: Any) -> Any:
|
231
|
+
"""
|
232
|
+
Given a value, attempt to coerce it into a bytestring.
|
233
|
+
"""
|
234
|
+
if isinstance(value, uuid.UUID):
|
235
|
+
return value
|
236
|
+
try:
|
237
|
+
return (
|
238
|
+
deserialize_base64(str(value))
|
239
|
+
if not value_is_null(value)
|
240
|
+
else None
|
241
|
+
)
|
242
|
+
except Exception:
|
205
243
|
return value
|
206
244
|
|
207
245
|
|
@@ -251,7 +289,7 @@ def coerce_timezone(
|
|
251
289
|
) -> Any:
|
252
290
|
"""
|
253
291
|
Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
|
254
|
-
return a
|
292
|
+
return a UTC timestamp (strip timezone if `strip_utc` is `True`.
|
255
293
|
"""
|
256
294
|
if dt is None:
|
257
295
|
return None
|
@@ -266,9 +304,7 @@ def coerce_timezone(
|
|
266
304
|
dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
|
267
305
|
|
268
306
|
if dt_is_series:
|
269
|
-
is_dask = 'dask' in dt.__module__
|
270
307
|
pandas = mrsm.attempt_import('pandas', lazy=False)
|
271
|
-
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
272
308
|
|
273
309
|
if (
|
274
310
|
pandas.api.types.is_datetime64_any_dtype(dt) and (
|
@@ -279,14 +315,13 @@ def coerce_timezone(
|
|
279
315
|
):
|
280
316
|
return dt
|
281
317
|
|
282
|
-
dt_series = (
|
283
|
-
pandas.to_datetime(dt, utc=True, format='ISO8601')
|
284
|
-
if dd is None
|
285
|
-
else dd.to_datetime(dt, utc=True, format='ISO8601')
|
286
|
-
)
|
318
|
+
dt_series = to_datetime(dt, coerce_utc=False)
|
287
319
|
if strip_utc:
|
288
|
-
|
289
|
-
|
320
|
+
try:
|
321
|
+
if dt_series.dt.tz is not None:
|
322
|
+
dt_series = dt_series.dt.tz_localize(None)
|
323
|
+
except Exception:
|
324
|
+
pass
|
290
325
|
|
291
326
|
return dt_series
|
292
327
|
|
@@ -299,3 +334,57 @@ def coerce_timezone(
|
|
299
334
|
if strip_utc:
|
300
335
|
return utc_dt.replace(tzinfo=None)
|
301
336
|
return utc_dt
|
337
|
+
|
338
|
+
|
339
|
+
def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any:
|
340
|
+
"""
|
341
|
+
Wrap `pd.to_datetime()` and add support for out-of-bounds values.
|
342
|
+
"""
|
343
|
+
pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
|
344
|
+
is_dask = 'dask' in getattr(dt_val, '__module__', '')
|
345
|
+
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
346
|
+
dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
|
347
|
+
pd = pandas if dd is None else dd
|
348
|
+
|
349
|
+
try:
|
350
|
+
new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
|
351
|
+
if as_pydatetime:
|
352
|
+
return new_dt_val.to_pydatetime()
|
353
|
+
return new_dt_val
|
354
|
+
except (pd.errors.OutOfBoundsDatetime, ValueError):
|
355
|
+
pass
|
356
|
+
|
357
|
+
def parse(x: Any) -> Any:
|
358
|
+
try:
|
359
|
+
return dateutil_parser.parse(x)
|
360
|
+
except Exception:
|
361
|
+
return x
|
362
|
+
|
363
|
+
if dt_is_series:
|
364
|
+
new_series = dt_val.apply(parse)
|
365
|
+
if coerce_utc:
|
366
|
+
return coerce_timezone(new_series)
|
367
|
+
return new_series
|
368
|
+
|
369
|
+
new_dt_val = parse(dt_val)
|
370
|
+
if not coerce_utc:
|
371
|
+
return new_dt_val
|
372
|
+
return coerce_timezone(new_dt_val)
|
373
|
+
|
374
|
+
|
375
|
+
def serialize_bytes(data: bytes) -> str:
|
376
|
+
"""
|
377
|
+
Return the given bytes as a base64-encoded string.
|
378
|
+
"""
|
379
|
+
import base64
|
380
|
+
if not isinstance(data, bytes) and value_is_null(data):
|
381
|
+
return data
|
382
|
+
return base64.b64encode(data).decode('utf-8')
|
383
|
+
|
384
|
+
|
385
|
+
def deserialize_base64(data: str) -> bytes:
|
386
|
+
"""
|
387
|
+
Return the original bytestring from the given base64-encoded string.
|
388
|
+
"""
|
389
|
+
import base64
|
390
|
+
return base64.b64decode(data)
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -276,6 +276,19 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
276
276
|
'cockroachdb': 'UUID',
|
277
277
|
'default': 'TEXT',
|
278
278
|
},
|
279
|
+
'bytes': {
|
280
|
+
'timescaledb': 'BYTEA',
|
281
|
+
'postgresql': 'BYTEA',
|
282
|
+
'mariadb': 'BLOB',
|
283
|
+
'mysql': 'BLOB',
|
284
|
+
'mssql': 'VARBINARY(MAX)',
|
285
|
+
'oracle': 'BLOB',
|
286
|
+
'sqlite': 'BLOB',
|
287
|
+
'duckdb': 'BLOB',
|
288
|
+
'citus': 'BYTEA',
|
289
|
+
'cockroachdb': 'BYTEA',
|
290
|
+
'default': 'BLOB',
|
291
|
+
},
|
279
292
|
}
|
280
293
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
281
294
|
'int': {
|
@@ -421,6 +434,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
421
434
|
'cockroachdb': 'Uuid',
|
422
435
|
'default': 'Uuid',
|
423
436
|
},
|
437
|
+
'bytes': {
|
438
|
+
'timescaledb': 'LargeBinary',
|
439
|
+
'postgresql': 'LargeBinary',
|
440
|
+
'mariadb': 'LargeBinary',
|
441
|
+
'mysql': 'LargeBinary',
|
442
|
+
'mssql': 'LargeBinary',
|
443
|
+
'oracle': 'LargeBinary',
|
444
|
+
'sqlite': 'LargeBinary',
|
445
|
+
'duckdb': 'LargeBinary',
|
446
|
+
'citus': 'LargeBinary',
|
447
|
+
'cockroachdb': 'LargeBinary',
|
448
|
+
'default': 'LargeBinary',
|
449
|
+
},
|
424
450
|
}
|
425
451
|
|
426
452
|
AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
|
meerschaum/utils/misc.py
CHANGED
@@ -177,14 +177,14 @@ def string_to_dict(
|
|
177
177
|
keys = _keys[:-1]
|
178
178
|
try:
|
179
179
|
val = ast.literal_eval(_keys[-1])
|
180
|
-
except Exception
|
180
|
+
except Exception:
|
181
181
|
val = str(_keys[-1])
|
182
182
|
|
183
183
|
c = params_dict
|
184
184
|
for _k in keys[:-1]:
|
185
185
|
try:
|
186
186
|
k = ast.literal_eval(_k)
|
187
|
-
except Exception
|
187
|
+
except Exception:
|
188
188
|
k = str(_k)
|
189
189
|
if k not in c:
|
190
190
|
c[k] = {}
|
@@ -196,12 +196,12 @@ def string_to_dict(
|
|
196
196
|
|
197
197
|
|
198
198
|
def parse_config_substitution(
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
199
|
+
value: str,
|
200
|
+
leading_key: str = 'MRSM',
|
201
|
+
begin_key: str = '{',
|
202
|
+
end_key: str = '}',
|
203
|
+
delimeter: str = ':'
|
204
|
+
) -> List[Any]:
|
205
205
|
"""
|
206
206
|
Parse Meerschaum substitution syntax
|
207
207
|
E.g. MRSM{value1:value2} => ['value1', 'value2']
|
meerschaum/utils/schedule.py
CHANGED
@@ -132,7 +132,7 @@ def schedule_function(
|
|
132
132
|
|
133
133
|
try:
|
134
134
|
loop.run_until_complete(run_scheduler())
|
135
|
-
except (KeyboardInterrupt, SystemExit)
|
135
|
+
except (KeyboardInterrupt, SystemExit):
|
136
136
|
loop.run_until_complete(_stop_scheduler())
|
137
137
|
|
138
138
|
return True, "Success"
|
@@ -159,13 +159,13 @@ def parse_schedule(schedule: str, now: Optional[datetime] = None):
|
|
159
159
|
)
|
160
160
|
|
161
161
|
starting_ts = parse_start_time(schedule, now=now)
|
162
|
-
schedule = schedule.split(STARTING_KEYWORD)[0].strip()
|
162
|
+
schedule = schedule.split(STARTING_KEYWORD, maxsplit=1)[0].strip()
|
163
163
|
for alias_keyword, true_keyword in SCHEDULE_ALIASES.items():
|
164
164
|
schedule = schedule.replace(alias_keyword, true_keyword)
|
165
165
|
|
166
166
|
### TODO Allow for combining `and` + `or` logic.
|
167
167
|
if '&' in schedule and '|' in schedule:
|
168
|
-
raise ValueError(
|
168
|
+
raise ValueError("Cannot accept both 'and' + 'or' logic in the schedule frequency.")
|
169
169
|
|
170
170
|
join_str = '|' if '|' in schedule else '&'
|
171
171
|
join_trigger = (
|
@@ -300,6 +300,11 @@ def parse_start_time(schedule: str, now: Optional[datetime] = None) -> datetime:
|
|
300
300
|
try:
|
301
301
|
if starting_str == 'now':
|
302
302
|
starting_ts = now
|
303
|
+
elif starting_str.startswith('in '):
|
304
|
+
delta_vals = starting_str.replace('in ', '').split(' ', maxsplit=1)
|
305
|
+
delta_unit = delta_vals[-1].rstrip('s') + 's'
|
306
|
+
delta_num = float(delta_vals[0])
|
307
|
+
starting_ts = now + timedelta(**{delta_unit: delta_num})
|
303
308
|
elif 'tomorrow' in starting_str or 'today' in starting_str:
|
304
309
|
today = round_time(now, timedelta(days=1))
|
305
310
|
tomorrow = today + timedelta(days=1)
|
meerschaum/utils/sql.py
CHANGED
@@ -108,24 +108,30 @@ update_queries = {
|
|
108
108
|
{cols_equal_values}
|
109
109
|
""",
|
110
110
|
'mssql': """
|
111
|
+
{with_temp_date_bounds}
|
111
112
|
MERGE {target_table_name} f
|
112
|
-
USING (SELECT
|
113
|
+
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
113
114
|
ON {and_subquery_f}
|
114
115
|
AND {date_bounds_subquery}
|
115
116
|
WHEN MATCHED THEN
|
116
117
|
UPDATE
|
117
118
|
{sets_subquery_none};
|
118
119
|
""",
|
119
|
-
'mssql-upsert':
|
120
|
+
'mssql-upsert': [
|
121
|
+
"{identity_insert_on}",
|
122
|
+
"""
|
123
|
+
{with_temp_date_bounds}
|
120
124
|
MERGE {target_table_name} f
|
121
|
-
USING (SELECT
|
125
|
+
USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
|
122
126
|
ON {and_subquery_f}
|
123
127
|
AND {date_bounds_subquery}
|
124
128
|
{when_matched_update_sets_subquery_none}
|
125
129
|
WHEN NOT MATCHED THEN
|
126
130
|
INSERT ({patch_cols_str})
|
127
131
|
VALUES ({patch_cols_prefixed_str});
|
128
|
-
|
132
|
+
""",
|
133
|
+
"{identity_insert_off}",
|
134
|
+
],
|
129
135
|
'oracle': """
|
130
136
|
MERGE INTO {target_table_name} f
|
131
137
|
USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
|
@@ -425,20 +431,10 @@ reset_autoincrement_queries: Dict[str, Union[str, List[str]]] = {
|
|
425
431
|
SET seq = {val}
|
426
432
|
WHERE name = '{table}'
|
427
433
|
""",
|
428
|
-
'oracle':
|
429
|
-
""
|
430
|
-
|
431
|
-
|
432
|
-
current_val NUMBER;
|
433
|
-
BEGIN
|
434
|
-
SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
|
435
|
-
|
436
|
-
WHILE current_val < max_id LOOP
|
437
|
-
SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
|
438
|
-
END LOOP;
|
439
|
-
END;
|
440
|
-
""",
|
441
|
-
],
|
434
|
+
'oracle': (
|
435
|
+
"ALTER TABLE {table_name} MODIFY {column_name} "
|
436
|
+
"GENERATED BY DEFAULT ON NULL AS IDENTITY (START WITH {val_plus_1})"
|
437
|
+
),
|
442
438
|
}
|
443
439
|
table_wrappers = {
|
444
440
|
'default' : ('"', '"'),
|
@@ -1066,7 +1062,7 @@ def get_sqlalchemy_table(
|
|
1066
1062
|
connector.metadata,
|
1067
1063
|
**table_kwargs
|
1068
1064
|
)
|
1069
|
-
except sqlalchemy.exc.NoSuchTableError
|
1065
|
+
except sqlalchemy.exc.NoSuchTableError:
|
1070
1066
|
warn(f"Table '{truncated_table_name}' does not exist in '{connector}'.")
|
1071
1067
|
return None
|
1072
1068
|
return tables[truncated_table_name]
|
@@ -1393,6 +1389,7 @@ def get_update_queries(
|
|
1393
1389
|
datetime_col: Optional[str] = None,
|
1394
1390
|
schema: Optional[str] = None,
|
1395
1391
|
patch_schema: Optional[str] = None,
|
1392
|
+
identity_insert: bool = False,
|
1396
1393
|
debug: bool = False,
|
1397
1394
|
) -> List[str]:
|
1398
1395
|
"""
|
@@ -1430,6 +1427,10 @@ def get_update_queries(
|
|
1430
1427
|
If provided, use this schema when quoting the patch table.
|
1431
1428
|
Defaults to `schema`.
|
1432
1429
|
|
1430
|
+
identity_insert: bool, default False
|
1431
|
+
If `True`, include `SET IDENTITY_INSERT` queries before and after the update queries.
|
1432
|
+
Only applies for MSSQL upserts.
|
1433
|
+
|
1433
1434
|
debug: bool, default False
|
1434
1435
|
Verbosity toggle.
|
1435
1436
|
|
@@ -1564,17 +1565,36 @@ def get_update_queries(
|
|
1564
1565
|
) for c_name, c_type in join_cols_types
|
1565
1566
|
])
|
1566
1567
|
|
1568
|
+
skip_query_val = ""
|
1567
1569
|
target_table_name = sql_item_name(target, flavor, schema)
|
1568
1570
|
patch_table_name = sql_item_name(patch, flavor, patch_schema)
|
1569
1571
|
dt_col_name = sql_item_name(datetime_col, flavor, None) if datetime_col else None
|
1572
|
+
date_bounds_table = patch_table_name if flavor != 'mssql' else '[date_bounds]'
|
1573
|
+
min_dt_col_name = f"MIN({dt_col_name})" if flavor != 'mssql' else '[Min_dt]'
|
1574
|
+
max_dt_col_name = f"MAX({dt_col_name})" if flavor != 'mssql' else '[Max_dt]'
|
1570
1575
|
date_bounds_subquery = (
|
1571
1576
|
f"""
|
1572
|
-
f.{dt_col_name} >= (SELECT
|
1573
|
-
AND f.{dt_col_name} <= (SELECT
|
1577
|
+
f.{dt_col_name} >= (SELECT {min_dt_col_name} FROM {date_bounds_table})
|
1578
|
+
AND f.{dt_col_name} <= (SELECT {max_dt_col_name} FROM {date_bounds_table})
|
1574
1579
|
"""
|
1575
1580
|
if datetime_col
|
1576
1581
|
else "1 = 1"
|
1577
1582
|
)
|
1583
|
+
with_temp_date_bounds = f"""
|
1584
|
+
WITH [date_bounds] AS (
|
1585
|
+
SELECT MIN({dt_col_name}) AS {min_dt_col_name}, MAX({dt_col_name}) AS {max_dt_col_name}
|
1586
|
+
FROM {patch_table_name}
|
1587
|
+
)""" if datetime_col else ""
|
1588
|
+
identity_insert_on = (
|
1589
|
+
f"SET IDENTITY_INSERT {target_table_name} ON"
|
1590
|
+
if identity_insert
|
1591
|
+
else skip_query_val
|
1592
|
+
)
|
1593
|
+
identity_insert_off = (
|
1594
|
+
f"SET IDENTITY_INSERT {target_table_name} OFF"
|
1595
|
+
if identity_insert
|
1596
|
+
else skip_query_val
|
1597
|
+
)
|
1578
1598
|
|
1579
1599
|
### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
|
1580
1600
|
when_matched_update_sets_subquery_none = "" if not value_cols else (
|
@@ -1595,7 +1615,7 @@ def get_update_queries(
|
|
1595
1615
|
)
|
1596
1616
|
ignore = "IGNORE " if not value_cols else ""
|
1597
1617
|
|
1598
|
-
|
1618
|
+
formatted_queries = [
|
1599
1619
|
base_query.format(
|
1600
1620
|
sets_subquery_none=sets_subquery('', 'p.'),
|
1601
1621
|
sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
|
@@ -1614,10 +1634,16 @@ def get_update_queries(
|
|
1614
1634
|
cols_equal_values=cols_equal_values,
|
1615
1635
|
on_duplicate_key_update=on_duplicate_key_update,
|
1616
1636
|
ignore=ignore,
|
1637
|
+
with_temp_date_bounds=with_temp_date_bounds,
|
1638
|
+
identity_insert_on=identity_insert_on,
|
1639
|
+
identity_insert_off=identity_insert_off,
|
1617
1640
|
)
|
1618
1641
|
for base_query in base_queries
|
1619
1642
|
]
|
1620
1643
|
|
1644
|
+
### NOTE: Allow for skipping some queries.
|
1645
|
+
return [query for query in formatted_queries if query]
|
1646
|
+
|
1621
1647
|
|
1622
1648
|
def get_null_replacement(typ: str, flavor: str) -> str:
|
1623
1649
|
"""
|
@@ -1867,7 +1893,17 @@ def _get_create_table_query_from_dtypes(
|
|
1867
1893
|
|
1868
1894
|
table_name = sql_item_name(new_table, schema=schema, flavor=flavor)
|
1869
1895
|
primary_key_name = sql_item_name(primary_key, flavor) if primary_key else None
|
1896
|
+
primary_key_constraint_name = (
|
1897
|
+
sql_item_name(f'PK_{new_table}', flavor, None)
|
1898
|
+
if primary_key
|
1899
|
+
else None
|
1900
|
+
)
|
1870
1901
|
datetime_column_name = sql_item_name(datetime_column, flavor) if datetime_column else None
|
1902
|
+
primary_key_clustered = (
|
1903
|
+
"CLUSTERED"
|
1904
|
+
if not datetime_column or datetime_column == primary_key
|
1905
|
+
else "NONCLUSTERED"
|
1906
|
+
)
|
1871
1907
|
query = f"CREATE TABLE {table_name} ("
|
1872
1908
|
if primary_key:
|
1873
1909
|
col_db_type = cols_types[0][1]
|
@@ -1887,6 +1923,8 @@ def _get_create_table_query_from_dtypes(
|
|
1887
1923
|
query += f"\n {col_name} {col_db_type} {auto_increment_str} PRIMARY KEY,"
|
1888
1924
|
elif flavor == 'timescaledb' and datetime_column and datetime_column != primary_key:
|
1889
1925
|
query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
|
1926
|
+
elif flavor == 'mssql':
|
1927
|
+
query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
|
1890
1928
|
else:
|
1891
1929
|
query += f"\n {col_name} {col_db_type} PRIMARY KEY{auto_increment_str} NOT NULL,"
|
1892
1930
|
|
@@ -1902,6 +1940,10 @@ def _get_create_table_query_from_dtypes(
|
|
1902
1940
|
and datetime_column != primary_key
|
1903
1941
|
):
|
1904
1942
|
query += f"\n PRIMARY KEY({datetime_column_name}, {primary_key_name}),"
|
1943
|
+
|
1944
|
+
if flavor == 'mssql' and primary_key:
|
1945
|
+
query += f"\n CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name}),"
|
1946
|
+
|
1905
1947
|
query = query[:-1]
|
1906
1948
|
query += "\n)"
|
1907
1949
|
|
@@ -1927,7 +1969,7 @@ def _get_create_table_query_from_cte(
|
|
1927
1969
|
create_cte_name = sql_item_name(create_cte, flavor, None)
|
1928
1970
|
new_table_name = sql_item_name(new_table, flavor, schema)
|
1929
1971
|
primary_key_constraint_name = (
|
1930
|
-
sql_item_name(f'
|
1972
|
+
sql_item_name(f'PK_{new_table}', flavor, None)
|
1931
1973
|
if primary_key
|
1932
1974
|
else None
|
1933
1975
|
)
|
@@ -1936,6 +1978,7 @@ def _get_create_table_query_from_cte(
|
|
1936
1978
|
if primary_key
|
1937
1979
|
else None
|
1938
1980
|
)
|
1981
|
+
primary_key_clustered = "CLUSTERED" if not datetime_column else "NONCLUSTERED"
|
1939
1982
|
datetime_column_name = (
|
1940
1983
|
sql_item_name(datetime_column, flavor)
|
1941
1984
|
if datetime_column
|
@@ -1943,7 +1986,7 @@ def _get_create_table_query_from_cte(
|
|
1943
1986
|
)
|
1944
1987
|
if flavor in ('mssql',):
|
1945
1988
|
query = query.lstrip()
|
1946
|
-
if
|
1989
|
+
if query.lower().startswith('with '):
|
1947
1990
|
final_select_ix = query.lower().rfind('select')
|
1948
1991
|
create_table_query = (
|
1949
1992
|
query[:final_select_ix].rstrip() + ',\n'
|
@@ -1961,7 +2004,7 @@ def _get_create_table_query_from_cte(
|
|
1961
2004
|
|
1962
2005
|
alter_type_query = f"""
|
1963
2006
|
ALTER TABLE {new_table_name}
|
1964
|
-
ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})
|
2007
|
+
ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})
|
1965
2008
|
"""
|
1966
2009
|
elif flavor in (None,):
|
1967
2010
|
create_table_query = f"""
|
@@ -2225,29 +2268,8 @@ def get_reset_autoincrement_queries(
|
|
2225
2268
|
schema = schema or connector.schema
|
2226
2269
|
max_id_name = sql_item_name('max_id', connector.flavor)
|
2227
2270
|
table_name = sql_item_name(table, connector.flavor, schema)
|
2228
|
-
table_trunc = truncate_item_name(table, connector.flavor)
|
2229
2271
|
table_seq_name = sql_item_name(table + '_' + column + '_seq', connector.flavor, schema)
|
2230
2272
|
column_name = sql_item_name(column, connector.flavor)
|
2231
|
-
if connector.flavor == 'oracle':
|
2232
|
-
potential_table_names = set([
|
2233
|
-
f"'{table_trunc.upper()}'",
|
2234
|
-
f"'{table_trunc}'",
|
2235
|
-
f"'{table_name}'",
|
2236
|
-
f"'{table_name.upper()}'",
|
2237
|
-
])
|
2238
|
-
df = connector.read(
|
2239
|
-
"""
|
2240
|
-
SELECT SEQUENCE_NAME
|
2241
|
-
FROM ALL_TAB_IDENTITY_COLS
|
2242
|
-
WHERE TABLE_NAME IN ("""
|
2243
|
-
+ ", ".join([name for name in potential_table_names])
|
2244
|
-
+ """)
|
2245
|
-
""",
|
2246
|
-
debug=debug
|
2247
|
-
)
|
2248
|
-
if len(df) > 0:
|
2249
|
-
table_seq_name = df['sequence_name'][0]
|
2250
|
-
|
2251
2273
|
max_id = connector.value(
|
2252
2274
|
f"""
|
2253
2275
|
SELECT COALESCE(MAX({column_name}), 0) AS {max_id_name}
|
@@ -2272,7 +2294,8 @@ def get_reset_autoincrement_queries(
|
|
2272
2294
|
table=table,
|
2273
2295
|
table_name=table_name,
|
2274
2296
|
table_seq_name=table_seq_name,
|
2275
|
-
val=
|
2297
|
+
val=max_id,
|
2298
|
+
val_plus_1=(max_id + 1),
|
2276
2299
|
)
|
2277
2300
|
for query in reset_queries
|
2278
2301
|
]
|
meerschaum/utils/venv/_Venv.py
CHANGED
@@ -34,10 +34,10 @@ class Venv:
|
|
34
34
|
"""
|
35
35
|
|
36
36
|
def __init__(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
self,
|
38
|
+
venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm',
|
39
|
+
debug: bool = False,
|
40
|
+
) -> None:
|
41
41
|
from meerschaum.utils.venv import activate_venv, deactivate_venv, active_venvs
|
42
42
|
### For some weird threading issue,
|
43
43
|
### we can't use `isinstance` here.
|