meerschaum 2.7.7__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/term/TermPageHandler.py +54 -4
- meerschaum/_internal/term/__init__.py +13 -5
- meerschaum/_internal/term/tools.py +41 -6
- meerschaum/actions/copy.py +1 -0
- meerschaum/actions/start.py +25 -10
- meerschaum/api/dash/callbacks/dashboard.py +43 -2
- meerschaum/api/dash/components.py +13 -6
- meerschaum/api/dash/keys.py +82 -108
- meerschaum/api/dash/pages/dashboard.py +17 -17
- meerschaum/api/dash/sessions.py +1 -0
- meerschaum/api/dash/webterm.py +17 -6
- meerschaum/api/resources/static/js/terminado.js +0 -2
- meerschaum/api/resources/templates/termpage.html +47 -4
- meerschaum/api/routes/_webterm.py +15 -11
- meerschaum/config/_default.py +6 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +2 -2
- meerschaum/connectors/sql/_SQLConnector.py +2 -9
- meerschaum/connectors/sql/_fetch.py +5 -30
- meerschaum/connectors/sql/_pipes.py +7 -4
- meerschaum/connectors/sql/_sql.py +56 -31
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
- meerschaum/core/Pipe/_fetch.py +4 -0
- meerschaum/core/Pipe/_sync.py +22 -15
- meerschaum/core/Pipe/_verify.py +1 -1
- meerschaum/utils/daemon/Daemon.py +24 -11
- meerschaum/utils/daemon/RotatingFile.py +3 -3
- meerschaum/utils/dataframe.py +42 -12
- meerschaum/utils/dtypes/__init__.py +153 -24
- meerschaum/utils/dtypes/sql.py +58 -9
- meerschaum/utils/formatting/__init__.py +2 -2
- meerschaum/utils/formatting/_pprint.py +13 -12
- meerschaum/utils/misc.py +32 -18
- meerschaum/utils/prompt.py +1 -1
- meerschaum/utils/sql.py +26 -8
- meerschaum/utils/venv/__init__.py +10 -14
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/METADATA +1 -1
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/RECORD +44 -44
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.9.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -85,6 +85,7 @@ def filter_unseen_df(
|
|
85
85
|
safe_copy: bool = True,
|
86
86
|
dtypes: Optional[Dict[str, Any]] = None,
|
87
87
|
include_unchanged_columns: bool = False,
|
88
|
+
coerce_mixed_numerics: bool = True,
|
88
89
|
debug: bool = False,
|
89
90
|
) -> 'pd.DataFrame':
|
90
91
|
"""
|
@@ -108,6 +109,10 @@ def filter_unseen_df(
|
|
108
109
|
include_unchanged_columns: bool, default False
|
109
110
|
If `True`, include columns which haven't changed on rows which have changed.
|
110
111
|
|
112
|
+
coerce_mixed_numerics: bool, default True
|
113
|
+
If `True`, cast mixed integer and float columns between the old and new dataframes into
|
114
|
+
numeric values (`decimal.Decimal`).
|
115
|
+
|
111
116
|
debug: bool, default False
|
112
117
|
Verbosity toggle.
|
113
118
|
|
@@ -138,7 +143,6 @@ def filter_unseen_df(
|
|
138
143
|
import json
|
139
144
|
import functools
|
140
145
|
import traceback
|
141
|
-
from decimal import Decimal
|
142
146
|
from meerschaum.utils.warnings import warn
|
143
147
|
from meerschaum.utils.packages import import_pandas, attempt_import
|
144
148
|
from meerschaum.utils.dtypes import (
|
@@ -148,7 +152,9 @@ def filter_unseen_df(
|
|
148
152
|
attempt_cast_to_uuid,
|
149
153
|
attempt_cast_to_bytes,
|
150
154
|
coerce_timezone,
|
155
|
+
serialize_decimal,
|
151
156
|
)
|
157
|
+
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
152
158
|
pd = import_pandas(debug=debug)
|
153
159
|
is_dask = 'dask' in new_df.__module__
|
154
160
|
if is_dask:
|
@@ -211,6 +217,12 @@ def filter_unseen_df(
|
|
211
217
|
if col not in dtypes:
|
212
218
|
dtypes[col] = typ
|
213
219
|
|
220
|
+
numeric_cols_precisions_scales = {
|
221
|
+
col: get_numeric_precision_scale(None, typ)
|
222
|
+
for col, typ in dtypes.items()
|
223
|
+
if col and typ and typ.startswith('numeric')
|
224
|
+
}
|
225
|
+
|
214
226
|
dt_dtypes = {
|
215
227
|
col: typ
|
216
228
|
for col, typ in dtypes.items()
|
@@ -259,6 +271,8 @@ def filter_unseen_df(
|
|
259
271
|
old_is_numeric = col in old_numeric_cols
|
260
272
|
|
261
273
|
if (
|
274
|
+
coerce_mixed_numerics
|
275
|
+
and
|
262
276
|
(new_is_float or new_is_int or new_is_numeric)
|
263
277
|
and
|
264
278
|
(old_is_float or old_is_int or old_is_numeric)
|
@@ -300,13 +314,9 @@ def filter_unseen_df(
|
|
300
314
|
new_numeric_cols = get_numeric_cols(new_df)
|
301
315
|
numeric_cols = set(new_numeric_cols + old_numeric_cols)
|
302
316
|
for numeric_col in old_numeric_cols:
|
303
|
-
old_df[numeric_col] = old_df[numeric_col].apply(
|
304
|
-
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
305
|
-
)
|
317
|
+
old_df[numeric_col] = old_df[numeric_col].apply(serialize_decimal)
|
306
318
|
for numeric_col in new_numeric_cols:
|
307
|
-
new_df[numeric_col] = new_df[numeric_col].apply(
|
308
|
-
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
309
|
-
)
|
319
|
+
new_df[numeric_col] = new_df[numeric_col].apply(serialize_decimal)
|
310
320
|
|
311
321
|
old_dt_cols = [
|
312
322
|
col
|
@@ -361,7 +371,14 @@ def filter_unseen_df(
|
|
361
371
|
if numeric_col not in delta_df.columns:
|
362
372
|
continue
|
363
373
|
try:
|
364
|
-
delta_df[numeric_col] = delta_df[numeric_col].apply(
|
374
|
+
delta_df[numeric_col] = delta_df[numeric_col].apply(
|
375
|
+
functools.partial(
|
376
|
+
attempt_cast_to_numeric,
|
377
|
+
quantize=True,
|
378
|
+
precision=numeric_cols_precisions_scales.get(numeric_col, (None, None)[0]),
|
379
|
+
scale=numeric_cols_precisions_scales.get(numeric_col, (None, None)[1]),
|
380
|
+
)
|
381
|
+
)
|
365
382
|
except Exception:
|
366
383
|
warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
|
367
384
|
|
@@ -882,6 +899,7 @@ def enforce_dtypes(
|
|
882
899
|
The Pandas DataFrame with the types enforced.
|
883
900
|
"""
|
884
901
|
import json
|
902
|
+
import functools
|
885
903
|
from meerschaum.utils.debug import dprint
|
886
904
|
from meerschaum.utils.formatting import pprint
|
887
905
|
from meerschaum.utils.dtypes import (
|
@@ -893,6 +911,7 @@ def enforce_dtypes(
|
|
893
911
|
attempt_cast_to_bytes,
|
894
912
|
coerce_timezone as _coerce_timezone,
|
895
913
|
)
|
914
|
+
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
896
915
|
pandas = mrsm.attempt_import('pandas')
|
897
916
|
is_dask = 'dask' in df.__module__
|
898
917
|
if safe_copy:
|
@@ -914,7 +933,7 @@ def enforce_dtypes(
|
|
914
933
|
numeric_cols = [
|
915
934
|
col
|
916
935
|
for col, typ in dtypes.items()
|
917
|
-
if typ
|
936
|
+
if typ.startswith('numeric')
|
918
937
|
]
|
919
938
|
uuid_cols = [
|
920
939
|
col
|
@@ -961,9 +980,17 @@ def enforce_dtypes(
|
|
961
980
|
if debug:
|
962
981
|
dprint(f"Checking for numerics: {numeric_cols}")
|
963
982
|
for col in numeric_cols:
|
983
|
+
precision, scale = get_numeric_precision_scale(None, dtypes.get(col, ''))
|
964
984
|
if col in df.columns:
|
965
985
|
try:
|
966
|
-
df[col] = df[col].apply(
|
986
|
+
df[col] = df[col].apply(
|
987
|
+
functools.partial(
|
988
|
+
attempt_cast_to_numeric,
|
989
|
+
quantize=True,
|
990
|
+
precision=precision,
|
991
|
+
scale=scale,
|
992
|
+
)
|
993
|
+
)
|
967
994
|
except Exception as e:
|
968
995
|
if debug:
|
969
996
|
dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
|
@@ -1040,7 +1067,7 @@ def enforce_dtypes(
|
|
1040
1067
|
previous_typ = common_dtypes[col]
|
1041
1068
|
mixed_numeric_types = (is_dtype_numeric(typ) and is_dtype_numeric(previous_typ))
|
1042
1069
|
explicitly_float = are_dtypes_equal(dtypes.get(col, 'object'), 'float')
|
1043
|
-
explicitly_numeric = dtypes.get(col, 'numeric')
|
1070
|
+
explicitly_numeric = dtypes.get(col, 'numeric').startswith('numeric')
|
1044
1071
|
cast_to_numeric = (
|
1045
1072
|
explicitly_numeric
|
1046
1073
|
or col in df_numeric_cols
|
@@ -1574,16 +1601,19 @@ def to_json(
|
|
1574
1601
|
A JSON string.
|
1575
1602
|
"""
|
1576
1603
|
from meerschaum.utils.packages import import_pandas
|
1577
|
-
from meerschaum.utils.dtypes import serialize_bytes
|
1604
|
+
from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
|
1578
1605
|
pd = import_pandas()
|
1579
1606
|
uuid_cols = get_uuid_cols(df)
|
1580
1607
|
bytes_cols = get_bytes_cols(df)
|
1608
|
+
numeric_cols = get_numeric_cols(df)
|
1581
1609
|
if safe_copy and bool(uuid_cols or bytes_cols):
|
1582
1610
|
df = df.copy()
|
1583
1611
|
for col in uuid_cols:
|
1584
1612
|
df[col] = df[col].astype(str)
|
1585
1613
|
for col in bytes_cols:
|
1586
1614
|
df[col] = df[col].apply(serialize_bytes)
|
1615
|
+
for col in numeric_cols:
|
1616
|
+
df[col] = df[col].apply(serialize_decimal)
|
1587
1617
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1588
1618
|
date_format=date_format,
|
1589
1619
|
date_unit=date_unit,
|
@@ -8,15 +8,16 @@ Utility functions for working with data types.
|
|
8
8
|
|
9
9
|
import traceback
|
10
10
|
import uuid
|
11
|
-
from datetime import timezone
|
12
|
-
from decimal import Decimal, Context, InvalidOperation
|
11
|
+
from datetime import timezone, datetime
|
12
|
+
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
13
13
|
|
14
14
|
import meerschaum as mrsm
|
15
|
-
from meerschaum.utils.typing import Dict, Union, Any
|
15
|
+
from meerschaum.utils.typing import Dict, Union, Any, Optional
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
18
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
19
19
|
'decimal': 'numeric',
|
20
|
+
'Decimal': 'numeric',
|
20
21
|
'number': 'numeric',
|
21
22
|
'jsonl': 'json',
|
22
23
|
'JSON': 'json',
|
@@ -56,6 +57,9 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
56
57
|
if alias_dtype is not None:
|
57
58
|
return MRSM_PD_DTYPES[alias_dtype]
|
58
59
|
|
60
|
+
if dtype.startswith('numeric'):
|
61
|
+
return MRSM_PD_DTYPES['numeric']
|
62
|
+
|
59
63
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
60
64
|
### treat it as a SQL db type.
|
61
65
|
if dtype.split(' ')[0].isupper():
|
@@ -118,8 +122,14 @@ def are_dtypes_equal(
|
|
118
122
|
return False
|
119
123
|
|
120
124
|
### Sometimes pandas dtype objects are passed.
|
121
|
-
ldtype = str(ldtype)
|
122
|
-
rdtype = str(rdtype)
|
125
|
+
ldtype = str(ldtype).split('[', maxsplit=1)[0]
|
126
|
+
rdtype = str(rdtype).split('[', maxsplit=1)[0]
|
127
|
+
|
128
|
+
if ldtype in MRSM_ALIAS_DTYPES:
|
129
|
+
ldtype = MRSM_ALIAS_DTYPES[ldtype]
|
130
|
+
|
131
|
+
if rdtype in MRSM_ALIAS_DTYPES:
|
132
|
+
rdtype = MRSM_ALIAS_DTYPES[rdtype]
|
123
133
|
|
124
134
|
json_dtypes = ('json', 'object')
|
125
135
|
if ldtype in json_dtypes and rdtype in json_dtypes:
|
@@ -137,10 +147,7 @@ def are_dtypes_equal(
|
|
137
147
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
138
148
|
return True
|
139
149
|
|
140
|
-
|
141
|
-
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
142
|
-
|
143
|
-
if ldtype_clean.lower() == rdtype_clean.lower():
|
150
|
+
if ldtype.lower() == rdtype.lower():
|
144
151
|
return True
|
145
152
|
|
146
153
|
datetime_dtypes = ('datetime', 'timestamp')
|
@@ -153,19 +160,19 @@ def are_dtypes_equal(
|
|
153
160
|
return True
|
154
161
|
|
155
162
|
string_dtypes = ('str', 'string', 'object')
|
156
|
-
if
|
163
|
+
if ldtype in string_dtypes and rdtype in string_dtypes:
|
157
164
|
return True
|
158
165
|
|
159
166
|
int_dtypes = ('int', 'int64', 'int32', 'int16', 'int8')
|
160
|
-
if
|
167
|
+
if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes:
|
161
168
|
return True
|
162
169
|
|
163
170
|
float_dtypes = ('float', 'float64', 'float32', 'float16', 'float128', 'double')
|
164
|
-
if
|
171
|
+
if ldtype.lower() in float_dtypes and rdtype.lower() in float_dtypes:
|
165
172
|
return True
|
166
173
|
|
167
174
|
bool_dtypes = ('bool', 'boolean')
|
168
|
-
if
|
175
|
+
if ldtype in bool_dtypes and rdtype in bool_dtypes:
|
169
176
|
return True
|
170
177
|
|
171
178
|
return False
|
@@ -195,18 +202,45 @@ def is_dtype_numeric(dtype: str) -> bool:
|
|
195
202
|
return False
|
196
203
|
|
197
204
|
|
198
|
-
def attempt_cast_to_numeric(
|
205
|
+
def attempt_cast_to_numeric(
|
206
|
+
value: Any,
|
207
|
+
quantize: bool = False,
|
208
|
+
precision: Optional[int] = None,
|
209
|
+
scale: Optional[int] = None,
|
210
|
+
)-> Any:
|
199
211
|
"""
|
200
212
|
Given a value, attempt to coerce it into a numeric (Decimal).
|
213
|
+
|
214
|
+
Parameters
|
215
|
+
----------
|
216
|
+
value: Any
|
217
|
+
The value to be cast to a Decimal.
|
218
|
+
|
219
|
+
quantize: bool, default False
|
220
|
+
If `True`, quantize the decimal to the specified precision and scale.
|
221
|
+
|
222
|
+
precision: Optional[int], default None
|
223
|
+
If `quantize` is `True`, use this precision.
|
224
|
+
|
225
|
+
scale: Optional[int], default None
|
226
|
+
If `quantize` is `True`, use this scale.
|
227
|
+
|
228
|
+
Returns
|
229
|
+
-------
|
230
|
+
A `Decimal` if possible, or `value`.
|
201
231
|
"""
|
202
232
|
if isinstance(value, Decimal):
|
233
|
+
if quantize and precision and scale:
|
234
|
+
return quantize_decimal(value, precision, scale)
|
203
235
|
return value
|
204
236
|
try:
|
205
|
-
|
206
|
-
Decimal(
|
207
|
-
|
208
|
-
|
209
|
-
|
237
|
+
if value_is_null(value):
|
238
|
+
return Decimal('NaN')
|
239
|
+
|
240
|
+
dec = Decimal(str(value))
|
241
|
+
if not quantize or not precision or not scale:
|
242
|
+
return dec
|
243
|
+
return quantize_decimal(dec, precision, scale)
|
210
244
|
except Exception:
|
211
245
|
return value
|
212
246
|
|
@@ -257,7 +291,7 @@ def none_if_null(value: Any) -> Any:
|
|
257
291
|
return (None if value_is_null(value) else value)
|
258
292
|
|
259
293
|
|
260
|
-
def quantize_decimal(x: Decimal,
|
294
|
+
def quantize_decimal(x: Decimal, precision: int, scale: int) -> Decimal:
|
261
295
|
"""
|
262
296
|
Quantize a given `Decimal` to a known scale and precision.
|
263
297
|
|
@@ -266,22 +300,64 @@ def quantize_decimal(x: Decimal, scale: int, precision: int) -> Decimal:
|
|
266
300
|
x: Decimal
|
267
301
|
The `Decimal` to be quantized.
|
268
302
|
|
269
|
-
|
303
|
+
precision: int
|
270
304
|
The total number of significant digits.
|
271
305
|
|
272
|
-
|
306
|
+
scale: int
|
273
307
|
The number of significant digits after the decimal point.
|
274
308
|
|
275
309
|
Returns
|
276
310
|
-------
|
277
311
|
A `Decimal` quantized to the specified scale and precision.
|
278
312
|
"""
|
279
|
-
precision_decimal = Decimal((
|
313
|
+
precision_decimal = Decimal(('1' * (precision - scale)) + '.' + ('1' * scale))
|
280
314
|
try:
|
281
|
-
return x.quantize(precision_decimal, context=Context(prec=
|
315
|
+
return x.quantize(precision_decimal, context=Context(prec=precision), rounding=ROUND_HALF_UP)
|
282
316
|
except InvalidOperation:
|
317
|
+
pass
|
318
|
+
|
319
|
+
raise ValueError(f"Cannot quantize value '{x}' to {precision=}, {scale=}.")
|
320
|
+
|
321
|
+
|
322
|
+
def serialize_decimal(
|
323
|
+
x: Any,
|
324
|
+
quantize: bool = False,
|
325
|
+
precision: Optional[int] = None,
|
326
|
+
scale: Optional[int] = None,
|
327
|
+
) -> Any:
|
328
|
+
"""
|
329
|
+
Return a quantized string of an input decimal.
|
330
|
+
|
331
|
+
Parameters
|
332
|
+
----------
|
333
|
+
x: Any
|
334
|
+
The potential decimal to be serialized.
|
335
|
+
|
336
|
+
quantize: bool, default False
|
337
|
+
If `True`, quantize the incoming Decimal to the specified scale and precision
|
338
|
+
before serialization.
|
339
|
+
|
340
|
+
precision: Optional[int], default None
|
341
|
+
The precision of the decimal to be quantized.
|
342
|
+
|
343
|
+
scale: Optional[int], default None
|
344
|
+
The scale of the decimal to be quantized.
|
345
|
+
|
346
|
+
Returns
|
347
|
+
-------
|
348
|
+
A string of the input decimal or the input if not a Decimal.
|
349
|
+
"""
|
350
|
+
if not isinstance(x, Decimal):
|
283
351
|
return x
|
284
352
|
|
353
|
+
if value_is_null(x):
|
354
|
+
return None
|
355
|
+
|
356
|
+
if quantize and scale and precision:
|
357
|
+
x = quantize_decimal(x, precision, scale)
|
358
|
+
|
359
|
+
return f"{x:f}"
|
360
|
+
|
285
361
|
|
286
362
|
def coerce_timezone(
|
287
363
|
dt: Any,
|
@@ -434,3 +510,56 @@ def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None:
|
|
434
510
|
if not isinstance(data, bytes) and value_is_null(data):
|
435
511
|
return data
|
436
512
|
return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8')
|
513
|
+
|
514
|
+
|
515
|
+
def serialize_datetime(dt: datetime) -> Union[str, None]:
|
516
|
+
"""
|
517
|
+
Serialize a datetime object into JSON (ISO format string).
|
518
|
+
|
519
|
+
Examples
|
520
|
+
--------
|
521
|
+
>>> import json
|
522
|
+
>>> from datetime import datetime
|
523
|
+
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
524
|
+
'{"a": "2022-01-01T00:00:00Z"}'
|
525
|
+
|
526
|
+
"""
|
527
|
+
if not isinstance(dt, datetime):
|
528
|
+
return None
|
529
|
+
tz_suffix = 'Z' if dt.tzinfo is None else ''
|
530
|
+
return dt.isoformat() + tz_suffix
|
531
|
+
|
532
|
+
|
533
|
+
def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
534
|
+
"""
|
535
|
+
Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
|
536
|
+
|
537
|
+
Parameters
|
538
|
+
----------
|
539
|
+
x: Any
|
540
|
+
The value to serialize.
|
541
|
+
|
542
|
+
default_to_str: bool, default True
|
543
|
+
If `True`, return a string of `x` if x is not a designated type.
|
544
|
+
Otherwise return x.
|
545
|
+
|
546
|
+
Returns
|
547
|
+
-------
|
548
|
+
A serialized version of x, or x.
|
549
|
+
"""
|
550
|
+
if isinstance(x, (mrsm.Pipe, mrsm.connectors.Connector)):
|
551
|
+
return x.meta
|
552
|
+
|
553
|
+
if hasattr(x, 'tzinfo'):
|
554
|
+
return serialize_datetime(x)
|
555
|
+
|
556
|
+
if isinstance(x, bytes):
|
557
|
+
return serialize_bytes(x)
|
558
|
+
|
559
|
+
if isinstance(x, Decimal):
|
560
|
+
return serialize_decimal(x)
|
561
|
+
|
562
|
+
if value_is_null(x):
|
563
|
+
return None
|
564
|
+
|
565
|
+
return str(x) if default_to_str else x
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -7,7 +7,7 @@ Utility functions for working with SQL data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
from meerschaum.utils.typing import Dict, Union, Tuple
|
10
|
+
from meerschaum.utils.typing import Dict, Union, Tuple, Optional
|
11
11
|
|
12
12
|
NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
13
13
|
'mariadb': (38, 20),
|
@@ -170,7 +170,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
170
170
|
'mariadb': 'DATETIME',
|
171
171
|
'mysql': 'DATETIME',
|
172
172
|
'mssql': 'DATETIME2',
|
173
|
-
'oracle': 'TIMESTAMP',
|
173
|
+
'oracle': 'TIMESTAMP(9)',
|
174
174
|
'sqlite': 'DATETIME',
|
175
175
|
'duckdb': 'TIMESTAMP',
|
176
176
|
'citus': 'TIMESTAMP',
|
@@ -183,7 +183,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
183
183
|
'mariadb': 'DATETIME',
|
184
184
|
'mysql': 'DATETIME',
|
185
185
|
'mssql': 'DATETIMEOFFSET',
|
186
|
-
'oracle': 'TIMESTAMP',
|
186
|
+
'oracle': 'TIMESTAMP(9)',
|
187
187
|
'sqlite': 'TIMESTAMP',
|
188
188
|
'duckdb': 'TIMESTAMPTZ',
|
189
189
|
'citus': 'TIMESTAMPTZ',
|
@@ -196,7 +196,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
196
196
|
'mariadb': 'DATETIME',
|
197
197
|
'mysql': 'DATETIME',
|
198
198
|
'mssql': 'DATETIMEOFFSET',
|
199
|
-
'oracle': 'TIMESTAMP',
|
199
|
+
'oracle': 'TIMESTAMP(9)',
|
200
200
|
'sqlite': 'TIMESTAMP',
|
201
201
|
'duckdb': 'TIMESTAMPTZ',
|
202
202
|
'citus': 'TIMESTAMPTZ',
|
@@ -470,7 +470,7 @@ AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
|
|
470
470
|
}
|
471
471
|
|
472
472
|
|
473
|
-
def get_pd_type_from_db_type(db_type: str, allow_custom_dtypes: bool =
|
473
|
+
def get_pd_type_from_db_type(db_type: str, allow_custom_dtypes: bool = True) -> str:
|
474
474
|
"""
|
475
475
|
Parse a database type to a pandas data type.
|
476
476
|
|
@@ -486,12 +486,17 @@ def get_pd_type_from_db_type(db_type: str, allow_custom_dtypes: bool = False) ->
|
|
486
486
|
-------
|
487
487
|
The equivalent datatype for a pandas DataFrame.
|
488
488
|
"""
|
489
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
489
490
|
def parse_custom(_pd_type: str, _db_type: str) -> str:
|
490
491
|
if 'json' in _db_type.lower():
|
491
492
|
return 'json'
|
493
|
+
if are_dtypes_equal(_pd_type, 'numeric') and _pd_type != 'object':
|
494
|
+
precision, scale = get_numeric_precision_scale(None, dtype=_db_type.upper())
|
495
|
+
if precision and scale:
|
496
|
+
return f"numeric[{precision},{scale}]"
|
492
497
|
return _pd_type
|
493
498
|
|
494
|
-
pd_type = DB_TO_PD_DTYPES.get(db_type.upper(), None)
|
499
|
+
pd_type = DB_TO_PD_DTYPES.get(db_type.upper().split('(', maxsplit=1)[0].strip(), None)
|
495
500
|
if pd_type is not None:
|
496
501
|
return (
|
497
502
|
parse_custom(pd_type, db_type)
|
@@ -544,17 +549,24 @@ def get_db_type_from_pd_type(
|
|
544
549
|
else PD_TO_SQLALCHEMY_DTYPES_FLAVORS
|
545
550
|
)
|
546
551
|
|
552
|
+
precision, scale = None, None
|
553
|
+
og_pd_type = pd_type
|
547
554
|
if pd_type in MRSM_ALIAS_DTYPES:
|
548
555
|
pd_type = MRSM_ALIAS_DTYPES[pd_type]
|
549
556
|
|
550
557
|
### Check whether we are able to match this type (e.g. pyarrow support).
|
551
558
|
found_db_type = False
|
552
|
-
if pd_type not in types_registry:
|
559
|
+
if pd_type not in types_registry and not pd_type.startswith('numeric['):
|
553
560
|
for mapped_pd_type in types_registry:
|
554
561
|
if are_dtypes_equal(mapped_pd_type, pd_type):
|
555
562
|
pd_type = mapped_pd_type
|
556
563
|
found_db_type = True
|
557
564
|
break
|
565
|
+
elif pd_type.startswith('numeric['):
|
566
|
+
og_pd_type = pd_type
|
567
|
+
pd_type = 'numeric'
|
568
|
+
precision, scale = get_numeric_precision_scale(flavor, og_pd_type)
|
569
|
+
found_db_type = True
|
558
570
|
else:
|
559
571
|
found_db_type = True
|
560
572
|
|
@@ -587,6 +599,9 @@ def get_db_type_from_pd_type(
|
|
587
599
|
warn(f"Unknown flavor '{flavor}'. Falling back to '{default_flavor_type}' (default).")
|
588
600
|
db_type = flavor_types.get(flavor, default_flavor_type)
|
589
601
|
if not as_sqlalchemy:
|
602
|
+
if precision is not None and scale is not None:
|
603
|
+
db_type_bare = db_type.split('(', maxsplit=1)[0]
|
604
|
+
return f"{db_type_bare}({precision},{scale})"
|
590
605
|
return db_type
|
591
606
|
|
592
607
|
if db_type.startswith('sqlalchemy.dialects'):
|
@@ -603,9 +618,8 @@ def get_db_type_from_pd_type(
|
|
603
618
|
return cls(*cls_args, **cls_kwargs)
|
604
619
|
|
605
620
|
if 'numeric' in db_type.lower():
|
606
|
-
if
|
621
|
+
if precision is None or scale is None:
|
607
622
|
return sqlalchemy_types.Numeric
|
608
|
-
precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
|
609
623
|
return sqlalchemy_types.Numeric(precision, scale)
|
610
624
|
|
611
625
|
cls_args, cls_kwargs = None, None
|
@@ -619,3 +633,38 @@ def get_db_type_from_pd_type(
|
|
619
633
|
if cls_args is None:
|
620
634
|
return cls
|
621
635
|
return cls(*cls_args, **cls_kwargs)
|
636
|
+
|
637
|
+
|
638
|
+
def get_numeric_precision_scale(
|
639
|
+
flavor: str,
|
640
|
+
dtype: Optional[str] = None,
|
641
|
+
) -> Union[Tuple[int, int], Tuple[None, None]]:
|
642
|
+
"""
|
643
|
+
Return the precision and scale to use for a numeric column for a given database flavor.
|
644
|
+
|
645
|
+
Parameters
|
646
|
+
----------
|
647
|
+
flavor: str
|
648
|
+
The database flavor for which to return the precision and scale.
|
649
|
+
|
650
|
+
dtype: Optional[str], default None
|
651
|
+
If provided, return the precision and scale provided in the dtype (if applicable).
|
652
|
+
If all caps, treat this as a DB type.
|
653
|
+
|
654
|
+
Returns
|
655
|
+
-------
|
656
|
+
A tuple of ints or a tuple of Nones.
|
657
|
+
"""
|
658
|
+
if not dtype:
|
659
|
+
return None, None
|
660
|
+
|
661
|
+
lbracket = '[' if '[' in dtype else '('
|
662
|
+
rbracket = ']' if lbracket == '[' else ')'
|
663
|
+
if lbracket in dtype and dtype.count(',') == 1 and dtype.endswith(rbracket):
|
664
|
+
try:
|
665
|
+
parts = dtype.split(lbracket, maxsplit=1)[-1].rstrip(rbracket).split(',', maxsplit=1)
|
666
|
+
return int(parts[0].strip()), int(parts[1].strip())
|
667
|
+
except Exception:
|
668
|
+
pass
|
669
|
+
|
670
|
+
return NUMERIC_PRECISION_FLAVORS.get(flavor, (None, None))
|
@@ -217,8 +217,8 @@ def print_tuple(
|
|
217
217
|
tup: mrsm.SuccessTuple,
|
218
218
|
skip_common: bool = True,
|
219
219
|
common_only: bool = False,
|
220
|
-
upper_padding: int =
|
221
|
-
lower_padding: int =
|
220
|
+
upper_padding: int = 1,
|
221
|
+
lower_padding: int = 1,
|
222
222
|
left_padding: int = 1,
|
223
223
|
calm: bool = False,
|
224
224
|
_progress: Optional['rich.progress.Progress'] = None,
|
@@ -7,21 +7,22 @@ Pretty printing wrapper
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
def pprint(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
*args,
|
11
|
+
detect_password: bool = True,
|
12
|
+
nopretty: bool = False,
|
13
|
+
**kw
|
14
|
+
) -> None:
|
15
15
|
"""Pretty print an object according to the configured ANSI and UNICODE settings.
|
16
16
|
If detect_password is True (default), search and replace passwords with '*' characters.
|
17
17
|
Does not mutate objects.
|
18
18
|
"""
|
19
|
+
import copy
|
20
|
+
import json
|
19
21
|
from meerschaum.utils.packages import attempt_import, import_rich
|
20
|
-
from meerschaum.utils.formatting import ANSI,
|
22
|
+
from meerschaum.utils.formatting import ANSI, get_console, print_tuple
|
21
23
|
from meerschaum.utils.warnings import error
|
22
24
|
from meerschaum.utils.misc import replace_password, dict_from_od, filter_keywords
|
23
25
|
from collections import OrderedDict
|
24
|
-
import copy, json
|
25
26
|
|
26
27
|
if (
|
27
28
|
len(args) == 1
|
@@ -34,7 +35,7 @@ def pprint(
|
|
34
35
|
and
|
35
36
|
isinstance(args[0][1], str)
|
36
37
|
):
|
37
|
-
return print_tuple(args[0])
|
38
|
+
return print_tuple(args[0], **filter_keywords(print_tuple, **kw))
|
38
39
|
|
39
40
|
modify = True
|
40
41
|
rich_pprint = None
|
@@ -52,7 +53,7 @@ def pprint(
|
|
52
53
|
pprintpp = attempt_import('pprintpp', warn=False)
|
53
54
|
try:
|
54
55
|
_pprint = pprintpp.pprint
|
55
|
-
except Exception
|
56
|
+
except Exception :
|
56
57
|
import pprint as _pprint_module
|
57
58
|
_pprint = _pprint_module.pprint
|
58
59
|
|
@@ -62,7 +63,7 @@ def pprint(
|
|
62
63
|
|
63
64
|
try:
|
64
65
|
args_copy = copy.deepcopy(args)
|
65
|
-
except Exception
|
66
|
+
except Exception:
|
66
67
|
args_copy = args
|
67
68
|
modify = False
|
68
69
|
_args = []
|
@@ -85,12 +86,12 @@ def pprint(
|
|
85
86
|
try:
|
86
87
|
c = json.dumps(c)
|
87
88
|
is_json = True
|
88
|
-
except Exception
|
89
|
+
except Exception:
|
89
90
|
is_json = False
|
90
91
|
if not is_json:
|
91
92
|
try:
|
92
93
|
c = str(c)
|
93
|
-
except Exception
|
94
|
+
except Exception:
|
94
95
|
pass
|
95
96
|
_args.append(c)
|
96
97
|
|