meerschaum 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/actions/copy.py +1 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_SQLConnector.py +2 -9
- meerschaum/connectors/sql/_fetch.py +5 -30
- meerschaum/connectors/sql/_pipes.py +7 -4
- meerschaum/connectors/sql/_sql.py +56 -31
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
- meerschaum/core/Pipe/_fetch.py +4 -0
- meerschaum/core/Pipe/_sync.py +16 -9
- meerschaum/utils/daemon/Daemon.py +9 -2
- meerschaum/utils/daemon/RotatingFile.py +3 -3
- meerschaum/utils/dataframe.py +42 -12
- meerschaum/utils/dtypes/__init__.py +144 -24
- meerschaum/utils/dtypes/sql.py +50 -7
- meerschaum/utils/formatting/__init__.py +2 -2
- meerschaum/utils/formatting/_pprint.py +12 -11
- meerschaum/utils/misc.py +16 -18
- meerschaum/utils/prompt.py +1 -1
- meerschaum/utils/sql.py +26 -8
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/METADATA +1 -1
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/RECORD +27 -27
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -85,6 +85,7 @@ def filter_unseen_df(
|
|
85
85
|
safe_copy: bool = True,
|
86
86
|
dtypes: Optional[Dict[str, Any]] = None,
|
87
87
|
include_unchanged_columns: bool = False,
|
88
|
+
coerce_mixed_numerics: bool = True,
|
88
89
|
debug: bool = False,
|
89
90
|
) -> 'pd.DataFrame':
|
90
91
|
"""
|
@@ -108,6 +109,10 @@ def filter_unseen_df(
|
|
108
109
|
include_unchanged_columns: bool, default False
|
109
110
|
If `True`, include columns which haven't changed on rows which have changed.
|
110
111
|
|
112
|
+
coerce_mixed_numerics: bool, default True
|
113
|
+
If `True`, cast mixed integer and float columns between the old and new dataframes into
|
114
|
+
numeric values (`decimal.Decimal`).
|
115
|
+
|
111
116
|
debug: bool, default False
|
112
117
|
Verbosity toggle.
|
113
118
|
|
@@ -138,7 +143,6 @@ def filter_unseen_df(
|
|
138
143
|
import json
|
139
144
|
import functools
|
140
145
|
import traceback
|
141
|
-
from decimal import Decimal
|
142
146
|
from meerschaum.utils.warnings import warn
|
143
147
|
from meerschaum.utils.packages import import_pandas, attempt_import
|
144
148
|
from meerschaum.utils.dtypes import (
|
@@ -148,7 +152,9 @@ def filter_unseen_df(
|
|
148
152
|
attempt_cast_to_uuid,
|
149
153
|
attempt_cast_to_bytes,
|
150
154
|
coerce_timezone,
|
155
|
+
serialize_decimal,
|
151
156
|
)
|
157
|
+
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
152
158
|
pd = import_pandas(debug=debug)
|
153
159
|
is_dask = 'dask' in new_df.__module__
|
154
160
|
if is_dask:
|
@@ -211,6 +217,12 @@ def filter_unseen_df(
|
|
211
217
|
if col not in dtypes:
|
212
218
|
dtypes[col] = typ
|
213
219
|
|
220
|
+
numeric_cols_precisions_scales = {
|
221
|
+
col: get_numeric_precision_scale(None, typ)
|
222
|
+
for col, typ in dtypes.items()
|
223
|
+
if col and typ and typ.startswith('numeric')
|
224
|
+
}
|
225
|
+
|
214
226
|
dt_dtypes = {
|
215
227
|
col: typ
|
216
228
|
for col, typ in dtypes.items()
|
@@ -259,6 +271,8 @@ def filter_unseen_df(
|
|
259
271
|
old_is_numeric = col in old_numeric_cols
|
260
272
|
|
261
273
|
if (
|
274
|
+
coerce_mixed_numerics
|
275
|
+
and
|
262
276
|
(new_is_float or new_is_int or new_is_numeric)
|
263
277
|
and
|
264
278
|
(old_is_float or old_is_int or old_is_numeric)
|
@@ -300,13 +314,9 @@ def filter_unseen_df(
|
|
300
314
|
new_numeric_cols = get_numeric_cols(new_df)
|
301
315
|
numeric_cols = set(new_numeric_cols + old_numeric_cols)
|
302
316
|
for numeric_col in old_numeric_cols:
|
303
|
-
old_df[numeric_col] = old_df[numeric_col].apply(
|
304
|
-
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
305
|
-
)
|
317
|
+
old_df[numeric_col] = old_df[numeric_col].apply(serialize_decimal)
|
306
318
|
for numeric_col in new_numeric_cols:
|
307
|
-
new_df[numeric_col] = new_df[numeric_col].apply(
|
308
|
-
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
309
|
-
)
|
319
|
+
new_df[numeric_col] = new_df[numeric_col].apply(serialize_decimal)
|
310
320
|
|
311
321
|
old_dt_cols = [
|
312
322
|
col
|
@@ -361,7 +371,14 @@ def filter_unseen_df(
|
|
361
371
|
if numeric_col not in delta_df.columns:
|
362
372
|
continue
|
363
373
|
try:
|
364
|
-
delta_df[numeric_col] = delta_df[numeric_col].apply(
|
374
|
+
delta_df[numeric_col] = delta_df[numeric_col].apply(
|
375
|
+
functools.partial(
|
376
|
+
attempt_cast_to_numeric,
|
377
|
+
quantize=True,
|
378
|
+
precision=numeric_cols_precisions_scales.get(numeric_col, (None, None)[0]),
|
379
|
+
scale=numeric_cols_precisions_scales.get(numeric_col, (None, None)[1]),
|
380
|
+
)
|
381
|
+
)
|
365
382
|
except Exception:
|
366
383
|
warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
|
367
384
|
|
@@ -882,6 +899,7 @@ def enforce_dtypes(
|
|
882
899
|
The Pandas DataFrame with the types enforced.
|
883
900
|
"""
|
884
901
|
import json
|
902
|
+
import functools
|
885
903
|
from meerschaum.utils.debug import dprint
|
886
904
|
from meerschaum.utils.formatting import pprint
|
887
905
|
from meerschaum.utils.dtypes import (
|
@@ -893,6 +911,7 @@ def enforce_dtypes(
|
|
893
911
|
attempt_cast_to_bytes,
|
894
912
|
coerce_timezone as _coerce_timezone,
|
895
913
|
)
|
914
|
+
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
896
915
|
pandas = mrsm.attempt_import('pandas')
|
897
916
|
is_dask = 'dask' in df.__module__
|
898
917
|
if safe_copy:
|
@@ -914,7 +933,7 @@ def enforce_dtypes(
|
|
914
933
|
numeric_cols = [
|
915
934
|
col
|
916
935
|
for col, typ in dtypes.items()
|
917
|
-
if typ
|
936
|
+
if typ.startswith('numeric')
|
918
937
|
]
|
919
938
|
uuid_cols = [
|
920
939
|
col
|
@@ -961,9 +980,17 @@ def enforce_dtypes(
|
|
961
980
|
if debug:
|
962
981
|
dprint(f"Checking for numerics: {numeric_cols}")
|
963
982
|
for col in numeric_cols:
|
983
|
+
precision, scale = get_numeric_precision_scale(None, dtypes.get(col, ''))
|
964
984
|
if col in df.columns:
|
965
985
|
try:
|
966
|
-
df[col] = df[col].apply(
|
986
|
+
df[col] = df[col].apply(
|
987
|
+
functools.partial(
|
988
|
+
attempt_cast_to_numeric,
|
989
|
+
quantize=True,
|
990
|
+
precision=precision,
|
991
|
+
scale=scale,
|
992
|
+
)
|
993
|
+
)
|
967
994
|
except Exception as e:
|
968
995
|
if debug:
|
969
996
|
dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
|
@@ -1040,7 +1067,7 @@ def enforce_dtypes(
|
|
1040
1067
|
previous_typ = common_dtypes[col]
|
1041
1068
|
mixed_numeric_types = (is_dtype_numeric(typ) and is_dtype_numeric(previous_typ))
|
1042
1069
|
explicitly_float = are_dtypes_equal(dtypes.get(col, 'object'), 'float')
|
1043
|
-
explicitly_numeric = dtypes.get(col, 'numeric')
|
1070
|
+
explicitly_numeric = dtypes.get(col, 'numeric').startswith('numeric')
|
1044
1071
|
cast_to_numeric = (
|
1045
1072
|
explicitly_numeric
|
1046
1073
|
or col in df_numeric_cols
|
@@ -1574,16 +1601,19 @@ def to_json(
|
|
1574
1601
|
A JSON string.
|
1575
1602
|
"""
|
1576
1603
|
from meerschaum.utils.packages import import_pandas
|
1577
|
-
from meerschaum.utils.dtypes import serialize_bytes
|
1604
|
+
from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
|
1578
1605
|
pd = import_pandas()
|
1579
1606
|
uuid_cols = get_uuid_cols(df)
|
1580
1607
|
bytes_cols = get_bytes_cols(df)
|
1608
|
+
numeric_cols = get_numeric_cols(df)
|
1581
1609
|
if safe_copy and bool(uuid_cols or bytes_cols):
|
1582
1610
|
df = df.copy()
|
1583
1611
|
for col in uuid_cols:
|
1584
1612
|
df[col] = df[col].astype(str)
|
1585
1613
|
for col in bytes_cols:
|
1586
1614
|
df[col] = df[col].apply(serialize_bytes)
|
1615
|
+
for col in numeric_cols:
|
1616
|
+
df[col] = df[col].apply(serialize_decimal)
|
1587
1617
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1588
1618
|
date_format=date_format,
|
1589
1619
|
date_unit=date_unit,
|
@@ -8,15 +8,16 @@ Utility functions for working with data types.
|
|
8
8
|
|
9
9
|
import traceback
|
10
10
|
import uuid
|
11
|
-
from datetime import timezone
|
12
|
-
from decimal import Decimal, Context, InvalidOperation
|
11
|
+
from datetime import timezone, datetime
|
12
|
+
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
13
13
|
|
14
14
|
import meerschaum as mrsm
|
15
|
-
from meerschaum.utils.typing import Dict, Union, Any
|
15
|
+
from meerschaum.utils.typing import Dict, Union, Any, Optional
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
18
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
19
19
|
'decimal': 'numeric',
|
20
|
+
'Decimal': 'numeric',
|
20
21
|
'number': 'numeric',
|
21
22
|
'jsonl': 'json',
|
22
23
|
'JSON': 'json',
|
@@ -56,6 +57,9 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
56
57
|
if alias_dtype is not None:
|
57
58
|
return MRSM_PD_DTYPES[alias_dtype]
|
58
59
|
|
60
|
+
if dtype.startswith('numeric'):
|
61
|
+
return MRSM_PD_DTYPES['numeric']
|
62
|
+
|
59
63
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
60
64
|
### treat it as a SQL db type.
|
61
65
|
if dtype.split(' ')[0].isupper():
|
@@ -118,8 +122,14 @@ def are_dtypes_equal(
|
|
118
122
|
return False
|
119
123
|
|
120
124
|
### Sometimes pandas dtype objects are passed.
|
121
|
-
ldtype = str(ldtype)
|
122
|
-
rdtype = str(rdtype)
|
125
|
+
ldtype = str(ldtype).split('[', maxsplit=1)[0]
|
126
|
+
rdtype = str(rdtype).split('[', maxsplit=1)[0]
|
127
|
+
|
128
|
+
if ldtype in MRSM_ALIAS_DTYPES:
|
129
|
+
ldtype = MRSM_ALIAS_DTYPES[ldtype]
|
130
|
+
|
131
|
+
if rdtype in MRSM_ALIAS_DTYPES:
|
132
|
+
rdtype = MRSM_ALIAS_DTYPES[rdtype]
|
123
133
|
|
124
134
|
json_dtypes = ('json', 'object')
|
125
135
|
if ldtype in json_dtypes and rdtype in json_dtypes:
|
@@ -137,10 +147,7 @@ def are_dtypes_equal(
|
|
137
147
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
138
148
|
return True
|
139
149
|
|
140
|
-
|
141
|
-
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
142
|
-
|
143
|
-
if ldtype_clean.lower() == rdtype_clean.lower():
|
150
|
+
if ldtype.lower() == rdtype.lower():
|
144
151
|
return True
|
145
152
|
|
146
153
|
datetime_dtypes = ('datetime', 'timestamp')
|
@@ -153,19 +160,19 @@ def are_dtypes_equal(
|
|
153
160
|
return True
|
154
161
|
|
155
162
|
string_dtypes = ('str', 'string', 'object')
|
156
|
-
if
|
163
|
+
if ldtype in string_dtypes and rdtype in string_dtypes:
|
157
164
|
return True
|
158
165
|
|
159
166
|
int_dtypes = ('int', 'int64', 'int32', 'int16', 'int8')
|
160
|
-
if
|
167
|
+
if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes:
|
161
168
|
return True
|
162
169
|
|
163
170
|
float_dtypes = ('float', 'float64', 'float32', 'float16', 'float128', 'double')
|
164
|
-
if
|
171
|
+
if ldtype.lower() in float_dtypes and rdtype.lower() in float_dtypes:
|
165
172
|
return True
|
166
173
|
|
167
174
|
bool_dtypes = ('bool', 'boolean')
|
168
|
-
if
|
175
|
+
if ldtype in bool_dtypes and rdtype in bool_dtypes:
|
169
176
|
return True
|
170
177
|
|
171
178
|
return False
|
@@ -195,18 +202,45 @@ def is_dtype_numeric(dtype: str) -> bool:
|
|
195
202
|
return False
|
196
203
|
|
197
204
|
|
198
|
-
def attempt_cast_to_numeric(
|
205
|
+
def attempt_cast_to_numeric(
|
206
|
+
value: Any,
|
207
|
+
quantize: bool = False,
|
208
|
+
precision: Optional[int] = None,
|
209
|
+
scale: Optional[int] = None,
|
210
|
+
)-> Any:
|
199
211
|
"""
|
200
212
|
Given a value, attempt to coerce it into a numeric (Decimal).
|
213
|
+
|
214
|
+
Parameters
|
215
|
+
----------
|
216
|
+
value: Any
|
217
|
+
The value to be cast to a Decimal.
|
218
|
+
|
219
|
+
quantize: bool, default False
|
220
|
+
If `True`, quantize the decimal to the specified precision and scale.
|
221
|
+
|
222
|
+
precision: Optional[int], default None
|
223
|
+
If `quantize` is `True`, use this precision.
|
224
|
+
|
225
|
+
scale: Optional[int], default None
|
226
|
+
If `quantize` is `True`, use this scale.
|
227
|
+
|
228
|
+
Returns
|
229
|
+
-------
|
230
|
+
A `Decimal` if possible, or `value`.
|
201
231
|
"""
|
202
232
|
if isinstance(value, Decimal):
|
233
|
+
if quantize and precision and scale:
|
234
|
+
return quantize_decimal(value, precision, scale)
|
203
235
|
return value
|
204
236
|
try:
|
205
|
-
|
206
|
-
Decimal(
|
207
|
-
|
208
|
-
|
209
|
-
|
237
|
+
if value_is_null(value):
|
238
|
+
return Decimal('NaN')
|
239
|
+
|
240
|
+
dec = Decimal(str(value))
|
241
|
+
if not quantize or not precision or not scale:
|
242
|
+
return dec
|
243
|
+
return quantize_decimal(dec, precision, scale)
|
210
244
|
except Exception:
|
211
245
|
return value
|
212
246
|
|
@@ -257,7 +291,7 @@ def none_if_null(value: Any) -> Any:
|
|
257
291
|
return (None if value_is_null(value) else value)
|
258
292
|
|
259
293
|
|
260
|
-
def quantize_decimal(x: Decimal,
|
294
|
+
def quantize_decimal(x: Decimal, precision: int, scale: int) -> Decimal:
|
261
295
|
"""
|
262
296
|
Quantize a given `Decimal` to a known scale and precision.
|
263
297
|
|
@@ -266,22 +300,61 @@ def quantize_decimal(x: Decimal, scale: int, precision: int) -> Decimal:
|
|
266
300
|
x: Decimal
|
267
301
|
The `Decimal` to be quantized.
|
268
302
|
|
269
|
-
|
303
|
+
precision: int
|
270
304
|
The total number of significant digits.
|
271
305
|
|
272
|
-
|
306
|
+
scale: int
|
273
307
|
The number of significant digits after the decimal point.
|
274
308
|
|
275
309
|
Returns
|
276
310
|
-------
|
277
311
|
A `Decimal` quantized to the specified scale and precision.
|
278
312
|
"""
|
279
|
-
precision_decimal = Decimal((
|
313
|
+
precision_decimal = Decimal(('1' * (precision - scale)) + '.' + ('1' * scale))
|
280
314
|
try:
|
281
|
-
return x.quantize(precision_decimal, context=Context(prec=
|
315
|
+
return x.quantize(precision_decimal, context=Context(prec=precision), rounding=ROUND_HALF_UP)
|
282
316
|
except InvalidOperation:
|
317
|
+
pass
|
318
|
+
|
319
|
+
raise ValueError(f"Cannot quantize value '{x}' to {precision=}, {scale=}.")
|
320
|
+
|
321
|
+
|
322
|
+
def serialize_decimal(
|
323
|
+
x: Any,
|
324
|
+
quantize: bool = False,
|
325
|
+
precision: Optional[int] = None,
|
326
|
+
scale: Optional[int] = None,
|
327
|
+
) -> Any:
|
328
|
+
"""
|
329
|
+
Return a quantized string of an input decimal.
|
330
|
+
|
331
|
+
Parameters
|
332
|
+
----------
|
333
|
+
x: Any
|
334
|
+
The potential decimal to be serialized.
|
335
|
+
|
336
|
+
quantize: bool, default False
|
337
|
+
If `True`, quantize the incoming Decimal to the specified scale and precision
|
338
|
+
before serialization.
|
339
|
+
|
340
|
+
precision: Optional[int], default None
|
341
|
+
The precision of the decimal to be quantized.
|
342
|
+
|
343
|
+
scale: Optional[int], default None
|
344
|
+
The scale of the decimal to be quantized.
|
345
|
+
|
346
|
+
Returns
|
347
|
+
-------
|
348
|
+
A string of the input decimal or the input if not a Decimal.
|
349
|
+
"""
|
350
|
+
if not isinstance(x, Decimal):
|
283
351
|
return x
|
284
352
|
|
353
|
+
if quantize and scale and precision:
|
354
|
+
x = quantize_decimal(x, precision, scale)
|
355
|
+
|
356
|
+
return f"{x:f}"
|
357
|
+
|
285
358
|
|
286
359
|
def coerce_timezone(
|
287
360
|
dt: Any,
|
@@ -434,3 +507,50 @@ def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None:
|
|
434
507
|
if not isinstance(data, bytes) and value_is_null(data):
|
435
508
|
return data
|
436
509
|
return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8')
|
510
|
+
|
511
|
+
|
512
|
+
def serialize_datetime(dt: datetime) -> Union[str, None]:
|
513
|
+
"""
|
514
|
+
Serialize a datetime object into JSON (ISO format string).
|
515
|
+
|
516
|
+
Examples
|
517
|
+
--------
|
518
|
+
>>> import json
|
519
|
+
>>> from datetime import datetime
|
520
|
+
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
521
|
+
'{"a": "2022-01-01T00:00:00Z"}'
|
522
|
+
|
523
|
+
"""
|
524
|
+
if not isinstance(dt, datetime):
|
525
|
+
return None
|
526
|
+
tz_suffix = 'Z' if dt.tzinfo is None else ''
|
527
|
+
return dt.isoformat() + tz_suffix
|
528
|
+
|
529
|
+
|
530
|
+
def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
531
|
+
"""
|
532
|
+
Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
|
533
|
+
|
534
|
+
Parameters
|
535
|
+
----------
|
536
|
+
x: Any
|
537
|
+
The value to serialize.
|
538
|
+
|
539
|
+
default_to_str: bool, default True
|
540
|
+
If `True`, return a string of `x` if x is not a designated type.
|
541
|
+
Otherwise return x.
|
542
|
+
|
543
|
+
Returns
|
544
|
+
-------
|
545
|
+
A serialized version of x, or x.
|
546
|
+
"""
|
547
|
+
if hasattr(x, 'tzinfo'):
|
548
|
+
return serialize_datetime(x)
|
549
|
+
|
550
|
+
if isinstance(x, bytes):
|
551
|
+
return serialize_bytes(x)
|
552
|
+
|
553
|
+
if isinstance(x, Decimal):
|
554
|
+
return serialize_decimal(x)
|
555
|
+
|
556
|
+
return str(x) if default_to_str else x
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -7,7 +7,7 @@ Utility functions for working with SQL data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
from meerschaum.utils.typing import Dict, Union, Tuple
|
10
|
+
from meerschaum.utils.typing import Dict, Union, Tuple, Optional
|
11
11
|
|
12
12
|
NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
13
13
|
'mariadb': (38, 20),
|
@@ -170,7 +170,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
170
170
|
'mariadb': 'DATETIME',
|
171
171
|
'mysql': 'DATETIME',
|
172
172
|
'mssql': 'DATETIME2',
|
173
|
-
'oracle': 'TIMESTAMP',
|
173
|
+
'oracle': 'TIMESTAMP(9)',
|
174
174
|
'sqlite': 'DATETIME',
|
175
175
|
'duckdb': 'TIMESTAMP',
|
176
176
|
'citus': 'TIMESTAMP',
|
@@ -183,7 +183,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
183
183
|
'mariadb': 'DATETIME',
|
184
184
|
'mysql': 'DATETIME',
|
185
185
|
'mssql': 'DATETIMEOFFSET',
|
186
|
-
'oracle': 'TIMESTAMP',
|
186
|
+
'oracle': 'TIMESTAMP(9)',
|
187
187
|
'sqlite': 'TIMESTAMP',
|
188
188
|
'duckdb': 'TIMESTAMPTZ',
|
189
189
|
'citus': 'TIMESTAMPTZ',
|
@@ -196,7 +196,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
196
196
|
'mariadb': 'DATETIME',
|
197
197
|
'mysql': 'DATETIME',
|
198
198
|
'mssql': 'DATETIMEOFFSET',
|
199
|
-
'oracle': 'TIMESTAMP',
|
199
|
+
'oracle': 'TIMESTAMP(9)',
|
200
200
|
'sqlite': 'TIMESTAMP',
|
201
201
|
'duckdb': 'TIMESTAMPTZ',
|
202
202
|
'citus': 'TIMESTAMPTZ',
|
@@ -544,17 +544,24 @@ def get_db_type_from_pd_type(
|
|
544
544
|
else PD_TO_SQLALCHEMY_DTYPES_FLAVORS
|
545
545
|
)
|
546
546
|
|
547
|
+
precision, scale = None, None
|
548
|
+
og_pd_type = pd_type
|
547
549
|
if pd_type in MRSM_ALIAS_DTYPES:
|
548
550
|
pd_type = MRSM_ALIAS_DTYPES[pd_type]
|
549
551
|
|
550
552
|
### Check whether we are able to match this type (e.g. pyarrow support).
|
551
553
|
found_db_type = False
|
552
|
-
if pd_type not in types_registry:
|
554
|
+
if pd_type not in types_registry and not pd_type.startswith('numeric['):
|
553
555
|
for mapped_pd_type in types_registry:
|
554
556
|
if are_dtypes_equal(mapped_pd_type, pd_type):
|
555
557
|
pd_type = mapped_pd_type
|
556
558
|
found_db_type = True
|
557
559
|
break
|
560
|
+
elif pd_type.startswith('numeric['):
|
561
|
+
og_pd_type = pd_type
|
562
|
+
pd_type = 'numeric'
|
563
|
+
precision, scale = get_numeric_precision_scale(flavor, og_pd_type)
|
564
|
+
found_db_type = True
|
558
565
|
else:
|
559
566
|
found_db_type = True
|
560
567
|
|
@@ -587,6 +594,9 @@ def get_db_type_from_pd_type(
|
|
587
594
|
warn(f"Unknown flavor '{flavor}'. Falling back to '{default_flavor_type}' (default).")
|
588
595
|
db_type = flavor_types.get(flavor, default_flavor_type)
|
589
596
|
if not as_sqlalchemy:
|
597
|
+
if precision is not None and scale is not None:
|
598
|
+
db_type_bare = db_type.split('(', maxsplit=1)[0]
|
599
|
+
return f"{db_type_bare}({precision},{scale})"
|
590
600
|
return db_type
|
591
601
|
|
592
602
|
if db_type.startswith('sqlalchemy.dialects'):
|
@@ -603,9 +613,8 @@ def get_db_type_from_pd_type(
|
|
603
613
|
return cls(*cls_args, **cls_kwargs)
|
604
614
|
|
605
615
|
if 'numeric' in db_type.lower():
|
606
|
-
if
|
616
|
+
if precision is None or scale is None:
|
607
617
|
return sqlalchemy_types.Numeric
|
608
|
-
precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
|
609
618
|
return sqlalchemy_types.Numeric(precision, scale)
|
610
619
|
|
611
620
|
cls_args, cls_kwargs = None, None
|
@@ -619,3 +628,37 @@ def get_db_type_from_pd_type(
|
|
619
628
|
if cls_args is None:
|
620
629
|
return cls
|
621
630
|
return cls(*cls_args, **cls_kwargs)
|
631
|
+
|
632
|
+
|
633
|
+
def get_numeric_precision_scale(
|
634
|
+
flavor: str,
|
635
|
+
dtype: Optional[str] = None,
|
636
|
+
) -> Union[Tuple[int, int], Tuple[None, None]]:
|
637
|
+
"""
|
638
|
+
Return the precision and scale to use for a numeric column for a given database flavor.
|
639
|
+
|
640
|
+
Parameters
|
641
|
+
----------
|
642
|
+
flavor: str
|
643
|
+
The database flavor for which to return the precision and scale.
|
644
|
+
|
645
|
+
dtype: Optional[str], default None
|
646
|
+
If provided, return the precision and scale provided in the dtype (if applicable).
|
647
|
+
|
648
|
+
Returns
|
649
|
+
-------
|
650
|
+
A tuple of ints or a tuple of Nones.
|
651
|
+
"""
|
652
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
653
|
+
if dtype and are_dtypes_equal(dtype, 'numeric'):
|
654
|
+
if '[' in dtype and ',' in dtype:
|
655
|
+
try:
|
656
|
+
parts = dtype.split('[', maxsplit=1)[-1].rstrip(']').split(',', maxsplit=1)
|
657
|
+
return int(parts[0].strip()), int(parts[1].strip())
|
658
|
+
except Exception:
|
659
|
+
pass
|
660
|
+
|
661
|
+
if flavor not in NUMERIC_PRECISION_FLAVORS:
|
662
|
+
return None, None
|
663
|
+
|
664
|
+
return NUMERIC_PRECISION_FLAVORS[flavor]
|
@@ -217,8 +217,8 @@ def print_tuple(
|
|
217
217
|
tup: mrsm.SuccessTuple,
|
218
218
|
skip_common: bool = True,
|
219
219
|
common_only: bool = False,
|
220
|
-
upper_padding: int =
|
221
|
-
lower_padding: int =
|
220
|
+
upper_padding: int = 1,
|
221
|
+
lower_padding: int = 1,
|
222
222
|
left_padding: int = 1,
|
223
223
|
calm: bool = False,
|
224
224
|
_progress: Optional['rich.progress.Progress'] = None,
|
@@ -7,21 +7,22 @@ Pretty printing wrapper
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
def pprint(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
*args,
|
11
|
+
detect_password: bool = True,
|
12
|
+
nopretty: bool = False,
|
13
|
+
**kw
|
14
|
+
) -> None:
|
15
15
|
"""Pretty print an object according to the configured ANSI and UNICODE settings.
|
16
16
|
If detect_password is True (default), search and replace passwords with '*' characters.
|
17
17
|
Does not mutate objects.
|
18
18
|
"""
|
19
|
+
import copy
|
20
|
+
import json
|
19
21
|
from meerschaum.utils.packages import attempt_import, import_rich
|
20
|
-
from meerschaum.utils.formatting import ANSI,
|
22
|
+
from meerschaum.utils.formatting import ANSI, get_console, print_tuple
|
21
23
|
from meerschaum.utils.warnings import error
|
22
24
|
from meerschaum.utils.misc import replace_password, dict_from_od, filter_keywords
|
23
25
|
from collections import OrderedDict
|
24
|
-
import copy, json
|
25
26
|
|
26
27
|
if (
|
27
28
|
len(args) == 1
|
@@ -52,7 +53,7 @@ def pprint(
|
|
52
53
|
pprintpp = attempt_import('pprintpp', warn=False)
|
53
54
|
try:
|
54
55
|
_pprint = pprintpp.pprint
|
55
|
-
except Exception
|
56
|
+
except Exception :
|
56
57
|
import pprint as _pprint_module
|
57
58
|
_pprint = _pprint_module.pprint
|
58
59
|
|
@@ -62,7 +63,7 @@ def pprint(
|
|
62
63
|
|
63
64
|
try:
|
64
65
|
args_copy = copy.deepcopy(args)
|
65
|
-
except Exception
|
66
|
+
except Exception:
|
66
67
|
args_copy = args
|
67
68
|
modify = False
|
68
69
|
_args = []
|
@@ -85,12 +86,12 @@ def pprint(
|
|
85
86
|
try:
|
86
87
|
c = json.dumps(c)
|
87
88
|
is_json = True
|
88
|
-
except Exception
|
89
|
+
except Exception:
|
89
90
|
is_json = False
|
90
91
|
if not is_json:
|
91
92
|
try:
|
92
93
|
c = str(c)
|
93
|
-
except Exception
|
94
|
+
except Exception:
|
94
95
|
pass
|
95
96
|
_args.append(c)
|
96
97
|
|
meerschaum/utils/misc.py
CHANGED
@@ -957,24 +957,6 @@ def get_connector_labels(
|
|
957
957
|
return sorted(possibilities)
|
958
958
|
|
959
959
|
|
960
|
-
def json_serialize_datetime(dt: datetime) -> Union[str, None]:
|
961
|
-
"""
|
962
|
-
Serialize a datetime object into JSON (ISO format string).
|
963
|
-
|
964
|
-
Examples
|
965
|
-
--------
|
966
|
-
>>> import json
|
967
|
-
>>> from datetime import datetime
|
968
|
-
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
969
|
-
'{"a": "2022-01-01T00:00:00Z"}'
|
970
|
-
|
971
|
-
"""
|
972
|
-
if not isinstance(dt, datetime):
|
973
|
-
return None
|
974
|
-
tz_suffix = 'Z' if dt.tzinfo is None else ''
|
975
|
-
return dt.isoformat() + tz_suffix
|
976
|
-
|
977
|
-
|
978
960
|
def wget(
|
979
961
|
url: str,
|
980
962
|
dest: Optional[Union[str, 'pathlib.Path']] = None,
|
@@ -1705,6 +1687,22 @@ def _get_subaction_names(*args, **kwargs) -> Any:
|
|
1705
1687
|
return real_function(*args, **kwargs)
|
1706
1688
|
|
1707
1689
|
|
1690
|
+
def json_serialize_datetime(dt: datetime) -> Union[str, None]:
|
1691
|
+
"""
|
1692
|
+
Serialize a datetime object into JSON (ISO format string).
|
1693
|
+
|
1694
|
+
Examples
|
1695
|
+
--------
|
1696
|
+
>>> import json
|
1697
|
+
>>> from datetime import datetime
|
1698
|
+
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
1699
|
+
'{"a": "2022-01-01T00:00:00Z"}'
|
1700
|
+
|
1701
|
+
"""
|
1702
|
+
from meerschaum.utils.dtypes import serialize_datetime
|
1703
|
+
return serialize_datetime(dt)
|
1704
|
+
|
1705
|
+
|
1708
1706
|
_current_module = sys.modules[__name__]
|
1709
1707
|
__all__ = tuple(
|
1710
1708
|
name
|
meerschaum/utils/prompt.py
CHANGED
@@ -585,7 +585,7 @@ def get_connectors_completer(*types: str):
|
|
585
585
|
|
586
586
|
class ConnectorCompleter(Completer):
|
587
587
|
def get_completions(self, document, complete_event):
|
588
|
-
for label in get_connector_labels(*types):
|
588
|
+
for label in get_connector_labels(*types, search_term=document.text):
|
589
589
|
yield Completion(label, start_position=(-1 * len(document.text)))
|
590
590
|
|
591
591
|
return ConnectorCompleter()
|