meerschaum 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/actions/copy.py +1 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_SQLConnector.py +2 -9
- meerschaum/connectors/sql/_fetch.py +5 -30
- meerschaum/connectors/sql/_pipes.py +7 -4
- meerschaum/connectors/sql/_sql.py +56 -31
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
- meerschaum/core/Pipe/_fetch.py +4 -0
- meerschaum/core/Pipe/_sync.py +16 -9
- meerschaum/utils/daemon/Daemon.py +9 -2
- meerschaum/utils/daemon/RotatingFile.py +3 -3
- meerschaum/utils/dataframe.py +42 -12
- meerschaum/utils/dtypes/__init__.py +144 -24
- meerschaum/utils/dtypes/sql.py +50 -7
- meerschaum/utils/formatting/__init__.py +2 -2
- meerschaum/utils/formatting/_pprint.py +12 -11
- meerschaum/utils/misc.py +16 -18
- meerschaum/utils/prompt.py +1 -1
- meerschaum/utils/sql.py +26 -8
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/METADATA +1 -1
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/RECORD +27 -27
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -85,6 +85,7 @@ def filter_unseen_df(
|
|
85
85
|
safe_copy: bool = True,
|
86
86
|
dtypes: Optional[Dict[str, Any]] = None,
|
87
87
|
include_unchanged_columns: bool = False,
|
88
|
+
coerce_mixed_numerics: bool = True,
|
88
89
|
debug: bool = False,
|
89
90
|
) -> 'pd.DataFrame':
|
90
91
|
"""
|
@@ -108,6 +109,10 @@ def filter_unseen_df(
|
|
108
109
|
include_unchanged_columns: bool, default False
|
109
110
|
If `True`, include columns which haven't changed on rows which have changed.
|
110
111
|
|
112
|
+
coerce_mixed_numerics: bool, default True
|
113
|
+
If `True`, cast mixed integer and float columns between the old and new dataframes into
|
114
|
+
numeric values (`decimal.Decimal`).
|
115
|
+
|
111
116
|
debug: bool, default False
|
112
117
|
Verbosity toggle.
|
113
118
|
|
@@ -138,7 +143,6 @@ def filter_unseen_df(
|
|
138
143
|
import json
|
139
144
|
import functools
|
140
145
|
import traceback
|
141
|
-
from decimal import Decimal
|
142
146
|
from meerschaum.utils.warnings import warn
|
143
147
|
from meerschaum.utils.packages import import_pandas, attempt_import
|
144
148
|
from meerschaum.utils.dtypes import (
|
@@ -148,7 +152,9 @@ def filter_unseen_df(
|
|
148
152
|
attempt_cast_to_uuid,
|
149
153
|
attempt_cast_to_bytes,
|
150
154
|
coerce_timezone,
|
155
|
+
serialize_decimal,
|
151
156
|
)
|
157
|
+
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
152
158
|
pd = import_pandas(debug=debug)
|
153
159
|
is_dask = 'dask' in new_df.__module__
|
154
160
|
if is_dask:
|
@@ -211,6 +217,12 @@ def filter_unseen_df(
|
|
211
217
|
if col not in dtypes:
|
212
218
|
dtypes[col] = typ
|
213
219
|
|
220
|
+
numeric_cols_precisions_scales = {
|
221
|
+
col: get_numeric_precision_scale(None, typ)
|
222
|
+
for col, typ in dtypes.items()
|
223
|
+
if col and typ and typ.startswith('numeric')
|
224
|
+
}
|
225
|
+
|
214
226
|
dt_dtypes = {
|
215
227
|
col: typ
|
216
228
|
for col, typ in dtypes.items()
|
@@ -259,6 +271,8 @@ def filter_unseen_df(
|
|
259
271
|
old_is_numeric = col in old_numeric_cols
|
260
272
|
|
261
273
|
if (
|
274
|
+
coerce_mixed_numerics
|
275
|
+
and
|
262
276
|
(new_is_float or new_is_int or new_is_numeric)
|
263
277
|
and
|
264
278
|
(old_is_float or old_is_int or old_is_numeric)
|
@@ -300,13 +314,9 @@ def filter_unseen_df(
|
|
300
314
|
new_numeric_cols = get_numeric_cols(new_df)
|
301
315
|
numeric_cols = set(new_numeric_cols + old_numeric_cols)
|
302
316
|
for numeric_col in old_numeric_cols:
|
303
|
-
old_df[numeric_col] = old_df[numeric_col].apply(
|
304
|
-
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
305
|
-
)
|
317
|
+
old_df[numeric_col] = old_df[numeric_col].apply(serialize_decimal)
|
306
318
|
for numeric_col in new_numeric_cols:
|
307
|
-
new_df[numeric_col] = new_df[numeric_col].apply(
|
308
|
-
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
309
|
-
)
|
319
|
+
new_df[numeric_col] = new_df[numeric_col].apply(serialize_decimal)
|
310
320
|
|
311
321
|
old_dt_cols = [
|
312
322
|
col
|
@@ -361,7 +371,14 @@ def filter_unseen_df(
|
|
361
371
|
if numeric_col not in delta_df.columns:
|
362
372
|
continue
|
363
373
|
try:
|
364
|
-
delta_df[numeric_col] = delta_df[numeric_col].apply(
|
374
|
+
delta_df[numeric_col] = delta_df[numeric_col].apply(
|
375
|
+
functools.partial(
|
376
|
+
attempt_cast_to_numeric,
|
377
|
+
quantize=True,
|
378
|
+
precision=numeric_cols_precisions_scales.get(numeric_col, (None, None)[0]),
|
379
|
+
scale=numeric_cols_precisions_scales.get(numeric_col, (None, None)[1]),
|
380
|
+
)
|
381
|
+
)
|
365
382
|
except Exception:
|
366
383
|
warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
|
367
384
|
|
@@ -882,6 +899,7 @@ def enforce_dtypes(
|
|
882
899
|
The Pandas DataFrame with the types enforced.
|
883
900
|
"""
|
884
901
|
import json
|
902
|
+
import functools
|
885
903
|
from meerschaum.utils.debug import dprint
|
886
904
|
from meerschaum.utils.formatting import pprint
|
887
905
|
from meerschaum.utils.dtypes import (
|
@@ -893,6 +911,7 @@ def enforce_dtypes(
|
|
893
911
|
attempt_cast_to_bytes,
|
894
912
|
coerce_timezone as _coerce_timezone,
|
895
913
|
)
|
914
|
+
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
896
915
|
pandas = mrsm.attempt_import('pandas')
|
897
916
|
is_dask = 'dask' in df.__module__
|
898
917
|
if safe_copy:
|
@@ -914,7 +933,7 @@ def enforce_dtypes(
|
|
914
933
|
numeric_cols = [
|
915
934
|
col
|
916
935
|
for col, typ in dtypes.items()
|
917
|
-
if typ
|
936
|
+
if typ.startswith('numeric')
|
918
937
|
]
|
919
938
|
uuid_cols = [
|
920
939
|
col
|
@@ -961,9 +980,17 @@ def enforce_dtypes(
|
|
961
980
|
if debug:
|
962
981
|
dprint(f"Checking for numerics: {numeric_cols}")
|
963
982
|
for col in numeric_cols:
|
983
|
+
precision, scale = get_numeric_precision_scale(None, dtypes.get(col, ''))
|
964
984
|
if col in df.columns:
|
965
985
|
try:
|
966
|
-
df[col] = df[col].apply(
|
986
|
+
df[col] = df[col].apply(
|
987
|
+
functools.partial(
|
988
|
+
attempt_cast_to_numeric,
|
989
|
+
quantize=True,
|
990
|
+
precision=precision,
|
991
|
+
scale=scale,
|
992
|
+
)
|
993
|
+
)
|
967
994
|
except Exception as e:
|
968
995
|
if debug:
|
969
996
|
dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
|
@@ -1040,7 +1067,7 @@ def enforce_dtypes(
|
|
1040
1067
|
previous_typ = common_dtypes[col]
|
1041
1068
|
mixed_numeric_types = (is_dtype_numeric(typ) and is_dtype_numeric(previous_typ))
|
1042
1069
|
explicitly_float = are_dtypes_equal(dtypes.get(col, 'object'), 'float')
|
1043
|
-
explicitly_numeric = dtypes.get(col, 'numeric')
|
1070
|
+
explicitly_numeric = dtypes.get(col, 'numeric').startswith('numeric')
|
1044
1071
|
cast_to_numeric = (
|
1045
1072
|
explicitly_numeric
|
1046
1073
|
or col in df_numeric_cols
|
@@ -1574,16 +1601,19 @@ def to_json(
|
|
1574
1601
|
A JSON string.
|
1575
1602
|
"""
|
1576
1603
|
from meerschaum.utils.packages import import_pandas
|
1577
|
-
from meerschaum.utils.dtypes import serialize_bytes
|
1604
|
+
from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
|
1578
1605
|
pd = import_pandas()
|
1579
1606
|
uuid_cols = get_uuid_cols(df)
|
1580
1607
|
bytes_cols = get_bytes_cols(df)
|
1608
|
+
numeric_cols = get_numeric_cols(df)
|
1581
1609
|
if safe_copy and bool(uuid_cols or bytes_cols):
|
1582
1610
|
df = df.copy()
|
1583
1611
|
for col in uuid_cols:
|
1584
1612
|
df[col] = df[col].astype(str)
|
1585
1613
|
for col in bytes_cols:
|
1586
1614
|
df[col] = df[col].apply(serialize_bytes)
|
1615
|
+
for col in numeric_cols:
|
1616
|
+
df[col] = df[col].apply(serialize_decimal)
|
1587
1617
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1588
1618
|
date_format=date_format,
|
1589
1619
|
date_unit=date_unit,
|
@@ -8,15 +8,16 @@ Utility functions for working with data types.
|
|
8
8
|
|
9
9
|
import traceback
|
10
10
|
import uuid
|
11
|
-
from datetime import timezone
|
12
|
-
from decimal import Decimal, Context, InvalidOperation
|
11
|
+
from datetime import timezone, datetime
|
12
|
+
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
13
13
|
|
14
14
|
import meerschaum as mrsm
|
15
|
-
from meerschaum.utils.typing import Dict, Union, Any
|
15
|
+
from meerschaum.utils.typing import Dict, Union, Any, Optional
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
18
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
19
19
|
'decimal': 'numeric',
|
20
|
+
'Decimal': 'numeric',
|
20
21
|
'number': 'numeric',
|
21
22
|
'jsonl': 'json',
|
22
23
|
'JSON': 'json',
|
@@ -56,6 +57,9 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
56
57
|
if alias_dtype is not None:
|
57
58
|
return MRSM_PD_DTYPES[alias_dtype]
|
58
59
|
|
60
|
+
if dtype.startswith('numeric'):
|
61
|
+
return MRSM_PD_DTYPES['numeric']
|
62
|
+
|
59
63
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
60
64
|
### treat it as a SQL db type.
|
61
65
|
if dtype.split(' ')[0].isupper():
|
@@ -118,8 +122,14 @@ def are_dtypes_equal(
|
|
118
122
|
return False
|
119
123
|
|
120
124
|
### Sometimes pandas dtype objects are passed.
|
121
|
-
ldtype = str(ldtype)
|
122
|
-
rdtype = str(rdtype)
|
125
|
+
ldtype = str(ldtype).split('[', maxsplit=1)[0]
|
126
|
+
rdtype = str(rdtype).split('[', maxsplit=1)[0]
|
127
|
+
|
128
|
+
if ldtype in MRSM_ALIAS_DTYPES:
|
129
|
+
ldtype = MRSM_ALIAS_DTYPES[ldtype]
|
130
|
+
|
131
|
+
if rdtype in MRSM_ALIAS_DTYPES:
|
132
|
+
rdtype = MRSM_ALIAS_DTYPES[rdtype]
|
123
133
|
|
124
134
|
json_dtypes = ('json', 'object')
|
125
135
|
if ldtype in json_dtypes and rdtype in json_dtypes:
|
@@ -137,10 +147,7 @@ def are_dtypes_equal(
|
|
137
147
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
138
148
|
return True
|
139
149
|
|
140
|
-
|
141
|
-
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
142
|
-
|
143
|
-
if ldtype_clean.lower() == rdtype_clean.lower():
|
150
|
+
if ldtype.lower() == rdtype.lower():
|
144
151
|
return True
|
145
152
|
|
146
153
|
datetime_dtypes = ('datetime', 'timestamp')
|
@@ -153,19 +160,19 @@ def are_dtypes_equal(
|
|
153
160
|
return True
|
154
161
|
|
155
162
|
string_dtypes = ('str', 'string', 'object')
|
156
|
-
if
|
163
|
+
if ldtype in string_dtypes and rdtype in string_dtypes:
|
157
164
|
return True
|
158
165
|
|
159
166
|
int_dtypes = ('int', 'int64', 'int32', 'int16', 'int8')
|
160
|
-
if
|
167
|
+
if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes:
|
161
168
|
return True
|
162
169
|
|
163
170
|
float_dtypes = ('float', 'float64', 'float32', 'float16', 'float128', 'double')
|
164
|
-
if
|
171
|
+
if ldtype.lower() in float_dtypes and rdtype.lower() in float_dtypes:
|
165
172
|
return True
|
166
173
|
|
167
174
|
bool_dtypes = ('bool', 'boolean')
|
168
|
-
if
|
175
|
+
if ldtype in bool_dtypes and rdtype in bool_dtypes:
|
169
176
|
return True
|
170
177
|
|
171
178
|
return False
|
@@ -195,18 +202,45 @@ def is_dtype_numeric(dtype: str) -> bool:
|
|
195
202
|
return False
|
196
203
|
|
197
204
|
|
198
|
-
def attempt_cast_to_numeric(
|
205
|
+
def attempt_cast_to_numeric(
|
206
|
+
value: Any,
|
207
|
+
quantize: bool = False,
|
208
|
+
precision: Optional[int] = None,
|
209
|
+
scale: Optional[int] = None,
|
210
|
+
)-> Any:
|
199
211
|
"""
|
200
212
|
Given a value, attempt to coerce it into a numeric (Decimal).
|
213
|
+
|
214
|
+
Parameters
|
215
|
+
----------
|
216
|
+
value: Any
|
217
|
+
The value to be cast to a Decimal.
|
218
|
+
|
219
|
+
quantize: bool, default False
|
220
|
+
If `True`, quantize the decimal to the specified precision and scale.
|
221
|
+
|
222
|
+
precision: Optional[int], default None
|
223
|
+
If `quantize` is `True`, use this precision.
|
224
|
+
|
225
|
+
scale: Optional[int], default None
|
226
|
+
If `quantize` is `True`, use this scale.
|
227
|
+
|
228
|
+
Returns
|
229
|
+
-------
|
230
|
+
A `Decimal` if possible, or `value`.
|
201
231
|
"""
|
202
232
|
if isinstance(value, Decimal):
|
233
|
+
if quantize and precision and scale:
|
234
|
+
return quantize_decimal(value, precision, scale)
|
203
235
|
return value
|
204
236
|
try:
|
205
|
-
|
206
|
-
Decimal(
|
207
|
-
|
208
|
-
|
209
|
-
|
237
|
+
if value_is_null(value):
|
238
|
+
return Decimal('NaN')
|
239
|
+
|
240
|
+
dec = Decimal(str(value))
|
241
|
+
if not quantize or not precision or not scale:
|
242
|
+
return dec
|
243
|
+
return quantize_decimal(dec, precision, scale)
|
210
244
|
except Exception:
|
211
245
|
return value
|
212
246
|
|
@@ -257,7 +291,7 @@ def none_if_null(value: Any) -> Any:
|
|
257
291
|
return (None if value_is_null(value) else value)
|
258
292
|
|
259
293
|
|
260
|
-
def quantize_decimal(x: Decimal,
|
294
|
+
def quantize_decimal(x: Decimal, precision: int, scale: int) -> Decimal:
|
261
295
|
"""
|
262
296
|
Quantize a given `Decimal` to a known scale and precision.
|
263
297
|
|
@@ -266,22 +300,61 @@ def quantize_decimal(x: Decimal, scale: int, precision: int) -> Decimal:
|
|
266
300
|
x: Decimal
|
267
301
|
The `Decimal` to be quantized.
|
268
302
|
|
269
|
-
|
303
|
+
precision: int
|
270
304
|
The total number of significant digits.
|
271
305
|
|
272
|
-
|
306
|
+
scale: int
|
273
307
|
The number of significant digits after the decimal point.
|
274
308
|
|
275
309
|
Returns
|
276
310
|
-------
|
277
311
|
A `Decimal` quantized to the specified scale and precision.
|
278
312
|
"""
|
279
|
-
precision_decimal = Decimal((
|
313
|
+
precision_decimal = Decimal(('1' * (precision - scale)) + '.' + ('1' * scale))
|
280
314
|
try:
|
281
|
-
return x.quantize(precision_decimal, context=Context(prec=
|
315
|
+
return x.quantize(precision_decimal, context=Context(prec=precision), rounding=ROUND_HALF_UP)
|
282
316
|
except InvalidOperation:
|
317
|
+
pass
|
318
|
+
|
319
|
+
raise ValueError(f"Cannot quantize value '{x}' to {precision=}, {scale=}.")
|
320
|
+
|
321
|
+
|
322
|
+
def serialize_decimal(
|
323
|
+
x: Any,
|
324
|
+
quantize: bool = False,
|
325
|
+
precision: Optional[int] = None,
|
326
|
+
scale: Optional[int] = None,
|
327
|
+
) -> Any:
|
328
|
+
"""
|
329
|
+
Return a quantized string of an input decimal.
|
330
|
+
|
331
|
+
Parameters
|
332
|
+
----------
|
333
|
+
x: Any
|
334
|
+
The potential decimal to be serialized.
|
335
|
+
|
336
|
+
quantize: bool, default False
|
337
|
+
If `True`, quantize the incoming Decimal to the specified scale and precision
|
338
|
+
before serialization.
|
339
|
+
|
340
|
+
precision: Optional[int], default None
|
341
|
+
The precision of the decimal to be quantized.
|
342
|
+
|
343
|
+
scale: Optional[int], default None
|
344
|
+
The scale of the decimal to be quantized.
|
345
|
+
|
346
|
+
Returns
|
347
|
+
-------
|
348
|
+
A string of the input decimal or the input if not a Decimal.
|
349
|
+
"""
|
350
|
+
if not isinstance(x, Decimal):
|
283
351
|
return x
|
284
352
|
|
353
|
+
if quantize and scale and precision:
|
354
|
+
x = quantize_decimal(x, precision, scale)
|
355
|
+
|
356
|
+
return f"{x:f}"
|
357
|
+
|
285
358
|
|
286
359
|
def coerce_timezone(
|
287
360
|
dt: Any,
|
@@ -434,3 +507,50 @@ def encode_bytes_for_bytea(data: bytes, with_prefix: bool = True) -> str | None:
|
|
434
507
|
if not isinstance(data, bytes) and value_is_null(data):
|
435
508
|
return data
|
436
509
|
return ('\\x' if with_prefix else '') + binascii.hexlify(data).decode('utf-8')
|
510
|
+
|
511
|
+
|
512
|
+
def serialize_datetime(dt: datetime) -> Union[str, None]:
|
513
|
+
"""
|
514
|
+
Serialize a datetime object into JSON (ISO format string).
|
515
|
+
|
516
|
+
Examples
|
517
|
+
--------
|
518
|
+
>>> import json
|
519
|
+
>>> from datetime import datetime
|
520
|
+
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
521
|
+
'{"a": "2022-01-01T00:00:00Z"}'
|
522
|
+
|
523
|
+
"""
|
524
|
+
if not isinstance(dt, datetime):
|
525
|
+
return None
|
526
|
+
tz_suffix = 'Z' if dt.tzinfo is None else ''
|
527
|
+
return dt.isoformat() + tz_suffix
|
528
|
+
|
529
|
+
|
530
|
+
def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
531
|
+
"""
|
532
|
+
Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
|
533
|
+
|
534
|
+
Parameters
|
535
|
+
----------
|
536
|
+
x: Any
|
537
|
+
The value to serialize.
|
538
|
+
|
539
|
+
default_to_str: bool, default True
|
540
|
+
If `True`, return a string of `x` if x is not a designated type.
|
541
|
+
Otherwise return x.
|
542
|
+
|
543
|
+
Returns
|
544
|
+
-------
|
545
|
+
A serialized version of x, or x.
|
546
|
+
"""
|
547
|
+
if hasattr(x, 'tzinfo'):
|
548
|
+
return serialize_datetime(x)
|
549
|
+
|
550
|
+
if isinstance(x, bytes):
|
551
|
+
return serialize_bytes(x)
|
552
|
+
|
553
|
+
if isinstance(x, Decimal):
|
554
|
+
return serialize_decimal(x)
|
555
|
+
|
556
|
+
return str(x) if default_to_str else x
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -7,7 +7,7 @@ Utility functions for working with SQL data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
from meerschaum.utils.typing import Dict, Union, Tuple
|
10
|
+
from meerschaum.utils.typing import Dict, Union, Tuple, Optional
|
11
11
|
|
12
12
|
NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
13
13
|
'mariadb': (38, 20),
|
@@ -170,7 +170,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
170
170
|
'mariadb': 'DATETIME',
|
171
171
|
'mysql': 'DATETIME',
|
172
172
|
'mssql': 'DATETIME2',
|
173
|
-
'oracle': 'TIMESTAMP',
|
173
|
+
'oracle': 'TIMESTAMP(9)',
|
174
174
|
'sqlite': 'DATETIME',
|
175
175
|
'duckdb': 'TIMESTAMP',
|
176
176
|
'citus': 'TIMESTAMP',
|
@@ -183,7 +183,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
183
183
|
'mariadb': 'DATETIME',
|
184
184
|
'mysql': 'DATETIME',
|
185
185
|
'mssql': 'DATETIMEOFFSET',
|
186
|
-
'oracle': 'TIMESTAMP',
|
186
|
+
'oracle': 'TIMESTAMP(9)',
|
187
187
|
'sqlite': 'TIMESTAMP',
|
188
188
|
'duckdb': 'TIMESTAMPTZ',
|
189
189
|
'citus': 'TIMESTAMPTZ',
|
@@ -196,7 +196,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
196
196
|
'mariadb': 'DATETIME',
|
197
197
|
'mysql': 'DATETIME',
|
198
198
|
'mssql': 'DATETIMEOFFSET',
|
199
|
-
'oracle': 'TIMESTAMP',
|
199
|
+
'oracle': 'TIMESTAMP(9)',
|
200
200
|
'sqlite': 'TIMESTAMP',
|
201
201
|
'duckdb': 'TIMESTAMPTZ',
|
202
202
|
'citus': 'TIMESTAMPTZ',
|
@@ -544,17 +544,24 @@ def get_db_type_from_pd_type(
|
|
544
544
|
else PD_TO_SQLALCHEMY_DTYPES_FLAVORS
|
545
545
|
)
|
546
546
|
|
547
|
+
precision, scale = None, None
|
548
|
+
og_pd_type = pd_type
|
547
549
|
if pd_type in MRSM_ALIAS_DTYPES:
|
548
550
|
pd_type = MRSM_ALIAS_DTYPES[pd_type]
|
549
551
|
|
550
552
|
### Check whether we are able to match this type (e.g. pyarrow support).
|
551
553
|
found_db_type = False
|
552
|
-
if pd_type not in types_registry:
|
554
|
+
if pd_type not in types_registry and not pd_type.startswith('numeric['):
|
553
555
|
for mapped_pd_type in types_registry:
|
554
556
|
if are_dtypes_equal(mapped_pd_type, pd_type):
|
555
557
|
pd_type = mapped_pd_type
|
556
558
|
found_db_type = True
|
557
559
|
break
|
560
|
+
elif pd_type.startswith('numeric['):
|
561
|
+
og_pd_type = pd_type
|
562
|
+
pd_type = 'numeric'
|
563
|
+
precision, scale = get_numeric_precision_scale(flavor, og_pd_type)
|
564
|
+
found_db_type = True
|
558
565
|
else:
|
559
566
|
found_db_type = True
|
560
567
|
|
@@ -587,6 +594,9 @@ def get_db_type_from_pd_type(
|
|
587
594
|
warn(f"Unknown flavor '{flavor}'. Falling back to '{default_flavor_type}' (default).")
|
588
595
|
db_type = flavor_types.get(flavor, default_flavor_type)
|
589
596
|
if not as_sqlalchemy:
|
597
|
+
if precision is not None and scale is not None:
|
598
|
+
db_type_bare = db_type.split('(', maxsplit=1)[0]
|
599
|
+
return f"{db_type_bare}({precision},{scale})"
|
590
600
|
return db_type
|
591
601
|
|
592
602
|
if db_type.startswith('sqlalchemy.dialects'):
|
@@ -603,9 +613,8 @@ def get_db_type_from_pd_type(
|
|
603
613
|
return cls(*cls_args, **cls_kwargs)
|
604
614
|
|
605
615
|
if 'numeric' in db_type.lower():
|
606
|
-
if
|
616
|
+
if precision is None or scale is None:
|
607
617
|
return sqlalchemy_types.Numeric
|
608
|
-
precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
|
609
618
|
return sqlalchemy_types.Numeric(precision, scale)
|
610
619
|
|
611
620
|
cls_args, cls_kwargs = None, None
|
@@ -619,3 +628,37 @@ def get_db_type_from_pd_type(
|
|
619
628
|
if cls_args is None:
|
620
629
|
return cls
|
621
630
|
return cls(*cls_args, **cls_kwargs)
|
631
|
+
|
632
|
+
|
633
|
+
def get_numeric_precision_scale(
|
634
|
+
flavor: str,
|
635
|
+
dtype: Optional[str] = None,
|
636
|
+
) -> Union[Tuple[int, int], Tuple[None, None]]:
|
637
|
+
"""
|
638
|
+
Return the precision and scale to use for a numeric column for a given database flavor.
|
639
|
+
|
640
|
+
Parameters
|
641
|
+
----------
|
642
|
+
flavor: str
|
643
|
+
The database flavor for which to return the precision and scale.
|
644
|
+
|
645
|
+
dtype: Optional[str], default None
|
646
|
+
If provided, return the precision and scale provided in the dtype (if applicable).
|
647
|
+
|
648
|
+
Returns
|
649
|
+
-------
|
650
|
+
A tuple of ints or a tuple of Nones.
|
651
|
+
"""
|
652
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
653
|
+
if dtype and are_dtypes_equal(dtype, 'numeric'):
|
654
|
+
if '[' in dtype and ',' in dtype:
|
655
|
+
try:
|
656
|
+
parts = dtype.split('[', maxsplit=1)[-1].rstrip(']').split(',', maxsplit=1)
|
657
|
+
return int(parts[0].strip()), int(parts[1].strip())
|
658
|
+
except Exception:
|
659
|
+
pass
|
660
|
+
|
661
|
+
if flavor not in NUMERIC_PRECISION_FLAVORS:
|
662
|
+
return None, None
|
663
|
+
|
664
|
+
return NUMERIC_PRECISION_FLAVORS[flavor]
|
@@ -217,8 +217,8 @@ def print_tuple(
|
|
217
217
|
tup: mrsm.SuccessTuple,
|
218
218
|
skip_common: bool = True,
|
219
219
|
common_only: bool = False,
|
220
|
-
upper_padding: int =
|
221
|
-
lower_padding: int =
|
220
|
+
upper_padding: int = 1,
|
221
|
+
lower_padding: int = 1,
|
222
222
|
left_padding: int = 1,
|
223
223
|
calm: bool = False,
|
224
224
|
_progress: Optional['rich.progress.Progress'] = None,
|
@@ -7,21 +7,22 @@ Pretty printing wrapper
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
def pprint(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
*args,
|
11
|
+
detect_password: bool = True,
|
12
|
+
nopretty: bool = False,
|
13
|
+
**kw
|
14
|
+
) -> None:
|
15
15
|
"""Pretty print an object according to the configured ANSI and UNICODE settings.
|
16
16
|
If detect_password is True (default), search and replace passwords with '*' characters.
|
17
17
|
Does not mutate objects.
|
18
18
|
"""
|
19
|
+
import copy
|
20
|
+
import json
|
19
21
|
from meerschaum.utils.packages import attempt_import, import_rich
|
20
|
-
from meerschaum.utils.formatting import ANSI,
|
22
|
+
from meerschaum.utils.formatting import ANSI, get_console, print_tuple
|
21
23
|
from meerschaum.utils.warnings import error
|
22
24
|
from meerschaum.utils.misc import replace_password, dict_from_od, filter_keywords
|
23
25
|
from collections import OrderedDict
|
24
|
-
import copy, json
|
25
26
|
|
26
27
|
if (
|
27
28
|
len(args) == 1
|
@@ -52,7 +53,7 @@ def pprint(
|
|
52
53
|
pprintpp = attempt_import('pprintpp', warn=False)
|
53
54
|
try:
|
54
55
|
_pprint = pprintpp.pprint
|
55
|
-
except Exception
|
56
|
+
except Exception :
|
56
57
|
import pprint as _pprint_module
|
57
58
|
_pprint = _pprint_module.pprint
|
58
59
|
|
@@ -62,7 +63,7 @@ def pprint(
|
|
62
63
|
|
63
64
|
try:
|
64
65
|
args_copy = copy.deepcopy(args)
|
65
|
-
except Exception
|
66
|
+
except Exception:
|
66
67
|
args_copy = args
|
67
68
|
modify = False
|
68
69
|
_args = []
|
@@ -85,12 +86,12 @@ def pprint(
|
|
85
86
|
try:
|
86
87
|
c = json.dumps(c)
|
87
88
|
is_json = True
|
88
|
-
except Exception
|
89
|
+
except Exception:
|
89
90
|
is_json = False
|
90
91
|
if not is_json:
|
91
92
|
try:
|
92
93
|
c = str(c)
|
93
|
-
except Exception
|
94
|
+
except Exception:
|
94
95
|
pass
|
95
96
|
_args.append(c)
|
96
97
|
|
meerschaum/utils/misc.py
CHANGED
@@ -957,24 +957,6 @@ def get_connector_labels(
|
|
957
957
|
return sorted(possibilities)
|
958
958
|
|
959
959
|
|
960
|
-
def json_serialize_datetime(dt: datetime) -> Union[str, None]:
|
961
|
-
"""
|
962
|
-
Serialize a datetime object into JSON (ISO format string).
|
963
|
-
|
964
|
-
Examples
|
965
|
-
--------
|
966
|
-
>>> import json
|
967
|
-
>>> from datetime import datetime
|
968
|
-
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
969
|
-
'{"a": "2022-01-01T00:00:00Z"}'
|
970
|
-
|
971
|
-
"""
|
972
|
-
if not isinstance(dt, datetime):
|
973
|
-
return None
|
974
|
-
tz_suffix = 'Z' if dt.tzinfo is None else ''
|
975
|
-
return dt.isoformat() + tz_suffix
|
976
|
-
|
977
|
-
|
978
960
|
def wget(
|
979
961
|
url: str,
|
980
962
|
dest: Optional[Union[str, 'pathlib.Path']] = None,
|
@@ -1705,6 +1687,22 @@ def _get_subaction_names(*args, **kwargs) -> Any:
|
|
1705
1687
|
return real_function(*args, **kwargs)
|
1706
1688
|
|
1707
1689
|
|
1690
|
+
def json_serialize_datetime(dt: datetime) -> Union[str, None]:
|
1691
|
+
"""
|
1692
|
+
Serialize a datetime object into JSON (ISO format string).
|
1693
|
+
|
1694
|
+
Examples
|
1695
|
+
--------
|
1696
|
+
>>> import json
|
1697
|
+
>>> from datetime import datetime
|
1698
|
+
>>> json.dumps({'a': datetime(2022, 1, 1)}, default=json_serialize_datetime)
|
1699
|
+
'{"a": "2022-01-01T00:00:00Z"}'
|
1700
|
+
|
1701
|
+
"""
|
1702
|
+
from meerschaum.utils.dtypes import serialize_datetime
|
1703
|
+
return serialize_datetime(dt)
|
1704
|
+
|
1705
|
+
|
1708
1706
|
_current_module = sys.modules[__name__]
|
1709
1707
|
__all__ = tuple(
|
1710
1708
|
name
|
meerschaum/utils/prompt.py
CHANGED
@@ -585,7 +585,7 @@ def get_connectors_completer(*types: str):
|
|
585
585
|
|
586
586
|
class ConnectorCompleter(Completer):
|
587
587
|
def get_completions(self, document, complete_event):
|
588
|
-
for label in get_connector_labels(*types):
|
588
|
+
for label in get_connector_labels(*types, search_term=document.text):
|
589
589
|
yield Completion(label, start_position=(-1 * len(document.text)))
|
590
590
|
|
591
591
|
return ConnectorCompleter()
|