meerschaum 2.6.17__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/install.py +1 -2
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +4 -3
- meerschaum/connectors/sql/_pipes.py +63 -25
- meerschaum/connectors/sql/_sql.py +6 -1
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +19 -0
- meerschaum/core/Pipe/_dtypes.py +1 -1
- meerschaum/core/Pipe/_sync.py +61 -21
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +18 -11
- meerschaum/utils/dataframe.py +175 -13
- meerschaum/utils/dtypes/__init__.py +103 -14
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/sql.py +64 -11
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +33 -13
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +1 -1
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +31 -31
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -139,7 +139,6 @@ def filter_unseen_df(
|
|
139
139
|
import functools
|
140
140
|
import traceback
|
141
141
|
from decimal import Decimal
|
142
|
-
from uuid import UUID
|
143
142
|
from meerschaum.utils.warnings import warn
|
144
143
|
from meerschaum.utils.packages import import_pandas, attempt_import
|
145
144
|
from meerschaum.utils.dtypes import (
|
@@ -147,6 +146,7 @@ def filter_unseen_df(
|
|
147
146
|
are_dtypes_equal,
|
148
147
|
attempt_cast_to_numeric,
|
149
148
|
attempt_cast_to_uuid,
|
149
|
+
attempt_cast_to_bytes,
|
150
150
|
coerce_timezone,
|
151
151
|
)
|
152
152
|
pd = import_pandas(debug=debug)
|
@@ -333,6 +333,11 @@ def filter_unseen_df(
|
|
333
333
|
old_uuid_cols = get_uuid_cols(old_df)
|
334
334
|
new_uuid_cols = get_uuid_cols(new_df)
|
335
335
|
uuid_cols = set(new_uuid_cols + old_uuid_cols)
|
336
|
+
|
337
|
+
old_bytes_cols = get_bytes_cols(old_df)
|
338
|
+
new_bytes_cols = get_bytes_cols(new_df)
|
339
|
+
bytes_cols = set(new_bytes_cols + old_bytes_cols)
|
340
|
+
|
336
341
|
joined_df = merge(
|
337
342
|
new_df.infer_objects(copy=False).fillna(NA),
|
338
343
|
old_df.infer_objects(copy=False).fillna(NA),
|
@@ -368,6 +373,14 @@ def filter_unseen_df(
|
|
368
373
|
except Exception:
|
369
374
|
warn(f"Unable to parse numeric column '{uuid_col}':\n{traceback.format_exc()}")
|
370
375
|
|
376
|
+
for bytes_col in bytes_cols:
|
377
|
+
if bytes_col not in delta_df.columns:
|
378
|
+
continue
|
379
|
+
try:
|
380
|
+
delta_df[bytes_col] = delta_df[bytes_col].apply(attempt_cast_to_bytes)
|
381
|
+
except Exception:
|
382
|
+
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
383
|
+
|
371
384
|
return delta_df
|
372
385
|
|
373
386
|
|
@@ -429,6 +442,7 @@ def parse_df_datetimes(
|
|
429
442
|
from meerschaum.utils.debug import dprint
|
430
443
|
from meerschaum.utils.warnings import warn
|
431
444
|
from meerschaum.utils.misc import items_str
|
445
|
+
from meerschaum.utils.dtypes import to_datetime
|
432
446
|
import traceback
|
433
447
|
pd = import_pandas()
|
434
448
|
pandas = attempt_import('pandas')
|
@@ -494,7 +508,7 @@ def parse_df_datetimes(
|
|
494
508
|
|
495
509
|
if len(cols_to_inspect) == 0:
|
496
510
|
if debug:
|
497
|
-
dprint(
|
511
|
+
dprint("All columns are ignored, skipping datetime detection...")
|
498
512
|
return df.fillna(pandas.NA)
|
499
513
|
|
500
514
|
### apply regex to columns to determine which are ISO datetimes
|
@@ -515,14 +529,10 @@ def parse_df_datetimes(
|
|
515
529
|
|
516
530
|
try:
|
517
531
|
if not using_dask:
|
518
|
-
df[datetime_cols] = df[datetime_cols].apply(
|
519
|
-
pd.to_datetime,
|
520
|
-
utc=True,
|
521
|
-
format='ISO8601',
|
522
|
-
)
|
532
|
+
df[datetime_cols] = df[datetime_cols].apply(to_datetime)
|
523
533
|
else:
|
524
534
|
df[datetime_cols] = df[datetime_cols].apply(
|
525
|
-
|
535
|
+
to_datetime,
|
526
536
|
utc=True,
|
527
537
|
axis=1,
|
528
538
|
meta={
|
@@ -665,7 +675,7 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
|
|
665
675
|
|
666
676
|
Returns
|
667
677
|
-------
|
668
|
-
A list of columns to treat as
|
678
|
+
A list of columns to treat as UUIDs.
|
669
679
|
"""
|
670
680
|
if df is None:
|
671
681
|
return []
|
@@ -692,6 +702,135 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
|
|
692
702
|
]
|
693
703
|
|
694
704
|
|
705
|
+
def get_datetime_cols(
|
706
|
+
df: 'pd.DataFrame',
|
707
|
+
timezone_aware: bool = True,
|
708
|
+
timezone_naive: bool = True,
|
709
|
+
) -> List[str]:
|
710
|
+
"""
|
711
|
+
Get the columns which contain `datetime` or `Timestamp` objects from a Pandas DataFrame.
|
712
|
+
|
713
|
+
Parameters
|
714
|
+
----------
|
715
|
+
df: pd.DataFrame
|
716
|
+
The DataFrame which may contain `datetime` or `Timestamp` objects.
|
717
|
+
|
718
|
+
timezone_aware: bool, default True
|
719
|
+
If `True`, include timezone-aware datetime columns.
|
720
|
+
|
721
|
+
timezone_naive: bool, default True
|
722
|
+
If `True`, include timezone-naive datetime columns.
|
723
|
+
|
724
|
+
Returns
|
725
|
+
-------
|
726
|
+
A list of columns to treat as datetimes.
|
727
|
+
"""
|
728
|
+
if not timezone_aware and not timezone_naive:
|
729
|
+
raise ValueError("`timezone_aware` and `timezone_naive` cannot both be `False`.")
|
730
|
+
|
731
|
+
if df is None:
|
732
|
+
return []
|
733
|
+
|
734
|
+
from datetime import datetime
|
735
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
736
|
+
is_dask = 'dask' in df.__module__
|
737
|
+
if is_dask:
|
738
|
+
df = get_first_valid_dask_partition(df)
|
739
|
+
|
740
|
+
known_dt_cols = [
|
741
|
+
col
|
742
|
+
for col, typ in df.dtypes.items()
|
743
|
+
if are_dtypes_equal('datetime', str(typ))
|
744
|
+
]
|
745
|
+
|
746
|
+
if len(df) == 0:
|
747
|
+
return known_dt_cols
|
748
|
+
|
749
|
+
cols_indices = {
|
750
|
+
col: df[col].first_valid_index()
|
751
|
+
for col in df.columns
|
752
|
+
if col not in known_dt_cols
|
753
|
+
}
|
754
|
+
pydt_cols = [
|
755
|
+
col
|
756
|
+
for col, ix in cols_indices.items()
|
757
|
+
if (
|
758
|
+
ix is not None
|
759
|
+
and
|
760
|
+
isinstance(df.loc[ix][col], datetime)
|
761
|
+
)
|
762
|
+
]
|
763
|
+
dt_cols_set = set(known_dt_cols + pydt_cols)
|
764
|
+
all_dt_cols = [
|
765
|
+
col
|
766
|
+
for col in df.columns
|
767
|
+
if col in dt_cols_set
|
768
|
+
]
|
769
|
+
if timezone_aware and timezone_naive:
|
770
|
+
return all_dt_cols
|
771
|
+
|
772
|
+
known_timezone_aware_dt_cols = [
|
773
|
+
col
|
774
|
+
for col in known_dt_cols
|
775
|
+
if getattr(df[col], 'tz', None) is not None
|
776
|
+
]
|
777
|
+
timezone_aware_pydt_cols = [
|
778
|
+
col
|
779
|
+
for col in pydt_cols
|
780
|
+
if df.loc[cols_indices[col]][col].tzinfo is not None
|
781
|
+
]
|
782
|
+
timezone_aware_dt_cols_set = set(known_timezone_aware_dt_cols + timezone_aware_pydt_cols)
|
783
|
+
if timezone_aware:
|
784
|
+
return [
|
785
|
+
col
|
786
|
+
for col in all_dt_cols
|
787
|
+
if col in timezone_aware_pydt_cols
|
788
|
+
]
|
789
|
+
|
790
|
+
return [
|
791
|
+
col
|
792
|
+
for col in all_dt_cols
|
793
|
+
if col not in timezone_aware_dt_cols_set
|
794
|
+
]
|
795
|
+
|
796
|
+
|
797
|
+
def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
|
798
|
+
"""
|
799
|
+
Get the columns which contain bytes strings from a Pandas DataFrame.
|
800
|
+
|
801
|
+
Parameters
|
802
|
+
----------
|
803
|
+
df: pd.DataFrame
|
804
|
+
The DataFrame which may contain bytes strings.
|
805
|
+
|
806
|
+
Returns
|
807
|
+
-------
|
808
|
+
A list of columns to treat as bytes.
|
809
|
+
"""
|
810
|
+
if df is None:
|
811
|
+
return []
|
812
|
+
is_dask = 'dask' in df.__module__
|
813
|
+
if is_dask:
|
814
|
+
df = get_first_valid_dask_partition(df)
|
815
|
+
|
816
|
+
if len(df) == 0:
|
817
|
+
return []
|
818
|
+
|
819
|
+
cols_indices = {
|
820
|
+
col: df[col].first_valid_index()
|
821
|
+
for col in df.columns
|
822
|
+
}
|
823
|
+
return [
|
824
|
+
col
|
825
|
+
for col, ix in cols_indices.items()
|
826
|
+
if (
|
827
|
+
ix is not None
|
828
|
+
and
|
829
|
+
isinstance(df.loc[ix][col], bytes)
|
830
|
+
)
|
831
|
+
]
|
832
|
+
|
833
|
+
|
695
834
|
def enforce_dtypes(
|
696
835
|
df: 'pd.DataFrame',
|
697
836
|
dtypes: Dict[str, str],
|
@@ -743,6 +882,7 @@ def enforce_dtypes(
|
|
743
882
|
is_dtype_numeric,
|
744
883
|
attempt_cast_to_numeric,
|
745
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
746
886
|
coerce_timezone as _coerce_timezone,
|
747
887
|
)
|
748
888
|
pandas = mrsm.attempt_import('pandas')
|
@@ -773,6 +913,11 @@ def enforce_dtypes(
|
|
773
913
|
for col, typ in dtypes.items()
|
774
914
|
if typ == 'uuid'
|
775
915
|
]
|
916
|
+
bytes_cols = [
|
917
|
+
col
|
918
|
+
for col, typ in dtypes.items()
|
919
|
+
if typ == 'bytes'
|
920
|
+
]
|
776
921
|
datetime_cols = [
|
777
922
|
col
|
778
923
|
for col, typ in dtypes.items()
|
@@ -826,6 +971,17 @@ def enforce_dtypes(
|
|
826
971
|
if debug:
|
827
972
|
dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
|
828
973
|
|
974
|
+
if bytes_cols:
|
975
|
+
if debug:
|
976
|
+
dprint(f"Checking for bytes: {bytes_cols}")
|
977
|
+
for col in bytes_cols:
|
978
|
+
if col in df.columns:
|
979
|
+
try:
|
980
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
981
|
+
except Exception as e:
|
982
|
+
if debug:
|
983
|
+
dprint(f"Unable to parse column '{col}' as bytes:\n{e}")
|
984
|
+
|
829
985
|
if datetime_cols and coerce_timezone:
|
830
986
|
if debug:
|
831
987
|
dprint(f"Checking for datetime conversion: {datetime_cols}")
|
@@ -931,6 +1087,8 @@ def get_datetime_bound_from_df(
|
|
931
1087
|
-------
|
932
1088
|
The minimum or maximum datetime value in the dataframe, or `None`.
|
933
1089
|
"""
|
1090
|
+
from meerschaum.utils.dtypes import to_datetime, value_is_null
|
1091
|
+
|
934
1092
|
if df is None:
|
935
1093
|
return None
|
936
1094
|
if not datetime_column:
|
@@ -982,9 +1140,9 @@ def get_datetime_bound_from_df(
|
|
982
1140
|
dt_val = dt_val.compute()
|
983
1141
|
|
984
1142
|
return (
|
985
|
-
|
1143
|
+
to_datetime(dt_val, as_pydatetime=True)
|
986
1144
|
if are_dtypes_equal(str(type(dt_val)), 'datetime')
|
987
|
-
else (dt_val if
|
1145
|
+
else (dt_val if not value_is_null(dt_val) else None)
|
988
1146
|
)
|
989
1147
|
|
990
1148
|
return None
|
@@ -1127,7 +1285,7 @@ def get_first_valid_dask_partition(ddf: 'dask.dataframe.DataFrame') -> Union['pd
|
|
1127
1285
|
for partition in ddf.partitions:
|
1128
1286
|
try:
|
1129
1287
|
pdf = partition.compute()
|
1130
|
-
except Exception
|
1288
|
+
except Exception:
|
1131
1289
|
continue
|
1132
1290
|
if len(pdf) > 0:
|
1133
1291
|
return pdf
|
@@ -1408,12 +1566,16 @@ def to_json(
|
|
1408
1566
|
A JSON string.
|
1409
1567
|
"""
|
1410
1568
|
from meerschaum.utils.packages import import_pandas
|
1569
|
+
from meerschaum.utils.dtypes import serialize_bytes
|
1411
1570
|
pd = import_pandas()
|
1412
1571
|
uuid_cols = get_uuid_cols(df)
|
1413
|
-
|
1572
|
+
bytes_cols = get_bytes_cols(df)
|
1573
|
+
if safe_copy and bool(uuid_cols or bytes_cols):
|
1414
1574
|
df = df.copy()
|
1415
1575
|
for col in uuid_cols:
|
1416
1576
|
df[col] = df[col].astype(str)
|
1577
|
+
for col in bytes_cols:
|
1578
|
+
df[col] = df[col].apply(serialize_bytes)
|
1417
1579
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1418
1580
|
date_format=date_format,
|
1419
1581
|
date_unit=date_unit,
|
@@ -15,7 +15,19 @@ import meerschaum as mrsm
|
|
15
15
|
from meerschaum.utils.typing import Dict, Union, Any
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
|
-
|
18
|
+
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
19
|
+
'decimal': 'numeric',
|
20
|
+
'number': 'numeric',
|
21
|
+
'jsonl': 'json',
|
22
|
+
'JSON': 'json',
|
23
|
+
'binary': 'bytes',
|
24
|
+
'blob': 'bytes',
|
25
|
+
'varbinary': 'bytes',
|
26
|
+
'bytea': 'bytes',
|
27
|
+
'guid': 'uuid',
|
28
|
+
'UUID': 'uuid',
|
29
|
+
}
|
30
|
+
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
19
31
|
'json': 'object',
|
20
32
|
'numeric': 'object',
|
21
33
|
'uuid': 'object',
|
@@ -27,6 +39,8 @@ MRSM_PD_DTYPES: Dict[str, str] = {
|
|
27
39
|
'int32': 'Int32',
|
28
40
|
'int64': 'Int64',
|
29
41
|
'str': 'string[python]',
|
42
|
+
'bytes': 'object',
|
43
|
+
None: 'object',
|
30
44
|
}
|
31
45
|
|
32
46
|
|
@@ -38,6 +52,10 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
38
52
|
if known_dtype is not None:
|
39
53
|
return known_dtype
|
40
54
|
|
55
|
+
alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None)
|
56
|
+
if alias_dtype is not None:
|
57
|
+
return MRSM_PD_DTYPES[alias_dtype]
|
58
|
+
|
41
59
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
42
60
|
### treat it as a SQL db type.
|
43
61
|
if dtype.split(' ')[0].isupper():
|
@@ -95,7 +113,7 @@ def are_dtypes_equal(
|
|
95
113
|
try:
|
96
114
|
if ldtype == rdtype:
|
97
115
|
return True
|
98
|
-
except Exception
|
116
|
+
except Exception:
|
99
117
|
warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}")
|
100
118
|
return False
|
101
119
|
|
@@ -115,6 +133,10 @@ def are_dtypes_equal(
|
|
115
133
|
if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
|
116
134
|
return True
|
117
135
|
|
136
|
+
bytes_dtypes = ('bytes', 'object')
|
137
|
+
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
138
|
+
return True
|
139
|
+
|
118
140
|
ldtype_clean = ldtype.split('[', maxsplit=1)[0]
|
119
141
|
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
120
142
|
|
@@ -185,7 +207,7 @@ def attempt_cast_to_numeric(value: Any) -> Any:
|
|
185
207
|
if not value_is_null(value)
|
186
208
|
else Decimal('NaN')
|
187
209
|
)
|
188
|
-
except Exception
|
210
|
+
except Exception:
|
189
211
|
return value
|
190
212
|
|
191
213
|
|
@@ -201,7 +223,23 @@ def attempt_cast_to_uuid(value: Any) -> Any:
|
|
201
223
|
if not value_is_null(value)
|
202
224
|
else None
|
203
225
|
)
|
204
|
-
except Exception
|
226
|
+
except Exception:
|
227
|
+
return value
|
228
|
+
|
229
|
+
|
230
|
+
def attempt_cast_to_bytes(value: Any) -> Any:
|
231
|
+
"""
|
232
|
+
Given a value, attempt to coerce it into a bytestring.
|
233
|
+
"""
|
234
|
+
if isinstance(value, uuid.UUID):
|
235
|
+
return value
|
236
|
+
try:
|
237
|
+
return (
|
238
|
+
deserialize_base64(str(value))
|
239
|
+
if not value_is_null(value)
|
240
|
+
else None
|
241
|
+
)
|
242
|
+
except Exception:
|
205
243
|
return value
|
206
244
|
|
207
245
|
|
@@ -251,7 +289,7 @@ def coerce_timezone(
|
|
251
289
|
) -> Any:
|
252
290
|
"""
|
253
291
|
Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
|
254
|
-
return a
|
292
|
+
return a UTC timestamp (strip timezone if `strip_utc` is `True`.
|
255
293
|
"""
|
256
294
|
if dt is None:
|
257
295
|
return None
|
@@ -266,9 +304,7 @@ def coerce_timezone(
|
|
266
304
|
dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
|
267
305
|
|
268
306
|
if dt_is_series:
|
269
|
-
is_dask = 'dask' in dt.__module__
|
270
307
|
pandas = mrsm.attempt_import('pandas', lazy=False)
|
271
|
-
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
272
308
|
|
273
309
|
if (
|
274
310
|
pandas.api.types.is_datetime64_any_dtype(dt) and (
|
@@ -279,14 +315,13 @@ def coerce_timezone(
|
|
279
315
|
):
|
280
316
|
return dt
|
281
317
|
|
282
|
-
dt_series = (
|
283
|
-
pandas.to_datetime(dt, utc=True, format='ISO8601')
|
284
|
-
if dd is None
|
285
|
-
else dd.to_datetime(dt, utc=True, format='ISO8601')
|
286
|
-
)
|
318
|
+
dt_series = to_datetime(dt, coerce_utc=False)
|
287
319
|
if strip_utc:
|
288
|
-
|
289
|
-
|
320
|
+
try:
|
321
|
+
if dt_series.dt.tz is not None:
|
322
|
+
dt_series = dt_series.dt.tz_localize(None)
|
323
|
+
except Exception:
|
324
|
+
pass
|
290
325
|
|
291
326
|
return dt_series
|
292
327
|
|
@@ -299,3 +334,57 @@ def coerce_timezone(
|
|
299
334
|
if strip_utc:
|
300
335
|
return utc_dt.replace(tzinfo=None)
|
301
336
|
return utc_dt
|
337
|
+
|
338
|
+
|
339
|
+
def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any:
|
340
|
+
"""
|
341
|
+
Wrap `pd.to_datetime()` and add support for out-of-bounds values.
|
342
|
+
"""
|
343
|
+
pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
|
344
|
+
is_dask = 'dask' in getattr(dt_val, '__module__', '')
|
345
|
+
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
346
|
+
dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
|
347
|
+
pd = pandas if dd is None else dd
|
348
|
+
|
349
|
+
try:
|
350
|
+
new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
|
351
|
+
if as_pydatetime:
|
352
|
+
return new_dt_val.to_pydatetime()
|
353
|
+
return new_dt_val
|
354
|
+
except (pd.errors.OutOfBoundsDatetime, ValueError):
|
355
|
+
pass
|
356
|
+
|
357
|
+
def parse(x: Any) -> Any:
|
358
|
+
try:
|
359
|
+
return dateutil_parser.parse(x)
|
360
|
+
except Exception:
|
361
|
+
return x
|
362
|
+
|
363
|
+
if dt_is_series:
|
364
|
+
new_series = dt_val.apply(parse)
|
365
|
+
if coerce_utc:
|
366
|
+
return coerce_timezone(new_series)
|
367
|
+
return new_series
|
368
|
+
|
369
|
+
new_dt_val = parse(dt_val)
|
370
|
+
if not coerce_utc:
|
371
|
+
return new_dt_val
|
372
|
+
return coerce_timezone(new_dt_val)
|
373
|
+
|
374
|
+
|
375
|
+
def serialize_bytes(data: bytes) -> str:
|
376
|
+
"""
|
377
|
+
Return the given bytes as a base64-encoded string.
|
378
|
+
"""
|
379
|
+
import base64
|
380
|
+
if not isinstance(data, bytes) and value_is_null(data):
|
381
|
+
return data
|
382
|
+
return base64.b64encode(data).decode('utf-8')
|
383
|
+
|
384
|
+
|
385
|
+
def deserialize_base64(data: str) -> bytes:
|
386
|
+
"""
|
387
|
+
Return the original bytestring from the given base64-encoded string.
|
388
|
+
"""
|
389
|
+
import base64
|
390
|
+
return base64.b64decode(data)
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -276,6 +276,19 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
276
276
|
'cockroachdb': 'UUID',
|
277
277
|
'default': 'TEXT',
|
278
278
|
},
|
279
|
+
'bytes': {
|
280
|
+
'timescaledb': 'BYTEA',
|
281
|
+
'postgresql': 'BYTEA',
|
282
|
+
'mariadb': 'BLOB',
|
283
|
+
'mysql': 'BLOB',
|
284
|
+
'mssql': 'VARBINARY(MAX)',
|
285
|
+
'oracle': 'BLOB',
|
286
|
+
'sqlite': 'BLOB',
|
287
|
+
'duckdb': 'BLOB',
|
288
|
+
'citus': 'BYTEA',
|
289
|
+
'cockroachdb': 'BYTEA',
|
290
|
+
'default': 'BLOB',
|
291
|
+
},
|
279
292
|
}
|
280
293
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
281
294
|
'int': {
|
@@ -421,6 +434,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
421
434
|
'cockroachdb': 'Uuid',
|
422
435
|
'default': 'Uuid',
|
423
436
|
},
|
437
|
+
'bytes': {
|
438
|
+
'timescaledb': 'LargeBinary',
|
439
|
+
'postgresql': 'LargeBinary',
|
440
|
+
'mariadb': 'LargeBinary',
|
441
|
+
'mysql': 'LargeBinary',
|
442
|
+
'mssql': 'LargeBinary',
|
443
|
+
'oracle': 'LargeBinary',
|
444
|
+
'sqlite': 'LargeBinary',
|
445
|
+
'duckdb': 'LargeBinary',
|
446
|
+
'citus': 'LargeBinary',
|
447
|
+
'cockroachdb': 'LargeBinary',
|
448
|
+
'default': 'LargeBinary',
|
449
|
+
},
|
424
450
|
}
|
425
451
|
|
426
452
|
AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
|
meerschaum/utils/misc.py
CHANGED
@@ -177,14 +177,14 @@ def string_to_dict(
|
|
177
177
|
keys = _keys[:-1]
|
178
178
|
try:
|
179
179
|
val = ast.literal_eval(_keys[-1])
|
180
|
-
except Exception
|
180
|
+
except Exception:
|
181
181
|
val = str(_keys[-1])
|
182
182
|
|
183
183
|
c = params_dict
|
184
184
|
for _k in keys[:-1]:
|
185
185
|
try:
|
186
186
|
k = ast.literal_eval(_k)
|
187
|
-
except Exception
|
187
|
+
except Exception:
|
188
188
|
k = str(_k)
|
189
189
|
if k not in c:
|
190
190
|
c[k] = {}
|
@@ -196,12 +196,12 @@ def string_to_dict(
|
|
196
196
|
|
197
197
|
|
198
198
|
def parse_config_substitution(
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
199
|
+
value: str,
|
200
|
+
leading_key: str = 'MRSM',
|
201
|
+
begin_key: str = '{',
|
202
|
+
end_key: str = '}',
|
203
|
+
delimeter: str = ':'
|
204
|
+
) -> List[Any]:
|
205
205
|
"""
|
206
206
|
Parse Meerschaum substitution syntax
|
207
207
|
E.g. MRSM{value1:value2} => ['value1', 'value2']
|