meerschaum 2.6.17__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/install.py +1 -2
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +4 -3
- meerschaum/connectors/sql/_pipes.py +63 -25
- meerschaum/connectors/sql/_sql.py +6 -1
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +19 -0
- meerschaum/core/Pipe/_dtypes.py +1 -1
- meerschaum/core/Pipe/_sync.py +61 -21
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +18 -11
- meerschaum/utils/dataframe.py +175 -13
- meerschaum/utils/dtypes/__init__.py +103 -14
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/sql.py +64 -11
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +33 -13
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +1 -1
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +31 -31
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -139,7 +139,6 @@ def filter_unseen_df(
|
|
139
139
|
import functools
|
140
140
|
import traceback
|
141
141
|
from decimal import Decimal
|
142
|
-
from uuid import UUID
|
143
142
|
from meerschaum.utils.warnings import warn
|
144
143
|
from meerschaum.utils.packages import import_pandas, attempt_import
|
145
144
|
from meerschaum.utils.dtypes import (
|
@@ -147,6 +146,7 @@ def filter_unseen_df(
|
|
147
146
|
are_dtypes_equal,
|
148
147
|
attempt_cast_to_numeric,
|
149
148
|
attempt_cast_to_uuid,
|
149
|
+
attempt_cast_to_bytes,
|
150
150
|
coerce_timezone,
|
151
151
|
)
|
152
152
|
pd = import_pandas(debug=debug)
|
@@ -333,6 +333,11 @@ def filter_unseen_df(
|
|
333
333
|
old_uuid_cols = get_uuid_cols(old_df)
|
334
334
|
new_uuid_cols = get_uuid_cols(new_df)
|
335
335
|
uuid_cols = set(new_uuid_cols + old_uuid_cols)
|
336
|
+
|
337
|
+
old_bytes_cols = get_bytes_cols(old_df)
|
338
|
+
new_bytes_cols = get_bytes_cols(new_df)
|
339
|
+
bytes_cols = set(new_bytes_cols + old_bytes_cols)
|
340
|
+
|
336
341
|
joined_df = merge(
|
337
342
|
new_df.infer_objects(copy=False).fillna(NA),
|
338
343
|
old_df.infer_objects(copy=False).fillna(NA),
|
@@ -368,6 +373,14 @@ def filter_unseen_df(
|
|
368
373
|
except Exception:
|
369
374
|
warn(f"Unable to parse numeric column '{uuid_col}':\n{traceback.format_exc()}")
|
370
375
|
|
376
|
+
for bytes_col in bytes_cols:
|
377
|
+
if bytes_col not in delta_df.columns:
|
378
|
+
continue
|
379
|
+
try:
|
380
|
+
delta_df[bytes_col] = delta_df[bytes_col].apply(attempt_cast_to_bytes)
|
381
|
+
except Exception:
|
382
|
+
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
383
|
+
|
371
384
|
return delta_df
|
372
385
|
|
373
386
|
|
@@ -429,6 +442,7 @@ def parse_df_datetimes(
|
|
429
442
|
from meerschaum.utils.debug import dprint
|
430
443
|
from meerschaum.utils.warnings import warn
|
431
444
|
from meerschaum.utils.misc import items_str
|
445
|
+
from meerschaum.utils.dtypes import to_datetime
|
432
446
|
import traceback
|
433
447
|
pd = import_pandas()
|
434
448
|
pandas = attempt_import('pandas')
|
@@ -494,7 +508,7 @@ def parse_df_datetimes(
|
|
494
508
|
|
495
509
|
if len(cols_to_inspect) == 0:
|
496
510
|
if debug:
|
497
|
-
dprint(
|
511
|
+
dprint("All columns are ignored, skipping datetime detection...")
|
498
512
|
return df.fillna(pandas.NA)
|
499
513
|
|
500
514
|
### apply regex to columns to determine which are ISO datetimes
|
@@ -515,14 +529,10 @@ def parse_df_datetimes(
|
|
515
529
|
|
516
530
|
try:
|
517
531
|
if not using_dask:
|
518
|
-
df[datetime_cols] = df[datetime_cols].apply(
|
519
|
-
pd.to_datetime,
|
520
|
-
utc=True,
|
521
|
-
format='ISO8601',
|
522
|
-
)
|
532
|
+
df[datetime_cols] = df[datetime_cols].apply(to_datetime)
|
523
533
|
else:
|
524
534
|
df[datetime_cols] = df[datetime_cols].apply(
|
525
|
-
|
535
|
+
to_datetime,
|
526
536
|
utc=True,
|
527
537
|
axis=1,
|
528
538
|
meta={
|
@@ -665,7 +675,7 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
|
|
665
675
|
|
666
676
|
Returns
|
667
677
|
-------
|
668
|
-
A list of columns to treat as
|
678
|
+
A list of columns to treat as UUIDs.
|
669
679
|
"""
|
670
680
|
if df is None:
|
671
681
|
return []
|
@@ -692,6 +702,135 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
|
|
692
702
|
]
|
693
703
|
|
694
704
|
|
705
|
+
def get_datetime_cols(
|
706
|
+
df: 'pd.DataFrame',
|
707
|
+
timezone_aware: bool = True,
|
708
|
+
timezone_naive: bool = True,
|
709
|
+
) -> List[str]:
|
710
|
+
"""
|
711
|
+
Get the columns which contain `datetime` or `Timestamp` objects from a Pandas DataFrame.
|
712
|
+
|
713
|
+
Parameters
|
714
|
+
----------
|
715
|
+
df: pd.DataFrame
|
716
|
+
The DataFrame which may contain `datetime` or `Timestamp` objects.
|
717
|
+
|
718
|
+
timezone_aware: bool, default True
|
719
|
+
If `True`, include timezone-aware datetime columns.
|
720
|
+
|
721
|
+
timezone_naive: bool, default True
|
722
|
+
If `True`, include timezone-naive datetime columns.
|
723
|
+
|
724
|
+
Returns
|
725
|
+
-------
|
726
|
+
A list of columns to treat as datetimes.
|
727
|
+
"""
|
728
|
+
if not timezone_aware and not timezone_naive:
|
729
|
+
raise ValueError("`timezone_aware` and `timezone_naive` cannot both be `False`.")
|
730
|
+
|
731
|
+
if df is None:
|
732
|
+
return []
|
733
|
+
|
734
|
+
from datetime import datetime
|
735
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
736
|
+
is_dask = 'dask' in df.__module__
|
737
|
+
if is_dask:
|
738
|
+
df = get_first_valid_dask_partition(df)
|
739
|
+
|
740
|
+
known_dt_cols = [
|
741
|
+
col
|
742
|
+
for col, typ in df.dtypes.items()
|
743
|
+
if are_dtypes_equal('datetime', str(typ))
|
744
|
+
]
|
745
|
+
|
746
|
+
if len(df) == 0:
|
747
|
+
return known_dt_cols
|
748
|
+
|
749
|
+
cols_indices = {
|
750
|
+
col: df[col].first_valid_index()
|
751
|
+
for col in df.columns
|
752
|
+
if col not in known_dt_cols
|
753
|
+
}
|
754
|
+
pydt_cols = [
|
755
|
+
col
|
756
|
+
for col, ix in cols_indices.items()
|
757
|
+
if (
|
758
|
+
ix is not None
|
759
|
+
and
|
760
|
+
isinstance(df.loc[ix][col], datetime)
|
761
|
+
)
|
762
|
+
]
|
763
|
+
dt_cols_set = set(known_dt_cols + pydt_cols)
|
764
|
+
all_dt_cols = [
|
765
|
+
col
|
766
|
+
for col in df.columns
|
767
|
+
if col in dt_cols_set
|
768
|
+
]
|
769
|
+
if timezone_aware and timezone_naive:
|
770
|
+
return all_dt_cols
|
771
|
+
|
772
|
+
known_timezone_aware_dt_cols = [
|
773
|
+
col
|
774
|
+
for col in known_dt_cols
|
775
|
+
if getattr(df[col], 'tz', None) is not None
|
776
|
+
]
|
777
|
+
timezone_aware_pydt_cols = [
|
778
|
+
col
|
779
|
+
for col in pydt_cols
|
780
|
+
if df.loc[cols_indices[col]][col].tzinfo is not None
|
781
|
+
]
|
782
|
+
timezone_aware_dt_cols_set = set(known_timezone_aware_dt_cols + timezone_aware_pydt_cols)
|
783
|
+
if timezone_aware:
|
784
|
+
return [
|
785
|
+
col
|
786
|
+
for col in all_dt_cols
|
787
|
+
if col in timezone_aware_pydt_cols
|
788
|
+
]
|
789
|
+
|
790
|
+
return [
|
791
|
+
col
|
792
|
+
for col in all_dt_cols
|
793
|
+
if col not in timezone_aware_dt_cols_set
|
794
|
+
]
|
795
|
+
|
796
|
+
|
797
|
+
def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
|
798
|
+
"""
|
799
|
+
Get the columns which contain bytes strings from a Pandas DataFrame.
|
800
|
+
|
801
|
+
Parameters
|
802
|
+
----------
|
803
|
+
df: pd.DataFrame
|
804
|
+
The DataFrame which may contain bytes strings.
|
805
|
+
|
806
|
+
Returns
|
807
|
+
-------
|
808
|
+
A list of columns to treat as bytes.
|
809
|
+
"""
|
810
|
+
if df is None:
|
811
|
+
return []
|
812
|
+
is_dask = 'dask' in df.__module__
|
813
|
+
if is_dask:
|
814
|
+
df = get_first_valid_dask_partition(df)
|
815
|
+
|
816
|
+
if len(df) == 0:
|
817
|
+
return []
|
818
|
+
|
819
|
+
cols_indices = {
|
820
|
+
col: df[col].first_valid_index()
|
821
|
+
for col in df.columns
|
822
|
+
}
|
823
|
+
return [
|
824
|
+
col
|
825
|
+
for col, ix in cols_indices.items()
|
826
|
+
if (
|
827
|
+
ix is not None
|
828
|
+
and
|
829
|
+
isinstance(df.loc[ix][col], bytes)
|
830
|
+
)
|
831
|
+
]
|
832
|
+
|
833
|
+
|
695
834
|
def enforce_dtypes(
|
696
835
|
df: 'pd.DataFrame',
|
697
836
|
dtypes: Dict[str, str],
|
@@ -743,6 +882,7 @@ def enforce_dtypes(
|
|
743
882
|
is_dtype_numeric,
|
744
883
|
attempt_cast_to_numeric,
|
745
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
746
886
|
coerce_timezone as _coerce_timezone,
|
747
887
|
)
|
748
888
|
pandas = mrsm.attempt_import('pandas')
|
@@ -773,6 +913,11 @@ def enforce_dtypes(
|
|
773
913
|
for col, typ in dtypes.items()
|
774
914
|
if typ == 'uuid'
|
775
915
|
]
|
916
|
+
bytes_cols = [
|
917
|
+
col
|
918
|
+
for col, typ in dtypes.items()
|
919
|
+
if typ == 'bytes'
|
920
|
+
]
|
776
921
|
datetime_cols = [
|
777
922
|
col
|
778
923
|
for col, typ in dtypes.items()
|
@@ -826,6 +971,17 @@ def enforce_dtypes(
|
|
826
971
|
if debug:
|
827
972
|
dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
|
828
973
|
|
974
|
+
if bytes_cols:
|
975
|
+
if debug:
|
976
|
+
dprint(f"Checking for bytes: {bytes_cols}")
|
977
|
+
for col in bytes_cols:
|
978
|
+
if col in df.columns:
|
979
|
+
try:
|
980
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
981
|
+
except Exception as e:
|
982
|
+
if debug:
|
983
|
+
dprint(f"Unable to parse column '{col}' as bytes:\n{e}")
|
984
|
+
|
829
985
|
if datetime_cols and coerce_timezone:
|
830
986
|
if debug:
|
831
987
|
dprint(f"Checking for datetime conversion: {datetime_cols}")
|
@@ -931,6 +1087,8 @@ def get_datetime_bound_from_df(
|
|
931
1087
|
-------
|
932
1088
|
The minimum or maximum datetime value in the dataframe, or `None`.
|
933
1089
|
"""
|
1090
|
+
from meerschaum.utils.dtypes import to_datetime, value_is_null
|
1091
|
+
|
934
1092
|
if df is None:
|
935
1093
|
return None
|
936
1094
|
if not datetime_column:
|
@@ -982,9 +1140,9 @@ def get_datetime_bound_from_df(
|
|
982
1140
|
dt_val = dt_val.compute()
|
983
1141
|
|
984
1142
|
return (
|
985
|
-
|
1143
|
+
to_datetime(dt_val, as_pydatetime=True)
|
986
1144
|
if are_dtypes_equal(str(type(dt_val)), 'datetime')
|
987
|
-
else (dt_val if
|
1145
|
+
else (dt_val if not value_is_null(dt_val) else None)
|
988
1146
|
)
|
989
1147
|
|
990
1148
|
return None
|
@@ -1127,7 +1285,7 @@ def get_first_valid_dask_partition(ddf: 'dask.dataframe.DataFrame') -> Union['pd
|
|
1127
1285
|
for partition in ddf.partitions:
|
1128
1286
|
try:
|
1129
1287
|
pdf = partition.compute()
|
1130
|
-
except Exception
|
1288
|
+
except Exception:
|
1131
1289
|
continue
|
1132
1290
|
if len(pdf) > 0:
|
1133
1291
|
return pdf
|
@@ -1408,12 +1566,16 @@ def to_json(
|
|
1408
1566
|
A JSON string.
|
1409
1567
|
"""
|
1410
1568
|
from meerschaum.utils.packages import import_pandas
|
1569
|
+
from meerschaum.utils.dtypes import serialize_bytes
|
1411
1570
|
pd = import_pandas()
|
1412
1571
|
uuid_cols = get_uuid_cols(df)
|
1413
|
-
|
1572
|
+
bytes_cols = get_bytes_cols(df)
|
1573
|
+
if safe_copy and bool(uuid_cols or bytes_cols):
|
1414
1574
|
df = df.copy()
|
1415
1575
|
for col in uuid_cols:
|
1416
1576
|
df[col] = df[col].astype(str)
|
1577
|
+
for col in bytes_cols:
|
1578
|
+
df[col] = df[col].apply(serialize_bytes)
|
1417
1579
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1418
1580
|
date_format=date_format,
|
1419
1581
|
date_unit=date_unit,
|
@@ -15,7 +15,19 @@ import meerschaum as mrsm
|
|
15
15
|
from meerschaum.utils.typing import Dict, Union, Any
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
|
-
|
18
|
+
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
19
|
+
'decimal': 'numeric',
|
20
|
+
'number': 'numeric',
|
21
|
+
'jsonl': 'json',
|
22
|
+
'JSON': 'json',
|
23
|
+
'binary': 'bytes',
|
24
|
+
'blob': 'bytes',
|
25
|
+
'varbinary': 'bytes',
|
26
|
+
'bytea': 'bytes',
|
27
|
+
'guid': 'uuid',
|
28
|
+
'UUID': 'uuid',
|
29
|
+
}
|
30
|
+
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
19
31
|
'json': 'object',
|
20
32
|
'numeric': 'object',
|
21
33
|
'uuid': 'object',
|
@@ -27,6 +39,8 @@ MRSM_PD_DTYPES: Dict[str, str] = {
|
|
27
39
|
'int32': 'Int32',
|
28
40
|
'int64': 'Int64',
|
29
41
|
'str': 'string[python]',
|
42
|
+
'bytes': 'object',
|
43
|
+
None: 'object',
|
30
44
|
}
|
31
45
|
|
32
46
|
|
@@ -38,6 +52,10 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
38
52
|
if known_dtype is not None:
|
39
53
|
return known_dtype
|
40
54
|
|
55
|
+
alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None)
|
56
|
+
if alias_dtype is not None:
|
57
|
+
return MRSM_PD_DTYPES[alias_dtype]
|
58
|
+
|
41
59
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
42
60
|
### treat it as a SQL db type.
|
43
61
|
if dtype.split(' ')[0].isupper():
|
@@ -95,7 +113,7 @@ def are_dtypes_equal(
|
|
95
113
|
try:
|
96
114
|
if ldtype == rdtype:
|
97
115
|
return True
|
98
|
-
except Exception
|
116
|
+
except Exception:
|
99
117
|
warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}")
|
100
118
|
return False
|
101
119
|
|
@@ -115,6 +133,10 @@ def are_dtypes_equal(
|
|
115
133
|
if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
|
116
134
|
return True
|
117
135
|
|
136
|
+
bytes_dtypes = ('bytes', 'object')
|
137
|
+
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
138
|
+
return True
|
139
|
+
|
118
140
|
ldtype_clean = ldtype.split('[', maxsplit=1)[0]
|
119
141
|
rdtype_clean = rdtype.split('[', maxsplit=1)[0]
|
120
142
|
|
@@ -185,7 +207,7 @@ def attempt_cast_to_numeric(value: Any) -> Any:
|
|
185
207
|
if not value_is_null(value)
|
186
208
|
else Decimal('NaN')
|
187
209
|
)
|
188
|
-
except Exception
|
210
|
+
except Exception:
|
189
211
|
return value
|
190
212
|
|
191
213
|
|
@@ -201,7 +223,23 @@ def attempt_cast_to_uuid(value: Any) -> Any:
|
|
201
223
|
if not value_is_null(value)
|
202
224
|
else None
|
203
225
|
)
|
204
|
-
except Exception
|
226
|
+
except Exception:
|
227
|
+
return value
|
228
|
+
|
229
|
+
|
230
|
+
def attempt_cast_to_bytes(value: Any) -> Any:
|
231
|
+
"""
|
232
|
+
Given a value, attempt to coerce it into a bytestring.
|
233
|
+
"""
|
234
|
+
if isinstance(value, uuid.UUID):
|
235
|
+
return value
|
236
|
+
try:
|
237
|
+
return (
|
238
|
+
deserialize_base64(str(value))
|
239
|
+
if not value_is_null(value)
|
240
|
+
else None
|
241
|
+
)
|
242
|
+
except Exception:
|
205
243
|
return value
|
206
244
|
|
207
245
|
|
@@ -251,7 +289,7 @@ def coerce_timezone(
|
|
251
289
|
) -> Any:
|
252
290
|
"""
|
253
291
|
Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
|
254
|
-
return a
|
292
|
+
return a UTC timestamp (strip timezone if `strip_utc` is `True`.
|
255
293
|
"""
|
256
294
|
if dt is None:
|
257
295
|
return None
|
@@ -266,9 +304,7 @@ def coerce_timezone(
|
|
266
304
|
dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
|
267
305
|
|
268
306
|
if dt_is_series:
|
269
|
-
is_dask = 'dask' in dt.__module__
|
270
307
|
pandas = mrsm.attempt_import('pandas', lazy=False)
|
271
|
-
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
272
308
|
|
273
309
|
if (
|
274
310
|
pandas.api.types.is_datetime64_any_dtype(dt) and (
|
@@ -279,14 +315,13 @@ def coerce_timezone(
|
|
279
315
|
):
|
280
316
|
return dt
|
281
317
|
|
282
|
-
dt_series = (
|
283
|
-
pandas.to_datetime(dt, utc=True, format='ISO8601')
|
284
|
-
if dd is None
|
285
|
-
else dd.to_datetime(dt, utc=True, format='ISO8601')
|
286
|
-
)
|
318
|
+
dt_series = to_datetime(dt, coerce_utc=False)
|
287
319
|
if strip_utc:
|
288
|
-
|
289
|
-
|
320
|
+
try:
|
321
|
+
if dt_series.dt.tz is not None:
|
322
|
+
dt_series = dt_series.dt.tz_localize(None)
|
323
|
+
except Exception:
|
324
|
+
pass
|
290
325
|
|
291
326
|
return dt_series
|
292
327
|
|
@@ -299,3 +334,57 @@ def coerce_timezone(
|
|
299
334
|
if strip_utc:
|
300
335
|
return utc_dt.replace(tzinfo=None)
|
301
336
|
return utc_dt
|
337
|
+
|
338
|
+
|
339
|
+
def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any:
|
340
|
+
"""
|
341
|
+
Wrap `pd.to_datetime()` and add support for out-of-bounds values.
|
342
|
+
"""
|
343
|
+
pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
|
344
|
+
is_dask = 'dask' in getattr(dt_val, '__module__', '')
|
345
|
+
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
346
|
+
dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
|
347
|
+
pd = pandas if dd is None else dd
|
348
|
+
|
349
|
+
try:
|
350
|
+
new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
|
351
|
+
if as_pydatetime:
|
352
|
+
return new_dt_val.to_pydatetime()
|
353
|
+
return new_dt_val
|
354
|
+
except (pd.errors.OutOfBoundsDatetime, ValueError):
|
355
|
+
pass
|
356
|
+
|
357
|
+
def parse(x: Any) -> Any:
|
358
|
+
try:
|
359
|
+
return dateutil_parser.parse(x)
|
360
|
+
except Exception:
|
361
|
+
return x
|
362
|
+
|
363
|
+
if dt_is_series:
|
364
|
+
new_series = dt_val.apply(parse)
|
365
|
+
if coerce_utc:
|
366
|
+
return coerce_timezone(new_series)
|
367
|
+
return new_series
|
368
|
+
|
369
|
+
new_dt_val = parse(dt_val)
|
370
|
+
if not coerce_utc:
|
371
|
+
return new_dt_val
|
372
|
+
return coerce_timezone(new_dt_val)
|
373
|
+
|
374
|
+
|
375
|
+
def serialize_bytes(data: bytes) -> str:
|
376
|
+
"""
|
377
|
+
Return the given bytes as a base64-encoded string.
|
378
|
+
"""
|
379
|
+
import base64
|
380
|
+
if not isinstance(data, bytes) and value_is_null(data):
|
381
|
+
return data
|
382
|
+
return base64.b64encode(data).decode('utf-8')
|
383
|
+
|
384
|
+
|
385
|
+
def deserialize_base64(data: str) -> bytes:
|
386
|
+
"""
|
387
|
+
Return the original bytestring from the given base64-encoded string.
|
388
|
+
"""
|
389
|
+
import base64
|
390
|
+
return base64.b64decode(data)
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -276,6 +276,19 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
276
276
|
'cockroachdb': 'UUID',
|
277
277
|
'default': 'TEXT',
|
278
278
|
},
|
279
|
+
'bytes': {
|
280
|
+
'timescaledb': 'BYTEA',
|
281
|
+
'postgresql': 'BYTEA',
|
282
|
+
'mariadb': 'BLOB',
|
283
|
+
'mysql': 'BLOB',
|
284
|
+
'mssql': 'VARBINARY(MAX)',
|
285
|
+
'oracle': 'BLOB',
|
286
|
+
'sqlite': 'BLOB',
|
287
|
+
'duckdb': 'BLOB',
|
288
|
+
'citus': 'BYTEA',
|
289
|
+
'cockroachdb': 'BYTEA',
|
290
|
+
'default': 'BLOB',
|
291
|
+
},
|
279
292
|
}
|
280
293
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
281
294
|
'int': {
|
@@ -421,6 +434,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
421
434
|
'cockroachdb': 'Uuid',
|
422
435
|
'default': 'Uuid',
|
423
436
|
},
|
437
|
+
'bytes': {
|
438
|
+
'timescaledb': 'LargeBinary',
|
439
|
+
'postgresql': 'LargeBinary',
|
440
|
+
'mariadb': 'LargeBinary',
|
441
|
+
'mysql': 'LargeBinary',
|
442
|
+
'mssql': 'LargeBinary',
|
443
|
+
'oracle': 'LargeBinary',
|
444
|
+
'sqlite': 'LargeBinary',
|
445
|
+
'duckdb': 'LargeBinary',
|
446
|
+
'citus': 'LargeBinary',
|
447
|
+
'cockroachdb': 'LargeBinary',
|
448
|
+
'default': 'LargeBinary',
|
449
|
+
},
|
424
450
|
}
|
425
451
|
|
426
452
|
AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
|
meerschaum/utils/misc.py
CHANGED
@@ -177,14 +177,14 @@ def string_to_dict(
|
|
177
177
|
keys = _keys[:-1]
|
178
178
|
try:
|
179
179
|
val = ast.literal_eval(_keys[-1])
|
180
|
-
except Exception
|
180
|
+
except Exception:
|
181
181
|
val = str(_keys[-1])
|
182
182
|
|
183
183
|
c = params_dict
|
184
184
|
for _k in keys[:-1]:
|
185
185
|
try:
|
186
186
|
k = ast.literal_eval(_k)
|
187
|
-
except Exception
|
187
|
+
except Exception:
|
188
188
|
k = str(_k)
|
189
189
|
if k not in c:
|
190
190
|
c[k] = {}
|
@@ -196,12 +196,12 @@ def string_to_dict(
|
|
196
196
|
|
197
197
|
|
198
198
|
def parse_config_substitution(
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
199
|
+
value: str,
|
200
|
+
leading_key: str = 'MRSM',
|
201
|
+
begin_key: str = '{',
|
202
|
+
end_key: str = '}',
|
203
|
+
delimeter: str = ':'
|
204
|
+
) -> List[Any]:
|
205
205
|
"""
|
206
206
|
Parse Meerschaum substitution syntax
|
207
207
|
E.g. MRSM{value1:value2} => ['value1', 'value2']
|