meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +6 -1
- meerschaum/_internal/entry.py +16 -5
- meerschaum/actions/edit.py +6 -6
- meerschaum/actions/sql.py +12 -11
- meerschaum/api/dash/pages/login.py +17 -17
- meerschaum/api/dash/pipes.py +104 -13
- meerschaum/api/routes/_pipes.py +58 -40
- meerschaum/api/routes/_webterm.py +1 -0
- meerschaum/config/_edit.py +46 -19
- meerschaum/config/_read_config.py +20 -9
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +1 -1
- meerschaum/config/static/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +1 -0
- meerschaum/connectors/api/_pipes.py +39 -8
- meerschaum/connectors/sql/_SQLConnector.py +4 -3
- meerschaum/connectors/sql/_pipes.py +511 -118
- meerschaum/connectors/sql/_sql.py +55 -15
- meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
- meerschaum/connectors/valkey/_pipes.py +11 -5
- meerschaum/core/Pipe/__init__.py +27 -9
- meerschaum/core/Pipe/_attributes.py +181 -18
- meerschaum/core/Pipe/_clear.py +10 -8
- meerschaum/core/Pipe/_copy.py +2 -0
- meerschaum/core/Pipe/_data.py +65 -17
- meerschaum/core/Pipe/_deduplicate.py +30 -28
- meerschaum/core/Pipe/_dtypes.py +4 -4
- meerschaum/core/Pipe/_fetch.py +12 -10
- meerschaum/core/Pipe/_sync.py +28 -11
- meerschaum/core/Pipe/_verify.py +52 -49
- meerschaum/utils/dataframe.py +64 -34
- meerschaum/utils/dtypes/__init__.py +25 -6
- meerschaum/utils/dtypes/sql.py +76 -33
- meerschaum/utils/misc.py +57 -24
- meerschaum/utils/packages/_packages.py +2 -1
- meerschaum/utils/schedule.py +7 -5
- meerschaum/utils/sql.py +697 -44
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -94,14 +94,14 @@ def filter_unseen_df(
|
|
94
94
|
----------
|
95
95
|
old_df: 'pd.DataFrame'
|
96
96
|
The original (target) dataframe. Acts as a filter on the `new_df`.
|
97
|
-
|
97
|
+
|
98
98
|
new_df: 'pd.DataFrame'
|
99
99
|
The fetched (source) dataframe. Rows that are contained in `old_df` are removed.
|
100
100
|
|
101
101
|
safe_copy: bool, default True
|
102
102
|
If `True`, create a copy before comparing and modifying the dataframes.
|
103
103
|
Setting to `False` may mutate the DataFrames.
|
104
|
-
|
104
|
+
|
105
105
|
dtypes: Optional[Dict[str, Any]], default None
|
106
106
|
Optionally specify the datatypes of the dataframe.
|
107
107
|
|
@@ -234,8 +234,10 @@ def filter_unseen_df(
|
|
234
234
|
cast_dt_cols = True
|
235
235
|
try:
|
236
236
|
for col, typ in dt_dtypes.items():
|
237
|
-
|
238
|
-
|
237
|
+
if col in old_df.columns:
|
238
|
+
old_df[col] = coerce_timezone(old_df[col])
|
239
|
+
if col in new_df.columns:
|
240
|
+
new_df[col] = coerce_timezone(new_df[col])
|
239
241
|
cast_dt_cols = False
|
240
242
|
except Exception as e:
|
241
243
|
warn(f"Could not cast datetime columns:\n{e}")
|
@@ -363,6 +365,7 @@ def filter_unseen_df(
|
|
363
365
|
def parse_df_datetimes(
|
364
366
|
df: 'pd.DataFrame',
|
365
367
|
ignore_cols: Optional[Iterable[str]] = None,
|
368
|
+
strip_timezone: bool = False,
|
366
369
|
chunksize: Optional[int] = None,
|
367
370
|
dtype_backend: str = 'numpy_nullable',
|
368
371
|
debug: bool = False,
|
@@ -378,6 +381,9 @@ def parse_df_datetimes(
|
|
378
381
|
ignore_cols: Optional[Iterable[str]], default None
|
379
382
|
If provided, do not attempt to coerce these columns as datetimes.
|
380
383
|
|
384
|
+
strip_timezone: bool, default False
|
385
|
+
If `True`, remove the UTC `tzinfo` property.
|
386
|
+
|
381
387
|
chunksize: Optional[int], default None
|
382
388
|
If the pandas implementation is `'dask'`, use this chunksize for the distributed dataframe.
|
383
389
|
|
@@ -385,7 +391,7 @@ def parse_df_datetimes(
|
|
385
391
|
If `df` is not a DataFrame and new one needs to be constructed,
|
386
392
|
use this as the datatypes backend.
|
387
393
|
Accepted values are 'numpy_nullable' and 'pyarrow'.
|
388
|
-
|
394
|
+
|
389
395
|
debug: bool, default False
|
390
396
|
Verbosity toggle.
|
391
397
|
|
@@ -447,7 +453,7 @@ def parse_df_datetimes(
|
|
447
453
|
for doc in df
|
448
454
|
] for k in keys
|
449
455
|
},
|
450
|
-
npartitions
|
456
|
+
npartitions=npartitions,
|
451
457
|
)
|
452
458
|
elif isinstance(df, dict):
|
453
459
|
df = pd.DataFrame.from_dict(df, npartitions=npartitions)
|
@@ -480,7 +486,7 @@ def parse_df_datetimes(
|
|
480
486
|
if len(cols_to_inspect) == 0:
|
481
487
|
if debug:
|
482
488
|
dprint(f"All columns are ignored, skipping datetime detection...")
|
483
|
-
return df
|
489
|
+
return df.fillna(pandas.NA)
|
484
490
|
|
485
491
|
### apply regex to columns to determine which are ISO datetimes
|
486
492
|
iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
|
@@ -493,21 +499,25 @@ def parse_df_datetimes(
|
|
493
499
|
if not datetime_cols:
|
494
500
|
if debug:
|
495
501
|
dprint("No columns detected as datetimes, returning...")
|
496
|
-
return df
|
502
|
+
return df.fillna(pandas.NA)
|
497
503
|
|
498
504
|
if debug:
|
499
505
|
dprint("Converting columns to datetimes: " + str(datetime_cols))
|
500
506
|
|
501
507
|
try:
|
502
508
|
if not using_dask:
|
503
|
-
df[datetime_cols] = df[datetime_cols].apply(
|
509
|
+
df[datetime_cols] = df[datetime_cols].apply(
|
510
|
+
pd.to_datetime,
|
511
|
+
utc=True,
|
512
|
+
format='ISO8601',
|
513
|
+
)
|
504
514
|
else:
|
505
515
|
df[datetime_cols] = df[datetime_cols].apply(
|
506
516
|
pd.to_datetime,
|
507
517
|
utc=True,
|
508
518
|
axis=1,
|
509
519
|
meta={
|
510
|
-
col: 'datetime64[ns]'
|
520
|
+
col: 'datetime64[ns, UTC]'
|
511
521
|
for col in datetime_cols
|
512
522
|
}
|
513
523
|
)
|
@@ -517,13 +527,17 @@ def parse_df_datetimes(
|
|
517
527
|
+ f"{traceback.format_exc()}"
|
518
528
|
)
|
519
529
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
530
|
+
if strip_timezone:
|
531
|
+
for dt in datetime_cols:
|
532
|
+
try:
|
533
|
+
df[dt] = df[dt].dt.tz_localize(None)
|
534
|
+
except Exception:
|
535
|
+
warn(
|
536
|
+
f"Unable to convert column '{dt}' to naive datetime:\n"
|
537
|
+
+ f"{traceback.format_exc()}"
|
538
|
+
)
|
525
539
|
|
526
|
-
return df
|
540
|
+
return df.fillna(pandas.NA)
|
527
541
|
|
528
542
|
|
529
543
|
def get_unhashable_cols(df: 'pd.DataFrame') -> List[str]:
|
@@ -674,6 +688,7 @@ def enforce_dtypes(
|
|
674
688
|
dtypes: Dict[str, str],
|
675
689
|
safe_copy: bool = True,
|
676
690
|
coerce_numeric: bool = True,
|
691
|
+
coerce_timezone: bool = True,
|
677
692
|
debug: bool = False,
|
678
693
|
) -> 'pd.DataFrame':
|
679
694
|
"""
|
@@ -695,6 +710,9 @@ def enforce_dtypes(
|
|
695
710
|
coerce_numeric: bool, default True
|
696
711
|
If `True`, convert float and int collisions to numeric.
|
697
712
|
|
713
|
+
coerce_timezone: bool, default True
|
714
|
+
If `True`, convert datetimes to UTC.
|
715
|
+
|
698
716
|
debug: bool, default False
|
699
717
|
Verbosity toggle.
|
700
718
|
|
@@ -703,21 +721,18 @@ def enforce_dtypes(
|
|
703
721
|
The Pandas DataFrame with the types enforced.
|
704
722
|
"""
|
705
723
|
import json
|
706
|
-
import traceback
|
707
|
-
from decimal import Decimal
|
708
724
|
from meerschaum.utils.debug import dprint
|
709
|
-
from meerschaum.utils.warnings import warn
|
710
725
|
from meerschaum.utils.formatting import pprint
|
711
|
-
from meerschaum.config.static import STATIC_CONFIG
|
712
|
-
from meerschaum.utils.packages import import_pandas
|
713
726
|
from meerschaum.utils.dtypes import (
|
714
727
|
are_dtypes_equal,
|
715
728
|
to_pandas_dtype,
|
716
729
|
is_dtype_numeric,
|
717
730
|
attempt_cast_to_numeric,
|
718
731
|
attempt_cast_to_uuid,
|
719
|
-
coerce_timezone,
|
732
|
+
coerce_timezone as _coerce_timezone,
|
720
733
|
)
|
734
|
+
pandas = mrsm.attempt_import('pandas')
|
735
|
+
is_dask = 'dask' in df.__module__
|
721
736
|
if safe_copy:
|
722
737
|
df = df.copy()
|
723
738
|
if len(df.columns) == 0:
|
@@ -744,6 +759,11 @@ def enforce_dtypes(
|
|
744
759
|
for col, typ in dtypes.items()
|
745
760
|
if typ == 'uuid'
|
746
761
|
]
|
762
|
+
datetime_cols = [
|
763
|
+
col
|
764
|
+
for col, typ in dtypes.items()
|
765
|
+
if are_dtypes_equal(typ, 'datetime')
|
766
|
+
]
|
747
767
|
df_numeric_cols = get_numeric_cols(df)
|
748
768
|
if debug:
|
749
769
|
dprint("Desired data types:")
|
@@ -792,6 +812,13 @@ def enforce_dtypes(
|
|
792
812
|
if debug:
|
793
813
|
dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
|
794
814
|
|
815
|
+
if datetime_cols and coerce_timezone:
|
816
|
+
if debug:
|
817
|
+
dprint(f"Checking for datetime conversion: {datetime_cols}")
|
818
|
+
for col in datetime_cols:
|
819
|
+
if col in df.columns:
|
820
|
+
df[col] = _coerce_timezone(df[col])
|
821
|
+
|
795
822
|
df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
|
796
823
|
if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
|
797
824
|
if debug:
|
@@ -826,8 +853,7 @@ def enforce_dtypes(
|
|
826
853
|
if debug:
|
827
854
|
dprint(
|
828
855
|
"The incoming DataFrame has mostly the same types, skipping enforcement."
|
829
|
-
+ "The only detected difference was in the following datetime columns
|
830
|
-
+ " Timezone information may be stripped."
|
856
|
+
+ "The only detected difference was in the following datetime columns."
|
831
857
|
)
|
832
858
|
pprint(detected_dt_cols)
|
833
859
|
return df
|
@@ -930,11 +956,15 @@ def get_datetime_bound_from_df(
|
|
930
956
|
if datetime_column not in df.columns:
|
931
957
|
return None
|
932
958
|
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
959
|
+
try:
|
960
|
+
dt_val = (
|
961
|
+
df[datetime_column].min(skipna=True)
|
962
|
+
if minimum
|
963
|
+
else df[datetime_column].max(skipna=True)
|
964
|
+
)
|
965
|
+
except Exception:
|
966
|
+
dt_val = pandas.NA
|
967
|
+
if is_dask and dt_val is not None and dt_val is not pandas.NA:
|
938
968
|
dt_val = dt_val.compute()
|
939
969
|
|
940
970
|
return (
|
@@ -1194,9 +1224,9 @@ def query_df(
|
|
1194
1224
|
dtypes = {col: str(typ) for col, typ in df.dtypes.items()}
|
1195
1225
|
|
1196
1226
|
if inplace:
|
1197
|
-
df.
|
1227
|
+
df.fillna(NA, inplace=True)
|
1198
1228
|
else:
|
1199
|
-
df = df.infer_objects(
|
1229
|
+
df = df.infer_objects().fillna(NA)
|
1200
1230
|
|
1201
1231
|
if isinstance(begin, str):
|
1202
1232
|
begin = dateutil_parser.parse(begin)
|
@@ -1243,12 +1273,12 @@ def query_df(
|
|
1243
1273
|
end_tz = end.tzinfo if end is not None else None
|
1244
1274
|
|
1245
1275
|
if begin_tz is not None or end_tz is not None or df_tz is not None:
|
1246
|
-
begin = coerce_timezone(begin)
|
1247
|
-
end = coerce_timezone(end)
|
1276
|
+
begin = coerce_timezone(begin, strip_utc=False)
|
1277
|
+
end = coerce_timezone(end, strip_utc=False)
|
1248
1278
|
if df_tz is not None:
|
1249
1279
|
if debug:
|
1250
1280
|
dprint(f"Casting column '{datetime_column}' to UTC...")
|
1251
|
-
df[datetime_column] = coerce_timezone(df[datetime_column])
|
1281
|
+
df[datetime_column] = coerce_timezone(df[datetime_column], strip_utc=False)
|
1252
1282
|
dprint(f"Using datetime bounds:\n{begin=}\n{end=}")
|
1253
1283
|
|
1254
1284
|
in_ex_params = get_in_ex_params(params)
|
@@ -19,7 +19,7 @@ MRSM_PD_DTYPES: Dict[str, str] = {
|
|
19
19
|
'json': 'object',
|
20
20
|
'numeric': 'object',
|
21
21
|
'uuid': 'object',
|
22
|
-
'datetime': 'datetime64[ns]',
|
22
|
+
'datetime': 'datetime64[ns, UTC]',
|
23
23
|
'bool': 'bool[pyarrow]',
|
24
24
|
'int': 'Int64',
|
25
25
|
'int8': 'Int8',
|
@@ -245,7 +245,10 @@ def quantize_decimal(x: Decimal, scale: int, precision: int) -> Decimal:
|
|
245
245
|
return x
|
246
246
|
|
247
247
|
|
248
|
-
def coerce_timezone(
|
248
|
+
def coerce_timezone(
|
249
|
+
dt: Any,
|
250
|
+
strip_utc: bool = False,
|
251
|
+
) -> Any:
|
249
252
|
"""
|
250
253
|
Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
|
251
254
|
return a naive datetime in terms of UTC.
|
@@ -256,13 +259,29 @@ def coerce_timezone(dt: Any) -> Any:
|
|
256
259
|
if isinstance(dt, int):
|
257
260
|
return dt
|
258
261
|
|
259
|
-
|
262
|
+
if isinstance(dt, str):
|
263
|
+
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
264
|
+
dt = dateutil_parser.parse(dt)
|
265
|
+
|
266
|
+
dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
|
260
267
|
|
261
268
|
if dt_is_series:
|
269
|
+
is_dask = 'dask' in dt.__module__
|
262
270
|
pandas = mrsm.attempt_import('pandas')
|
263
|
-
|
271
|
+
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
272
|
+
dt_series = (
|
273
|
+
pandas.to_datetime(dt, utc=True)
|
274
|
+
if dd is None
|
275
|
+
else dd.to_datetime(dt, utc=True)
|
276
|
+
)
|
277
|
+
if strip_utc:
|
278
|
+
dt_series = dt_series.apply(lambda x: x.replace(tzinfo=None))
|
279
|
+
|
280
|
+
return dt_series
|
264
281
|
|
265
282
|
if dt.tzinfo is None:
|
266
|
-
|
283
|
+
if strip_utc:
|
284
|
+
return dt
|
285
|
+
return dt.replace(tzinfo=timezone.utc)
|
267
286
|
|
268
|
-
return dt.astimezone(timezone.utc)
|
287
|
+
return dt.astimezone(timezone.utc)
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -7,15 +7,16 @@ Utility functions for working with SQL data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
from meerschaum.utils.typing import Dict, Union, Tuple
|
10
|
+
from meerschaum.utils.typing import Dict, Union, Tuple, List
|
11
11
|
|
12
12
|
NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
13
13
|
'mariadb': (38, 20),
|
14
14
|
'mysql': (38, 20),
|
15
15
|
'mssql': (28, 10),
|
16
|
-
'duckdb': (15,
|
16
|
+
'duckdb': (15, 3),
|
17
17
|
'sqlite': (15, 4),
|
18
18
|
}
|
19
|
+
TIMEZONE_NAIVE_FLAVORS = {'oracle', 'mysql', 'mariadb'}
|
19
20
|
|
20
21
|
### MySQL doesn't allow for casting as BIGINT, so this is a workaround.
|
21
22
|
DB_FLAVORS_CAST_DTYPES = {
|
@@ -49,6 +50,7 @@ DB_FLAVORS_CAST_DTYPES = {
|
|
49
50
|
'NVARCHAR(2000)': 'NVARCHAR2(2000)',
|
50
51
|
'NVARCHAR': 'NVARCHAR2(2000)',
|
51
52
|
'NVARCHAR2': 'NVARCHAR2(2000)',
|
53
|
+
'CHAR': 'CHAR(36)', # UUID columns
|
52
54
|
},
|
53
55
|
'mssql': {
|
54
56
|
'NVARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)',
|
@@ -78,7 +80,9 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
|
|
78
80
|
'NUMBER': 'numeric',
|
79
81
|
'NUMERIC': 'numeric',
|
80
82
|
'TIMESTAMP': 'datetime64[ns]',
|
83
|
+
'TIMESTAMP WITHOUT TIMEZONE': 'datetime64[ns]',
|
81
84
|
'TIMESTAMP WITH TIMEZONE': 'datetime64[ns, UTC]',
|
85
|
+
'TIMESTAMP WITH TIME ZONE': 'datetime64[ns, UTC]',
|
82
86
|
'TIMESTAMPTZ': 'datetime64[ns, UTC]',
|
83
87
|
'DATE': 'datetime64[ns]',
|
84
88
|
'DATETIME': 'datetime64[ns]',
|
@@ -160,7 +164,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
160
164
|
'mariadb': 'DATETIME',
|
161
165
|
'mysql': 'DATETIME',
|
162
166
|
'mssql': 'DATETIME2',
|
163
|
-
'oracle': '
|
167
|
+
'oracle': 'TIMESTAMP',
|
164
168
|
'sqlite': 'DATETIME',
|
165
169
|
'duckdb': 'TIMESTAMP',
|
166
170
|
'citus': 'TIMESTAMP',
|
@@ -168,24 +172,37 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
168
172
|
'default': 'DATETIME',
|
169
173
|
},
|
170
174
|
'datetime64[ns, UTC]': {
|
171
|
-
'timescaledb': '
|
172
|
-
'postgresql': '
|
173
|
-
'mariadb': '
|
174
|
-
'mysql': '
|
175
|
+
'timescaledb': 'TIMESTAMPTZ',
|
176
|
+
'postgresql': 'TIMESTAMPTZ',
|
177
|
+
'mariadb': 'DATETIME',
|
178
|
+
'mysql': 'DATETIME',
|
175
179
|
'mssql': 'DATETIMEOFFSET',
|
176
180
|
'oracle': 'TIMESTAMP',
|
177
181
|
'sqlite': 'TIMESTAMP',
|
178
|
-
'duckdb': '
|
179
|
-
'citus': '
|
180
|
-
'cockroachdb': '
|
181
|
-
'default': '
|
182
|
+
'duckdb': 'TIMESTAMPTZ',
|
183
|
+
'citus': 'TIMESTAMPTZ',
|
184
|
+
'cockroachdb': 'TIMESTAMPTZ',
|
185
|
+
'default': 'TIMESTAMPTZ',
|
186
|
+
},
|
187
|
+
'datetime': {
|
188
|
+
'timescaledb': 'TIMESTAMPTZ',
|
189
|
+
'postgresql': 'TIMESTAMPTZ',
|
190
|
+
'mariadb': 'DATETIME',
|
191
|
+
'mysql': 'DATETIME',
|
192
|
+
'mssql': 'DATETIMEOFFSET',
|
193
|
+
'oracle': 'TIMESTAMP',
|
194
|
+
'sqlite': 'TIMESTAMP',
|
195
|
+
'duckdb': 'TIMESTAMPTZ',
|
196
|
+
'citus': 'TIMESTAMPTZ',
|
197
|
+
'cockroachdb': 'TIMESTAMPTZ',
|
198
|
+
'default': 'TIMESTAMPTZ',
|
182
199
|
},
|
183
200
|
'bool': {
|
184
201
|
'timescaledb': 'BOOLEAN',
|
185
202
|
'postgresql': 'BOOLEAN',
|
186
203
|
'mariadb': 'BOOLEAN',
|
187
204
|
'mysql': 'BOOLEAN',
|
188
|
-
'mssql': '
|
205
|
+
'mssql': 'BIT',
|
189
206
|
'oracle': 'INTEGER',
|
190
207
|
'sqlite': 'FLOAT',
|
191
208
|
'duckdb': 'BOOLEAN',
|
@@ -252,7 +269,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
252
269
|
'mysql': 'CHAR(36)',
|
253
270
|
'mssql': 'UNIQUEIDENTIFIER',
|
254
271
|
### I know this is too much space, but erring on the side of caution.
|
255
|
-
'oracle': '
|
272
|
+
'oracle': 'CHAR(36)',
|
256
273
|
'sqlite': 'TEXT',
|
257
274
|
'duckdb': 'VARCHAR',
|
258
275
|
'citus': 'UUID',
|
@@ -301,24 +318,24 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
301
318
|
'default': 'DateTime',
|
302
319
|
},
|
303
320
|
'datetime64[ns, UTC]': {
|
304
|
-
'timescaledb': 'DateTime',
|
305
|
-
'postgresql': 'DateTime',
|
306
|
-
'mariadb': 'DateTime',
|
307
|
-
'mysql': 'DateTime',
|
321
|
+
'timescaledb': 'DateTime(timezone=True)',
|
322
|
+
'postgresql': 'DateTime(timezone=True)',
|
323
|
+
'mariadb': 'DateTime(timezone=True)',
|
324
|
+
'mysql': 'DateTime(timezone=True)',
|
308
325
|
'mssql': 'sqlalchemy.dialects.mssql.DATETIMEOFFSET',
|
309
|
-
'oracle': '
|
310
|
-
'sqlite': 'DateTime',
|
311
|
-
'duckdb': 'DateTime',
|
312
|
-
'citus': 'DateTime',
|
313
|
-
'cockroachdb': 'DateTime',
|
314
|
-
'default': 'DateTime',
|
326
|
+
'oracle': 'sqlalchemy.dialects.oracle.TIMESTAMP(timezone=True)',
|
327
|
+
'sqlite': 'DateTime(timezone=True)',
|
328
|
+
'duckdb': 'DateTime(timezone=True)',
|
329
|
+
'citus': 'DateTime(timezone=True)',
|
330
|
+
'cockroachdb': 'DateTime(timezone=True)',
|
331
|
+
'default': 'DateTime(timezone=True)',
|
315
332
|
},
|
316
333
|
'bool': {
|
317
334
|
'timescaledb': 'Boolean',
|
318
335
|
'postgresql': 'Boolean',
|
319
336
|
'mariadb': 'Integer',
|
320
337
|
'mysql': 'Integer',
|
321
|
-
'mssql': '
|
338
|
+
'mssql': 'sqlalchemy.dialects.mssql.BIT',
|
322
339
|
'oracle': 'Integer',
|
323
340
|
'sqlite': 'Float',
|
324
341
|
'duckdb': 'Boolean',
|
@@ -384,7 +401,7 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
384
401
|
'mariadb': 'sqlalchemy.dialects.mysql.CHAR(36)',
|
385
402
|
'mysql': 'sqlalchemy.dialects.mysql.CHAR(36)',
|
386
403
|
'mssql': 'Uuid',
|
387
|
-
'oracle': '
|
404
|
+
'oracle': 'sqlalchemy.dialects.oracle.CHAR(36)',
|
388
405
|
'sqlite': 'UnicodeText',
|
389
406
|
'duckdb': 'UnicodeText',
|
390
407
|
'citus': 'Uuid',
|
@@ -393,6 +410,20 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
|
|
393
410
|
},
|
394
411
|
}
|
395
412
|
|
413
|
+
AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
|
414
|
+
'timescaledb': 'GENERATED BY DEFAULT AS IDENTITY',
|
415
|
+
'postgresql': 'GENERATED BY DEFAULT AS IDENTITY',
|
416
|
+
'mariadb': 'AUTO_INCREMENT',
|
417
|
+
'mysql': 'AUTO_INCREMENT',
|
418
|
+
'mssql': 'IDENTITY(1,1)',
|
419
|
+
'oracle': 'GENERATED BY DEFAULT ON NULL AS IDENTITY',
|
420
|
+
'sqlite': 'AUTOINCREMENT',
|
421
|
+
'duckdb': 'GENERATED BY DEFAULT',
|
422
|
+
'citus': 'GENERATED BY DEFAULT',
|
423
|
+
'cockroachdb': 'GENERATED BY DEFAULT AS IDENTITY',
|
424
|
+
'default': 'GENERATED BY DEFAULT AS IDENTITY',
|
425
|
+
}
|
426
|
+
|
396
427
|
|
397
428
|
def get_pd_type_from_db_type(db_type: str, allow_custom_dtypes: bool = False) -> str:
|
398
429
|
"""
|
@@ -456,10 +487,10 @@ def get_db_type_from_pd_type(
|
|
456
487
|
The database data type for the incoming Pandas data type.
|
457
488
|
If nothing can be found, a warning will be thrown and 'TEXT' will be returned.
|
458
489
|
"""
|
459
|
-
import ast
|
460
490
|
from meerschaum.utils.warnings import warn
|
461
491
|
from meerschaum.utils.packages import attempt_import
|
462
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
492
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
|
493
|
+
from meerschaum.utils.misc import parse_arguments_str
|
463
494
|
sqlalchemy_types = attempt_import('sqlalchemy.types')
|
464
495
|
|
465
496
|
types_registry = (
|
@@ -512,15 +543,16 @@ def get_db_type_from_pd_type(
|
|
512
543
|
|
513
544
|
if db_type.startswith('sqlalchemy.dialects'):
|
514
545
|
dialect, typ_class_name = db_type.replace('sqlalchemy.dialects.', '').split('.', maxsplit=2)
|
515
|
-
|
546
|
+
cls_args, cls_kwargs = None, None
|
516
547
|
if '(' in typ_class_name:
|
517
|
-
typ_class_name,
|
518
|
-
|
548
|
+
typ_class_name, args_str = typ_class_name.split('(', maxsplit=1)
|
549
|
+
args_str = args_str.rstrip(')')
|
550
|
+
cls_args, cls_kwargs = parse_arguments_str(args_str)
|
519
551
|
sqlalchemy_dialects_flavor_module = attempt_import(f'sqlalchemy.dialects.{dialect}')
|
520
552
|
cls = getattr(sqlalchemy_dialects_flavor_module, typ_class_name)
|
521
|
-
if
|
553
|
+
if cls_args is None:
|
522
554
|
return cls
|
523
|
-
return cls(
|
555
|
+
return cls(*cls_args, **cls_kwargs)
|
524
556
|
|
525
557
|
if 'numeric' in db_type.lower():
|
526
558
|
numeric_type_str = PD_TO_DB_DTYPES_FLAVORS['numeric'].get(flavor, 'NUMERIC')
|
@@ -528,4 +560,15 @@ def get_db_type_from_pd_type(
|
|
528
560
|
return sqlalchemy_types.Numeric
|
529
561
|
precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
|
530
562
|
return sqlalchemy_types.Numeric(precision, scale)
|
531
|
-
|
563
|
+
|
564
|
+
cls_args, cls_kwargs = None, None
|
565
|
+
typ_class_name = db_type
|
566
|
+
if '(' in db_type:
|
567
|
+
typ_class_name, args_str = db_type.split('(', maxsplit=1)
|
568
|
+
args_str = args_str.rstrip(')')
|
569
|
+
cls_args, cls_kwargs = parse_arguments_str(args_str)
|
570
|
+
|
571
|
+
cls = getattr(sqlalchemy_types, typ_class_name)
|
572
|
+
if cls_args is None:
|
573
|
+
return cls
|
574
|
+
return cls(*cls_args, **cls_kwargs)
|
meerschaum/utils/misc.py
CHANGED
@@ -214,20 +214,20 @@ def parse_config_substitution(
|
|
214
214
|
|
215
215
|
|
216
216
|
def edit_file(
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
217
|
+
path: Union['pathlib.Path', str],
|
218
|
+
default_editor: str = 'pyvim',
|
219
|
+
debug: bool = False
|
220
|
+
) -> bool:
|
221
221
|
"""
|
222
222
|
Open a file for editing.
|
223
|
-
|
223
|
+
|
224
224
|
Attempt to launch the user's defined `$EDITOR`, otherwise use `pyvim`.
|
225
225
|
|
226
226
|
Parameters
|
227
227
|
----------
|
228
228
|
path: Union[pathlib.Path, str]
|
229
229
|
The path to the file to be edited.
|
230
|
-
|
230
|
+
|
231
231
|
default_editor: str, default 'pyvim'
|
232
232
|
If `$EDITOR` is not set, use this instead.
|
233
233
|
If `pyvim` is not installed, it will install it from PyPI.
|
@@ -250,7 +250,7 @@ def edit_file(
|
|
250
250
|
rc = call([EDITOR, path])
|
251
251
|
except Exception as e: ### can't open with default editors
|
252
252
|
if debug:
|
253
|
-
dprint(e)
|
253
|
+
dprint(str(e))
|
254
254
|
dprint('Failed to open file with system editor. Falling back to pyvim...')
|
255
255
|
pyvim = attempt_import('pyvim', lazy=False)
|
256
256
|
rc = run_python_package('pyvim', [path], venv=package_venv(pyvim), debug=debug)
|
@@ -258,10 +258,10 @@ def edit_file(
|
|
258
258
|
|
259
259
|
|
260
260
|
def is_pipe_registered(
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
261
|
+
pipe: mrsm.Pipe,
|
262
|
+
pipes: PipesDict,
|
263
|
+
debug: bool = False
|
264
|
+
) -> bool:
|
265
265
|
"""
|
266
266
|
Check if a Pipe is inside the pipes dictionary.
|
267
267
|
|
@@ -269,10 +269,10 @@ def is_pipe_registered(
|
|
269
269
|
----------
|
270
270
|
pipe: meerschaum.Pipe
|
271
271
|
The pipe to see if it's in the dictionary.
|
272
|
-
|
272
|
+
|
273
273
|
pipes: PipesDict
|
274
274
|
The dictionary to search inside.
|
275
|
-
|
275
|
+
|
276
276
|
debug: bool, default False
|
277
277
|
Verbosity toggle.
|
278
278
|
|
@@ -975,13 +975,13 @@ def json_serialize_datetime(dt: datetime) -> Union[str, None]:
|
|
975
975
|
|
976
976
|
|
977
977
|
def wget(
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
978
|
+
url: str,
|
979
|
+
dest: Optional[Union[str, 'pathlib.Path']] = None,
|
980
|
+
headers: Optional[Dict[str, Any]] = None,
|
981
|
+
color: bool = True,
|
982
|
+
debug: bool = False,
|
983
|
+
**kw: Any
|
984
|
+
) -> 'pathlib.Path':
|
985
985
|
"""
|
986
986
|
Mimic `wget` with `requests`.
|
987
987
|
|
@@ -989,7 +989,7 @@ def wget(
|
|
989
989
|
----------
|
990
990
|
url: str
|
991
991
|
The URL to the resource to be downloaded.
|
992
|
-
|
992
|
+
|
993
993
|
dest: Optional[Union[str, pathlib.Path]], default None
|
994
994
|
The destination path of the downloaded file.
|
995
995
|
If `None`, save to the current directory.
|
@@ -1426,7 +1426,40 @@ def flatten_list(list_: List[Any]) -> List[Any]:
|
|
1426
1426
|
yield item
|
1427
1427
|
|
1428
1428
|
|
1429
|
-
def
|
1429
|
+
def parse_arguments_str(args_str: str) -> Tuple[Tuple[Any], Dict[str, Any]]:
|
1430
|
+
"""
|
1431
|
+
Parse a string containing the text to be passed into a function
|
1432
|
+
and return a tuple of args, kwargs.
|
1433
|
+
|
1434
|
+
Parameters
|
1435
|
+
----------
|
1436
|
+
args_str: str
|
1437
|
+
The contents of the function parameter (as a string).
|
1438
|
+
|
1439
|
+
Returns
|
1440
|
+
-------
|
1441
|
+
A tuple of args (tuple) and kwargs (dict[str, Any]).
|
1442
|
+
|
1443
|
+
Examples
|
1444
|
+
--------
|
1445
|
+
>>> parse_arguments_str('123, 456, foo=789, bar="baz"')
|
1446
|
+
(123, 456), {'foo': 789, 'bar': 'baz'}
|
1447
|
+
"""
|
1448
|
+
import ast
|
1449
|
+
args = []
|
1450
|
+
kwargs = {}
|
1451
|
+
|
1452
|
+
for part in args_str.split(','):
|
1453
|
+
if '=' in part:
|
1454
|
+
key, val = part.split('=', 1)
|
1455
|
+
kwargs[key.strip()] = ast.literal_eval(val)
|
1456
|
+
else:
|
1457
|
+
args.append(ast.literal_eval(part.strip()))
|
1458
|
+
|
1459
|
+
return tuple(args), kwargs
|
1460
|
+
|
1461
|
+
|
1462
|
+
def make_symlink(src_path: 'pathlib.Path', dest_path: 'pathlib.Path') -> SuccessTuple:
|
1430
1463
|
"""
|
1431
1464
|
Wrap around `pathlib.Path.symlink_to`, but add support for Windows.
|
1432
1465
|
|
@@ -1452,7 +1485,7 @@ def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTupl
|
|
1452
1485
|
msg = str(e)
|
1453
1486
|
if success:
|
1454
1487
|
return success, "Success"
|
1455
|
-
|
1488
|
+
|
1456
1489
|
### Failed to create a symlink.
|
1457
1490
|
### If we're not on Windows, return an error.
|
1458
1491
|
import platform
|
@@ -1477,7 +1510,7 @@ def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTupl
|
|
1477
1510
|
shutil.copy(src_path, dest_path)
|
1478
1511
|
except Exception as e:
|
1479
1512
|
return False, str(e)
|
1480
|
-
|
1513
|
+
|
1481
1514
|
return True, "Success"
|
1482
1515
|
|
1483
1516
|
|
@@ -163,7 +163,8 @@ packages['api'] = {
|
|
163
163
|
'fastapi' : 'fastapi>=0.111.0',
|
164
164
|
'fastapi_login' : 'fastapi-login>=1.7.2',
|
165
165
|
'multipart' : 'python-multipart>=0.0.9',
|
166
|
-
'httpx' : 'httpx>=0.
|
166
|
+
'httpx' : 'httpx>=0.27.2',
|
167
|
+
'httpcore' : 'httpcore>=1.0.6',
|
167
168
|
'valkey' : 'valkey>=6.0.0',
|
168
169
|
}
|
169
170
|
packages['api'].update(packages['sql'])
|