meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +2 -1
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/shell/Shell.py +5 -4
- meerschaum/_internal/static.py +8 -24
- meerschaum/actions/bootstrap.py +1 -1
- meerschaum/actions/edit.py +6 -3
- meerschaum/actions/start.py +1 -1
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +2 -1
- meerschaum/api/dash/__init__.py +0 -2
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/dashboard.py +20 -19
- meerschaum/api/dash/callbacks/jobs.py +11 -5
- meerschaum/api/dash/callbacks/pipes.py +106 -5
- meerschaum/api/dash/callbacks/settings/__init__.py +0 -1
- meerschaum/api/dash/callbacks/{settings/tokens.py → tokens.py} +1 -1
- meerschaum/api/dash/jobs.py +1 -1
- meerschaum/api/dash/pages/__init__.py +2 -1
- meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
- meerschaum/api/dash/pages/pipes.py +4 -3
- meerschaum/api/dash/pages/settings/__init__.py +0 -1
- meerschaum/api/dash/pages/{settings/tokens.py → tokens.py} +6 -8
- meerschaum/api/dash/pipes.py +131 -0
- meerschaum/api/dash/tokens.py +28 -31
- meerschaum/api/routes/_pipes.py +47 -37
- meerschaum/config/_default.py +13 -2
- meerschaum/config/_paths.py +1 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +9 -8
- meerschaum/connectors/api/_pipes.py +2 -18
- meerschaum/connectors/api/_tokens.py +2 -2
- meerschaum/connectors/instance/_tokens.py +10 -6
- meerschaum/connectors/sql/_SQLConnector.py +14 -0
- meerschaum/connectors/sql/_create_engine.py +3 -14
- meerschaum/connectors/sql/_pipes.py +175 -185
- meerschaum/connectors/sql/_sql.py +38 -20
- meerschaum/connectors/sql/tables/__init__.py +237 -122
- meerschaum/connectors/valkey/_pipes.py +44 -16
- meerschaum/core/Pipe/__init__.py +28 -5
- meerschaum/core/Pipe/_attributes.py +273 -46
- meerschaum/core/Pipe/_data.py +55 -17
- meerschaum/core/Pipe/_dtypes.py +19 -4
- meerschaum/core/Pipe/_edit.py +2 -0
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_sync.py +90 -160
- meerschaum/core/Pipe/_verify.py +3 -3
- meerschaum/core/Token/_Token.py +4 -5
- meerschaum/plugins/bootstrap.py +508 -3
- meerschaum/utils/_get_pipes.py +1 -1
- meerschaum/utils/dataframe.py +385 -68
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +387 -22
- meerschaum/utils/dtypes/sql.py +327 -31
- meerschaum/utils/misc.py +9 -68
- meerschaum/utils/packages/__init__.py +7 -21
- meerschaum/utils/packages/_packages.py +7 -2
- meerschaum/utils/schedule.py +1 -1
- meerschaum/utils/sql.py +8 -8
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/METADATA +5 -17
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/RECORD +66 -65
- meerschaum-3.0.0rc3.dist-info/licenses/NOTICE +2 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/WHEEL +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/entry_points.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/top_level.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_data.py
CHANGED
@@ -29,6 +29,7 @@ def get_data(
|
|
29
29
|
as_iterator: bool = False,
|
30
30
|
as_chunks: bool = False,
|
31
31
|
as_dask: bool = False,
|
32
|
+
add_missing_columns: bool = False,
|
32
33
|
chunk_interval: Union[timedelta, int, None] = None,
|
33
34
|
order: Optional[str] = 'asc',
|
34
35
|
limit: Optional[int] = None,
|
@@ -72,6 +73,9 @@ def get_data(
|
|
72
73
|
If `True`, return a `dask.DataFrame`
|
73
74
|
(which may be loaded into a Pandas DataFrame with `df.compute()`).
|
74
75
|
|
76
|
+
add_missing_columns: bool, default False
|
77
|
+
If `True`, add any missing columns from `Pipe.dtypes` to the dataframe.
|
78
|
+
|
75
79
|
chunk_interval: Union[timedelta, int, None], default None
|
76
80
|
If `as_iterator`, then return chunks with `begin` and `end` separated by this interval.
|
77
81
|
This may be set under `pipe.parameters['chunk_minutes']`.
|
@@ -103,12 +107,13 @@ def get_data(
|
|
103
107
|
from meerschaum.utils.warnings import warn
|
104
108
|
from meerschaum.utils.venv import Venv
|
105
109
|
from meerschaum.connectors import get_connector_plugin
|
106
|
-
from meerschaum.utils.
|
107
|
-
from meerschaum.utils.dtypes import to_pandas_dtype, coerce_timezone
|
110
|
+
from meerschaum.utils.dtypes import to_pandas_dtype
|
108
111
|
from meerschaum.utils.dataframe import add_missing_cols_to_df, df_is_chunk_generator
|
109
112
|
from meerschaum.utils.packages import attempt_import
|
113
|
+
from meerschaum.utils.warnings import dprint
|
110
114
|
dd = attempt_import('dask.dataframe') if as_dask else None
|
111
115
|
dask = attempt_import('dask') if as_dask else None
|
116
|
+
_ = attempt_import('partd', lazy=False) if as_dask else None
|
112
117
|
|
113
118
|
if select_columns == '*':
|
114
119
|
select_columns = None
|
@@ -187,14 +192,17 @@ def get_data(
|
|
187
192
|
order=order,
|
188
193
|
limit=limit,
|
189
194
|
fresh=fresh,
|
195
|
+
add_missing_columns=True,
|
190
196
|
debug=debug,
|
191
197
|
)
|
192
198
|
for (chunk_begin, chunk_end) in bounds
|
193
199
|
]
|
194
200
|
dask_meta = {
|
195
201
|
col: to_pandas_dtype(typ)
|
196
|
-
for col, typ in self.
|
202
|
+
for col, typ in self.get_dtypes(refresh=True, infer=True, debug=debug).items()
|
197
203
|
}
|
204
|
+
if debug:
|
205
|
+
dprint(f"Dask meta:\n{dask_meta}")
|
198
206
|
return _sort_df(dd.from_delayed(dask_chunks, meta=dask_meta))
|
199
207
|
|
200
208
|
if not self.exists(debug=debug):
|
@@ -248,6 +256,7 @@ def get_data(
|
|
248
256
|
if not select_columns:
|
249
257
|
select_columns = [col for col in df.columns]
|
250
258
|
|
259
|
+
pipe_dtypes = self.get_dtypes(refresh=False, debug=debug)
|
251
260
|
cols_to_omit = [
|
252
261
|
col
|
253
262
|
for col in df.columns
|
@@ -261,7 +270,11 @@ def get_data(
|
|
261
270
|
col
|
262
271
|
for col in select_columns
|
263
272
|
if col not in df.columns
|
264
|
-
]
|
273
|
+
] + ([
|
274
|
+
col
|
275
|
+
for col in pipe_dtypes
|
276
|
+
if col not in df.columns
|
277
|
+
] if add_missing_columns else [])
|
265
278
|
if cols_to_omit:
|
266
279
|
warn(
|
267
280
|
(
|
@@ -277,16 +290,26 @@ def get_data(
|
|
277
290
|
df = df[_cols_to_select]
|
278
291
|
|
279
292
|
if cols_to_add:
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
293
|
+
if not add_missing_columns:
|
294
|
+
from meerschaum.utils.misc import items_str
|
295
|
+
warn(
|
296
|
+
f"Will add columns {items_str(cols_to_add)} as nulls to dataframe.",
|
297
|
+
stack=False,
|
298
|
+
)
|
299
|
+
|
300
|
+
df = add_missing_cols_to_df(
|
301
|
+
df,
|
302
|
+
{
|
303
|
+
col: pipe_dtypes.get(col, 'string')
|
304
|
+
for col in cols_to_add
|
305
|
+
},
|
286
306
|
)
|
287
|
-
df = add_missing_cols_to_df(df, {col: 'string' for col in cols_to_add})
|
288
307
|
|
289
|
-
enforced_df = self.enforce_dtypes(
|
308
|
+
enforced_df = self.enforce_dtypes(
|
309
|
+
df,
|
310
|
+
dtypes=pipe_dtypes,
|
311
|
+
debug=debug,
|
312
|
+
)
|
290
313
|
|
291
314
|
if order:
|
292
315
|
return _sort_df(enforced_df)
|
@@ -310,7 +333,7 @@ def _get_data_as_iterator(
|
|
310
333
|
"""
|
311
334
|
Return a pipe's data as a generator.
|
312
335
|
"""
|
313
|
-
from meerschaum.utils.
|
336
|
+
from meerschaum.utils.dtypes import round_time
|
314
337
|
begin, end = self.parse_date_bounds(begin, end)
|
315
338
|
if not self.exists(debug=debug):
|
316
339
|
return
|
@@ -623,7 +646,7 @@ def get_chunk_interval(
|
|
623
646
|
if dt_col is None:
|
624
647
|
return timedelta(minutes=chunk_minutes)
|
625
648
|
|
626
|
-
dt_dtype = self.dtypes.get(dt_col, '
|
649
|
+
dt_dtype = self.dtypes.get(dt_col, 'datetime')
|
627
650
|
if 'int' in dt_dtype.lower():
|
628
651
|
return chunk_minutes
|
629
652
|
return timedelta(minutes=chunk_minutes)
|
@@ -687,11 +710,26 @@ def get_chunk_bounds(
|
|
687
710
|
elif are_dtypes_equal(str(type(end)), 'int'):
|
688
711
|
end += 1
|
689
712
|
consolidate_end_chunk = True
|
713
|
+
|
690
714
|
if begin is None and end is None:
|
691
715
|
return [(None, None)]
|
692
716
|
|
693
717
|
begin, end = self.parse_date_bounds(begin, end)
|
694
718
|
|
719
|
+
if begin and end:
|
720
|
+
if begin >= end:
|
721
|
+
return (
|
722
|
+
[(begin, begin)]
|
723
|
+
if bounded
|
724
|
+
else [(begin, None)]
|
725
|
+
)
|
726
|
+
if end <= begin:
|
727
|
+
return (
|
728
|
+
[(end, end)]
|
729
|
+
if bounded
|
730
|
+
else [(None, begin)]
|
731
|
+
)
|
732
|
+
|
695
733
|
### Set the chunk interval under `pipe.parameters['verify']['chunk_minutes']`.
|
696
734
|
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
697
735
|
|
@@ -798,7 +836,7 @@ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
|
|
798
836
|
Given a date bound (begin, end), coerce a timezone if necessary.
|
799
837
|
"""
|
800
838
|
from meerschaum.utils.misc import is_int
|
801
|
-
from meerschaum.utils.dtypes import coerce_timezone
|
839
|
+
from meerschaum.utils.dtypes import coerce_timezone, MRSM_PD_DTYPES
|
802
840
|
from meerschaum.utils.warnings import warn
|
803
841
|
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
804
842
|
|
@@ -823,9 +861,9 @@ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
|
|
823
861
|
return None
|
824
862
|
|
825
863
|
dt_col = self.columns.get('datetime', None)
|
826
|
-
dt_typ = str(self.dtypes.get(dt_col, '
|
864
|
+
dt_typ = str(self.dtypes.get(dt_col, 'datetime'))
|
827
865
|
if dt_typ == 'datetime':
|
828
|
-
dt_typ = '
|
866
|
+
dt_typ = MRSM_PD_DTYPES['datetime']
|
829
867
|
return coerce_timezone(dt_val, strip_utc=('utc' not in dt_typ.lower()))
|
830
868
|
|
831
869
|
bounds = tuple(_parse_date_bound(dt_val) for dt_val in dt_vals)
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -22,6 +22,7 @@ def enforce_dtypes(
|
|
22
22
|
chunksize: Optional[int] = -1,
|
23
23
|
enforce: bool = True,
|
24
24
|
safe_copy: bool = True,
|
25
|
+
dtypes: Optional[Dict[str, str]] = None,
|
25
26
|
debug: bool = False,
|
26
27
|
) -> 'pd.DataFrame':
|
27
28
|
"""
|
@@ -50,7 +51,8 @@ def enforce_dtypes(
|
|
50
51
|
if not self.enforce:
|
51
52
|
enforce = False
|
52
53
|
|
53
|
-
|
54
|
+
explicit_dtypes = self.get_dtypes(infer=False, debug=debug) if enforce else {}
|
55
|
+
pipe_dtypes = self.get_dtypes(infer=True, debug=debug) if not dtypes else dtypes
|
54
56
|
|
55
57
|
try:
|
56
58
|
if isinstance(df, str):
|
@@ -103,14 +105,21 @@ def enforce_dtypes(
|
|
103
105
|
return _enforce_dtypes(
|
104
106
|
df,
|
105
107
|
pipe_dtypes,
|
108
|
+
explicit_dtypes=explicit_dtypes,
|
106
109
|
safe_copy=safe_copy,
|
107
110
|
strip_timezone=(self.tzinfo is None),
|
111
|
+
coerce_numeric=self.mixed_numerics,
|
108
112
|
coerce_timezone=enforce,
|
109
113
|
debug=debug,
|
110
114
|
)
|
111
115
|
|
112
116
|
|
113
|
-
def infer_dtypes(
|
117
|
+
def infer_dtypes(
|
118
|
+
self,
|
119
|
+
persist: bool = False,
|
120
|
+
refresh: bool = False,
|
121
|
+
debug: bool = False,
|
122
|
+
) -> Dict[str, Any]:
|
114
123
|
"""
|
115
124
|
If `dtypes` is not set in `meerschaum.Pipe.parameters`,
|
116
125
|
infer the data types from the underlying table if it exists.
|
@@ -119,6 +128,11 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
119
128
|
----------
|
120
129
|
persist: bool, default False
|
121
130
|
If `True`, persist the inferred data types to `meerschaum.Pipe.parameters`.
|
131
|
+
NOTE: Use with caution! Generally `dtypes` is meant to be user-configurable only.
|
132
|
+
|
133
|
+
refresh: bool, default False
|
134
|
+
If `True`, retrieve the latest columns-types for the pipe.
|
135
|
+
See `Pipe.get_columns.types()`.
|
122
136
|
|
123
137
|
Returns
|
124
138
|
-------
|
@@ -132,7 +146,7 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
132
146
|
|
133
147
|
### NOTE: get_columns_types() may return either the types as
|
134
148
|
### PostgreSQL- or Pandas-style.
|
135
|
-
columns_types = self.get_columns_types(debug=debug)
|
149
|
+
columns_types = self.get_columns_types(refresh=refresh, debug=debug)
|
136
150
|
|
137
151
|
remote_pd_dtypes = {
|
138
152
|
c: (
|
@@ -145,7 +159,8 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
145
159
|
if not persist:
|
146
160
|
return remote_pd_dtypes
|
147
161
|
|
148
|
-
|
162
|
+
parameters = self.get_parameters(refresh=refresh, debug=debug)
|
163
|
+
dtypes = parameters.get('dtypes', {})
|
149
164
|
dtypes.update({
|
150
165
|
col: typ
|
151
166
|
for col, typ in remote_pd_dtypes.items()
|
meerschaum/core/Pipe/_edit.py
CHANGED
@@ -47,6 +47,8 @@ def edit(
|
|
47
47
|
if self.temporary:
|
48
48
|
return False, "Cannot edit pipes created with `temporary=True` (read-only)."
|
49
49
|
|
50
|
+
self._invalidate_cache(hard=True, debug=debug)
|
51
|
+
|
50
52
|
if hasattr(self, '_symlinks'):
|
51
53
|
from meerschaum.utils.misc import get_val_from_dict_path, set_val_in_dict_path
|
52
54
|
for path, vals in self._symlinks.items():
|
meerschaum/core/Pipe/_fetch.py
CHANGED
@@ -127,7 +127,7 @@ def get_backtrack_interval(
|
|
127
127
|
if dt_col is None:
|
128
128
|
return backtrack_interval
|
129
129
|
|
130
|
-
dt_dtype = self.dtypes.get(dt_col, '
|
130
|
+
dt_dtype = self.dtypes.get(dt_col, 'datetime')
|
131
131
|
if 'int' in dt_dtype.lower():
|
132
132
|
return backtrack_minutes
|
133
133
|
|