meerschaum 2.9.5__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/__init__.py +5 -2
- meerschaum/_internal/__init__.py +1 -0
- meerschaum/_internal/arguments/_parse_arguments.py +4 -4
- meerschaum/_internal/arguments/_parser.py +19 -2
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/entry.py +6 -6
- meerschaum/_internal/shell/Shell.py +1 -1
- meerschaum/_internal/static.py +356 -0
- meerschaum/actions/api.py +12 -2
- meerschaum/actions/bootstrap.py +7 -7
- meerschaum/actions/edit.py +142 -18
- meerschaum/actions/register.py +137 -6
- meerschaum/actions/show.py +117 -29
- meerschaum/actions/stop.py +4 -1
- meerschaum/actions/sync.py +1 -1
- meerschaum/actions/tag.py +9 -8
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +11 -3
- meerschaum/api/_events.py +39 -2
- meerschaum/api/_oauth2.py +118 -8
- meerschaum/api/_tokens.py +102 -0
- meerschaum/api/dash/__init__.py +0 -3
- meerschaum/api/dash/callbacks/custom.py +2 -2
- meerschaum/api/dash/callbacks/dashboard.py +103 -19
- meerschaum/api/dash/callbacks/plugins.py +0 -1
- meerschaum/api/dash/callbacks/register.py +1 -1
- meerschaum/api/dash/callbacks/settings/__init__.py +1 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
- meerschaum/api/dash/callbacks/settings/tokens.py +388 -0
- meerschaum/api/dash/components.py +30 -8
- meerschaum/api/dash/keys.py +19 -93
- meerschaum/api/dash/pages/dashboard.py +1 -20
- meerschaum/api/dash/pages/settings/__init__.py +1 -0
- meerschaum/api/dash/pages/settings/password_reset.py +1 -1
- meerschaum/api/dash/pages/settings/tokens.py +55 -0
- meerschaum/api/dash/pipes.py +94 -59
- meerschaum/api/dash/sessions.py +12 -0
- meerschaum/api/dash/tokens.py +606 -0
- meerschaum/api/dash/websockets.py +1 -1
- meerschaum/api/dash/webterm.py +4 -0
- meerschaum/api/models/__init__.py +23 -3
- meerschaum/api/models/_actions.py +22 -0
- meerschaum/api/models/_pipes.py +85 -7
- meerschaum/api/models/_tokens.py +81 -0
- meerschaum/api/resources/templates/termpage.html +12 -0
- meerschaum/api/routes/__init__.py +1 -0
- meerschaum/api/routes/_actions.py +3 -4
- meerschaum/api/routes/_connectors.py +3 -7
- meerschaum/api/routes/_jobs.py +14 -35
- meerschaum/api/routes/_login.py +49 -12
- meerschaum/api/routes/_misc.py +5 -10
- meerschaum/api/routes/_pipes.py +173 -140
- meerschaum/api/routes/_plugins.py +38 -28
- meerschaum/api/routes/_tokens.py +236 -0
- meerschaum/api/routes/_users.py +47 -35
- meerschaum/api/routes/_version.py +3 -3
- meerschaum/config/__init__.py +43 -20
- meerschaum/config/_default.py +43 -6
- meerschaum/config/_edit.py +28 -24
- meerschaum/config/_environment.py +1 -1
- meerschaum/config/_patch.py +6 -6
- meerschaum/config/_paths.py +5 -1
- meerschaum/config/_read_config.py +65 -34
- meerschaum/config/_sync.py +6 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +31 -11
- meerschaum/config/static.py +18 -0
- meerschaum/connectors/_Connector.py +10 -4
- meerschaum/connectors/__init__.py +4 -20
- meerschaum/connectors/api/_APIConnector.py +34 -6
- meerschaum/connectors/api/_actions.py +2 -2
- meerschaum/connectors/api/_jobs.py +1 -1
- meerschaum/connectors/api/_login.py +33 -7
- meerschaum/connectors/api/_misc.py +2 -2
- meerschaum/connectors/api/_pipes.py +16 -31
- meerschaum/connectors/api/_plugins.py +2 -2
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/api/_tokens.py +146 -0
- meerschaum/connectors/api/_users.py +70 -58
- meerschaum/connectors/instance/_InstanceConnector.py +83 -0
- meerschaum/connectors/instance/__init__.py +10 -0
- meerschaum/connectors/instance/_pipes.py +442 -0
- meerschaum/connectors/instance/_plugins.py +151 -0
- meerschaum/connectors/instance/_tokens.py +296 -0
- meerschaum/connectors/instance/_users.py +181 -0
- meerschaum/connectors/parse.py +4 -1
- meerschaum/connectors/sql/_SQLConnector.py +8 -5
- meerschaum/connectors/sql/_cli.py +12 -11
- meerschaum/connectors/sql/_create_engine.py +9 -168
- meerschaum/connectors/sql/_fetch.py +2 -18
- meerschaum/connectors/sql/_pipes.py +156 -190
- meerschaum/connectors/sql/_plugins.py +29 -0
- meerschaum/connectors/sql/_sql.py +46 -21
- meerschaum/connectors/sql/_users.py +29 -2
- meerschaum/connectors/sql/tables/__init__.py +1 -1
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -4
- meerschaum/connectors/valkey/_pipes.py +53 -26
- meerschaum/connectors/valkey/_plugins.py +2 -26
- meerschaum/core/Pipe/__init__.py +59 -19
- meerschaum/core/Pipe/_attributes.py +412 -90
- meerschaum/core/Pipe/_bootstrap.py +54 -24
- meerschaum/core/Pipe/_data.py +96 -18
- meerschaum/core/Pipe/_dtypes.py +48 -18
- meerschaum/core/Pipe/_edit.py +14 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_show.py +5 -5
- meerschaum/core/Pipe/_sync.py +118 -193
- meerschaum/core/Pipe/_verify.py +4 -4
- meerschaum/{plugins → core/Plugin}/_Plugin.py +9 -11
- meerschaum/core/Plugin/__init__.py +1 -1
- meerschaum/core/Token/_Token.py +220 -0
- meerschaum/core/Token/__init__.py +12 -0
- meerschaum/core/User/_User.py +34 -8
- meerschaum/core/User/__init__.py +9 -1
- meerschaum/core/__init__.py +1 -0
- meerschaum/jobs/_Job.py +3 -2
- meerschaum/jobs/__init__.py +3 -2
- meerschaum/jobs/systemd.py +1 -1
- meerschaum/models/__init__.py +35 -0
- meerschaum/models/pipes.py +247 -0
- meerschaum/models/tokens.py +38 -0
- meerschaum/models/users.py +26 -0
- meerschaum/plugins/__init__.py +22 -7
- meerschaum/plugins/bootstrap.py +2 -1
- meerschaum/utils/_get_pipes.py +68 -27
- meerschaum/utils/daemon/Daemon.py +2 -1
- meerschaum/utils/daemon/__init__.py +30 -2
- meerschaum/utils/dataframe.py +473 -81
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +473 -34
- meerschaum/utils/dtypes/sql.py +368 -28
- meerschaum/utils/formatting/__init__.py +1 -1
- meerschaum/utils/formatting/_pipes.py +5 -4
- meerschaum/utils/formatting/_shell.py +11 -9
- meerschaum/utils/misc.py +246 -148
- meerschaum/utils/packages/__init__.py +10 -27
- meerschaum/utils/packages/_packages.py +41 -34
- meerschaum/utils/pipes.py +181 -0
- meerschaum/utils/process.py +1 -1
- meerschaum/utils/prompt.py +3 -1
- meerschaum/utils/schedule.py +2 -1
- meerschaum/utils/sql.py +121 -44
- meerschaum/utils/typing.py +1 -4
- meerschaum/utils/venv/_Venv.py +2 -2
- meerschaum/utils/venv/__init__.py +5 -7
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/METADATA +92 -96
- meerschaum-3.0.0rc2.dist-info/RECORD +283 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/WHEEL +1 -1
- meerschaum-3.0.0rc2.dist-info/licenses/NOTICE +2 -0
- meerschaum/api/models/_interfaces.py +0 -15
- meerschaum/api/models/_locations.py +0 -15
- meerschaum/api/models/_metrics.py +0 -15
- meerschaum/config/static/__init__.py +0 -186
- meerschaum-2.9.5.dist-info/RECORD +0 -263
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/zip-safe +0 -0
@@ -7,6 +7,8 @@ Attempt to create a pipe's requirements in one method.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
|
11
|
+
import meerschaum as mrsm
|
10
12
|
from meerschaum.utils.typing import SuccessTuple, Dict, Any
|
11
13
|
|
12
14
|
|
@@ -207,28 +209,56 @@ def _ask_for_columns(pipe, debug: bool=False) -> Dict[str, str]:
|
|
207
209
|
"""
|
208
210
|
Prompt the user for the column names.
|
209
211
|
"""
|
210
|
-
|
211
|
-
from meerschaum.utils.
|
212
|
+
import json
|
213
|
+
from meerschaum.utils.warnings import info
|
214
|
+
from meerschaum.utils.prompt import prompt, yes_no
|
215
|
+
from meerschaum.utils.formatting import get_console
|
216
|
+
from meerschaum.utils.formatting._shell import clear_screen
|
217
|
+
from meerschaum.utils.misc import to_snake_case
|
218
|
+
from meerschaum.config import get_config
|
219
|
+
rich_json = mrsm.attempt_import('rich.json')
|
212
220
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
'
|
233
|
-
|
234
|
-
|
221
|
+
do_clear = get_config('shell', 'clear_screen')
|
222
|
+
|
223
|
+
cols = {}
|
224
|
+
|
225
|
+
info(f"Please enter index columns for {pipe}:")
|
226
|
+
try:
|
227
|
+
datetime_name = prompt("Datetime column (empty to omit):", icon=False)
|
228
|
+
except KeyboardInterrupt:
|
229
|
+
datetime_name = None
|
230
|
+
|
231
|
+
if datetime_name:
|
232
|
+
cols['datetime'] = datetime_name
|
233
|
+
|
234
|
+
try:
|
235
|
+
id_name = prompt("ID column (empty to omit):", icon=False)
|
236
|
+
except KeyboardInterrupt:
|
237
|
+
id_name = None
|
238
|
+
|
239
|
+
if id_name:
|
240
|
+
cols['id'] = id_name
|
241
|
+
|
242
|
+
if yes_no("Add more columns?"):
|
243
|
+
while True:
|
244
|
+
if do_clear:
|
245
|
+
clear_screen(debug=debug)
|
246
|
+
|
247
|
+
cols_text = json.dumps(cols, indent=4)
|
248
|
+
info("Current index columns:")
|
249
|
+
get_console().print(rich_json.JSON(cols_text))
|
250
|
+
|
251
|
+
col_name = prompt("Enter index column (empty to stop):")
|
252
|
+
if not col_name:
|
253
|
+
break
|
254
|
+
|
255
|
+
if col_name in cols.values():
|
256
|
+
continue
|
257
|
+
|
258
|
+
col_ix = to_snake_case(col_name)
|
259
|
+
if col_ix in cols:
|
260
|
+
col_ix = col_ix + '_'
|
261
|
+
|
262
|
+
cols[col_ix] = col_name
|
263
|
+
|
264
|
+
return cols
|
meerschaum/core/Pipe/_data.py
CHANGED
@@ -29,6 +29,7 @@ def get_data(
|
|
29
29
|
as_iterator: bool = False,
|
30
30
|
as_chunks: bool = False,
|
31
31
|
as_dask: bool = False,
|
32
|
+
add_missing_columns: bool = False,
|
32
33
|
chunk_interval: Union[timedelta, int, None] = None,
|
33
34
|
order: Optional[str] = 'asc',
|
34
35
|
limit: Optional[int] = None,
|
@@ -72,6 +73,9 @@ def get_data(
|
|
72
73
|
If `True`, return a `dask.DataFrame`
|
73
74
|
(which may be loaded into a Pandas DataFrame with `df.compute()`).
|
74
75
|
|
76
|
+
add_missing_columns: bool, default False
|
77
|
+
If `True`, add any missing columns from `Pipe.dtypes` to the dataframe.
|
78
|
+
|
75
79
|
chunk_interval: Union[timedelta, int, None], default None
|
76
80
|
If `as_iterator`, then return chunks with `begin` and `end` separated by this interval.
|
77
81
|
This may be set under `pipe.parameters['chunk_minutes']`.
|
@@ -103,13 +107,13 @@ def get_data(
|
|
103
107
|
from meerschaum.utils.warnings import warn
|
104
108
|
from meerschaum.utils.venv import Venv
|
105
109
|
from meerschaum.connectors import get_connector_plugin
|
106
|
-
from meerschaum.utils.
|
107
|
-
from meerschaum.utils.dtypes import to_pandas_dtype, coerce_timezone
|
110
|
+
from meerschaum.utils.dtypes import to_pandas_dtype
|
108
111
|
from meerschaum.utils.dataframe import add_missing_cols_to_df, df_is_chunk_generator
|
109
112
|
from meerschaum.utils.packages import attempt_import
|
113
|
+
from meerschaum.utils.warnings import dprint
|
110
114
|
dd = attempt_import('dask.dataframe') if as_dask else None
|
111
115
|
dask = attempt_import('dask') if as_dask else None
|
112
|
-
|
116
|
+
_ = attempt_import('partd', lazy=False) if as_dask else None
|
113
117
|
|
114
118
|
if select_columns == '*':
|
115
119
|
select_columns = None
|
@@ -188,14 +192,17 @@ def get_data(
|
|
188
192
|
order=order,
|
189
193
|
limit=limit,
|
190
194
|
fresh=fresh,
|
195
|
+
add_missing_columns=True,
|
191
196
|
debug=debug,
|
192
197
|
)
|
193
198
|
for (chunk_begin, chunk_end) in bounds
|
194
199
|
]
|
195
200
|
dask_meta = {
|
196
201
|
col: to_pandas_dtype(typ)
|
197
|
-
for col, typ in self.
|
202
|
+
for col, typ in self.get_dtypes(refresh=True, infer=True, debug=debug).items()
|
198
203
|
}
|
204
|
+
if debug:
|
205
|
+
dprint(f"Dask meta:\n{dask_meta}")
|
199
206
|
return _sort_df(dd.from_delayed(dask_chunks, meta=dask_meta))
|
200
207
|
|
201
208
|
if not self.exists(debug=debug):
|
@@ -249,6 +256,7 @@ def get_data(
|
|
249
256
|
if not select_columns:
|
250
257
|
select_columns = [col for col in df.columns]
|
251
258
|
|
259
|
+
pipe_dtypes = self.get_dtypes(refresh=False, debug=debug)
|
252
260
|
cols_to_omit = [
|
253
261
|
col
|
254
262
|
for col in df.columns
|
@@ -262,7 +270,11 @@ def get_data(
|
|
262
270
|
col
|
263
271
|
for col in select_columns
|
264
272
|
if col not in df.columns
|
265
|
-
]
|
273
|
+
] + ([
|
274
|
+
col
|
275
|
+
for col in pipe_dtypes
|
276
|
+
if col not in df.columns
|
277
|
+
] if add_missing_columns else [])
|
266
278
|
if cols_to_omit:
|
267
279
|
warn(
|
268
280
|
(
|
@@ -278,16 +290,26 @@ def get_data(
|
|
278
290
|
df = df[_cols_to_select]
|
279
291
|
|
280
292
|
if cols_to_add:
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
293
|
+
if not add_missing_columns:
|
294
|
+
from meerschaum.utils.misc import items_str
|
295
|
+
warn(
|
296
|
+
f"Will add columns {items_str(cols_to_add)} as nulls to dataframe.",
|
297
|
+
stack=False,
|
298
|
+
)
|
299
|
+
|
300
|
+
df = add_missing_cols_to_df(
|
301
|
+
df,
|
302
|
+
{
|
303
|
+
col: pipe_dtypes.get(col, 'string')
|
304
|
+
for col in cols_to_add
|
305
|
+
},
|
287
306
|
)
|
288
|
-
df = add_missing_cols_to_df(df, {col: 'string' for col in cols_to_add})
|
289
307
|
|
290
|
-
enforced_df = self.enforce_dtypes(
|
308
|
+
enforced_df = self.enforce_dtypes(
|
309
|
+
df,
|
310
|
+
dtypes=pipe_dtypes,
|
311
|
+
debug=debug,
|
312
|
+
)
|
291
313
|
|
292
314
|
if order:
|
293
315
|
return _sort_df(enforced_df)
|
@@ -311,7 +333,7 @@ def _get_data_as_iterator(
|
|
311
333
|
"""
|
312
334
|
Return a pipe's data as a generator.
|
313
335
|
"""
|
314
|
-
from meerschaum.utils.
|
336
|
+
from meerschaum.utils.dtypes import round_time
|
315
337
|
begin, end = self.parse_date_bounds(begin, end)
|
316
338
|
if not self.exists(debug=debug):
|
317
339
|
return
|
@@ -624,7 +646,7 @@ def get_chunk_interval(
|
|
624
646
|
if dt_col is None:
|
625
647
|
return timedelta(minutes=chunk_minutes)
|
626
648
|
|
627
|
-
dt_dtype = self.dtypes.get(dt_col, '
|
649
|
+
dt_dtype = self.dtypes.get(dt_col, 'datetime')
|
628
650
|
if 'int' in dt_dtype.lower():
|
629
651
|
return chunk_minutes
|
630
652
|
return timedelta(minutes=chunk_minutes)
|
@@ -688,11 +710,26 @@ def get_chunk_bounds(
|
|
688
710
|
elif are_dtypes_equal(str(type(end)), 'int'):
|
689
711
|
end += 1
|
690
712
|
consolidate_end_chunk = True
|
713
|
+
|
691
714
|
if begin is None and end is None:
|
692
715
|
return [(None, None)]
|
693
716
|
|
694
717
|
begin, end = self.parse_date_bounds(begin, end)
|
695
718
|
|
719
|
+
if begin and end:
|
720
|
+
if begin >= end:
|
721
|
+
return (
|
722
|
+
[(begin, begin)]
|
723
|
+
if bounded
|
724
|
+
else [(begin, None)]
|
725
|
+
)
|
726
|
+
if end <= begin:
|
727
|
+
return (
|
728
|
+
[(end, end)]
|
729
|
+
if bounded
|
730
|
+
else [(None, begin)]
|
731
|
+
)
|
732
|
+
|
696
733
|
### Set the chunk interval under `pipe.parameters['verify']['chunk_minutes']`.
|
697
734
|
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
698
735
|
|
@@ -799,7 +836,7 @@ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
|
|
799
836
|
Given a date bound (begin, end), coerce a timezone if necessary.
|
800
837
|
"""
|
801
838
|
from meerschaum.utils.misc import is_int
|
802
|
-
from meerschaum.utils.dtypes import coerce_timezone
|
839
|
+
from meerschaum.utils.dtypes import coerce_timezone, MRSM_PD_DTYPES
|
803
840
|
from meerschaum.utils.warnings import warn
|
804
841
|
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
805
842
|
|
@@ -824,12 +861,53 @@ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
|
|
824
861
|
return None
|
825
862
|
|
826
863
|
dt_col = self.columns.get('datetime', None)
|
827
|
-
dt_typ = str(self.dtypes.get(dt_col, '
|
864
|
+
dt_typ = str(self.dtypes.get(dt_col, 'datetime'))
|
828
865
|
if dt_typ == 'datetime':
|
829
|
-
dt_typ = '
|
866
|
+
dt_typ = MRSM_PD_DTYPES['datetime']
|
830
867
|
return coerce_timezone(dt_val, strip_utc=('utc' not in dt_typ.lower()))
|
831
868
|
|
832
869
|
bounds = tuple(_parse_date_bound(dt_val) for dt_val in dt_vals)
|
833
870
|
if len(bounds) == 1:
|
834
871
|
return bounds[0]
|
835
872
|
return bounds
|
873
|
+
|
874
|
+
|
875
|
+
def get_doc(self, **kwargs) -> Union[Dict[str, Any], None]:
|
876
|
+
"""
|
877
|
+
Convenience function to return a single row as a dictionary (or `None`) from `Pipe.get_data().
|
878
|
+
Keywords arguments are passed to `Pipe.get_data()`.
|
879
|
+
"""
|
880
|
+
from meerschaum.utils.warnings import warn
|
881
|
+
kwargs['limit'] = 1
|
882
|
+
try:
|
883
|
+
result_df = self.get_data(**kwargs)
|
884
|
+
if result_df is None or len(result_df) == 0:
|
885
|
+
return None
|
886
|
+
return result_df.reset_index(drop=True).iloc[0].to_dict()
|
887
|
+
except Exception as e:
|
888
|
+
warn(f"Failed to read value from {self}:\n{e}", stack=False)
|
889
|
+
return None
|
890
|
+
|
891
|
+
def get_value(
|
892
|
+
self,
|
893
|
+
column: str,
|
894
|
+
params: Optional[Dict[str, Any]] = None,
|
895
|
+
**kwargs: Any
|
896
|
+
) -> Any:
|
897
|
+
"""
|
898
|
+
Convenience function to return a single value (or `None`) from `Pipe.get_data()`.
|
899
|
+
Keywords arguments are passed to `Pipe.get_data()`.
|
900
|
+
"""
|
901
|
+
from meerschaum.utils.warnings import warn
|
902
|
+
kwargs['select_columns'] = [column]
|
903
|
+
kwargs['limit'] = 1
|
904
|
+
try:
|
905
|
+
result_df = self.get_data(params=params, **kwargs)
|
906
|
+
if result_df is None or len(result_df) == 0:
|
907
|
+
return None
|
908
|
+
if column not in result_df.columns:
|
909
|
+
raise ValueError(f"Column '{column}' was not included in the result set.")
|
910
|
+
return result_df[column][0]
|
911
|
+
except Exception as e:
|
912
|
+
warn(f"Failed to read value from {self}:\n{e}", stack=False)
|
913
|
+
return None
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -22,6 +22,7 @@ def enforce_dtypes(
|
|
22
22
|
chunksize: Optional[int] = -1,
|
23
23
|
enforce: bool = True,
|
24
24
|
safe_copy: bool = True,
|
25
|
+
dtypes: Optional[Dict[str, str]] = None,
|
25
26
|
debug: bool = False,
|
26
27
|
) -> 'pd.DataFrame':
|
27
28
|
"""
|
@@ -31,7 +32,11 @@ def enforce_dtypes(
|
|
31
32
|
import traceback
|
32
33
|
from meerschaum.utils.warnings import warn
|
33
34
|
from meerschaum.utils.debug import dprint
|
34
|
-
from meerschaum.utils.dataframe import
|
35
|
+
from meerschaum.utils.dataframe import (
|
36
|
+
parse_df_datetimes,
|
37
|
+
enforce_dtypes as _enforce_dtypes,
|
38
|
+
parse_simple_lines,
|
39
|
+
)
|
35
40
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
36
41
|
from meerschaum.utils.packages import import_pandas
|
37
42
|
pd = import_pandas(debug=debug)
|
@@ -45,23 +50,35 @@ def enforce_dtypes(
|
|
45
50
|
|
46
51
|
if not self.enforce:
|
47
52
|
enforce = False
|
48
|
-
|
53
|
+
|
54
|
+
explicit_dtypes = self.get_dtypes(infer=False, debug=debug) if enforce else {}
|
55
|
+
pipe_dtypes = self.get_dtypes(infer=True, debug=debug) if not dtypes else dtypes
|
49
56
|
|
50
57
|
try:
|
51
58
|
if isinstance(df, str):
|
52
|
-
df
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
59
|
+
if df.strip() and df.strip()[0] not in ('{', '['):
|
60
|
+
df = parse_df_datetimes(
|
61
|
+
parse_simple_lines(df),
|
62
|
+
ignore_cols=[
|
63
|
+
col
|
64
|
+
for col, dtype in pipe_dtypes.items()
|
65
|
+
if (not enforce or not are_dtypes_equal(dtype, 'datetime'))
|
66
|
+
],
|
67
|
+
)
|
68
|
+
else:
|
69
|
+
df = parse_df_datetimes(
|
70
|
+
pd.read_json(StringIO(df)),
|
71
|
+
ignore_cols=[
|
72
|
+
col
|
73
|
+
for col, dtype in pipe_dtypes.items()
|
74
|
+
if (not enforce or not are_dtypes_equal(dtype, 'datetime'))
|
75
|
+
],
|
76
|
+
ignore_all=(not enforce),
|
77
|
+
strip_timezone=(self.tzinfo is None),
|
78
|
+
chunksize=chunksize,
|
79
|
+
debug=debug,
|
80
|
+
)
|
81
|
+
elif isinstance(df, (dict, list, tuple)):
|
65
82
|
df = parse_df_datetimes(
|
66
83
|
df,
|
67
84
|
ignore_cols=[
|
@@ -88,14 +105,21 @@ def enforce_dtypes(
|
|
88
105
|
return _enforce_dtypes(
|
89
106
|
df,
|
90
107
|
pipe_dtypes,
|
108
|
+
explicit_dtypes=explicit_dtypes,
|
91
109
|
safe_copy=safe_copy,
|
92
110
|
strip_timezone=(self.tzinfo is None),
|
111
|
+
coerce_numeric=self.mixed_numerics,
|
93
112
|
coerce_timezone=enforce,
|
94
113
|
debug=debug,
|
95
114
|
)
|
96
115
|
|
97
116
|
|
98
|
-
def infer_dtypes(
|
117
|
+
def infer_dtypes(
|
118
|
+
self,
|
119
|
+
persist: bool = False,
|
120
|
+
refresh: bool = False,
|
121
|
+
debug: bool = False,
|
122
|
+
) -> Dict[str, Any]:
|
99
123
|
"""
|
100
124
|
If `dtypes` is not set in `meerschaum.Pipe.parameters`,
|
101
125
|
infer the data types from the underlying table if it exists.
|
@@ -104,6 +128,11 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
104
128
|
----------
|
105
129
|
persist: bool, default False
|
106
130
|
If `True`, persist the inferred data types to `meerschaum.Pipe.parameters`.
|
131
|
+
NOTE: Use with caution! Generally `dtypes` is meant to be user-configurable only.
|
132
|
+
|
133
|
+
refresh: bool, default False
|
134
|
+
If `True`, retrieve the latest columns-types for the pipe.
|
135
|
+
See `Pipe.get_columns.types()`.
|
107
136
|
|
108
137
|
Returns
|
109
138
|
-------
|
@@ -117,7 +146,7 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
117
146
|
|
118
147
|
### NOTE: get_columns_types() may return either the types as
|
119
148
|
### PostgreSQL- or Pandas-style.
|
120
|
-
columns_types = self.get_columns_types(debug=debug)
|
149
|
+
columns_types = self.get_columns_types(refresh=refresh, debug=debug)
|
121
150
|
|
122
151
|
remote_pd_dtypes = {
|
123
152
|
c: (
|
@@ -130,7 +159,8 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
130
159
|
if not persist:
|
131
160
|
return remote_pd_dtypes
|
132
161
|
|
133
|
-
|
162
|
+
parameters = self.get_parameters(refresh=refresh, debug=debug)
|
163
|
+
dtypes = parameters.get('dtypes', {})
|
134
164
|
dtypes.update({
|
135
165
|
col: typ
|
136
166
|
for col, typ in remote_pd_dtypes.items()
|
meerschaum/core/Pipe/_edit.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# vim:fenc=utf-8
|
4
4
|
|
5
5
|
"""
|
6
|
-
Edit a Pipe's parameters
|
6
|
+
Edit a Pipe's parameters.
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
@@ -47,6 +47,15 @@ def edit(
|
|
47
47
|
if self.temporary:
|
48
48
|
return False, "Cannot edit pipes created with `temporary=True` (read-only)."
|
49
49
|
|
50
|
+
self._invalidate_cache(hard=True, debug=debug)
|
51
|
+
|
52
|
+
if hasattr(self, '_symlinks'):
|
53
|
+
from meerschaum.utils.misc import get_val_from_dict_path, set_val_in_dict_path
|
54
|
+
for path, vals in self._symlinks.items():
|
55
|
+
current_val = get_val_from_dict_path(self.parameters, path)
|
56
|
+
if current_val == vals['substituted']:
|
57
|
+
set_val_in_dict_path(self.parameters, path, vals['original'])
|
58
|
+
|
50
59
|
if not interactive:
|
51
60
|
with Venv(get_connector_plugin(self.instance_connector)):
|
52
61
|
return self.instance_connector.edit_pipe(self, patch=patch, debug=debug, **kw)
|
@@ -65,7 +74,8 @@ def edit(
|
|
65
74
|
from meerschaum.config import get_config
|
66
75
|
parameters = dict(get_config('pipes', 'parameters', patch=True))
|
67
76
|
from meerschaum.config._patch import apply_patch_to_config
|
68
|
-
|
77
|
+
raw_parameters = self.attributes.get('parameters', {})
|
78
|
+
parameters = apply_patch_to_config(parameters, raw_parameters)
|
69
79
|
|
70
80
|
### write parameters to yaml file
|
71
81
|
with open(parameters_path, 'w+') as f:
|
@@ -194,7 +204,7 @@ def edit_definition(
|
|
194
204
|
return True, "Success"
|
195
205
|
|
196
206
|
def _edit_sql():
|
197
|
-
import
|
207
|
+
import textwrap
|
198
208
|
from meerschaum.config._paths import PIPES_CACHE_RESOURCES_PATH
|
199
209
|
from meerschaum.utils.misc import edit_file
|
200
210
|
definition_filename = str(self) + '.sql'
|
@@ -214,7 +224,7 @@ def edit_definition(
|
|
214
224
|
|
215
225
|
edit_file(definition_path)
|
216
226
|
try:
|
217
|
-
with open(definition_path, 'r') as f:
|
227
|
+
with open(definition_path, 'r', encoding='utf-8') as f:
|
218
228
|
file_definition = f.read()
|
219
229
|
except Exception as e:
|
220
230
|
return False, f"Failed reading file '{definition_path}':\n" + str(e)
|
meerschaum/core/Pipe/_fetch.py
CHANGED
@@ -127,7 +127,7 @@ def get_backtrack_interval(
|
|
127
127
|
if dt_col is None:
|
128
128
|
return backtrack_interval
|
129
129
|
|
130
|
-
dt_dtype = self.dtypes.get(dt_col, '
|
130
|
+
dt_dtype = self.dtypes.get(dt_col, 'datetime')
|
131
131
|
if 'int' in dt_dtype.lower():
|
132
132
|
return backtrack_minutes
|
133
133
|
|
meerschaum/core/Pipe/_show.py
CHANGED
@@ -9,11 +9,11 @@ Show information about a Pipe
|
|
9
9
|
from meerschaum.utils.typing import SuccessTuple
|
10
10
|
|
11
11
|
def show(
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
self,
|
13
|
+
nopretty: bool = False,
|
14
|
+
debug: bool = False,
|
15
|
+
**kw
|
16
|
+
) -> SuccessTuple:
|
17
17
|
"""
|
18
18
|
Show attributes of a Pipe.
|
19
19
|
|