meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +2 -1
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/shell/Shell.py +5 -4
- meerschaum/_internal/static.py +8 -24
- meerschaum/actions/bootstrap.py +1 -1
- meerschaum/actions/edit.py +6 -3
- meerschaum/actions/start.py +1 -1
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +2 -1
- meerschaum/api/dash/__init__.py +0 -2
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/dashboard.py +20 -19
- meerschaum/api/dash/callbacks/jobs.py +11 -5
- meerschaum/api/dash/callbacks/pipes.py +106 -5
- meerschaum/api/dash/callbacks/settings/__init__.py +0 -1
- meerschaum/api/dash/callbacks/{settings/tokens.py → tokens.py} +1 -1
- meerschaum/api/dash/jobs.py +1 -1
- meerschaum/api/dash/pages/__init__.py +2 -1
- meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
- meerschaum/api/dash/pages/pipes.py +4 -3
- meerschaum/api/dash/pages/settings/__init__.py +0 -1
- meerschaum/api/dash/pages/{settings/tokens.py → tokens.py} +6 -8
- meerschaum/api/dash/pipes.py +131 -0
- meerschaum/api/dash/tokens.py +28 -31
- meerschaum/api/routes/_pipes.py +47 -37
- meerschaum/config/_default.py +13 -2
- meerschaum/config/_paths.py +1 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +9 -8
- meerschaum/connectors/api/_pipes.py +2 -18
- meerschaum/connectors/api/_tokens.py +2 -2
- meerschaum/connectors/instance/_tokens.py +10 -6
- meerschaum/connectors/sql/_SQLConnector.py +14 -0
- meerschaum/connectors/sql/_create_engine.py +3 -14
- meerschaum/connectors/sql/_pipes.py +175 -185
- meerschaum/connectors/sql/_sql.py +38 -20
- meerschaum/connectors/sql/tables/__init__.py +237 -122
- meerschaum/connectors/valkey/_pipes.py +44 -16
- meerschaum/core/Pipe/__init__.py +28 -5
- meerschaum/core/Pipe/_attributes.py +273 -46
- meerschaum/core/Pipe/_data.py +55 -17
- meerschaum/core/Pipe/_dtypes.py +19 -4
- meerschaum/core/Pipe/_edit.py +2 -0
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_sync.py +90 -160
- meerschaum/core/Pipe/_verify.py +3 -3
- meerschaum/core/Token/_Token.py +4 -5
- meerschaum/plugins/bootstrap.py +508 -3
- meerschaum/utils/_get_pipes.py +1 -1
- meerschaum/utils/dataframe.py +385 -68
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +387 -22
- meerschaum/utils/dtypes/sql.py +327 -31
- meerschaum/utils/misc.py +9 -68
- meerschaum/utils/packages/__init__.py +7 -21
- meerschaum/utils/packages/_packages.py +7 -2
- meerschaum/utils/schedule.py +1 -1
- meerschaum/utils/sql.py +8 -8
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/METADATA +5 -17
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/RECORD +66 -65
- meerschaum-3.0.0rc3.dist-info/licenses/NOTICE +2 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/WHEEL +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/entry_points.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/top_level.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/zip-safe +0 -0
@@ -321,14 +321,40 @@ def drop_pipe(
|
|
321
321
|
-------
|
322
322
|
A `SuccessTuple` indicating success.
|
323
323
|
"""
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
324
|
+
if not pipe.exists(debug=debug):
|
325
|
+
return True, f"{pipe} does not exist, so it was not dropped."
|
326
|
+
|
327
|
+
table_name = self.quote_table(pipe.target)
|
328
|
+
dt_col = pipe.columns.get('datetime', None)
|
329
|
+
|
330
|
+
try:
|
331
|
+
members = (
|
332
|
+
self.client.zrange(table_name, 0, -1)
|
333
|
+
if dt_col
|
334
|
+
else self.client.smembers(table_name)
|
329
335
|
)
|
330
|
-
|
331
|
-
|
336
|
+
|
337
|
+
keys_to_delete = []
|
338
|
+
for member_bytes in members:
|
339
|
+
member_str = member_bytes.decode('utf-8')
|
340
|
+
member_doc = json.loads(member_str)
|
341
|
+
ix_str = member_doc.get('ix')
|
342
|
+
if not ix_str:
|
343
|
+
continue
|
344
|
+
|
345
|
+
ix_doc = string_to_dict(ix_str.replace(COLON, ':'))
|
346
|
+
doc_key = self.get_document_key(ix_doc, list(ix_doc.keys()), table_name)
|
347
|
+
keys_to_delete.append(doc_key)
|
348
|
+
|
349
|
+
if keys_to_delete:
|
350
|
+
batch_size = 1000
|
351
|
+
for i in range(0, len(keys_to_delete), batch_size):
|
352
|
+
batch = keys_to_delete[i:i+batch_size]
|
353
|
+
self.client.delete(*batch)
|
354
|
+
|
355
|
+
except Exception as e:
|
356
|
+
return False, f"Failed to delete documents for {pipe}:\n{e}"
|
357
|
+
|
332
358
|
try:
|
333
359
|
self.drop_table(pipe.target, debug=debug)
|
334
360
|
except Exception as e:
|
@@ -558,11 +584,7 @@ def sync_pipe(
|
|
558
584
|
|
559
585
|
valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
|
560
586
|
new_dtypes = {
|
561
|
-
str(key): (
|
562
|
-
str(val)
|
563
|
-
if not are_dtypes_equal(str(val), 'datetime')
|
564
|
-
else 'datetime64[ns, UTC]'
|
565
|
-
)
|
587
|
+
str(key): str(val)
|
566
588
|
for key, val in df.dtypes.items()
|
567
589
|
if str(key) not in valkey_dtypes
|
568
590
|
}
|
@@ -571,6 +593,8 @@ def sync_pipe(
|
|
571
593
|
try:
|
572
594
|
df[col] = df[col].astype(typ)
|
573
595
|
except Exception:
|
596
|
+
import traceback
|
597
|
+
traceback.print_exc()
|
574
598
|
valkey_dtypes[col] = 'string'
|
575
599
|
new_dtypes[col] = 'string'
|
576
600
|
df[col] = df[col].astype('string')
|
@@ -780,7 +804,7 @@ def get_sync_time(
|
|
780
804
|
"""
|
781
805
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
782
806
|
dt_col = pipe.columns.get('datetime', None)
|
783
|
-
dt_typ = pipe.dtypes.get(dt_col, '
|
807
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime')
|
784
808
|
if not dt_col:
|
785
809
|
return None
|
786
810
|
|
@@ -788,14 +812,18 @@ def get_sync_time(
|
|
788
812
|
table_name = self.quote_table(pipe.target)
|
789
813
|
try:
|
790
814
|
vals = (
|
791
|
-
self.client.zrevrange(table_name, 0, 0)
|
815
|
+
self.client.zrevrange(table_name, 0, 0, withscores=True)
|
792
816
|
if newest
|
793
|
-
else self.client.zrange(table_name, 0, 0)
|
817
|
+
else self.client.zrange(table_name, 0, 0, withscores=True)
|
794
818
|
)
|
795
819
|
if not vals:
|
796
820
|
return None
|
797
|
-
val = vals[0]
|
821
|
+
val = vals[0][0]
|
822
|
+
if isinstance(val, bytes):
|
823
|
+
val = val.decode('utf-8')
|
798
824
|
except Exception:
|
825
|
+
import traceback
|
826
|
+
traceback.print_exc()
|
799
827
|
return None
|
800
828
|
|
801
829
|
doc = json.loads(val)
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -117,11 +117,13 @@ class Pipe:
|
|
117
117
|
tzinfo,
|
118
118
|
enforce,
|
119
119
|
null_indices,
|
120
|
+
mixed_numerics,
|
120
121
|
get_columns,
|
121
122
|
get_columns_types,
|
122
123
|
get_columns_indices,
|
123
124
|
get_indices,
|
124
125
|
get_parameters,
|
126
|
+
get_dtypes,
|
125
127
|
update_parameters,
|
126
128
|
tags,
|
127
129
|
get_id,
|
@@ -133,6 +135,9 @@ class Pipe:
|
|
133
135
|
target,
|
134
136
|
_target_legacy,
|
135
137
|
guess_datetime,
|
138
|
+
precision,
|
139
|
+
get_precision,
|
140
|
+
_invalidate_cache,
|
136
141
|
)
|
137
142
|
from ._show import show
|
138
143
|
from ._edit import edit, edit_definition, update
|
@@ -143,11 +148,7 @@ class Pipe:
|
|
143
148
|
filter_existing,
|
144
149
|
_get_chunk_label,
|
145
150
|
get_num_workers,
|
146
|
-
|
147
|
-
_persist_new_numeric_columns,
|
148
|
-
_persist_new_uuid_columns,
|
149
|
-
_persist_new_bytes_columns,
|
150
|
-
_persist_new_geometry_columns,
|
151
|
+
_persist_new_special_columns,
|
151
152
|
)
|
152
153
|
from ._verify import (
|
153
154
|
verify,
|
@@ -179,9 +180,11 @@ class Pipe:
|
|
179
180
|
upsert: Optional[bool] = None,
|
180
181
|
autoincrement: Optional[bool] = None,
|
181
182
|
autotime: Optional[bool] = None,
|
183
|
+
precision: Union[str, Dict[str, Union[str, int]], None] = None,
|
182
184
|
static: Optional[bool] = None,
|
183
185
|
enforce: Optional[bool] = None,
|
184
186
|
null_indices: Optional[bool] = None,
|
187
|
+
mixed_numerics: Optional[bool] = None,
|
185
188
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
186
189
|
cache: bool = False,
|
187
190
|
debug: bool = False,
|
@@ -240,6 +243,13 @@ class Pipe:
|
|
240
243
|
autotime: Optional[bool], default None
|
241
244
|
If `True`, set `autotime` in the parameters.
|
242
245
|
|
246
|
+
precision: Union[str, Dict[str, Union[str, int]], None], default None
|
247
|
+
If provided, set `precision` in the parameters.
|
248
|
+
This may be either a string (the precision unit) or a dictionary of in the form
|
249
|
+
`{'unit': <unit>, 'interval': <interval>}`.
|
250
|
+
Default is determined by the `datetime` column dtype
|
251
|
+
(e.g. `datetime64[us]` is `microsecond` precision).
|
252
|
+
|
243
253
|
static: Optional[bool], default None
|
244
254
|
If `True`, set `static` in the parameters.
|
245
255
|
|
@@ -251,6 +261,11 @@ class Pipe:
|
|
251
261
|
Set to `False` if there will be no null values in the index columns.
|
252
262
|
Defaults to `True`.
|
253
263
|
|
264
|
+
mixed_numerics: bool, default None
|
265
|
+
If `True`, integer columns will be converted to `numeric` when floats are synced.
|
266
|
+
Set to `False` to disable this behavior.
|
267
|
+
Defaults to `True`.
|
268
|
+
|
254
269
|
temporary: bool, default False
|
255
270
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
256
271
|
|
@@ -353,6 +368,11 @@ class Pipe:
|
|
353
368
|
if isinstance(autotime, bool):
|
354
369
|
self._attributes['parameters']['autotime'] = autotime
|
355
370
|
|
371
|
+
if isinstance(precision, dict):
|
372
|
+
self._attributes['parameters']['precision'] = precision
|
373
|
+
elif isinstance(precision, str):
|
374
|
+
self._attributes['parameters']['precision'] = {'unit': precision}
|
375
|
+
|
356
376
|
if isinstance(static, bool):
|
357
377
|
self._attributes['parameters']['static'] = static
|
358
378
|
|
@@ -362,6 +382,9 @@ class Pipe:
|
|
362
382
|
if isinstance(null_indices, bool):
|
363
383
|
self._attributes['parameters']['null_indices'] = null_indices
|
364
384
|
|
385
|
+
if isinstance(mixed_numerics, bool):
|
386
|
+
self._attributes['parameters']['mixed_numerics'] = mixed_numerics
|
387
|
+
|
365
388
|
### NOTE: The parameters dictionary is {} by default.
|
366
389
|
### A Pipe may be registered without parameters, then edited,
|
367
390
|
### or a Pipe may be registered with parameters set in-memory first.
|
@@ -13,7 +13,7 @@ from datetime import timezone
|
|
13
13
|
|
14
14
|
import meerschaum as mrsm
|
15
15
|
from meerschaum.utils.typing import Tuple, Dict, Any, Union, Optional, List
|
16
|
-
from meerschaum.utils.warnings import warn
|
16
|
+
from meerschaum.utils.warnings import warn, dprint
|
17
17
|
|
18
18
|
|
19
19
|
@property
|
@@ -52,18 +52,34 @@ def attributes(self) -> Dict[str, Any]:
|
|
52
52
|
def get_parameters(
|
53
53
|
self,
|
54
54
|
apply_symlinks: bool = True,
|
55
|
+
refresh: bool = False,
|
56
|
+
debug: bool = False,
|
55
57
|
_visited: 'Optional[set[mrsm.Pipe]]' = None,
|
56
58
|
) -> Dict[str, Any]:
|
57
59
|
"""
|
58
60
|
Return the `parameters` dictionary of the pipe.
|
61
|
+
|
62
|
+
Parameters
|
63
|
+
----------
|
64
|
+
apply_symlinks: bool, default True
|
65
|
+
If `True`, resolve references to parameters from other pipes.
|
66
|
+
|
67
|
+
refresh: bool, default False
|
68
|
+
If `True`, pull the latest attributes for the pipe.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
The pipe's parameters dictionary.
|
59
73
|
"""
|
60
74
|
from meerschaum.config._patch import apply_patch_to_config
|
61
|
-
from meerschaum.utils.warnings import warn
|
62
75
|
from meerschaum.config._read_config import search_and_substitute_config
|
63
76
|
|
64
77
|
if _visited is None:
|
65
78
|
_visited = {self}
|
66
79
|
|
80
|
+
if refresh:
|
81
|
+
self._invalidate_cache(hard=True)
|
82
|
+
|
67
83
|
raw_parameters = self.attributes.get('parameters', {})
|
68
84
|
ref_keys = raw_parameters.get('reference')
|
69
85
|
if not apply_symlinks:
|
@@ -71,13 +87,15 @@ def get_parameters(
|
|
71
87
|
|
72
88
|
if ref_keys:
|
73
89
|
try:
|
90
|
+
if debug:
|
91
|
+
dprint(f"Building reference pipe from keys: {ref_keys}")
|
74
92
|
ref_pipe = mrsm.Pipe(**ref_keys)
|
75
93
|
if ref_pipe in _visited:
|
76
94
|
warn(f"Circular reference detected in {self}: chain involves {ref_pipe}.")
|
77
95
|
return search_and_substitute_config(raw_parameters)
|
78
96
|
|
79
97
|
_visited.add(ref_pipe)
|
80
|
-
base_params = ref_pipe.get_parameters(_visited=_visited)
|
98
|
+
base_params = ref_pipe.get_parameters(_visited=_visited, debug=debug)
|
81
99
|
except Exception as e:
|
82
100
|
warn(f"Failed to resolve reference pipe for {self}: {e}")
|
83
101
|
base_params = {}
|
@@ -113,10 +131,7 @@ def parameters(self) -> Optional[Dict[str, Any]]:
|
|
113
131
|
"""
|
114
132
|
Return the parameters dictionary of the pipe.
|
115
133
|
"""
|
116
|
-
|
117
|
-
return _parameters
|
118
|
-
self._parameters = self.get_parameters()
|
119
|
-
return self._parameters
|
134
|
+
return self.get_parameters()
|
120
135
|
|
121
136
|
|
122
137
|
@parameters.setter
|
@@ -165,14 +180,12 @@ def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]:
|
|
165
180
|
if 'indexes' in self.parameters
|
166
181
|
else 'indices'
|
167
182
|
)
|
168
|
-
|
169
|
-
|
170
|
-
_indices = self.parameters[indices_key]
|
183
|
+
|
184
|
+
_indices = self.parameters.get(indices_key, {})
|
171
185
|
_columns = self.columns
|
172
186
|
dt_col = _columns.get('datetime', None)
|
173
187
|
if not isinstance(_indices, dict):
|
174
188
|
_indices = {}
|
175
|
-
self.parameters[indices_key] = _indices
|
176
189
|
unique_cols = list(set((
|
177
190
|
[dt_col]
|
178
191
|
if dt_col
|
@@ -246,17 +259,65 @@ def tags(self, _tags: List[str]) -> None:
|
|
246
259
|
|
247
260
|
|
248
261
|
@property
|
249
|
-
def dtypes(self) ->
|
262
|
+
def dtypes(self) -> Dict[str, Any]:
|
250
263
|
"""
|
251
264
|
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
252
265
|
"""
|
266
|
+
return self.get_dtypes(refresh=False)
|
267
|
+
|
268
|
+
|
269
|
+
@dtypes.setter
|
270
|
+
def dtypes(self, _dtypes: Dict[str, Any]) -> None:
|
271
|
+
"""
|
272
|
+
Override the dtypes dictionary of the in-memory pipe.
|
273
|
+
Call `meerschaum.Pipe.edit()` to persist changes.
|
274
|
+
"""
|
275
|
+
self.update_parameters({'dtypes': _dtypes}, persist=False)
|
276
|
+
_ = self.__dict__.pop('_remote_dtypes', None)
|
277
|
+
_ = self.__dict__.pop('_remote_dtypes_timestamp', None)
|
278
|
+
|
279
|
+
|
280
|
+
def get_dtypes(
|
281
|
+
self,
|
282
|
+
infer: bool = True,
|
283
|
+
refresh: bool = False,
|
284
|
+
debug: bool = False,
|
285
|
+
) -> Dict[str, Any]:
|
286
|
+
"""
|
287
|
+
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
288
|
+
|
289
|
+
Parameters
|
290
|
+
----------
|
291
|
+
infer: bool, default True
|
292
|
+
If `True`, include the implicit existing dtypes.
|
293
|
+
Else only return the explicitly configured dtypes (e.g. `Pipe.parameters['dtypes']`).
|
294
|
+
|
295
|
+
refresh: bool, default False
|
296
|
+
If `True`, invalidate any cache and return the latest known dtypes.
|
297
|
+
|
298
|
+
Returns
|
299
|
+
-------
|
300
|
+
A dictionary mapping column names to dtypes.
|
301
|
+
"""
|
302
|
+
import time
|
253
303
|
from meerschaum.config._patch import apply_patch_to_config
|
254
304
|
from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
305
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
306
|
+
parameters = self.get_parameters(refresh=refresh, debug=debug)
|
307
|
+
configured_dtypes = parameters.get('dtypes', {})
|
308
|
+
if debug:
|
309
|
+
dprint(f"Configured dtypes for {self}:")
|
310
|
+
mrsm.pprint(configured_dtypes)
|
311
|
+
|
312
|
+
remote_dtypes = (
|
313
|
+
self.infer_dtypes(persist=False, refresh=refresh, debug=debug)
|
314
|
+
if infer
|
315
|
+
else {}
|
316
|
+
)
|
317
|
+
patched_dtypes = apply_patch_to_config((remote_dtypes or {}), (configured_dtypes or {}))
|
318
|
+
|
319
|
+
dt_col = parameters.get('columns', {}).get('datetime', None)
|
320
|
+
primary_col = parameters.get('columns', {}).get('primary', None)
|
260
321
|
_dtypes = {
|
261
322
|
col: MRSM_ALIAS_DTYPES.get(typ, typ)
|
262
323
|
for col, typ in patched_dtypes.items()
|
@@ -264,18 +325,10 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
|
|
264
325
|
}
|
265
326
|
if dt_col and dt_col not in configured_dtypes:
|
266
327
|
_dtypes[dt_col] = 'datetime'
|
267
|
-
if primary_col and
|
328
|
+
if primary_col and parameters.get('autoincrement', False) and primary_col not in _dtypes:
|
268
329
|
_dtypes[primary_col] = 'int'
|
269
|
-
return _dtypes
|
270
|
-
|
271
330
|
|
272
|
-
|
273
|
-
def dtypes(self, _dtypes: Dict[str, Any]) -> None:
|
274
|
-
"""
|
275
|
-
Override the dtypes dictionary of the in-memory pipe.
|
276
|
-
Call `meerschaum.Pipe.edit()` to persist changes.
|
277
|
-
"""
|
278
|
-
self.update_parameters({'dtypes': _dtypes}, persist=False)
|
331
|
+
return _dtypes
|
279
332
|
|
280
333
|
|
281
334
|
@property
|
@@ -347,18 +400,22 @@ def tzinfo(self) -> Union[None, timezone]:
|
|
347
400
|
"""
|
348
401
|
Return `timezone.utc` if the pipe is timezone-aware.
|
349
402
|
"""
|
350
|
-
|
351
|
-
|
352
|
-
return None
|
403
|
+
if '_tzinfo' in self.__dict__:
|
404
|
+
return self.__dict__['_tzinfo']
|
353
405
|
|
354
|
-
|
355
|
-
|
356
|
-
|
406
|
+
_tzinfo = None
|
407
|
+
dt_col = self.columns.get('datetime', None)
|
408
|
+
dt_typ = str(self.dtypes.get(dt_col, 'datetime')) if dt_col else None
|
409
|
+
if self.autotime:
|
410
|
+
ts_col = mrsm.get_config('pipes', 'autotime', 'column_name_if_datetime_missing')
|
411
|
+
ts_typ = self.dtypes.get(ts_col, 'datetime')
|
412
|
+
dt_typ = ts_typ
|
357
413
|
|
358
|
-
if dt_typ == '
|
359
|
-
|
414
|
+
if dt_typ and 'utc' in dt_typ.lower() or dt_typ == 'datetime':
|
415
|
+
_tzinfo = timezone.utc
|
360
416
|
|
361
|
-
|
417
|
+
self._tzinfo = _tzinfo
|
418
|
+
return _tzinfo
|
362
419
|
|
363
420
|
|
364
421
|
@property
|
@@ -393,6 +450,22 @@ def null_indices(self, _null_indices: bool) -> None:
|
|
393
450
|
self.update_parameters({'null_indices': _null_indices}, persist=False)
|
394
451
|
|
395
452
|
|
453
|
+
@property
|
454
|
+
def mixed_numerics(self) -> bool:
|
455
|
+
"""
|
456
|
+
Return the `mixed_numerics` parameter for the pipe.
|
457
|
+
"""
|
458
|
+
return self.parameters.get('mixed_numerics', True)
|
459
|
+
|
460
|
+
|
461
|
+
@mixed_numerics.setter
|
462
|
+
def mixed_numerics(self, _mixed_numerics: bool) -> None:
|
463
|
+
"""
|
464
|
+
Set the `mixed_numerics` parameter for the pipe.
|
465
|
+
"""
|
466
|
+
self.update_parameters({'mixed_numerics': _mixed_numerics}, persist=False)
|
467
|
+
|
468
|
+
|
396
469
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
397
470
|
"""
|
398
471
|
Check if the requested columns are defined.
|
@@ -471,15 +544,17 @@ def get_columns_types(
|
|
471
544
|
import time
|
472
545
|
from meerschaum.connectors import get_connector_plugin
|
473
546
|
from meerschaum._internal.static import STATIC_CONFIG
|
474
|
-
from meerschaum.utils.warnings import dprint
|
475
547
|
|
476
548
|
now = time.perf_counter()
|
477
|
-
cache_seconds =
|
478
|
-
|
479
|
-
|
549
|
+
cache_seconds = (
|
550
|
+
mrsm.get_config('pipes', 'static', 'static_schema_cache_seconds')
|
551
|
+
if self.static
|
552
|
+
else mrsm.get_config('pipes', 'dtypes', 'columns_types_cache_seconds')
|
553
|
+
)
|
480
554
|
if refresh:
|
481
555
|
_ = self.__dict__.pop('_columns_types_timestamp', None)
|
482
556
|
_ = self.__dict__.pop('_columns_types', None)
|
557
|
+
|
483
558
|
_columns_types = self.__dict__.get('_columns_types', None)
|
484
559
|
if _columns_types:
|
485
560
|
columns_types_timestamp = self.__dict__.get('_columns_types_timestamp', None)
|
@@ -516,13 +591,12 @@ def get_columns_indices(
|
|
516
591
|
import time
|
517
592
|
from meerschaum.connectors import get_connector_plugin
|
518
593
|
from meerschaum._internal.static import STATIC_CONFIG
|
519
|
-
from meerschaum.utils.warnings import dprint
|
520
594
|
|
521
595
|
now = time.perf_counter()
|
522
596
|
cache_seconds = (
|
523
|
-
|
597
|
+
mrsm.get_config('pipes', 'static', 'static_schema_cache_seconds')
|
524
598
|
if self.static
|
525
|
-
else
|
599
|
+
else mrsm.get_config('pipes', 'dtypes', 'columns_types_cache_seconds')
|
526
600
|
)
|
527
601
|
if refresh:
|
528
602
|
_ = self.__dict__.pop('_columns_indices_timestamp', None)
|
@@ -595,7 +669,6 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]:
|
|
595
669
|
-------
|
596
670
|
Either a string or `None`.
|
597
671
|
"""
|
598
|
-
from meerschaum.utils.debug import dprint
|
599
672
|
if debug:
|
600
673
|
dprint('Attempting to determine the value column...')
|
601
674
|
try:
|
@@ -657,6 +730,7 @@ def parents(self) -> List[mrsm.Pipe]:
|
|
657
730
|
"""
|
658
731
|
if 'parents' not in self.parameters:
|
659
732
|
return []
|
733
|
+
|
660
734
|
from meerschaum.utils.warnings import warn
|
661
735
|
_parents_keys = self.parameters['parents']
|
662
736
|
if not isinstance(_parents_keys, list):
|
@@ -695,6 +769,7 @@ def children(self) -> List[mrsm.Pipe]:
|
|
695
769
|
"""
|
696
770
|
if 'children' not in self.parameters:
|
697
771
|
return []
|
772
|
+
|
698
773
|
from meerschaum.utils.warnings import warn
|
699
774
|
_children_keys = self.parameters['children']
|
700
775
|
if not isinstance(_children_keys, list):
|
@@ -836,11 +911,12 @@ def update_parameters(
|
|
836
911
|
If `True`, call `Pipe.edit()` to persist the new parameters.
|
837
912
|
"""
|
838
913
|
from meerschaum.config import apply_patch_to_config
|
839
|
-
if '_parameters' in self.__dict__:
|
840
|
-
del self.__dict__['_parameters']
|
841
914
|
if 'parameters' not in self._attributes:
|
842
915
|
self._attributes['parameters'] = {}
|
843
916
|
|
917
|
+
if '_parameters' not in self.__dict__:
|
918
|
+
self._parameters = {}
|
919
|
+
|
844
920
|
self._attributes['parameters'] = apply_patch_to_config(
|
845
921
|
self._attributes['parameters'],
|
846
922
|
parameters_patch,
|
@@ -853,3 +929,154 @@ def update_parameters(
|
|
853
929
|
return True, "Success"
|
854
930
|
|
855
931
|
return self.edit(debug=debug)
|
932
|
+
|
933
|
+
|
934
|
+
def get_precision(self, debug: bool = False) -> Dict[str, Union[str, int]]:
|
935
|
+
"""
|
936
|
+
Return the timestamp precision unit and interval for the `datetime` axis.
|
937
|
+
"""
|
938
|
+
from meerschaum.utils.dtypes import (
|
939
|
+
MRSM_PRECISION_UNITS_SCALARS,
|
940
|
+
MRSM_PRECISION_UNITS_ALIASES,
|
941
|
+
MRSM_PD_DTYPES,
|
942
|
+
are_dtypes_equal,
|
943
|
+
)
|
944
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
945
|
+
|
946
|
+
if self.__dict__.get('_precision', None):
|
947
|
+
if debug:
|
948
|
+
dprint(f"Returning cached precision: {self._precision}")
|
949
|
+
return self._precision
|
950
|
+
|
951
|
+
parameters = self.parameters
|
952
|
+
_precision = parameters.get('precision', {})
|
953
|
+
if isinstance(_precision, str):
|
954
|
+
_precision = {'unit': _precision}
|
955
|
+
default_precision_unit = STATIC_CONFIG['dtypes']['datetime']['default_precision_unit']
|
956
|
+
|
957
|
+
if not _precision:
|
958
|
+
|
959
|
+
dt_col = parameters.get('columns', {}).get('datetime', None)
|
960
|
+
if not dt_col and self.autotime:
|
961
|
+
dt_col = mrsm.get_config('pipes', 'autotime', 'column_name_if_datetime_missing')
|
962
|
+
if not dt_col:
|
963
|
+
if debug:
|
964
|
+
dprint(f"No datetime axis, returning default precision '{default_precision_unit}'.")
|
965
|
+
return {'unit': default_precision_unit}
|
966
|
+
|
967
|
+
dt_typ = self.dtypes.get(dt_col, 'datetime')
|
968
|
+
if are_dtypes_equal(dt_typ, 'datetime'):
|
969
|
+
if dt_typ == 'datetime':
|
970
|
+
dt_typ = MRSM_PD_DTYPES['datetime']
|
971
|
+
if debug:
|
972
|
+
dprint(f"Datetime type is `datetime`, assuming {dt_typ} precision.")
|
973
|
+
|
974
|
+
_precision = {
|
975
|
+
'unit': (
|
976
|
+
dt_typ
|
977
|
+
.split('[', maxsplit=1)[-1]
|
978
|
+
.split(',', maxsplit=1)[0]
|
979
|
+
.split(' ', maxsplit=1)[0]
|
980
|
+
).rstrip(']')
|
981
|
+
}
|
982
|
+
|
983
|
+
if debug:
|
984
|
+
dprint(f"Extracted precision '{_precision['unit']}' from type '{dt_typ}'.")
|
985
|
+
|
986
|
+
elif are_dtypes_equal(dt_typ, 'int'):
|
987
|
+
_precision = {
|
988
|
+
'unit': (
|
989
|
+
'second'
|
990
|
+
if '32' in dt_typ
|
991
|
+
else default_precision_unit
|
992
|
+
)
|
993
|
+
}
|
994
|
+
elif are_dtypes_equal(dt_typ, 'date'):
|
995
|
+
if debug:
|
996
|
+
dprint("Datetime axis is 'date', falling back to 'day' precision.")
|
997
|
+
_precision = {'unit': 'day'}
|
998
|
+
|
999
|
+
precision_unit = _precision.get('unit', default_precision_unit)
|
1000
|
+
precision_interval = _precision.get('interval', None)
|
1001
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
1002
|
+
if true_precision_unit is None:
|
1003
|
+
if debug:
|
1004
|
+
dprint(f"No precision could be determined, falling back to '{default_precision_unit}'.")
|
1005
|
+
true_precision_unit = default_precision_unit
|
1006
|
+
|
1007
|
+
if true_precision_unit not in MRSM_PRECISION_UNITS_SCALARS:
|
1008
|
+
from meerschaum.utils.misc import items_str
|
1009
|
+
raise ValueError(
|
1010
|
+
f"Invalid precision unit '{true_precision_unit}'.\n"
|
1011
|
+
"Accepted values are "
|
1012
|
+
f"{items_str(list(MRSM_PRECISION_UNITS_SCALARS) + list(MRSM_PRECISION_UNITS_ALIASES))}."
|
1013
|
+
)
|
1014
|
+
|
1015
|
+
self._precision = {'unit': true_precision_unit}
|
1016
|
+
if precision_interval:
|
1017
|
+
self._precision['interval'] = precision_interval
|
1018
|
+
return self._precision
|
1019
|
+
|
1020
|
+
|
1021
|
+
@property
|
1022
|
+
def precision(self) -> Dict[str, Union[str, int]]:
|
1023
|
+
"""
|
1024
|
+
Return the configured or detected precision.
|
1025
|
+
"""
|
1026
|
+
return self.get_precision()
|
1027
|
+
|
1028
|
+
|
1029
|
+
@precision.setter
|
1030
|
+
def precision(self, _precision: Union[str, Dict[str, Union[str, int]]]) -> None:
|
1031
|
+
"""
|
1032
|
+
Update the `precision` parameter.
|
1033
|
+
"""
|
1034
|
+
existing_precision = self._attributes.get('parameters', {}).get('precision', None)
|
1035
|
+
if isinstance(existing_precision, str):
|
1036
|
+
existing_precision = {'unit': existing_precision}
|
1037
|
+
|
1038
|
+
true_precision = (
|
1039
|
+
_precision
|
1040
|
+
if isinstance(_precision, dict)
|
1041
|
+
else {
|
1042
|
+
'unit': _precision,
|
1043
|
+
**(
|
1044
|
+
{
|
1045
|
+
'interval': existing_precision['interval'],
|
1046
|
+
} if existing_precision else {}
|
1047
|
+
)
|
1048
|
+
}
|
1049
|
+
)
|
1050
|
+
|
1051
|
+
self.update_parameters({'precision': true_precision}, persist=False)
|
1052
|
+
_ = self.__dict__.pop('_precision', None)
|
1053
|
+
|
1054
|
+
|
1055
|
+
def _invalidate_cache(
|
1056
|
+
self,
|
1057
|
+
hard: bool = False,
|
1058
|
+
debug: bool = False,
|
1059
|
+
) -> None:
|
1060
|
+
"""
|
1061
|
+
Invalidate temporary metadata cache.
|
1062
|
+
|
1063
|
+
Parameters
|
1064
|
+
----------
|
1065
|
+
hard: bool, default False
|
1066
|
+
If `True`, clear all temporary cache.
|
1067
|
+
Otherwise only clear soft cache.
|
1068
|
+
"""
|
1069
|
+
if debug:
|
1070
|
+
dprint(f"Invalidating {'some' if not hard else 'all'} cache for {self}.")
|
1071
|
+
|
1072
|
+
self._exists = None
|
1073
|
+
self._sync_ts = None
|
1074
|
+
|
1075
|
+
if not hard:
|
1076
|
+
return
|
1077
|
+
|
1078
|
+
_ = self.__dict__.pop('_parameters', None)
|
1079
|
+
_ = self.__dict__.pop('_precision', None)
|
1080
|
+
self._columns_types_timestamp = None
|
1081
|
+
self._columns_types = None
|
1082
|
+
self._attributes_sync_time = None
|