meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +2 -1
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/static.py +8 -24
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +2 -1
- meerschaum/api/dash/__init__.py +0 -2
- meerschaum/api/dash/callbacks/dashboard.py +1 -1
- meerschaum/api/dash/tokens.py +2 -2
- meerschaum/api/routes/_pipes.py +47 -37
- meerschaum/config/_default.py +11 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +9 -8
- meerschaum/connectors/api/_pipes.py +2 -18
- meerschaum/connectors/api/_tokens.py +2 -2
- meerschaum/connectors/instance/_tokens.py +4 -4
- meerschaum/connectors/sql/_create_engine.py +3 -14
- meerschaum/connectors/sql/_pipes.py +118 -163
- meerschaum/connectors/sql/_sql.py +38 -20
- meerschaum/connectors/valkey/_pipes.py +44 -16
- meerschaum/core/Pipe/__init__.py +28 -5
- meerschaum/core/Pipe/_attributes.py +270 -46
- meerschaum/core/Pipe/_data.py +55 -17
- meerschaum/core/Pipe/_dtypes.py +19 -4
- meerschaum/core/Pipe/_edit.py +2 -0
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_sync.py +90 -160
- meerschaum/core/Pipe/_verify.py +3 -3
- meerschaum/core/Token/_Token.py +3 -4
- meerschaum/utils/dataframe.py +379 -68
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +388 -22
- meerschaum/utils/dtypes/sql.py +326 -30
- meerschaum/utils/misc.py +9 -68
- meerschaum/utils/packages/__init__.py +7 -21
- meerschaum/utils/packages/_packages.py +7 -2
- meerschaum/utils/schedule.py +1 -1
- meerschaum/utils/sql.py +7 -7
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/METADATA +5 -17
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/RECORD +45 -44
- meerschaum-3.0.0rc2.dist-info/licenses/NOTICE +2 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/WHEEL +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/entry_points.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/top_level.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc2.dist-info}/zip-safe +0 -0
@@ -321,14 +321,40 @@ def drop_pipe(
|
|
321
321
|
-------
|
322
322
|
A `SuccessTuple` indicating success.
|
323
323
|
"""
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
324
|
+
if not pipe.exists(debug=debug):
|
325
|
+
return True, f"{pipe} does not exist, so it was not dropped."
|
326
|
+
|
327
|
+
table_name = self.quote_table(pipe.target)
|
328
|
+
dt_col = pipe.columns.get('datetime', None)
|
329
|
+
|
330
|
+
try:
|
331
|
+
members = (
|
332
|
+
self.client.zrange(table_name, 0, -1)
|
333
|
+
if dt_col
|
334
|
+
else self.client.smembers(table_name)
|
329
335
|
)
|
330
|
-
|
331
|
-
|
336
|
+
|
337
|
+
keys_to_delete = []
|
338
|
+
for member_bytes in members:
|
339
|
+
member_str = member_bytes.decode('utf-8')
|
340
|
+
member_doc = json.loads(member_str)
|
341
|
+
ix_str = member_doc.get('ix')
|
342
|
+
if not ix_str:
|
343
|
+
continue
|
344
|
+
|
345
|
+
ix_doc = string_to_dict(ix_str.replace(COLON, ':'))
|
346
|
+
doc_key = self.get_document_key(ix_doc, list(ix_doc.keys()), table_name)
|
347
|
+
keys_to_delete.append(doc_key)
|
348
|
+
|
349
|
+
if keys_to_delete:
|
350
|
+
batch_size = 1000
|
351
|
+
for i in range(0, len(keys_to_delete), batch_size):
|
352
|
+
batch = keys_to_delete[i:i+batch_size]
|
353
|
+
self.client.delete(*batch)
|
354
|
+
|
355
|
+
except Exception as e:
|
356
|
+
return False, f"Failed to delete documents for {pipe}:\n{e}"
|
357
|
+
|
332
358
|
try:
|
333
359
|
self.drop_table(pipe.target, debug=debug)
|
334
360
|
except Exception as e:
|
@@ -558,11 +584,7 @@ def sync_pipe(
|
|
558
584
|
|
559
585
|
valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
|
560
586
|
new_dtypes = {
|
561
|
-
str(key): (
|
562
|
-
str(val)
|
563
|
-
if not are_dtypes_equal(str(val), 'datetime')
|
564
|
-
else 'datetime64[ns, UTC]'
|
565
|
-
)
|
587
|
+
str(key): str(val)
|
566
588
|
for key, val in df.dtypes.items()
|
567
589
|
if str(key) not in valkey_dtypes
|
568
590
|
}
|
@@ -571,6 +593,8 @@ def sync_pipe(
|
|
571
593
|
try:
|
572
594
|
df[col] = df[col].astype(typ)
|
573
595
|
except Exception:
|
596
|
+
import traceback
|
597
|
+
traceback.print_exc()
|
574
598
|
valkey_dtypes[col] = 'string'
|
575
599
|
new_dtypes[col] = 'string'
|
576
600
|
df[col] = df[col].astype('string')
|
@@ -780,7 +804,7 @@ def get_sync_time(
|
|
780
804
|
"""
|
781
805
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
782
806
|
dt_col = pipe.columns.get('datetime', None)
|
783
|
-
dt_typ = pipe.dtypes.get(dt_col, '
|
807
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime')
|
784
808
|
if not dt_col:
|
785
809
|
return None
|
786
810
|
|
@@ -788,14 +812,18 @@ def get_sync_time(
|
|
788
812
|
table_name = self.quote_table(pipe.target)
|
789
813
|
try:
|
790
814
|
vals = (
|
791
|
-
self.client.zrevrange(table_name, 0, 0)
|
815
|
+
self.client.zrevrange(table_name, 0, 0, withscores=True)
|
792
816
|
if newest
|
793
|
-
else self.client.zrange(table_name, 0, 0)
|
817
|
+
else self.client.zrange(table_name, 0, 0, withscores=True)
|
794
818
|
)
|
795
819
|
if not vals:
|
796
820
|
return None
|
797
|
-
val = vals[0]
|
821
|
+
val = vals[0][0]
|
822
|
+
if isinstance(val, bytes):
|
823
|
+
val = val.decode('utf-8')
|
798
824
|
except Exception:
|
825
|
+
import traceback
|
826
|
+
traceback.print_exc()
|
799
827
|
return None
|
800
828
|
|
801
829
|
doc = json.loads(val)
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -117,11 +117,13 @@ class Pipe:
|
|
117
117
|
tzinfo,
|
118
118
|
enforce,
|
119
119
|
null_indices,
|
120
|
+
mixed_numerics,
|
120
121
|
get_columns,
|
121
122
|
get_columns_types,
|
122
123
|
get_columns_indices,
|
123
124
|
get_indices,
|
124
125
|
get_parameters,
|
126
|
+
get_dtypes,
|
125
127
|
update_parameters,
|
126
128
|
tags,
|
127
129
|
get_id,
|
@@ -133,6 +135,9 @@ class Pipe:
|
|
133
135
|
target,
|
134
136
|
_target_legacy,
|
135
137
|
guess_datetime,
|
138
|
+
precision,
|
139
|
+
get_precision,
|
140
|
+
_invalidate_cache,
|
136
141
|
)
|
137
142
|
from ._show import show
|
138
143
|
from ._edit import edit, edit_definition, update
|
@@ -143,11 +148,7 @@ class Pipe:
|
|
143
148
|
filter_existing,
|
144
149
|
_get_chunk_label,
|
145
150
|
get_num_workers,
|
146
|
-
|
147
|
-
_persist_new_numeric_columns,
|
148
|
-
_persist_new_uuid_columns,
|
149
|
-
_persist_new_bytes_columns,
|
150
|
-
_persist_new_geometry_columns,
|
151
|
+
_persist_new_special_columns,
|
151
152
|
)
|
152
153
|
from ._verify import (
|
153
154
|
verify,
|
@@ -179,9 +180,11 @@ class Pipe:
|
|
179
180
|
upsert: Optional[bool] = None,
|
180
181
|
autoincrement: Optional[bool] = None,
|
181
182
|
autotime: Optional[bool] = None,
|
183
|
+
precision: Union[str, Dict[str, Union[str, int]], None] = None,
|
182
184
|
static: Optional[bool] = None,
|
183
185
|
enforce: Optional[bool] = None,
|
184
186
|
null_indices: Optional[bool] = None,
|
187
|
+
mixed_numerics: Optional[bool] = None,
|
185
188
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
186
189
|
cache: bool = False,
|
187
190
|
debug: bool = False,
|
@@ -240,6 +243,13 @@ class Pipe:
|
|
240
243
|
autotime: Optional[bool], default None
|
241
244
|
If `True`, set `autotime` in the parameters.
|
242
245
|
|
246
|
+
precision: Union[str, Dict[str, Union[str, int]], None], default None
|
247
|
+
If provided, set `precision` in the parameters.
|
248
|
+
This may be either a string (the precision unit) or a dictionary of in the form
|
249
|
+
`{'unit': <unit>, 'interval': <interval>}`.
|
250
|
+
Default is determined by the `datetime` column dtype
|
251
|
+
(e.g. `datetime64[us]` is `microsecond` precision).
|
252
|
+
|
243
253
|
static: Optional[bool], default None
|
244
254
|
If `True`, set `static` in the parameters.
|
245
255
|
|
@@ -251,6 +261,11 @@ class Pipe:
|
|
251
261
|
Set to `False` if there will be no null values in the index columns.
|
252
262
|
Defaults to `True`.
|
253
263
|
|
264
|
+
mixed_numerics: bool, default None
|
265
|
+
If `True`, integer columns will be converted to `numeric` when floats are synced.
|
266
|
+
Set to `False` to disable this behavior.
|
267
|
+
Defaults to `True`.
|
268
|
+
|
254
269
|
temporary: bool, default False
|
255
270
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
256
271
|
|
@@ -353,6 +368,11 @@ class Pipe:
|
|
353
368
|
if isinstance(autotime, bool):
|
354
369
|
self._attributes['parameters']['autotime'] = autotime
|
355
370
|
|
371
|
+
if isinstance(precision, dict):
|
372
|
+
self._attributes['parameters']['precision'] = precision
|
373
|
+
elif isinstance(precision, str):
|
374
|
+
self._attributes['parameters']['precision'] = {'unit': precision}
|
375
|
+
|
356
376
|
if isinstance(static, bool):
|
357
377
|
self._attributes['parameters']['static'] = static
|
358
378
|
|
@@ -362,6 +382,9 @@ class Pipe:
|
|
362
382
|
if isinstance(null_indices, bool):
|
363
383
|
self._attributes['parameters']['null_indices'] = null_indices
|
364
384
|
|
385
|
+
if isinstance(mixed_numerics, bool):
|
386
|
+
self._attributes['parameters']['mixed_numerics'] = mixed_numerics
|
387
|
+
|
365
388
|
### NOTE: The parameters dictionary is {} by default.
|
366
389
|
### A Pipe may be registered without parameters, then edited,
|
367
390
|
### or a Pipe may be registered with parameters set in-memory first.
|
@@ -13,7 +13,7 @@ from datetime import timezone
|
|
13
13
|
|
14
14
|
import meerschaum as mrsm
|
15
15
|
from meerschaum.utils.typing import Tuple, Dict, Any, Union, Optional, List
|
16
|
-
from meerschaum.utils.warnings import warn
|
16
|
+
from meerschaum.utils.warnings import warn, dprint
|
17
17
|
|
18
18
|
|
19
19
|
@property
|
@@ -52,18 +52,34 @@ def attributes(self) -> Dict[str, Any]:
|
|
52
52
|
def get_parameters(
|
53
53
|
self,
|
54
54
|
apply_symlinks: bool = True,
|
55
|
+
refresh: bool = False,
|
56
|
+
debug: bool = False,
|
55
57
|
_visited: 'Optional[set[mrsm.Pipe]]' = None,
|
56
58
|
) -> Dict[str, Any]:
|
57
59
|
"""
|
58
60
|
Return the `parameters` dictionary of the pipe.
|
61
|
+
|
62
|
+
Parameters
|
63
|
+
----------
|
64
|
+
apply_symlinks: bool, default True
|
65
|
+
If `True`, resolve references to parameters from other pipes.
|
66
|
+
|
67
|
+
refresh: bool, default False
|
68
|
+
If `True`, pull the latest attributes for the pipe.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
The pipe's parameters dictionary.
|
59
73
|
"""
|
60
74
|
from meerschaum.config._patch import apply_patch_to_config
|
61
|
-
from meerschaum.utils.warnings import warn
|
62
75
|
from meerschaum.config._read_config import search_and_substitute_config
|
63
76
|
|
64
77
|
if _visited is None:
|
65
78
|
_visited = {self}
|
66
79
|
|
80
|
+
if refresh:
|
81
|
+
self._invalidate_cache(hard=True)
|
82
|
+
|
67
83
|
raw_parameters = self.attributes.get('parameters', {})
|
68
84
|
ref_keys = raw_parameters.get('reference')
|
69
85
|
if not apply_symlinks:
|
@@ -71,13 +87,15 @@ def get_parameters(
|
|
71
87
|
|
72
88
|
if ref_keys:
|
73
89
|
try:
|
90
|
+
if debug:
|
91
|
+
dprint(f"Building reference pipe from keys: {ref_keys}")
|
74
92
|
ref_pipe = mrsm.Pipe(**ref_keys)
|
75
93
|
if ref_pipe in _visited:
|
76
94
|
warn(f"Circular reference detected in {self}: chain involves {ref_pipe}.")
|
77
95
|
return search_and_substitute_config(raw_parameters)
|
78
96
|
|
79
97
|
_visited.add(ref_pipe)
|
80
|
-
base_params = ref_pipe.get_parameters(_visited=_visited)
|
98
|
+
base_params = ref_pipe.get_parameters(_visited=_visited, debug=debug)
|
81
99
|
except Exception as e:
|
82
100
|
warn(f"Failed to resolve reference pipe for {self}: {e}")
|
83
101
|
base_params = {}
|
@@ -113,10 +131,7 @@ def parameters(self) -> Optional[Dict[str, Any]]:
|
|
113
131
|
"""
|
114
132
|
Return the parameters dictionary of the pipe.
|
115
133
|
"""
|
116
|
-
|
117
|
-
return _parameters
|
118
|
-
self._parameters = self.get_parameters()
|
119
|
-
return self._parameters
|
134
|
+
return self.get_parameters()
|
120
135
|
|
121
136
|
|
122
137
|
@parameters.setter
|
@@ -165,14 +180,12 @@ def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]:
|
|
165
180
|
if 'indexes' in self.parameters
|
166
181
|
else 'indices'
|
167
182
|
)
|
168
|
-
|
169
|
-
|
170
|
-
_indices = self.parameters[indices_key]
|
183
|
+
|
184
|
+
_indices = self.parameters.get(indices_key, {})
|
171
185
|
_columns = self.columns
|
172
186
|
dt_col = _columns.get('datetime', None)
|
173
187
|
if not isinstance(_indices, dict):
|
174
188
|
_indices = {}
|
175
|
-
self.parameters[indices_key] = _indices
|
176
189
|
unique_cols = list(set((
|
177
190
|
[dt_col]
|
178
191
|
if dt_col
|
@@ -246,17 +259,62 @@ def tags(self, _tags: List[str]) -> None:
|
|
246
259
|
|
247
260
|
|
248
261
|
@property
|
249
|
-
def dtypes(self) ->
|
262
|
+
def dtypes(self) -> Dict[str, Any]:
|
250
263
|
"""
|
251
264
|
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
252
265
|
"""
|
266
|
+
return self.get_dtypes(refresh=False)
|
267
|
+
|
268
|
+
|
269
|
+
@dtypes.setter
|
270
|
+
def dtypes(self, _dtypes: Dict[str, Any]) -> None:
|
271
|
+
"""
|
272
|
+
Override the dtypes dictionary of the in-memory pipe.
|
273
|
+
Call `meerschaum.Pipe.edit()` to persist changes.
|
274
|
+
"""
|
275
|
+
self.update_parameters({'dtypes': _dtypes}, persist=False)
|
276
|
+
_ = self.__dict__.pop('_remote_dtypes', None)
|
277
|
+
_ = self.__dict__.pop('_remote_dtypes_timestamp', None)
|
278
|
+
|
279
|
+
|
280
|
+
def get_dtypes(
|
281
|
+
self,
|
282
|
+
infer: bool = True,
|
283
|
+
refresh: bool = False,
|
284
|
+
debug: bool = False,
|
285
|
+
) -> Dict[str, Any]:
|
286
|
+
"""
|
287
|
+
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
288
|
+
|
289
|
+
|
290
|
+
Parameters
|
291
|
+
----------
|
292
|
+
infer: bool, default True
|
293
|
+
If `True`, include the implicit existing dtypes.
|
294
|
+
Else only return the explicitly configured dtypes (e.g. `Pipe.parameters['dtypes']`).
|
295
|
+
|
296
|
+
refresh: bool, default False
|
297
|
+
If `True`, invalidate any cache and return the latest known dtypes.
|
298
|
+
|
299
|
+
Returns
|
300
|
+
-------
|
301
|
+
A dictionary mapping column names to dtypes.
|
302
|
+
"""
|
303
|
+
import time
|
253
304
|
from meerschaum.config._patch import apply_patch_to_config
|
254
305
|
from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
306
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
307
|
+
parameters = self.get_parameters(refresh=refresh, debug=debug)
|
308
|
+
configured_dtypes = parameters.get('dtypes', {})
|
309
|
+
if debug:
|
310
|
+
dprint(f"Configured dtypes for {self}:")
|
311
|
+
mrsm.pprint(configured_dtypes)
|
312
|
+
|
313
|
+
remote_dtypes = self.infer_dtypes(persist=False, refresh=refresh, debug=debug)
|
314
|
+
patched_dtypes = apply_patch_to_config((remote_dtypes or {}), (configured_dtypes or {}))
|
315
|
+
|
316
|
+
dt_col = parameters.get('columns', {}).get('datetime', None)
|
317
|
+
primary_col = parameters.get('columns', {}).get('primary', None)
|
260
318
|
_dtypes = {
|
261
319
|
col: MRSM_ALIAS_DTYPES.get(typ, typ)
|
262
320
|
for col, typ in patched_dtypes.items()
|
@@ -264,18 +322,10 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
|
|
264
322
|
}
|
265
323
|
if dt_col and dt_col not in configured_dtypes:
|
266
324
|
_dtypes[dt_col] = 'datetime'
|
267
|
-
if primary_col and
|
325
|
+
if primary_col and parameters.get('autoincrement', False) and primary_col not in _dtypes:
|
268
326
|
_dtypes[primary_col] = 'int'
|
269
|
-
return _dtypes
|
270
|
-
|
271
327
|
|
272
|
-
|
273
|
-
def dtypes(self, _dtypes: Dict[str, Any]) -> None:
|
274
|
-
"""
|
275
|
-
Override the dtypes dictionary of the in-memory pipe.
|
276
|
-
Call `meerschaum.Pipe.edit()` to persist changes.
|
277
|
-
"""
|
278
|
-
self.update_parameters({'dtypes': _dtypes}, persist=False)
|
328
|
+
return _dtypes
|
279
329
|
|
280
330
|
|
281
331
|
@property
|
@@ -347,18 +397,22 @@ def tzinfo(self) -> Union[None, timezone]:
|
|
347
397
|
"""
|
348
398
|
Return `timezone.utc` if the pipe is timezone-aware.
|
349
399
|
"""
|
350
|
-
|
351
|
-
|
352
|
-
return None
|
400
|
+
if '_tzinfo' in self.__dict__:
|
401
|
+
return self.__dict__['_tzinfo']
|
353
402
|
|
354
|
-
|
355
|
-
|
356
|
-
|
403
|
+
_tzinfo = None
|
404
|
+
dt_col = self.columns.get('datetime', None)
|
405
|
+
dt_typ = str(self.dtypes.get(dt_col, 'datetime')) if dt_col else None
|
406
|
+
if self.autotime:
|
407
|
+
ts_col = mrsm.get_config('pipes', 'autotime', 'column_name_if_datetime_missing')
|
408
|
+
ts_typ = self.dtypes.get(ts_col, 'datetime')
|
409
|
+
dt_typ = ts_typ
|
357
410
|
|
358
|
-
if dt_typ == '
|
359
|
-
|
411
|
+
if dt_typ and 'utc' in dt_typ.lower() or dt_typ == 'datetime':
|
412
|
+
_tzinfo = timezone.utc
|
360
413
|
|
361
|
-
|
414
|
+
self._tzinfo = _tzinfo
|
415
|
+
return _tzinfo
|
362
416
|
|
363
417
|
|
364
418
|
@property
|
@@ -393,6 +447,22 @@ def null_indices(self, _null_indices: bool) -> None:
|
|
393
447
|
self.update_parameters({'null_indices': _null_indices}, persist=False)
|
394
448
|
|
395
449
|
|
450
|
+
@property
|
451
|
+
def mixed_numerics(self) -> bool:
|
452
|
+
"""
|
453
|
+
Return the `mixed_numerics` parameter for the pipe.
|
454
|
+
"""
|
455
|
+
return self.parameters.get('mixed_numerics', True)
|
456
|
+
|
457
|
+
|
458
|
+
@mixed_numerics.setter
|
459
|
+
def mixed_numerics(self, _mixed_numerics: bool) -> None:
|
460
|
+
"""
|
461
|
+
Set the `mixed_numerics` parameter for the pipe.
|
462
|
+
"""
|
463
|
+
self.update_parameters({'mixed_numerics': _mixed_numerics}, persist=False)
|
464
|
+
|
465
|
+
|
396
466
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
397
467
|
"""
|
398
468
|
Check if the requested columns are defined.
|
@@ -471,15 +541,17 @@ def get_columns_types(
|
|
471
541
|
import time
|
472
542
|
from meerschaum.connectors import get_connector_plugin
|
473
543
|
from meerschaum._internal.static import STATIC_CONFIG
|
474
|
-
from meerschaum.utils.warnings import dprint
|
475
544
|
|
476
545
|
now = time.perf_counter()
|
477
|
-
cache_seconds =
|
478
|
-
|
479
|
-
|
546
|
+
cache_seconds = (
|
547
|
+
mrsm.get_config('pipes', 'static', 'static_schema_cache_seconds')
|
548
|
+
if self.static
|
549
|
+
else mrsm.get_config('pipes', 'dtypes', 'columns_types_cache_seconds')
|
550
|
+
)
|
480
551
|
if refresh:
|
481
552
|
_ = self.__dict__.pop('_columns_types_timestamp', None)
|
482
553
|
_ = self.__dict__.pop('_columns_types', None)
|
554
|
+
|
483
555
|
_columns_types = self.__dict__.get('_columns_types', None)
|
484
556
|
if _columns_types:
|
485
557
|
columns_types_timestamp = self.__dict__.get('_columns_types_timestamp', None)
|
@@ -516,13 +588,12 @@ def get_columns_indices(
|
|
516
588
|
import time
|
517
589
|
from meerschaum.connectors import get_connector_plugin
|
518
590
|
from meerschaum._internal.static import STATIC_CONFIG
|
519
|
-
from meerschaum.utils.warnings import dprint
|
520
591
|
|
521
592
|
now = time.perf_counter()
|
522
593
|
cache_seconds = (
|
523
|
-
|
594
|
+
mrsm.get_config('pipes', 'static', 'static_schema_cache_seconds')
|
524
595
|
if self.static
|
525
|
-
else
|
596
|
+
else mrsm.get_config('pipes', 'dtypes', 'columns_types_cache_seconds')
|
526
597
|
)
|
527
598
|
if refresh:
|
528
599
|
_ = self.__dict__.pop('_columns_indices_timestamp', None)
|
@@ -595,7 +666,6 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]:
|
|
595
666
|
-------
|
596
667
|
Either a string or `None`.
|
597
668
|
"""
|
598
|
-
from meerschaum.utils.debug import dprint
|
599
669
|
if debug:
|
600
670
|
dprint('Attempting to determine the value column...')
|
601
671
|
try:
|
@@ -657,6 +727,7 @@ def parents(self) -> List[mrsm.Pipe]:
|
|
657
727
|
"""
|
658
728
|
if 'parents' not in self.parameters:
|
659
729
|
return []
|
730
|
+
|
660
731
|
from meerschaum.utils.warnings import warn
|
661
732
|
_parents_keys = self.parameters['parents']
|
662
733
|
if not isinstance(_parents_keys, list):
|
@@ -695,6 +766,7 @@ def children(self) -> List[mrsm.Pipe]:
|
|
695
766
|
"""
|
696
767
|
if 'children' not in self.parameters:
|
697
768
|
return []
|
769
|
+
|
698
770
|
from meerschaum.utils.warnings import warn
|
699
771
|
_children_keys = self.parameters['children']
|
700
772
|
if not isinstance(_children_keys, list):
|
@@ -836,11 +908,12 @@ def update_parameters(
|
|
836
908
|
If `True`, call `Pipe.edit()` to persist the new parameters.
|
837
909
|
"""
|
838
910
|
from meerschaum.config import apply_patch_to_config
|
839
|
-
if '_parameters' in self.__dict__:
|
840
|
-
del self.__dict__['_parameters']
|
841
911
|
if 'parameters' not in self._attributes:
|
842
912
|
self._attributes['parameters'] = {}
|
843
913
|
|
914
|
+
if '_parameters' not in self.__dict__:
|
915
|
+
self._parameters = {}
|
916
|
+
|
844
917
|
self._attributes['parameters'] = apply_patch_to_config(
|
845
918
|
self._attributes['parameters'],
|
846
919
|
parameters_patch,
|
@@ -853,3 +926,154 @@ def update_parameters(
|
|
853
926
|
return True, "Success"
|
854
927
|
|
855
928
|
return self.edit(debug=debug)
|
929
|
+
|
930
|
+
|
931
|
+
def get_precision(self, debug: bool = False) -> Dict[str, Union[str, int]]:
|
932
|
+
"""
|
933
|
+
Return the timestamp precision unit and interval for the `datetime` axis.
|
934
|
+
"""
|
935
|
+
from meerschaum.utils.dtypes import (
|
936
|
+
MRSM_PRECISION_UNITS_SCALARS,
|
937
|
+
MRSM_PRECISION_UNITS_ALIASES,
|
938
|
+
MRSM_PD_DTYPES,
|
939
|
+
are_dtypes_equal,
|
940
|
+
)
|
941
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
942
|
+
|
943
|
+
if self.__dict__.get('_precision', None):
|
944
|
+
if debug:
|
945
|
+
dprint(f"Returning cached precision: {self._precision}")
|
946
|
+
return self._precision
|
947
|
+
|
948
|
+
parameters = self.parameters
|
949
|
+
_precision = parameters.get('precision', {})
|
950
|
+
if isinstance(_precision, str):
|
951
|
+
_precision = {'unit': _precision}
|
952
|
+
default_precision_unit = STATIC_CONFIG['dtypes']['datetime']['default_precision_unit']
|
953
|
+
|
954
|
+
if not _precision:
|
955
|
+
|
956
|
+
dt_col = parameters.get('columns', {}).get('datetime', None)
|
957
|
+
if not dt_col and self.autotime:
|
958
|
+
dt_col = mrsm.get_config('pipes', 'autotime', 'column_name_if_datetime_missing')
|
959
|
+
if not dt_col:
|
960
|
+
if debug:
|
961
|
+
dprint(f"No datetime axis, returning default precision '{default_precision_unit}'.")
|
962
|
+
return {'unit': default_precision_unit}
|
963
|
+
|
964
|
+
dt_typ = self.dtypes.get(dt_col, 'datetime')
|
965
|
+
if are_dtypes_equal(dt_typ, 'datetime'):
|
966
|
+
if dt_typ == 'datetime':
|
967
|
+
dt_typ = MRSM_PD_DTYPES['datetime']
|
968
|
+
if debug:
|
969
|
+
dprint(f"Datetime type is `datetime`, assuming {dt_typ} precision.")
|
970
|
+
|
971
|
+
_precision = {
|
972
|
+
'unit': (
|
973
|
+
dt_typ
|
974
|
+
.split('[', maxsplit=1)[-1]
|
975
|
+
.split(',', maxsplit=1)[0]
|
976
|
+
.split(' ', maxsplit=1)[0]
|
977
|
+
).rstrip(']')
|
978
|
+
}
|
979
|
+
|
980
|
+
if debug:
|
981
|
+
dprint(f"Extracted precision '{_precision['unit']}' from type '{dt_typ}'.")
|
982
|
+
|
983
|
+
elif are_dtypes_equal(dt_typ, 'int'):
|
984
|
+
_precision = {
|
985
|
+
'unit': (
|
986
|
+
'second'
|
987
|
+
if '32' in dt_typ
|
988
|
+
else default_precision_unit
|
989
|
+
)
|
990
|
+
}
|
991
|
+
elif are_dtypes_equal(dt_typ, 'date'):
|
992
|
+
if debug:
|
993
|
+
dprint("Datetime axis is 'date', falling back to 'day' precision.")
|
994
|
+
_precision = {'unit': 'day'}
|
995
|
+
|
996
|
+
precision_unit = _precision.get('unit', default_precision_unit)
|
997
|
+
precision_interval = _precision.get('interval', None)
|
998
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
999
|
+
if true_precision_unit is None:
|
1000
|
+
if debug:
|
1001
|
+
dprint(f"No precision could be determined, falling back to '{default_precision_unit}'.")
|
1002
|
+
true_precision_unit = default_precision_unit
|
1003
|
+
|
1004
|
+
if true_precision_unit not in MRSM_PRECISION_UNITS_SCALARS:
|
1005
|
+
from meerschaum.utils.misc import items_str
|
1006
|
+
raise ValueError(
|
1007
|
+
f"Invalid precision unit '{true_precision_unit}'.\n"
|
1008
|
+
"Accepted values are "
|
1009
|
+
f"{items_str(list(MRSM_PRECISION_UNITS_SCALARS) + list(MRSM_PRECISION_UNITS_ALIASES))}."
|
1010
|
+
)
|
1011
|
+
|
1012
|
+
self._precision = {'unit': true_precision_unit}
|
1013
|
+
if precision_interval:
|
1014
|
+
self._precision['interval'] = precision_interval
|
1015
|
+
return self._precision
|
1016
|
+
|
1017
|
+
|
1018
|
+
@property
|
1019
|
+
def precision(self) -> Dict[str, Union[str, int]]:
|
1020
|
+
"""
|
1021
|
+
Return the configured or detected precision.
|
1022
|
+
"""
|
1023
|
+
return self.get_precision()
|
1024
|
+
|
1025
|
+
|
1026
|
+
@precision.setter
|
1027
|
+
def precision(self, _precision: Union[str, Dict[str, Union[str, int]]]) -> None:
|
1028
|
+
"""
|
1029
|
+
Update the `precision` parameter.
|
1030
|
+
"""
|
1031
|
+
existing_precision = self._attributes.get('parameters', {}).get('precision', None)
|
1032
|
+
if isinstance(existing_precision, str):
|
1033
|
+
existing_precision = {'unit': existing_precision}
|
1034
|
+
|
1035
|
+
true_precision = (
|
1036
|
+
_precision
|
1037
|
+
if isinstance(_precision, dict)
|
1038
|
+
else {
|
1039
|
+
'unit': _precision,
|
1040
|
+
**(
|
1041
|
+
{
|
1042
|
+
'interval': existing_precision['interval'],
|
1043
|
+
} if existing_precision else {}
|
1044
|
+
)
|
1045
|
+
}
|
1046
|
+
)
|
1047
|
+
|
1048
|
+
self.update_parameters({'precision': true_precision}, persist=False)
|
1049
|
+
_ = self.__dict__.pop('_precision', None)
|
1050
|
+
|
1051
|
+
|
1052
|
+
def _invalidate_cache(
|
1053
|
+
self,
|
1054
|
+
hard: bool = False,
|
1055
|
+
debug: bool = False,
|
1056
|
+
) -> None:
|
1057
|
+
"""
|
1058
|
+
Invalidate temporary metadata cache.
|
1059
|
+
|
1060
|
+
Parameters
|
1061
|
+
----------
|
1062
|
+
hard: bool, default False
|
1063
|
+
If `True`, clear all temporary cache.
|
1064
|
+
Otherwise only clear soft cache.
|
1065
|
+
"""
|
1066
|
+
if debug:
|
1067
|
+
dprint(f"Invalidating {'some' if not hard else 'all'} cache for {self}.")
|
1068
|
+
|
1069
|
+
self._exists = None
|
1070
|
+
self._sync_ts = None
|
1071
|
+
|
1072
|
+
if not hard:
|
1073
|
+
return
|
1074
|
+
|
1075
|
+
_ = self.__dict__.pop('_parameters', None)
|
1076
|
+
_ = self.__dict__.pop('_precision', None)
|
1077
|
+
self._columns_types_timestamp = None
|
1078
|
+
self._columns_types = None
|
1079
|
+
self._attributes_sync_time = None
|