meerschaum 2.3.6__py3-none-any.whl → 2.4.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/actions/bootstrap.py +36 -10
- meerschaum/actions/copy.py +3 -3
- meerschaum/actions/start.py +13 -14
- meerschaum/api/dash/__init__.py +7 -6
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/dashboard.py +7 -5
- meerschaum/api/dash/callbacks/pipes.py +42 -0
- meerschaum/api/dash/pages/__init__.py +1 -0
- meerschaum/api/dash/pages/pipes.py +16 -0
- meerschaum/api/dash/pipes.py +79 -47
- meerschaum/api/dash/users.py +19 -6
- meerschaum/api/routes/_login.py +4 -4
- meerschaum/api/routes/_pipes.py +3 -3
- meerschaum/config/_default.py +9 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +59 -16
- meerschaum/connectors/Connector.py +19 -13
- meerschaum/connectors/__init__.py +9 -5
- meerschaum/connectors/poll.py +30 -24
- meerschaum/connectors/sql/_pipes.py +126 -154
- meerschaum/connectors/sql/_plugins.py +45 -43
- meerschaum/connectors/sql/_users.py +46 -38
- meerschaum/connectors/valkey/ValkeyConnector.py +535 -0
- meerschaum/connectors/valkey/__init__.py +8 -0
- meerschaum/connectors/valkey/_fetch.py +75 -0
- meerschaum/connectors/valkey/_pipes.py +839 -0
- meerschaum/connectors/valkey/_plugins.py +265 -0
- meerschaum/connectors/valkey/_users.py +305 -0
- meerschaum/core/Pipe/__init__.py +3 -0
- meerschaum/core/Pipe/_attributes.py +1 -2
- meerschaum/core/Pipe/_clear.py +16 -13
- meerschaum/core/Pipe/_copy.py +106 -0
- meerschaum/core/Pipe/_drop.py +4 -4
- meerschaum/core/Pipe/_dtypes.py +14 -14
- meerschaum/core/Pipe/_edit.py +15 -14
- meerschaum/core/Pipe/_sync.py +134 -51
- meerschaum/core/Pipe/_verify.py +11 -11
- meerschaum/core/User/_User.py +14 -12
- meerschaum/plugins/_Plugin.py +17 -13
- meerschaum/utils/_get_pipes.py +14 -20
- meerschaum/utils/dataframe.py +288 -101
- meerschaum/utils/dtypes/__init__.py +31 -6
- meerschaum/utils/dtypes/sql.py +4 -4
- meerschaum/utils/misc.py +3 -3
- meerschaum/utils/packages/_packages.py +1 -0
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/METADATA +3 -1
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/RECORD +53 -44
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/WHEEL +1 -1
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/LICENSE +0 -0
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/NOTICE +0 -0
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/zip-safe +0 -0
@@ -0,0 +1,106 @@
|
|
1
|
+
#! /usr/bin/env python3
|
2
|
+
# vim:fenc=utf-8
|
3
|
+
|
4
|
+
"""
|
5
|
+
Define methods for copying pipes.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from datetime import datetime, timedelta
|
9
|
+
|
10
|
+
import meerschaum as mrsm
|
11
|
+
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Dict, Union
|
12
|
+
|
13
|
+
|
14
|
+
def copy_to(
|
15
|
+
self,
|
16
|
+
instance_keys: str,
|
17
|
+
sync: bool = True,
|
18
|
+
begin: Union[datetime, int, None] = None,
|
19
|
+
end: Union[datetime, int, None] = None,
|
20
|
+
params: Optional[Dict[str, Any]] = None,
|
21
|
+
chunk_interval: Union[timedelta, int, None] = None,
|
22
|
+
debug: bool = False,
|
23
|
+
**kwargs: Any
|
24
|
+
) -> SuccessTuple:
|
25
|
+
"""
|
26
|
+
Copy a pipe to another instance.
|
27
|
+
|
28
|
+
Parameters
|
29
|
+
----------
|
30
|
+
instance_keys: str
|
31
|
+
The instance to which to copy this pipe.
|
32
|
+
|
33
|
+
sync: bool, default True
|
34
|
+
If `True`, sync the source pipe's documents
|
35
|
+
|
36
|
+
begin: Union[datetime, int, None], default None
|
37
|
+
Beginning datetime value to pass to `Pipe.get_data()`.
|
38
|
+
|
39
|
+
end: Union[datetime, int, None], default None
|
40
|
+
End datetime value to pass to `Pipe.get_data()`.
|
41
|
+
|
42
|
+
params: Optional[Dict[str, Any]], default None
|
43
|
+
Parameters filter to pass to `Pipe.get_data()`.
|
44
|
+
|
45
|
+
chunk_interval: Union[timedelta, int, None], default None
|
46
|
+
The size of chunks to retrieve from `Pipe.get_data()` for syncing.
|
47
|
+
|
48
|
+
kwargs: Any
|
49
|
+
Additional flags to pass to `Pipe.get_data()` and `Pipe.sync()`, e.g. `workers`.
|
50
|
+
|
51
|
+
Returns
|
52
|
+
-------
|
53
|
+
A SuccessTuple indicating success.
|
54
|
+
"""
|
55
|
+
if str(instance_keys) == self.instance_keys:
|
56
|
+
return False, f"Cannot copy {self} to instance '{instance_keys}'."
|
57
|
+
|
58
|
+
new_pipe = mrsm.Pipe(
|
59
|
+
self.connector_keys,
|
60
|
+
self.metric_key,
|
61
|
+
self.location_key,
|
62
|
+
parameters=self.parameters.copy(),
|
63
|
+
instance=instance_keys,
|
64
|
+
)
|
65
|
+
|
66
|
+
new_pipe_is_registered = new_pipe.get_id() is not None
|
67
|
+
|
68
|
+
metadata_method = new_pipe.edit if new_pipe_is_registered else new_pipe.register
|
69
|
+
metadata_success, metadata_msg = metadata_method(debug=debug)
|
70
|
+
if not metadata_success:
|
71
|
+
return metadata_success, metadata_msg
|
72
|
+
|
73
|
+
if not self.exists(debug=debug):
|
74
|
+
return True, f"{self} does not exist; nothing to sync."
|
75
|
+
|
76
|
+
original_as_iterator = kwargs.get('as_iterator', None)
|
77
|
+
kwargs['as_iterator'] = True
|
78
|
+
|
79
|
+
chunk_generator = self.get_data(
|
80
|
+
begin=begin,
|
81
|
+
end=end,
|
82
|
+
params=params,
|
83
|
+
chunk_interval=chunk_interval,
|
84
|
+
debug=debug,
|
85
|
+
**kwargs
|
86
|
+
)
|
87
|
+
|
88
|
+
if original_as_iterator is None:
|
89
|
+
_ = kwargs.pop('as_iterator', None)
|
90
|
+
else:
|
91
|
+
kwargs['as_iterator'] = original_as_iterator
|
92
|
+
|
93
|
+
sync_success, sync_msg = new_pipe.sync(
|
94
|
+
chunk_generator,
|
95
|
+
begin=begin,
|
96
|
+
end=end,
|
97
|
+
params=params,
|
98
|
+
debug=debug,
|
99
|
+
**kwargs
|
100
|
+
)
|
101
|
+
msg = (
|
102
|
+
f"Successfully synced {new_pipe}:\n{sync_msg}"
|
103
|
+
if sync_success
|
104
|
+
else f"Failed to sync {new_pipe}:\n{sync_msg}"
|
105
|
+
)
|
106
|
+
return sync_success, msg
|
meerschaum/core/Pipe/_drop.py
CHANGED
@@ -10,10 +10,10 @@ from __future__ import annotations
|
|
10
10
|
from meerschaum.utils.typing import SuccessTuple, Any
|
11
11
|
|
12
12
|
def drop(
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
self,
|
14
|
+
debug: bool = False,
|
15
|
+
**kw: Any
|
16
|
+
) -> SuccessTuple:
|
17
17
|
"""
|
18
18
|
Call the Pipe's instance connector's `drop_pipe()` method.
|
19
19
|
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -16,12 +16,12 @@ if TYPE_CHECKING:
|
|
16
16
|
pd = mrsm.attempt_import('pandas')
|
17
17
|
|
18
18
|
def enforce_dtypes(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
self,
|
20
|
+
df: 'pd.DataFrame',
|
21
|
+
chunksize: Optional[int] = -1,
|
22
|
+
safe_copy: bool = True,
|
23
|
+
debug: bool = False,
|
24
|
+
) -> 'pd.DataFrame':
|
25
25
|
"""
|
26
26
|
Cast the input dataframe to the pipe's registered data types.
|
27
27
|
If the pipe does not exist and dtypes are not set, return the dataframe.
|
@@ -35,7 +35,7 @@ def enforce_dtypes(
|
|
35
35
|
if df is None:
|
36
36
|
if debug:
|
37
37
|
dprint(
|
38
|
-
|
38
|
+
"Received None instead of a DataFrame.\n"
|
39
39
|
+ " Skipping dtype enforcement..."
|
40
40
|
)
|
41
41
|
return df
|
@@ -46,24 +46,24 @@ def enforce_dtypes(
|
|
46
46
|
if isinstance(df, str):
|
47
47
|
df = parse_df_datetimes(
|
48
48
|
pd.read_json(StringIO(df)),
|
49
|
-
ignore_cols
|
49
|
+
ignore_cols=[
|
50
50
|
col
|
51
51
|
for col, dtype in pipe_dtypes.items()
|
52
52
|
if 'datetime' not in str(dtype)
|
53
53
|
],
|
54
|
-
chunksize
|
55
|
-
debug
|
54
|
+
chunksize=chunksize,
|
55
|
+
debug=debug,
|
56
56
|
)
|
57
57
|
else:
|
58
58
|
df = parse_df_datetimes(
|
59
59
|
df,
|
60
|
-
ignore_cols
|
60
|
+
ignore_cols=[
|
61
61
|
col
|
62
62
|
for col, dtype in pipe_dtypes.items()
|
63
63
|
if 'datetime' not in str(dtype)
|
64
64
|
],
|
65
|
-
chunksize
|
66
|
-
debug
|
65
|
+
chunksize=chunksize,
|
66
|
+
debug=debug,
|
67
67
|
)
|
68
68
|
except Exception as e:
|
69
69
|
warn(f"Unable to cast incoming data as a DataFrame...:\n{e}\n\n{traceback.format_exc()}")
|
@@ -80,7 +80,7 @@ def enforce_dtypes(
|
|
80
80
|
return _enforce_dtypes(df, pipe_dtypes, safe_copy=safe_copy, debug=debug)
|
81
81
|
|
82
82
|
|
83
|
-
def infer_dtypes(self, persist: bool=False, debug: bool=False) -> Dict[str, Any]:
|
83
|
+
def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str, Any]:
|
84
84
|
"""
|
85
85
|
If `dtypes` is not set in `meerschaum.Pipe.parameters`,
|
86
86
|
infer the data types from the underlying table if it exists.
|
meerschaum/core/Pipe/_edit.py
CHANGED
@@ -18,12 +18,12 @@ def update(self, *args, **kw) -> SuccessTuple:
|
|
18
18
|
|
19
19
|
|
20
20
|
def edit(
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
self,
|
22
|
+
patch: bool = False,
|
23
|
+
interactive: bool = False,
|
24
|
+
debug: bool = False,
|
25
|
+
**kw: Any
|
26
|
+
) -> SuccessTuple:
|
27
27
|
"""
|
28
28
|
Edit a Pipe's configuration.
|
29
29
|
|
@@ -50,11 +50,12 @@ def edit(
|
|
50
50
|
if not interactive:
|
51
51
|
with Venv(get_connector_plugin(self.instance_connector)):
|
52
52
|
return self.instance_connector.edit_pipe(self, patch=patch, debug=debug, **kw)
|
53
|
+
|
53
54
|
from meerschaum.config._paths import PIPES_CACHE_RESOURCES_PATH
|
54
55
|
from meerschaum.utils.misc import edit_file
|
55
56
|
parameters_filename = str(self) + '.yaml'
|
56
57
|
parameters_path = PIPES_CACHE_RESOURCES_PATH / parameters_filename
|
57
|
-
|
58
|
+
|
58
59
|
from meerschaum.utils.yaml import yaml
|
59
60
|
|
60
61
|
edit_text = f"Edit the parameters for {self}"
|
@@ -96,13 +97,13 @@ def edit(
|
|
96
97
|
|
97
98
|
|
98
99
|
def edit_definition(
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
100
|
+
self,
|
101
|
+
yes: bool = False,
|
102
|
+
noask: bool = False,
|
103
|
+
force: bool = False,
|
104
|
+
debug : bool = False,
|
105
|
+
**kw : Any
|
106
|
+
) -> SuccessTuple:
|
106
107
|
"""
|
107
108
|
Edit a pipe's definition file and update its configuration.
|
108
109
|
**NOTE:** This function is interactive and should not be used in automated scripts!
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -266,7 +266,6 @@ def sync(
|
|
266
266
|
**kw
|
267
267
|
)
|
268
268
|
)
|
269
|
-
|
270
269
|
except Exception as e:
|
271
270
|
get_console().print_exception(
|
272
271
|
suppress=[
|
@@ -369,6 +368,11 @@ def sync(
|
|
369
368
|
|
370
369
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
371
370
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
371
|
+
|
372
|
+
### Capture `numeric` and `json` columns.
|
373
|
+
self._persist_new_json_columns(df, debug=debug)
|
374
|
+
self._persist_new_numeric_columns(df, debug=debug)
|
375
|
+
|
372
376
|
if debug:
|
373
377
|
dprint(
|
374
378
|
"DataFrame to sync:\n"
|
@@ -554,14 +558,15 @@ def exists(
|
|
554
558
|
|
555
559
|
|
556
560
|
def filter_existing(
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
561
|
+
self,
|
562
|
+
df: 'pd.DataFrame',
|
563
|
+
safe_copy: bool = True,
|
564
|
+
date_bound_only: bool = False,
|
565
|
+
include_unchanged_columns: bool = False,
|
566
|
+
chunksize: Optional[int] = -1,
|
567
|
+
debug: bool = False,
|
568
|
+
**kw
|
569
|
+
) -> Tuple['pd.DataFrame', 'pd.DataFrame', 'pd.DataFrame']:
|
565
570
|
"""
|
566
571
|
Inspect a dataframe and filter out rows which already exist in the pipe.
|
567
572
|
|
@@ -569,7 +574,7 @@ def filter_existing(
|
|
569
574
|
----------
|
570
575
|
df: 'pd.DataFrame'
|
571
576
|
The dataframe to inspect and filter.
|
572
|
-
|
577
|
+
|
573
578
|
safe_copy: bool, default True
|
574
579
|
If `True`, create a copy before comparing and modifying the dataframes.
|
575
580
|
Setting to `False` may mutate the DataFrames.
|
@@ -578,6 +583,10 @@ def filter_existing(
|
|
578
583
|
date_bound_only: bool, default False
|
579
584
|
If `True`, only use the datetime index to fetch the sample dataframe.
|
580
585
|
|
586
|
+
include_unchanged_columns: bool, default False
|
587
|
+
If `True`, include the backtrack columns which haven't changed in the update dataframe.
|
588
|
+
This is useful if you can't update individual keys.
|
589
|
+
|
581
590
|
chunksize: Optional[int], default -1
|
582
591
|
The `chunksize` used when fetching existing data.
|
583
592
|
|
@@ -605,7 +614,7 @@ def filter_existing(
|
|
605
614
|
from meerschaum.config import get_config
|
606
615
|
pd = import_pandas()
|
607
616
|
pandas = attempt_import('pandas')
|
608
|
-
if
|
617
|
+
if 'dataframe' not in str(type(df)).lower():
|
609
618
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
610
619
|
is_dask = 'dask' in df.__module__
|
611
620
|
if is_dask:
|
@@ -615,8 +624,21 @@ def filter_existing(
|
|
615
624
|
else:
|
616
625
|
merge = pd.merge
|
617
626
|
NA = pd.NA
|
627
|
+
|
628
|
+
def get_empty_df():
|
629
|
+
empty_df = pd.DataFrame([])
|
630
|
+
dtypes = dict(df.dtypes) if df is not None else {}
|
631
|
+
dtypes.update(self.dtypes)
|
632
|
+
pd_dtypes = {
|
633
|
+
col: to_pandas_dtype(str(typ))
|
634
|
+
for col, typ in dtypes.items()
|
635
|
+
}
|
636
|
+
return add_missing_cols_to_df(empty_df, pd_dtypes)
|
637
|
+
|
618
638
|
if df is None:
|
619
|
-
|
639
|
+
empty_df = get_empty_df()
|
640
|
+
return empty_df, empty_df, empty_df
|
641
|
+
|
620
642
|
if (df.empty if not is_dask else len(df) == 0):
|
621
643
|
return df, df, df
|
622
644
|
|
@@ -633,7 +655,7 @@ def filter_existing(
|
|
633
655
|
if min_dt_val is not None and 'datetime' in str(dt_type)
|
634
656
|
else min_dt_val
|
635
657
|
)
|
636
|
-
except Exception
|
658
|
+
except Exception:
|
637
659
|
min_dt = None
|
638
660
|
if not ('datetime' in str(type(min_dt))) or str(min_dt) == 'NaT':
|
639
661
|
if 'int' not in str(type(min_dt)).lower():
|
@@ -643,7 +665,7 @@ def filter_existing(
|
|
643
665
|
begin = (
|
644
666
|
round_time(
|
645
667
|
min_dt,
|
646
|
-
to
|
668
|
+
to='down'
|
647
669
|
) - timedelta(minutes=1)
|
648
670
|
)
|
649
671
|
elif dt_type and 'int' in dt_type.lower():
|
@@ -661,7 +683,7 @@ def filter_existing(
|
|
661
683
|
if max_dt_val is not None and 'datetime' in str(dt_type)
|
662
684
|
else max_dt_val
|
663
685
|
)
|
664
|
-
except Exception
|
686
|
+
except Exception:
|
665
687
|
import traceback
|
666
688
|
traceback.print_exc()
|
667
689
|
max_dt = None
|
@@ -674,14 +696,14 @@ def filter_existing(
|
|
674
696
|
end = (
|
675
697
|
round_time(
|
676
698
|
max_dt,
|
677
|
-
to
|
699
|
+
to='down'
|
678
700
|
) + timedelta(minutes=1)
|
679
701
|
)
|
680
702
|
elif dt_type and 'int' in dt_type.lower():
|
681
703
|
end = max_dt + 1
|
682
704
|
|
683
705
|
if max_dt is not None and min_dt is not None and min_dt > max_dt:
|
684
|
-
warn(
|
706
|
+
warn("Detected minimum datetime greater than maximum datetime.")
|
685
707
|
|
686
708
|
if begin is not None and end is not None and begin > end:
|
687
709
|
if isinstance(begin, datetime):
|
@@ -710,13 +732,18 @@ def filter_existing(
|
|
710
732
|
dprint(f"Looking at data between '{begin}' and '{end}':", **kw)
|
711
733
|
|
712
734
|
backtrack_df = self.get_data(
|
713
|
-
begin
|
714
|
-
end
|
715
|
-
chunksize
|
716
|
-
params
|
717
|
-
debug
|
735
|
+
begin=begin,
|
736
|
+
end=end,
|
737
|
+
chunksize=chunksize,
|
738
|
+
params=params,
|
739
|
+
debug=debug,
|
718
740
|
**kw
|
719
741
|
)
|
742
|
+
if backtrack_df is None:
|
743
|
+
if debug:
|
744
|
+
dprint(f"No backtrack data was found for {self}.")
|
745
|
+
return df, get_empty_df(), df
|
746
|
+
|
720
747
|
if debug:
|
721
748
|
dprint(f"Existing data for {self}:\n" + str(backtrack_df), **kw)
|
722
749
|
dprint(f"Existing dtypes for {self}:\n" + str(backtrack_df.dtypes))
|
@@ -743,18 +770,19 @@ def filter_existing(
|
|
743
770
|
filter_unseen_df(
|
744
771
|
backtrack_df,
|
745
772
|
df,
|
746
|
-
dtypes
|
773
|
+
dtypes={
|
747
774
|
col: to_pandas_dtype(typ)
|
748
775
|
for col, typ in self_dtypes.items()
|
749
776
|
},
|
750
|
-
safe_copy
|
751
|
-
debug
|
777
|
+
safe_copy=safe_copy,
|
778
|
+
debug=debug
|
752
779
|
),
|
753
780
|
on_cols_dtypes,
|
754
781
|
)
|
755
782
|
|
756
783
|
### Cast dicts or lists to strings so we can merge.
|
757
784
|
serializer = functools.partial(json.dumps, sort_keys=True, separators=(',', ':'), default=str)
|
785
|
+
|
758
786
|
def deserializer(x):
|
759
787
|
return json.loads(x) if isinstance(x, str) else x
|
760
788
|
|
@@ -767,12 +795,12 @@ def filter_existing(
|
|
767
795
|
casted_cols = set(unhashable_delta_cols + unhashable_backtrack_cols)
|
768
796
|
|
769
797
|
joined_df = merge(
|
770
|
-
delta_df.fillna(NA),
|
771
|
-
backtrack_df.fillna(NA),
|
772
|
-
how
|
773
|
-
on
|
774
|
-
indicator
|
775
|
-
suffixes
|
798
|
+
delta_df.infer_objects(copy=False).fillna(NA),
|
799
|
+
backtrack_df.infer_objects(copy=False).fillna(NA),
|
800
|
+
how='left',
|
801
|
+
on=on_cols,
|
802
|
+
indicator=True,
|
803
|
+
suffixes=('', '_old'),
|
776
804
|
) if on_cols else delta_df
|
777
805
|
for col in casted_cols:
|
778
806
|
if col in joined_df.columns:
|
@@ -782,20 +810,13 @@ def filter_existing(
|
|
782
810
|
|
783
811
|
### Determine which rows are completely new.
|
784
812
|
new_rows_mask = (joined_df['_merge'] == 'left_only') if on_cols else None
|
785
|
-
cols = list(
|
813
|
+
cols = list(delta_df.columns)
|
786
814
|
|
787
815
|
unseen_df = (
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
.reset_index(drop=True)
|
793
|
-
) if not is_dask else (
|
794
|
-
joined_df
|
795
|
-
.where(new_rows_mask)
|
796
|
-
.dropna(how='all')[cols]
|
797
|
-
.reset_index(drop=True)
|
798
|
-
)
|
816
|
+
joined_df
|
817
|
+
.where(new_rows_mask)
|
818
|
+
.dropna(how='all')[cols]
|
819
|
+
.reset_index(drop=True)
|
799
820
|
) if on_cols else delta_df
|
800
821
|
|
801
822
|
### Rows that have already been inserted but values have changed.
|
@@ -804,20 +825,33 @@ def filter_existing(
|
|
804
825
|
.where(~new_rows_mask)
|
805
826
|
.dropna(how='all')[cols]
|
806
827
|
.reset_index(drop=True)
|
807
|
-
) if on_cols else
|
828
|
+
) if on_cols else get_empty_df()
|
829
|
+
|
830
|
+
if include_unchanged_columns and on_cols:
|
831
|
+
unchanged_backtrack_cols = [
|
832
|
+
col
|
833
|
+
for col in backtrack_df.columns
|
834
|
+
if col in on_cols or col not in update_df.columns
|
835
|
+
]
|
836
|
+
update_df = merge(
|
837
|
+
backtrack_df[unchanged_backtrack_cols],
|
838
|
+
update_df,
|
839
|
+
how='inner',
|
840
|
+
on=on_cols,
|
841
|
+
)
|
808
842
|
|
809
843
|
return unseen_df, update_df, delta_df
|
810
844
|
|
811
845
|
|
812
846
|
@staticmethod
|
813
847
|
def _get_chunk_label(
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
848
|
+
chunk: Union[
|
849
|
+
'pd.DataFrame',
|
850
|
+
List[Dict[str, Any]],
|
851
|
+
Dict[str, List[Any]]
|
852
|
+
],
|
853
|
+
dt_col: str,
|
854
|
+
) -> str:
|
821
855
|
"""
|
822
856
|
Return the min - max label for the chunk.
|
823
857
|
"""
|
@@ -870,3 +904,52 @@ def get_num_workers(self, workers: Optional[int] = None) -> int:
|
|
870
904
|
(desired_workers - current_num_connections),
|
871
905
|
1,
|
872
906
|
)
|
907
|
+
|
908
|
+
|
909
|
+
def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
910
|
+
"""
|
911
|
+
Check for new numeric columns and update the parameters.
|
912
|
+
"""
|
913
|
+
from meerschaum.utils.dataframe import get_numeric_cols
|
914
|
+
numeric_cols = get_numeric_cols(df)
|
915
|
+
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ == 'numeric']
|
916
|
+
new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
|
917
|
+
if not new_numeric_cols:
|
918
|
+
return True, "Success"
|
919
|
+
|
920
|
+
dtypes = self.parameters.get('dtypes', {})
|
921
|
+
dtypes.update({col: 'numeric' for col in numeric_cols})
|
922
|
+
self.parameters['dtypes'] = dtypes
|
923
|
+
if not self.temporary:
|
924
|
+
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
925
|
+
if not edit_success:
|
926
|
+
warn(f"Unable to update NUMERIC dtypes for {self}:\n{edit_msg}")
|
927
|
+
|
928
|
+
return edit_success, edit_msg
|
929
|
+
|
930
|
+
return True, "Success"
|
931
|
+
|
932
|
+
|
933
|
+
def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
934
|
+
"""
|
935
|
+
Check for new JSON columns and update the parameters.
|
936
|
+
"""
|
937
|
+
from meerschaum.utils.dataframe import get_json_cols
|
938
|
+
json_cols = get_json_cols(df)
|
939
|
+
existing_json_cols = [col for col, typ in self.dtypes.items() if typ == 'json']
|
940
|
+
new_json_cols = [col for col in json_cols if col not in existing_json_cols]
|
941
|
+
if not new_json_cols:
|
942
|
+
return True, "Success"
|
943
|
+
|
944
|
+
dtypes = self.parameters.get('dtypes', {})
|
945
|
+
dtypes.update({col: 'json' for col in json_cols})
|
946
|
+
self.parameters['dtypes'] = dtypes
|
947
|
+
|
948
|
+
if not self.temporary:
|
949
|
+
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
950
|
+
if not edit_success:
|
951
|
+
warn(f"Unable to update JSON dtypes for {self}:\n{edit_msg}")
|
952
|
+
|
953
|
+
return edit_success, edit_msg
|
954
|
+
|
955
|
+
return True, "Success"
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -12,17 +12,17 @@ from meerschaum.utils.warnings import warn, info
|
|
12
12
|
from meerschaum.utils.debug import dprint
|
13
13
|
|
14
14
|
def verify(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
15
|
+
self,
|
16
|
+
begin: Union[datetime, int, None] = None,
|
17
|
+
end: Union[datetime, int, None] = None,
|
18
|
+
params: Optional[Dict[str, Any]] = None,
|
19
|
+
chunk_interval: Union[timedelta, int, None] = None,
|
20
|
+
bounded: Optional[bool] = None,
|
21
|
+
deduplicate: bool = False,
|
22
|
+
workers: Optional[int] = None,
|
23
|
+
debug: bool = False,
|
24
|
+
**kwargs: Any
|
25
|
+
) -> SuccessTuple:
|
26
26
|
"""
|
27
27
|
Verify the contents of the pipe by resyncing its interval.
|
28
28
|
|
meerschaum/core/User/_User.py
CHANGED
@@ -11,7 +11,7 @@ import os
|
|
11
11
|
import hashlib
|
12
12
|
import hmac
|
13
13
|
from binascii import b2a_base64, a2b_base64, Error as _BinAsciiError
|
14
|
-
from meerschaum.utils.typing import Optional, Dict, Any,
|
14
|
+
from meerschaum.utils.typing import Optional, Dict, Any, Union
|
15
15
|
from meerschaum.config.static import STATIC_CONFIG
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
@@ -19,10 +19,10 @@ from meerschaum.utils.warnings import warn
|
|
19
19
|
__all__ = ('hash_password', 'verify_password', 'User')
|
20
20
|
|
21
21
|
def hash_password(
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
password: str,
|
23
|
+
salt: Optional[bytes] = None,
|
24
|
+
rounds: Optional[int] = None,
|
25
|
+
) -> str:
|
26
26
|
"""
|
27
27
|
Return an encoded hash string from the given password.
|
28
28
|
|
@@ -68,9 +68,9 @@ def hash_password(
|
|
68
68
|
|
69
69
|
|
70
70
|
def verify_password(
|
71
|
-
|
72
|
-
|
73
|
-
|
71
|
+
password: str,
|
72
|
+
password_hash: str,
|
73
|
+
) -> bool:
|
74
74
|
"""
|
75
75
|
Return `True` if the password matches the provided hash.
|
76
76
|
|
@@ -197,26 +197,28 @@ class User:
|
|
197
197
|
return self._attributes
|
198
198
|
|
199
199
|
@property
|
200
|
-
def instance_connector(self) ->
|
201
|
-
""" """
|
200
|
+
def instance_connector(self) -> 'mrsm.connectors.Connector':
|
202
201
|
from meerschaum.connectors.parse import parse_instance_keys
|
203
202
|
if '_instance_connector' not in self.__dict__:
|
204
203
|
self._instance_connector = parse_instance_keys(self._instance_keys)
|
205
204
|
return self._instance_connector
|
206
205
|
|
207
206
|
@property
|
208
|
-
def user_id(self) -> int:
|
207
|
+
def user_id(self) -> Union[int, str, None]:
|
209
208
|
"""NOTE: This causes recursion with the API,
|
210
209
|
so don't try to get fancy with read-only attributes.
|
211
210
|
"""
|
212
211
|
return self._user_id
|
213
212
|
|
214
213
|
@user_id.setter
|
215
|
-
def user_id(self, user_id):
|
214
|
+
def user_id(self, user_id: Union[int, str, None]):
|
216
215
|
self._user_id = user_id
|
217
216
|
|
218
217
|
@property
|
219
218
|
def password_hash(self):
|
219
|
+
"""
|
220
|
+
Return the hash of the user's password.
|
221
|
+
"""
|
220
222
|
_password_hash = self.__dict__.get('_password_hash', None)
|
221
223
|
if _password_hash is not None:
|
222
224
|
return _password_hash
|