meerschaum 2.6.17__py3-none-any.whl → 2.7.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/install.py +1 -2
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +18 -21
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +122 -78
- meerschaum/connectors/sql/_sql.py +43 -8
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +25 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +59 -31
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +20 -13
- meerschaum/utils/dataframe.py +178 -16
- meerschaum/utils/dtypes/__init__.py +149 -14
- meerschaum/utils/dtypes/sql.py +41 -7
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/sql.py +174 -64
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +53 -20
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/METADATA +1 -1
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/RECORD +34 -34
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -106,6 +106,7 @@ class Pipe:
|
|
106
106
|
upsert,
|
107
107
|
static,
|
108
108
|
tzinfo,
|
109
|
+
enforce,
|
109
110
|
get_columns,
|
110
111
|
get_columns_types,
|
111
112
|
get_columns_indices,
|
@@ -132,6 +133,7 @@ class Pipe:
|
|
132
133
|
_persist_new_json_columns,
|
133
134
|
_persist_new_numeric_columns,
|
134
135
|
_persist_new_uuid_columns,
|
136
|
+
_persist_new_bytes_columns,
|
135
137
|
)
|
136
138
|
from ._verify import (
|
137
139
|
verify,
|
@@ -162,12 +164,14 @@ class Pipe:
|
|
162
164
|
upsert: Optional[bool] = None,
|
163
165
|
autoincrement: Optional[bool] = None,
|
164
166
|
static: Optional[bool] = None,
|
167
|
+
enforce: Optional[bool] = None,
|
165
168
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
166
169
|
cache: bool = False,
|
167
170
|
debug: bool = False,
|
168
171
|
connector_keys: Optional[str] = None,
|
169
172
|
metric_key: Optional[str] = None,
|
170
173
|
location_key: Optional[str] = None,
|
174
|
+
instance_keys: Optional[str] = None,
|
171
175
|
indexes: Union[Dict[str, str], List[str], None] = None,
|
172
176
|
):
|
173
177
|
"""
|
@@ -219,6 +223,10 @@ class Pipe:
|
|
219
223
|
static: Optional[bool], default None
|
220
224
|
If `True`, set `static` in the parameters.
|
221
225
|
|
226
|
+
enforce: Optionanl[bool], default None
|
227
|
+
If `False`, skip data type enforcement.
|
228
|
+
Default behavior is `True`.
|
229
|
+
|
222
230
|
temporary: bool, default False
|
223
231
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
224
232
|
|
@@ -319,11 +327,13 @@ class Pipe:
|
|
319
327
|
if isinstance(static, bool):
|
320
328
|
self._attributes['parameters']['static'] = static
|
321
329
|
|
330
|
+
if isinstance(enforce, bool):
|
331
|
+
self._attributes['parameters']['enforce'] = enforce
|
332
|
+
|
322
333
|
### NOTE: The parameters dictionary is {} by default.
|
323
334
|
### A Pipe may be registered without parameters, then edited,
|
324
335
|
### or a Pipe may be registered with parameters set in-memory first.
|
325
|
-
|
326
|
-
_mrsm_instance = mrsm_instance if mrsm_instance is not None else instance
|
336
|
+
_mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys)
|
327
337
|
if _mrsm_instance is None:
|
328
338
|
_mrsm_instance = get_config('meerschaum', 'instance', patch=True)
|
329
339
|
|
@@ -341,10 +351,10 @@ class Pipe:
|
|
341
351
|
Return the four keys needed to reconstruct this pipe.
|
342
352
|
"""
|
343
353
|
return {
|
344
|
-
'
|
345
|
-
'
|
346
|
-
'
|
347
|
-
'
|
354
|
+
'connector_keys': self.connector_keys,
|
355
|
+
'metric_key': self.metric_key,
|
356
|
+
'location_key': self.location_key,
|
357
|
+
'instance_keys': self.instance_keys,
|
348
358
|
}
|
349
359
|
|
350
360
|
def keys(self) -> List[str]:
|
@@ -385,7 +395,7 @@ class Pipe:
|
|
385
395
|
warnings.simplefilter('ignore')
|
386
396
|
try:
|
387
397
|
conn = parse_instance_keys(self.connector_keys)
|
388
|
-
except Exception
|
398
|
+
except Exception:
|
389
399
|
conn = None
|
390
400
|
if conn:
|
391
401
|
self._connector = conn
|
@@ -429,7 +439,7 @@ class Pipe:
|
|
429
439
|
_fetch_patch = {
|
430
440
|
'fetch': ({
|
431
441
|
'definition': (
|
432
|
-
|
442
|
+
"SELECT * FROM "
|
433
443
|
+ sql_item_name(
|
434
444
|
str(self.target),
|
435
445
|
self.instance_connector.flavor,
|
@@ -467,7 +477,7 @@ class Pipe:
|
|
467
477
|
and self.location_key == other.location_key
|
468
478
|
and self.instance_keys == other.instance_keys
|
469
479
|
)
|
470
|
-
except Exception
|
480
|
+
except Exception:
|
471
481
|
return False
|
472
482
|
|
473
483
|
def __hash__(self):
|
@@ -496,11 +506,11 @@ class Pipe:
|
|
496
506
|
Define the state dictionary (pickling).
|
497
507
|
"""
|
498
508
|
return {
|
499
|
-
'
|
500
|
-
'
|
501
|
-
'
|
509
|
+
'connector_keys': self.connector_keys,
|
510
|
+
'metric_key': self.metric_key,
|
511
|
+
'location_key': self.location_key,
|
502
512
|
'parameters': self.parameters,
|
503
|
-
'
|
513
|
+
'instance_keys': self.instance_keys,
|
504
514
|
}
|
505
515
|
|
506
516
|
def __setstate__(self, _state: Dict[str, Any]):
|
@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
|
|
200
200
|
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
201
201
|
"""
|
202
202
|
from meerschaum.config._patch import apply_patch_to_config
|
203
|
+
from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
|
203
204
|
configured_dtypes = self.parameters.get('dtypes', {})
|
204
205
|
remote_dtypes = self.infer_dtypes(persist=False)
|
205
206
|
patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
|
206
|
-
return
|
207
|
+
return {
|
208
|
+
col: MRSM_ALIAS_DTYPES.get(typ, typ)
|
209
|
+
for col, typ in patched_dtypes.items()
|
210
|
+
if col and typ
|
211
|
+
}
|
207
212
|
|
208
213
|
|
209
214
|
@dtypes.setter
|
@@ -289,6 +294,25 @@ def tzinfo(self) -> Union[None, timezone]:
|
|
289
294
|
return None
|
290
295
|
|
291
296
|
|
297
|
+
@property
|
298
|
+
def enforce(self) -> bool:
|
299
|
+
"""
|
300
|
+
Return the `enforce` parameter for the pipe.
|
301
|
+
"""
|
302
|
+
if 'enforce' not in self.parameters:
|
303
|
+
self.parameters['enforce'] = True
|
304
|
+
|
305
|
+
return self.parameters['enforce']
|
306
|
+
|
307
|
+
|
308
|
+
@enforce.setter
|
309
|
+
def enforce(self, _enforce: bool) -> None:
|
310
|
+
"""
|
311
|
+
Set the `enforce` parameter for the pipe.
|
312
|
+
"""
|
313
|
+
self.parameters['_enforce'] = _enforce
|
314
|
+
|
315
|
+
|
292
316
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
293
317
|
"""
|
294
318
|
Check if the requested columns are defined.
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
pd = mrsm.attempt_import('pandas')
|
17
17
|
|
18
|
+
|
18
19
|
def enforce_dtypes(
|
19
20
|
self,
|
20
21
|
df: 'pd.DataFrame',
|
@@ -30,7 +31,7 @@ def enforce_dtypes(
|
|
30
31
|
from meerschaum.utils.warnings import warn
|
31
32
|
from meerschaum.utils.debug import dprint
|
32
33
|
from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
|
33
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
34
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
|
34
35
|
from meerschaum.utils.packages import import_pandas
|
35
36
|
pd = import_pandas(debug=debug)
|
36
37
|
if df is None:
|
@@ -41,7 +42,11 @@ def enforce_dtypes(
|
|
41
42
|
)
|
42
43
|
return df
|
43
44
|
|
44
|
-
pipe_dtypes = self.dtypes
|
45
|
+
pipe_dtypes = self.dtypes if self.enforce else {
|
46
|
+
col: typ
|
47
|
+
for col, typ in self.dtypes.items()
|
48
|
+
if typ in MRSM_PD_DTYPES
|
49
|
+
}
|
45
50
|
|
46
51
|
try:
|
47
52
|
if isinstance(df, str):
|
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
105
110
|
A dictionary of strings containing the pandas data types for this Pipe.
|
106
111
|
"""
|
107
112
|
if not self.exists(debug=debug):
|
108
|
-
|
109
|
-
if not self.columns:
|
110
|
-
return {}
|
111
|
-
dt_col = self.columns.get('datetime', None)
|
112
|
-
if dt_col:
|
113
|
-
if not self.parameters.get('dtypes', {}).get(dt_col, None):
|
114
|
-
dtypes[dt_col] = 'datetime64[ns, UTC]'
|
115
|
-
return dtypes
|
113
|
+
return {}
|
116
114
|
|
117
115
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
118
116
|
from meerschaum.utils.dtypes import to_pandas_dtype
|
119
|
-
columns_types = self.get_columns_types(debug=debug)
|
120
117
|
|
121
118
|
### NOTE: get_columns_types() may return either the types as
|
122
119
|
### PostgreSQL- or Pandas-style.
|
123
|
-
|
120
|
+
columns_types = self.get_columns_types(debug=debug)
|
121
|
+
|
122
|
+
remote_pd_dtypes = {
|
124
123
|
c: (
|
125
124
|
get_pd_type_from_db_type(t, allow_custom_dtypes=True)
|
126
125
|
if str(t).isupper()
|
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
128
127
|
)
|
129
128
|
for c, t in columns_types.items()
|
130
129
|
} if columns_types else {}
|
131
|
-
if persist:
|
132
|
-
|
133
|
-
|
134
|
-
|
130
|
+
if not persist:
|
131
|
+
return remote_pd_dtypes
|
132
|
+
|
133
|
+
dtypes = self.parameters.get('dtypes', {})
|
134
|
+
dtypes.update({
|
135
|
+
col: typ
|
136
|
+
for col, typ in remote_pd_dtypes.items()
|
137
|
+
if col not in dtypes
|
138
|
+
})
|
139
|
+
self.dtypes = dtypes
|
140
|
+
self.edit(interactive=False, debug=debug)
|
141
|
+
return remote_pd_dtypes
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -161,7 +161,7 @@ def sync(
|
|
161
161
|
self._exists = None
|
162
162
|
|
163
163
|
def _sync(
|
164
|
-
p:
|
164
|
+
p: mrsm.Pipe,
|
165
165
|
df: Union[
|
166
166
|
'pd.DataFrame',
|
167
167
|
Dict[str, List[Any]],
|
@@ -368,10 +368,11 @@ def sync(
|
|
368
368
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
369
369
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
370
370
|
|
371
|
-
### Capture `numeric`, `uuid`, and `
|
371
|
+
### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
|
372
372
|
self._persist_new_json_columns(df, debug=debug)
|
373
373
|
self._persist_new_numeric_columns(df, debug=debug)
|
374
374
|
self._persist_new_uuid_columns(df, debug=debug)
|
375
|
+
self._persist_new_bytes_columns(df, debug=debug)
|
375
376
|
|
376
377
|
if debug:
|
377
378
|
dprint(
|
@@ -617,11 +618,13 @@ def filter_existing(
|
|
617
618
|
filter_unseen_df,
|
618
619
|
add_missing_cols_to_df,
|
619
620
|
get_unhashable_cols,
|
620
|
-
get_numeric_cols,
|
621
621
|
)
|
622
622
|
from meerschaum.utils.dtypes import (
|
623
623
|
to_pandas_dtype,
|
624
624
|
none_if_null,
|
625
|
+
to_datetime,
|
626
|
+
are_dtypes_equal,
|
627
|
+
value_is_null,
|
625
628
|
)
|
626
629
|
from meerschaum.config import get_config
|
627
630
|
pd = import_pandas()
|
@@ -669,29 +672,36 @@ def filter_existing(
|
|
669
672
|
### begin is the oldest data in the new dataframe
|
670
673
|
begin, end = None, None
|
671
674
|
dt_col = pipe_columns.get('datetime', None)
|
675
|
+
primary_key = pipe_columns.get('primary', None)
|
672
676
|
dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
677
|
+
|
678
|
+
if autoincrement and primary_key == dt_col and dt_col not in df.columns:
|
679
|
+
if enforce_dtypes:
|
680
|
+
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
681
|
+
return df, get_empty_df(), df
|
682
|
+
|
673
683
|
try:
|
674
|
-
min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
|
684
|
+
min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
|
675
685
|
if is_dask and min_dt_val is not None:
|
676
686
|
min_dt_val = min_dt_val.compute()
|
677
687
|
min_dt = (
|
678
|
-
|
679
|
-
if min_dt_val is not None and 'datetime'
|
688
|
+
to_datetime(min_dt_val, as_pydatetime=True)
|
689
|
+
if min_dt_val is not None and are_dtypes_equal(dt_type, 'datetime')
|
680
690
|
else min_dt_val
|
681
691
|
)
|
682
692
|
except Exception:
|
683
693
|
min_dt = None
|
684
|
-
|
685
|
-
|
694
|
+
|
695
|
+
if not are_dtypes_equal('datetime', str(type(min_dt))) or value_is_null(min_dt):
|
696
|
+
if not are_dtypes_equal('int', str(type(min_dt))):
|
686
697
|
min_dt = None
|
687
698
|
|
688
699
|
if isinstance(min_dt, datetime):
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
)
|
700
|
+
rounded_min_dt = round_time(min_dt, to='down')
|
701
|
+
try:
|
702
|
+
begin = rounded_min_dt - timedelta(minutes=1)
|
703
|
+
except OverflowError:
|
704
|
+
begin = rounded_min_dt
|
695
705
|
elif dt_type and 'int' in dt_type.lower():
|
696
706
|
begin = min_dt
|
697
707
|
elif dt_col is None:
|
@@ -699,11 +709,11 @@ def filter_existing(
|
|
699
709
|
|
700
710
|
### end is the newest data in the new dataframe
|
701
711
|
try:
|
702
|
-
max_dt_val = df[dt_col].max(skipna=True) if dt_col else None
|
712
|
+
max_dt_val = df[dt_col].max(skipna=True) if dt_col and dt_col in df.columns else None
|
703
713
|
if is_dask and max_dt_val is not None:
|
704
714
|
max_dt_val = max_dt_val.compute()
|
705
715
|
max_dt = (
|
706
|
-
|
716
|
+
to_datetime(max_dt_val, as_pydatetime=True)
|
707
717
|
if max_dt_val is not None and 'datetime' in str(dt_type)
|
708
718
|
else max_dt_val
|
709
719
|
)
|
@@ -712,8 +722,8 @@ def filter_existing(
|
|
712
722
|
traceback.print_exc()
|
713
723
|
max_dt = None
|
714
724
|
|
715
|
-
if ('datetime'
|
716
|
-
if 'int'
|
725
|
+
if not are_dtypes_equal('datetime', str(type(max_dt))) or value_is_null(max_dt):
|
726
|
+
if not are_dtypes_equal('int', str(type(max_dt))):
|
717
727
|
max_dt = None
|
718
728
|
|
719
729
|
if isinstance(max_dt, datetime):
|
@@ -723,7 +733,7 @@ def filter_existing(
|
|
723
733
|
to='down'
|
724
734
|
) + timedelta(minutes=1)
|
725
735
|
)
|
726
|
-
elif dt_type and 'int' in dt_type.lower():
|
736
|
+
elif dt_type and 'int' in dt_type.lower() and max_dt is not None:
|
727
737
|
end = max_dt + 1
|
728
738
|
|
729
739
|
if max_dt is not None and min_dt is not None and min_dt > max_dt:
|
@@ -738,7 +748,7 @@ def filter_existing(
|
|
738
748
|
|
739
749
|
unique_index_vals = {
|
740
750
|
col: df[col].unique()
|
741
|
-
for col in pipe_columns
|
751
|
+
for col in (pipe_columns if not primary_key else [primary_key])
|
742
752
|
if col in df.columns and col != dt_col
|
743
753
|
} if not date_bound_only else {}
|
744
754
|
filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
|
@@ -777,14 +787,15 @@ def filter_existing(
|
|
777
787
|
|
778
788
|
### Separate new rows from changed ones.
|
779
789
|
on_cols = [
|
780
|
-
col
|
790
|
+
col
|
791
|
+
for col_key, col in pipe_columns.items()
|
781
792
|
if (
|
782
793
|
col
|
783
794
|
and
|
784
795
|
col_key != 'value'
|
785
796
|
and col in backtrack_df.columns
|
786
797
|
)
|
787
|
-
]
|
798
|
+
] if not primary_key else [primary_key]
|
788
799
|
self_dtypes = self.dtypes
|
789
800
|
on_cols_dtypes = {
|
790
801
|
col: to_pandas_dtype(typ)
|
@@ -949,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
949
960
|
return True, "Success"
|
950
961
|
|
951
962
|
self._attributes_sync_time = None
|
952
|
-
dt_col = self.columns.get('datetime', None)
|
953
963
|
dtypes = self.parameters.get('dtypes', {})
|
954
|
-
if dt_col not in dtypes:
|
955
|
-
dtypes[dt_col] = 'datetime'
|
956
964
|
dtypes.update({col: 'numeric' for col in numeric_cols})
|
957
965
|
self.parameters['dtypes'] = dtypes
|
958
966
|
if not self.temporary:
|
@@ -977,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
977
985
|
return True, "Success"
|
978
986
|
|
979
987
|
self._attributes_sync_time = None
|
980
|
-
dt_col = self.columns.get('datetime', None)
|
981
988
|
dtypes = self.parameters.get('dtypes', {})
|
982
|
-
if dt_col not in dtypes:
|
983
|
-
dtypes[dt_col] = 'datetime'
|
984
989
|
dtypes.update({col: 'uuid' for col in uuid_cols})
|
985
990
|
self.parameters['dtypes'] = dtypes
|
986
991
|
if not self.temporary:
|
@@ -1005,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1005
1010
|
return True, "Success"
|
1006
1011
|
|
1007
1012
|
self._attributes_sync_time = None
|
1008
|
-
dt_col = self.columns.get('datetime', None)
|
1009
1013
|
dtypes = self.parameters.get('dtypes', {})
|
1010
|
-
if dt_col not in dtypes:
|
1011
|
-
dtypes[dt_col] = 'datetime'
|
1012
1014
|
dtypes.update({col: 'json' for col in json_cols})
|
1013
1015
|
self.parameters['dtypes'] = dtypes
|
1014
1016
|
|
@@ -1020,3 +1022,29 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1020
1022
|
return edit_success, edit_msg
|
1021
1023
|
|
1022
1024
|
return True, "Success"
|
1025
|
+
|
1026
|
+
|
1027
|
+
def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1028
|
+
"""
|
1029
|
+
Check for new `bytes` columns and update the parameters.
|
1030
|
+
"""
|
1031
|
+
from meerschaum.utils.dataframe import get_bytes_cols
|
1032
|
+
bytes_cols = get_bytes_cols(df)
|
1033
|
+
existing_bytes_cols = [col for col, typ in self.dtypes.items() if typ == 'bytes']
|
1034
|
+
new_bytes_cols = [col for col in bytes_cols if col not in existing_bytes_cols]
|
1035
|
+
if not new_bytes_cols:
|
1036
|
+
return True, "Success"
|
1037
|
+
|
1038
|
+
self._attributes_sync_time = None
|
1039
|
+
dtypes = self.parameters.get('dtypes', {})
|
1040
|
+
dtypes.update({col: 'bytes' for col in bytes_cols})
|
1041
|
+
self.parameters['dtypes'] = dtypes
|
1042
|
+
|
1043
|
+
if not self.temporary:
|
1044
|
+
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1045
|
+
if not edit_success:
|
1046
|
+
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1047
|
+
|
1048
|
+
return edit_success, edit_msg
|
1049
|
+
|
1050
|
+
return True, "Success"
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -7,9 +7,10 @@ Verify the contents of a pipe by resyncing its interval.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from datetime import datetime, timedelta
|
10
|
-
|
10
|
+
|
11
|
+
import meerschaum as mrsm
|
12
|
+
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
|
11
13
|
from meerschaum.utils.warnings import warn, info
|
12
|
-
from meerschaum.utils.debug import dprint
|
13
14
|
|
14
15
|
|
15
16
|
def verify(
|
@@ -94,9 +95,6 @@ def verify(
|
|
94
95
|
else 1
|
95
96
|
)
|
96
97
|
|
97
|
-
sync_less_than_begin = not bounded and begin is None
|
98
|
-
sync_greater_than_end = not bounded and end is None
|
99
|
-
|
100
98
|
cannot_determine_bounds = not self.exists(debug=debug)
|
101
99
|
|
102
100
|
if cannot_determine_bounds:
|
@@ -164,7 +162,7 @@ def verify(
|
|
164
162
|
)
|
165
163
|
|
166
164
|
info(
|
167
|
-
f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
|
165
|
+
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
|
168
166
|
+ f" ({'un' if not bounded else ''}bounded)"
|
169
167
|
+ f" of size '{interval_str(chunk_interval)}'"
|
170
168
|
+ f" between '{begin_to_print}' and '{end_to_print}'."
|
@@ -187,7 +185,7 @@ def verify(
|
|
187
185
|
return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
|
188
186
|
|
189
187
|
chunk_begin, chunk_end = chunk_begin_and_end
|
190
|
-
|
188
|
+
chunk_success, chunk_msg = self.sync(
|
191
189
|
begin=chunk_begin,
|
192
190
|
end=chunk_end,
|
193
191
|
params=params,
|
@@ -195,6 +193,9 @@ def verify(
|
|
195
193
|
debug=debug,
|
196
194
|
**kwargs
|
197
195
|
)
|
196
|
+
chunk_msg = chunk_msg.strip()
|
197
|
+
mrsm.pprint((chunk_success, chunk_msg))
|
198
|
+
return chunk_begin_and_end, (chunk_success, chunk_msg)
|
198
199
|
|
199
200
|
### If we have more than one chunk, attempt to sync the first one and return if its fails.
|
200
201
|
if len(chunk_bounds) > 1:
|
meerschaum/jobs/_Job.py
CHANGED
@@ -200,6 +200,8 @@ class Job:
|
|
200
200
|
if root_dir is None:
|
201
201
|
from meerschaum.config.paths import ROOT_DIR_PATH
|
202
202
|
root_dir = ROOT_DIR_PATH
|
203
|
+
else:
|
204
|
+
root_dir = pathlib.Path(root_dir)
|
203
205
|
jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
|
204
206
|
daemon_dir = jobs_dir / daemon_id
|
205
207
|
pid_file = daemon_dir / 'process.pid'
|
meerschaum/plugins/_Plugin.py
CHANGED
@@ -255,11 +255,11 @@ class Plugin:
|
|
255
255
|
|
256
256
|
|
257
257
|
def install(
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
258
|
+
self,
|
259
|
+
skip_deps: bool = False,
|
260
|
+
force: bool = False,
|
261
|
+
debug: bool = False,
|
262
|
+
) -> SuccessTuple:
|
263
263
|
"""
|
264
264
|
Extract a plugin's tar archive to the plugins directory.
|
265
265
|
|
@@ -359,7 +359,7 @@ class Plugin:
|
|
359
359
|
is_same_version = new_version and old_version and (
|
360
360
|
packaging_version.parse(old_version) == packaging_version.parse(new_version)
|
361
361
|
)
|
362
|
-
except Exception
|
362
|
+
except Exception:
|
363
363
|
is_new_version, is_same_version = True, False
|
364
364
|
|
365
365
|
### Determine where to permanently store the new plugin.
|
@@ -404,7 +404,7 @@ class Plugin:
|
|
404
404
|
dprint(f"Moving '{src_file}' to '{dst_dir}'...")
|
405
405
|
try:
|
406
406
|
shutil.move(src_file, dst_dir)
|
407
|
-
except Exception
|
407
|
+
except Exception:
|
408
408
|
success, msg = False, (
|
409
409
|
f"Failed to install plugin '{self}': " +
|
410
410
|
f"Could not move file '{src_file}' to '{dst_dir}'"
|
@@ -817,10 +817,10 @@ class Plugin:
|
|
817
817
|
|
818
818
|
|
819
819
|
def install_dependencies(
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
820
|
+
self,
|
821
|
+
force: bool = False,
|
822
|
+
debug: bool = False,
|
823
|
+
) -> bool:
|
824
824
|
"""
|
825
825
|
If specified, install dependencies.
|
826
826
|
|
@@ -841,12 +841,9 @@ class Plugin:
|
|
841
841
|
Returns
|
842
842
|
-------
|
843
843
|
A bool indicating success.
|
844
|
-
|
845
844
|
"""
|
846
845
|
from meerschaum.utils.packages import pip_install, venv_contains_package
|
847
|
-
from meerschaum.utils.debug import dprint
|
848
846
|
from meerschaum.utils.warnings import warn, info
|
849
|
-
from meerschaum.connectors.parse import parse_repo_keys
|
850
847
|
_deps = self.get_dependencies(debug=debug)
|
851
848
|
if not _deps and self.requirements_file_path is None:
|
852
849
|
return True
|
@@ -465,18 +465,25 @@ class Daemon:
|
|
465
465
|
self._write_stop_file('kill')
|
466
466
|
return True, "Process has already stopped."
|
467
467
|
|
468
|
+
psutil = attempt_import('psutil')
|
468
469
|
process = self.process
|
469
470
|
try:
|
470
471
|
process.terminate()
|
471
472
|
process.kill()
|
472
473
|
process.wait(timeout=timeout)
|
473
474
|
except Exception as e:
|
474
|
-
return False, f"Failed to kill job {self} with exception: {e}"
|
475
|
+
return False, f"Failed to kill job {self} ({process}) with exception: {e}"
|
476
|
+
|
477
|
+
try:
|
478
|
+
if process.status():
|
479
|
+
return False, "Failed to stop daemon '{self}' ({process})."
|
480
|
+
except psutil.NoSuchProcess:
|
481
|
+
pass
|
475
482
|
|
476
483
|
if self.pid_path.exists():
|
477
484
|
try:
|
478
485
|
self.pid_path.unlink()
|
479
|
-
except Exception
|
486
|
+
except Exception:
|
480
487
|
pass
|
481
488
|
|
482
489
|
self._write_stop_file('kill')
|
@@ -534,7 +541,7 @@ class Daemon:
|
|
534
541
|
if not timeout:
|
535
542
|
try:
|
536
543
|
success = self.process.status() == 'stopped'
|
537
|
-
except psutil.NoSuchProcess
|
544
|
+
except psutil.NoSuchProcess:
|
538
545
|
success = True
|
539
546
|
msg = "Success" if success else f"Failed to suspend daemon '{self.daemon_id}'."
|
540
547
|
if success:
|
@@ -677,11 +684,11 @@ class Daemon:
|
|
677
684
|
raise SystemExit(0)
|
678
685
|
|
679
686
|
def _send_signal(
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
687
|
+
self,
|
688
|
+
signal_to_send,
|
689
|
+
timeout: Union[float, int, None] = None,
|
690
|
+
check_timeout_interval: Union[float, int, None] = None,
|
691
|
+
) -> SuccessTuple:
|
685
692
|
"""Send a signal to the daemon process.
|
686
693
|
|
687
694
|
Parameters
|
@@ -709,7 +716,7 @@ class Daemon:
|
|
709
716
|
)
|
710
717
|
|
711
718
|
os.kill(pid, signal_to_send)
|
712
|
-
except Exception
|
719
|
+
except Exception:
|
713
720
|
return False, f"Failed to send signal {signal_to_send}:\n{traceback.format_exc()}"
|
714
721
|
|
715
722
|
timeout = self.get_timeout_seconds(timeout)
|
@@ -727,7 +734,7 @@ class Daemon:
|
|
727
734
|
time.sleep(check_timeout_interval)
|
728
735
|
|
729
736
|
return False, (
|
730
|
-
f"Failed to stop daemon '{self.daemon_id}' within {timeout} second"
|
737
|
+
f"Failed to stop daemon '{self.daemon_id}' (PID: {pid}) within {timeout} second"
|
731
738
|
+ ('s' if timeout != 1 else '') + '.'
|
732
739
|
)
|
733
740
|
|
@@ -745,7 +752,7 @@ class Daemon:
|
|
745
752
|
if _already_exists and not allow_dirty_run:
|
746
753
|
error(
|
747
754
|
f"Daemon '{self.daemon_id}' already exists. " +
|
748
|
-
|
755
|
+
"To allow this daemon to run, do one of the following:\n"
|
749
756
|
+ " - Execute `daemon.cleanup()`.\n"
|
750
757
|
+ f" - Delete the directory '{self.path}'.\n"
|
751
758
|
+ " - Pass `allow_dirty_run=True` to `daemon.run()`.\n",
|
@@ -764,7 +771,7 @@ class Daemon:
|
|
764
771
|
if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
|
765
772
|
try:
|
766
773
|
self._process = psutil.Process(int(pid))
|
767
|
-
except Exception
|
774
|
+
except Exception:
|
768
775
|
if self.pid_path.exists():
|
769
776
|
self.pid_path.unlink()
|
770
777
|
return None
|
@@ -788,7 +795,7 @@ class Daemon:
|
|
788
795
|
if self.pid_path.exists():
|
789
796
|
try:
|
790
797
|
self.pid_path.unlink()
|
791
|
-
except Exception
|
798
|
+
except Exception:
|
792
799
|
pass
|
793
800
|
return 'stopped'
|
794
801
|
|