meerschaum 2.6.16__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +1 -1
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/edit.py +22 -2
- meerschaum/actions/install.py +1 -2
- meerschaum/actions/sync.py +2 -3
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +18 -21
- meerschaum/connectors/sql/_create_engine.py +3 -3
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +143 -91
- meerschaum/connectors/sql/_sql.py +43 -8
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +25 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +59 -31
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +4 -1
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +22 -15
- meerschaum/utils/dataframe.py +178 -16
- meerschaum/utils/dtypes/__init__.py +149 -14
- meerschaum/utils/dtypes/sql.py +41 -7
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/schedule.py +8 -3
- meerschaum/utils/sql.py +180 -100
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +53 -20
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/METADATA +2 -2
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/RECORD +40 -40
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -46,9 +46,20 @@ def serialize_document(doc: Dict[str, Any]) -> str:
|
|
46
46
|
-------
|
47
47
|
A serialized string for the document.
|
48
48
|
"""
|
49
|
+
from meerschaum.utils.dtypes import serialize_bytes
|
49
50
|
return json.dumps(
|
50
51
|
doc,
|
51
|
-
default=(
|
52
|
+
default=(
|
53
|
+
lambda x: (
|
54
|
+
json_serialize_datetime(x)
|
55
|
+
if hasattr(x, 'tzinfo')
|
56
|
+
else (
|
57
|
+
serialize_bytes(x)
|
58
|
+
if isinstance(x, bytes)
|
59
|
+
else str(x)
|
60
|
+
)
|
61
|
+
)
|
62
|
+
),
|
52
63
|
separators=(',', ':'),
|
53
64
|
sort_keys=True,
|
54
65
|
)
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -106,6 +106,7 @@ class Pipe:
|
|
106
106
|
upsert,
|
107
107
|
static,
|
108
108
|
tzinfo,
|
109
|
+
enforce,
|
109
110
|
get_columns,
|
110
111
|
get_columns_types,
|
111
112
|
get_columns_indices,
|
@@ -132,6 +133,7 @@ class Pipe:
|
|
132
133
|
_persist_new_json_columns,
|
133
134
|
_persist_new_numeric_columns,
|
134
135
|
_persist_new_uuid_columns,
|
136
|
+
_persist_new_bytes_columns,
|
135
137
|
)
|
136
138
|
from ._verify import (
|
137
139
|
verify,
|
@@ -162,12 +164,14 @@ class Pipe:
|
|
162
164
|
upsert: Optional[bool] = None,
|
163
165
|
autoincrement: Optional[bool] = None,
|
164
166
|
static: Optional[bool] = None,
|
167
|
+
enforce: Optional[bool] = None,
|
165
168
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
166
169
|
cache: bool = False,
|
167
170
|
debug: bool = False,
|
168
171
|
connector_keys: Optional[str] = None,
|
169
172
|
metric_key: Optional[str] = None,
|
170
173
|
location_key: Optional[str] = None,
|
174
|
+
instance_keys: Optional[str] = None,
|
171
175
|
indexes: Union[Dict[str, str], List[str], None] = None,
|
172
176
|
):
|
173
177
|
"""
|
@@ -219,6 +223,10 @@ class Pipe:
|
|
219
223
|
static: Optional[bool], default None
|
220
224
|
If `True`, set `static` in the parameters.
|
221
225
|
|
226
|
+
enforce: Optionanl[bool], default None
|
227
|
+
If `False`, skip data type enforcement.
|
228
|
+
Default behavior is `True`.
|
229
|
+
|
222
230
|
temporary: bool, default False
|
223
231
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
224
232
|
|
@@ -319,11 +327,13 @@ class Pipe:
|
|
319
327
|
if isinstance(static, bool):
|
320
328
|
self._attributes['parameters']['static'] = static
|
321
329
|
|
330
|
+
if isinstance(enforce, bool):
|
331
|
+
self._attributes['parameters']['enforce'] = enforce
|
332
|
+
|
322
333
|
### NOTE: The parameters dictionary is {} by default.
|
323
334
|
### A Pipe may be registered without parameters, then edited,
|
324
335
|
### or a Pipe may be registered with parameters set in-memory first.
|
325
|
-
|
326
|
-
_mrsm_instance = mrsm_instance if mrsm_instance is not None else instance
|
336
|
+
_mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys)
|
327
337
|
if _mrsm_instance is None:
|
328
338
|
_mrsm_instance = get_config('meerschaum', 'instance', patch=True)
|
329
339
|
|
@@ -341,10 +351,10 @@ class Pipe:
|
|
341
351
|
Return the four keys needed to reconstruct this pipe.
|
342
352
|
"""
|
343
353
|
return {
|
344
|
-
'
|
345
|
-
'
|
346
|
-
'
|
347
|
-
'
|
354
|
+
'connector_keys': self.connector_keys,
|
355
|
+
'metric_key': self.metric_key,
|
356
|
+
'location_key': self.location_key,
|
357
|
+
'instance_keys': self.instance_keys,
|
348
358
|
}
|
349
359
|
|
350
360
|
def keys(self) -> List[str]:
|
@@ -385,7 +395,7 @@ class Pipe:
|
|
385
395
|
warnings.simplefilter('ignore')
|
386
396
|
try:
|
387
397
|
conn = parse_instance_keys(self.connector_keys)
|
388
|
-
except Exception
|
398
|
+
except Exception:
|
389
399
|
conn = None
|
390
400
|
if conn:
|
391
401
|
self._connector = conn
|
@@ -429,7 +439,7 @@ class Pipe:
|
|
429
439
|
_fetch_patch = {
|
430
440
|
'fetch': ({
|
431
441
|
'definition': (
|
432
|
-
|
442
|
+
"SELECT * FROM "
|
433
443
|
+ sql_item_name(
|
434
444
|
str(self.target),
|
435
445
|
self.instance_connector.flavor,
|
@@ -467,7 +477,7 @@ class Pipe:
|
|
467
477
|
and self.location_key == other.location_key
|
468
478
|
and self.instance_keys == other.instance_keys
|
469
479
|
)
|
470
|
-
except Exception
|
480
|
+
except Exception:
|
471
481
|
return False
|
472
482
|
|
473
483
|
def __hash__(self):
|
@@ -496,11 +506,11 @@ class Pipe:
|
|
496
506
|
Define the state dictionary (pickling).
|
497
507
|
"""
|
498
508
|
return {
|
499
|
-
'
|
500
|
-
'
|
501
|
-
'
|
509
|
+
'connector_keys': self.connector_keys,
|
510
|
+
'metric_key': self.metric_key,
|
511
|
+
'location_key': self.location_key,
|
502
512
|
'parameters': self.parameters,
|
503
|
-
'
|
513
|
+
'instance_keys': self.instance_keys,
|
504
514
|
}
|
505
515
|
|
506
516
|
def __setstate__(self, _state: Dict[str, Any]):
|
@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
|
|
200
200
|
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
201
201
|
"""
|
202
202
|
from meerschaum.config._patch import apply_patch_to_config
|
203
|
+
from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
|
203
204
|
configured_dtypes = self.parameters.get('dtypes', {})
|
204
205
|
remote_dtypes = self.infer_dtypes(persist=False)
|
205
206
|
patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
|
206
|
-
return
|
207
|
+
return {
|
208
|
+
col: MRSM_ALIAS_DTYPES.get(typ, typ)
|
209
|
+
for col, typ in patched_dtypes.items()
|
210
|
+
if col and typ
|
211
|
+
}
|
207
212
|
|
208
213
|
|
209
214
|
@dtypes.setter
|
@@ -289,6 +294,25 @@ def tzinfo(self) -> Union[None, timezone]:
|
|
289
294
|
return None
|
290
295
|
|
291
296
|
|
297
|
+
@property
|
298
|
+
def enforce(self) -> bool:
|
299
|
+
"""
|
300
|
+
Return the `enforce` parameter for the pipe.
|
301
|
+
"""
|
302
|
+
if 'enforce' not in self.parameters:
|
303
|
+
self.parameters['enforce'] = True
|
304
|
+
|
305
|
+
return self.parameters['enforce']
|
306
|
+
|
307
|
+
|
308
|
+
@enforce.setter
|
309
|
+
def enforce(self, _enforce: bool) -> None:
|
310
|
+
"""
|
311
|
+
Set the `enforce` parameter for the pipe.
|
312
|
+
"""
|
313
|
+
self.parameters['_enforce'] = _enforce
|
314
|
+
|
315
|
+
|
292
316
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
293
317
|
"""
|
294
318
|
Check if the requested columns are defined.
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
pd = mrsm.attempt_import('pandas')
|
17
17
|
|
18
|
+
|
18
19
|
def enforce_dtypes(
|
19
20
|
self,
|
20
21
|
df: 'pd.DataFrame',
|
@@ -30,7 +31,7 @@ def enforce_dtypes(
|
|
30
31
|
from meerschaum.utils.warnings import warn
|
31
32
|
from meerschaum.utils.debug import dprint
|
32
33
|
from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
|
33
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
34
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
|
34
35
|
from meerschaum.utils.packages import import_pandas
|
35
36
|
pd = import_pandas(debug=debug)
|
36
37
|
if df is None:
|
@@ -41,7 +42,11 @@ def enforce_dtypes(
|
|
41
42
|
)
|
42
43
|
return df
|
43
44
|
|
44
|
-
pipe_dtypes = self.dtypes
|
45
|
+
pipe_dtypes = self.dtypes if self.enforce else {
|
46
|
+
col: typ
|
47
|
+
for col, typ in self.dtypes.items()
|
48
|
+
if typ in MRSM_PD_DTYPES
|
49
|
+
}
|
45
50
|
|
46
51
|
try:
|
47
52
|
if isinstance(df, str):
|
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
105
110
|
A dictionary of strings containing the pandas data types for this Pipe.
|
106
111
|
"""
|
107
112
|
if not self.exists(debug=debug):
|
108
|
-
|
109
|
-
if not self.columns:
|
110
|
-
return {}
|
111
|
-
dt_col = self.columns.get('datetime', None)
|
112
|
-
if dt_col:
|
113
|
-
if not self.parameters.get('dtypes', {}).get(dt_col, None):
|
114
|
-
dtypes[dt_col] = 'datetime64[ns, UTC]'
|
115
|
-
return dtypes
|
113
|
+
return {}
|
116
114
|
|
117
115
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
118
116
|
from meerschaum.utils.dtypes import to_pandas_dtype
|
119
|
-
columns_types = self.get_columns_types(debug=debug)
|
120
117
|
|
121
118
|
### NOTE: get_columns_types() may return either the types as
|
122
119
|
### PostgreSQL- or Pandas-style.
|
123
|
-
|
120
|
+
columns_types = self.get_columns_types(debug=debug)
|
121
|
+
|
122
|
+
remote_pd_dtypes = {
|
124
123
|
c: (
|
125
124
|
get_pd_type_from_db_type(t, allow_custom_dtypes=True)
|
126
125
|
if str(t).isupper()
|
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
128
127
|
)
|
129
128
|
for c, t in columns_types.items()
|
130
129
|
} if columns_types else {}
|
131
|
-
if persist:
|
132
|
-
|
133
|
-
|
134
|
-
|
130
|
+
if not persist:
|
131
|
+
return remote_pd_dtypes
|
132
|
+
|
133
|
+
dtypes = self.parameters.get('dtypes', {})
|
134
|
+
dtypes.update({
|
135
|
+
col: typ
|
136
|
+
for col, typ in remote_pd_dtypes.items()
|
137
|
+
if col not in dtypes
|
138
|
+
})
|
139
|
+
self.dtypes = dtypes
|
140
|
+
self.edit(interactive=False, debug=debug)
|
141
|
+
return remote_pd_dtypes
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -161,7 +161,7 @@ def sync(
|
|
161
161
|
self._exists = None
|
162
162
|
|
163
163
|
def _sync(
|
164
|
-
p:
|
164
|
+
p: mrsm.Pipe,
|
165
165
|
df: Union[
|
166
166
|
'pd.DataFrame',
|
167
167
|
Dict[str, List[Any]],
|
@@ -368,10 +368,11 @@ def sync(
|
|
368
368
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
369
369
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
370
370
|
|
371
|
-
### Capture `numeric`, `uuid`, and `
|
371
|
+
### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
|
372
372
|
self._persist_new_json_columns(df, debug=debug)
|
373
373
|
self._persist_new_numeric_columns(df, debug=debug)
|
374
374
|
self._persist_new_uuid_columns(df, debug=debug)
|
375
|
+
self._persist_new_bytes_columns(df, debug=debug)
|
375
376
|
|
376
377
|
if debug:
|
377
378
|
dprint(
|
@@ -617,11 +618,13 @@ def filter_existing(
|
|
617
618
|
filter_unseen_df,
|
618
619
|
add_missing_cols_to_df,
|
619
620
|
get_unhashable_cols,
|
620
|
-
get_numeric_cols,
|
621
621
|
)
|
622
622
|
from meerschaum.utils.dtypes import (
|
623
623
|
to_pandas_dtype,
|
624
624
|
none_if_null,
|
625
|
+
to_datetime,
|
626
|
+
are_dtypes_equal,
|
627
|
+
value_is_null,
|
625
628
|
)
|
626
629
|
from meerschaum.config import get_config
|
627
630
|
pd = import_pandas()
|
@@ -669,29 +672,36 @@ def filter_existing(
|
|
669
672
|
### begin is the oldest data in the new dataframe
|
670
673
|
begin, end = None, None
|
671
674
|
dt_col = pipe_columns.get('datetime', None)
|
675
|
+
primary_key = pipe_columns.get('primary', None)
|
672
676
|
dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
677
|
+
|
678
|
+
if autoincrement and primary_key == dt_col and dt_col not in df.columns:
|
679
|
+
if enforce_dtypes:
|
680
|
+
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
681
|
+
return df, get_empty_df(), df
|
682
|
+
|
673
683
|
try:
|
674
|
-
min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
|
684
|
+
min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
|
675
685
|
if is_dask and min_dt_val is not None:
|
676
686
|
min_dt_val = min_dt_val.compute()
|
677
687
|
min_dt = (
|
678
|
-
|
679
|
-
if min_dt_val is not None and 'datetime'
|
688
|
+
to_datetime(min_dt_val, as_pydatetime=True)
|
689
|
+
if min_dt_val is not None and are_dtypes_equal(dt_type, 'datetime')
|
680
690
|
else min_dt_val
|
681
691
|
)
|
682
692
|
except Exception:
|
683
693
|
min_dt = None
|
684
|
-
|
685
|
-
|
694
|
+
|
695
|
+
if not are_dtypes_equal('datetime', str(type(min_dt))) or value_is_null(min_dt):
|
696
|
+
if not are_dtypes_equal('int', str(type(min_dt))):
|
686
697
|
min_dt = None
|
687
698
|
|
688
699
|
if isinstance(min_dt, datetime):
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
)
|
700
|
+
rounded_min_dt = round_time(min_dt, to='down')
|
701
|
+
try:
|
702
|
+
begin = rounded_min_dt - timedelta(minutes=1)
|
703
|
+
except OverflowError:
|
704
|
+
begin = rounded_min_dt
|
695
705
|
elif dt_type and 'int' in dt_type.lower():
|
696
706
|
begin = min_dt
|
697
707
|
elif dt_col is None:
|
@@ -699,11 +709,11 @@ def filter_existing(
|
|
699
709
|
|
700
710
|
### end is the newest data in the new dataframe
|
701
711
|
try:
|
702
|
-
max_dt_val = df[dt_col].max(skipna=True) if dt_col else None
|
712
|
+
max_dt_val = df[dt_col].max(skipna=True) if dt_col and dt_col in df.columns else None
|
703
713
|
if is_dask and max_dt_val is not None:
|
704
714
|
max_dt_val = max_dt_val.compute()
|
705
715
|
max_dt = (
|
706
|
-
|
716
|
+
to_datetime(max_dt_val, as_pydatetime=True)
|
707
717
|
if max_dt_val is not None and 'datetime' in str(dt_type)
|
708
718
|
else max_dt_val
|
709
719
|
)
|
@@ -712,8 +722,8 @@ def filter_existing(
|
|
712
722
|
traceback.print_exc()
|
713
723
|
max_dt = None
|
714
724
|
|
715
|
-
if ('datetime'
|
716
|
-
if 'int'
|
725
|
+
if not are_dtypes_equal('datetime', str(type(max_dt))) or value_is_null(max_dt):
|
726
|
+
if not are_dtypes_equal('int', str(type(max_dt))):
|
717
727
|
max_dt = None
|
718
728
|
|
719
729
|
if isinstance(max_dt, datetime):
|
@@ -723,7 +733,7 @@ def filter_existing(
|
|
723
733
|
to='down'
|
724
734
|
) + timedelta(minutes=1)
|
725
735
|
)
|
726
|
-
elif dt_type and 'int' in dt_type.lower():
|
736
|
+
elif dt_type and 'int' in dt_type.lower() and max_dt is not None:
|
727
737
|
end = max_dt + 1
|
728
738
|
|
729
739
|
if max_dt is not None and min_dt is not None and min_dt > max_dt:
|
@@ -738,7 +748,7 @@ def filter_existing(
|
|
738
748
|
|
739
749
|
unique_index_vals = {
|
740
750
|
col: df[col].unique()
|
741
|
-
for col in pipe_columns
|
751
|
+
for col in (pipe_columns if not primary_key else [primary_key])
|
742
752
|
if col in df.columns and col != dt_col
|
743
753
|
} if not date_bound_only else {}
|
744
754
|
filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
|
@@ -777,14 +787,15 @@ def filter_existing(
|
|
777
787
|
|
778
788
|
### Separate new rows from changed ones.
|
779
789
|
on_cols = [
|
780
|
-
col
|
790
|
+
col
|
791
|
+
for col_key, col in pipe_columns.items()
|
781
792
|
if (
|
782
793
|
col
|
783
794
|
and
|
784
795
|
col_key != 'value'
|
785
796
|
and col in backtrack_df.columns
|
786
797
|
)
|
787
|
-
]
|
798
|
+
] if not primary_key else [primary_key]
|
788
799
|
self_dtypes = self.dtypes
|
789
800
|
on_cols_dtypes = {
|
790
801
|
col: to_pandas_dtype(typ)
|
@@ -949,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
949
960
|
return True, "Success"
|
950
961
|
|
951
962
|
self._attributes_sync_time = None
|
952
|
-
dt_col = self.columns.get('datetime', None)
|
953
963
|
dtypes = self.parameters.get('dtypes', {})
|
954
|
-
if dt_col not in dtypes:
|
955
|
-
dtypes[dt_col] = 'datetime'
|
956
964
|
dtypes.update({col: 'numeric' for col in numeric_cols})
|
957
965
|
self.parameters['dtypes'] = dtypes
|
958
966
|
if not self.temporary:
|
@@ -977,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
977
985
|
return True, "Success"
|
978
986
|
|
979
987
|
self._attributes_sync_time = None
|
980
|
-
dt_col = self.columns.get('datetime', None)
|
981
988
|
dtypes = self.parameters.get('dtypes', {})
|
982
|
-
if dt_col not in dtypes:
|
983
|
-
dtypes[dt_col] = 'datetime'
|
984
989
|
dtypes.update({col: 'uuid' for col in uuid_cols})
|
985
990
|
self.parameters['dtypes'] = dtypes
|
986
991
|
if not self.temporary:
|
@@ -1005,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1005
1010
|
return True, "Success"
|
1006
1011
|
|
1007
1012
|
self._attributes_sync_time = None
|
1008
|
-
dt_col = self.columns.get('datetime', None)
|
1009
1013
|
dtypes = self.parameters.get('dtypes', {})
|
1010
|
-
if dt_col not in dtypes:
|
1011
|
-
dtypes[dt_col] = 'datetime'
|
1012
1014
|
dtypes.update({col: 'json' for col in json_cols})
|
1013
1015
|
self.parameters['dtypes'] = dtypes
|
1014
1016
|
|
@@ -1020,3 +1022,29 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1020
1022
|
return edit_success, edit_msg
|
1021
1023
|
|
1022
1024
|
return True, "Success"
|
1025
|
+
|
1026
|
+
|
1027
|
+
def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1028
|
+
"""
|
1029
|
+
Check for new `bytes` columns and update the parameters.
|
1030
|
+
"""
|
1031
|
+
from meerschaum.utils.dataframe import get_bytes_cols
|
1032
|
+
bytes_cols = get_bytes_cols(df)
|
1033
|
+
existing_bytes_cols = [col for col, typ in self.dtypes.items() if typ == 'bytes']
|
1034
|
+
new_bytes_cols = [col for col in bytes_cols if col not in existing_bytes_cols]
|
1035
|
+
if not new_bytes_cols:
|
1036
|
+
return True, "Success"
|
1037
|
+
|
1038
|
+
self._attributes_sync_time = None
|
1039
|
+
dtypes = self.parameters.get('dtypes', {})
|
1040
|
+
dtypes.update({col: 'bytes' for col in bytes_cols})
|
1041
|
+
self.parameters['dtypes'] = dtypes
|
1042
|
+
|
1043
|
+
if not self.temporary:
|
1044
|
+
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1045
|
+
if not edit_success:
|
1046
|
+
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1047
|
+
|
1048
|
+
return edit_success, edit_msg
|
1049
|
+
|
1050
|
+
return True, "Success"
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -7,9 +7,10 @@ Verify the contents of a pipe by resyncing its interval.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from datetime import datetime, timedelta
|
10
|
-
|
10
|
+
|
11
|
+
import meerschaum as mrsm
|
12
|
+
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
|
11
13
|
from meerschaum.utils.warnings import warn, info
|
12
|
-
from meerschaum.utils.debug import dprint
|
13
14
|
|
14
15
|
|
15
16
|
def verify(
|
@@ -94,9 +95,6 @@ def verify(
|
|
94
95
|
else 1
|
95
96
|
)
|
96
97
|
|
97
|
-
sync_less_than_begin = not bounded and begin is None
|
98
|
-
sync_greater_than_end = not bounded and end is None
|
99
|
-
|
100
98
|
cannot_determine_bounds = not self.exists(debug=debug)
|
101
99
|
|
102
100
|
if cannot_determine_bounds:
|
@@ -164,7 +162,7 @@ def verify(
|
|
164
162
|
)
|
165
163
|
|
166
164
|
info(
|
167
|
-
f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
|
165
|
+
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
|
168
166
|
+ f" ({'un' if not bounded else ''}bounded)"
|
169
167
|
+ f" of size '{interval_str(chunk_interval)}'"
|
170
168
|
+ f" between '{begin_to_print}' and '{end_to_print}'."
|
@@ -187,7 +185,7 @@ def verify(
|
|
187
185
|
return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
|
188
186
|
|
189
187
|
chunk_begin, chunk_end = chunk_begin_and_end
|
190
|
-
|
188
|
+
chunk_success, chunk_msg = self.sync(
|
191
189
|
begin=chunk_begin,
|
192
190
|
end=chunk_end,
|
193
191
|
params=params,
|
@@ -195,6 +193,9 @@ def verify(
|
|
195
193
|
debug=debug,
|
196
194
|
**kwargs
|
197
195
|
)
|
196
|
+
chunk_msg = chunk_msg.strip()
|
197
|
+
mrsm.pprint((chunk_success, chunk_msg))
|
198
|
+
return chunk_begin_and_end, (chunk_success, chunk_msg)
|
198
199
|
|
199
200
|
### If we have more than one chunk, attempt to sync the first one and return if its fails.
|
200
201
|
if len(chunk_bounds) > 1:
|
meerschaum/jobs/_Job.py
CHANGED
@@ -200,6 +200,8 @@ class Job:
|
|
200
200
|
if root_dir is None:
|
201
201
|
from meerschaum.config.paths import ROOT_DIR_PATH
|
202
202
|
root_dir = ROOT_DIR_PATH
|
203
|
+
else:
|
204
|
+
root_dir = pathlib.Path(root_dir)
|
203
205
|
jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
|
204
206
|
daemon_dir = jobs_dir / daemon_id
|
205
207
|
pid_file = daemon_dir / 'process.pid'
|
@@ -873,7 +875,7 @@ class Job:
|
|
873
875
|
"""
|
874
876
|
from meerschaum._internal.arguments import compress_pipeline_sysargs
|
875
877
|
sysargs = compress_pipeline_sysargs(self.sysargs)
|
876
|
-
return shlex.join(sysargs).replace(' + ', '\n+ ')
|
878
|
+
return shlex.join(sysargs).replace(' + ', '\n+ ').replace(' : ', '\n: ').lstrip().rstrip()
|
877
879
|
|
878
880
|
@property
|
879
881
|
def _externally_managed_file(self) -> pathlib.Path:
|
@@ -915,6 +917,7 @@ class Job:
|
|
915
917
|
'PYTHONUNBUFFERED': '1',
|
916
918
|
'LINES': str(get_config('jobs', 'terminal', 'lines')),
|
917
919
|
'COLUMNS': str(get_config('jobs', 'terminal', 'columns')),
|
920
|
+
STATIC_CONFIG['environment']['noninteractive']: 'true',
|
918
921
|
}
|
919
922
|
self._env = {**default_env, **_env}
|
920
923
|
return self._env
|
meerschaum/plugins/_Plugin.py
CHANGED
@@ -255,11 +255,11 @@ class Plugin:
|
|
255
255
|
|
256
256
|
|
257
257
|
def install(
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
258
|
+
self,
|
259
|
+
skip_deps: bool = False,
|
260
|
+
force: bool = False,
|
261
|
+
debug: bool = False,
|
262
|
+
) -> SuccessTuple:
|
263
263
|
"""
|
264
264
|
Extract a plugin's tar archive to the plugins directory.
|
265
265
|
|
@@ -359,7 +359,7 @@ class Plugin:
|
|
359
359
|
is_same_version = new_version and old_version and (
|
360
360
|
packaging_version.parse(old_version) == packaging_version.parse(new_version)
|
361
361
|
)
|
362
|
-
except Exception
|
362
|
+
except Exception:
|
363
363
|
is_new_version, is_same_version = True, False
|
364
364
|
|
365
365
|
### Determine where to permanently store the new plugin.
|
@@ -404,7 +404,7 @@ class Plugin:
|
|
404
404
|
dprint(f"Moving '{src_file}' to '{dst_dir}'...")
|
405
405
|
try:
|
406
406
|
shutil.move(src_file, dst_dir)
|
407
|
-
except Exception
|
407
|
+
except Exception:
|
408
408
|
success, msg = False, (
|
409
409
|
f"Failed to install plugin '{self}': " +
|
410
410
|
f"Could not move file '{src_file}' to '{dst_dir}'"
|
@@ -817,10 +817,10 @@ class Plugin:
|
|
817
817
|
|
818
818
|
|
819
819
|
def install_dependencies(
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
820
|
+
self,
|
821
|
+
force: bool = False,
|
822
|
+
debug: bool = False,
|
823
|
+
) -> bool:
|
824
824
|
"""
|
825
825
|
If specified, install dependencies.
|
826
826
|
|
@@ -841,12 +841,9 @@ class Plugin:
|
|
841
841
|
Returns
|
842
842
|
-------
|
843
843
|
A bool indicating success.
|
844
|
-
|
845
844
|
"""
|
846
845
|
from meerschaum.utils.packages import pip_install, venv_contains_package
|
847
|
-
from meerschaum.utils.debug import dprint
|
848
846
|
from meerschaum.utils.warnings import warn, info
|
849
|
-
from meerschaum.connectors.parse import parse_repo_keys
|
850
847
|
_deps = self.get_dependencies(debug=debug)
|
851
848
|
if not _deps and self.requirements_file_path is None:
|
852
849
|
return True
|