meerschaum 2.4.0.dev0__py3-none-any.whl → 2.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +2 -5
- meerschaum/_internal/docs/index.py +3 -2
- meerschaum/_internal/entry.py +13 -7
- meerschaum/_internal/shell/Shell.py +38 -44
- meerschaum/_internal/term/TermPageHandler.py +2 -3
- meerschaum/_internal/term/__init__.py +13 -11
- meerschaum/actions/api.py +10 -7
- meerschaum/actions/bootstrap.py +2 -1
- meerschaum/actions/copy.py +3 -3
- meerschaum/actions/delete.py +4 -1
- meerschaum/actions/register.py +1 -3
- meerschaum/actions/stack.py +24 -19
- meerschaum/actions/start.py +25 -26
- meerschaum/actions/sync.py +53 -52
- meerschaum/api/__init__.py +48 -14
- meerschaum/api/_events.py +15 -10
- meerschaum/api/_oauth2.py +2 -2
- meerschaum/api/_websockets.py +5 -4
- meerschaum/api/dash/__init__.py +1 -11
- meerschaum/api/dash/callbacks/dashboard.py +47 -55
- meerschaum/api/dash/callbacks/jobs.py +15 -16
- meerschaum/api/dash/callbacks/login.py +16 -10
- meerschaum/api/dash/callbacks/pipes.py +3 -4
- meerschaum/api/dash/callbacks/plugins.py +1 -1
- meerschaum/api/dash/callbacks/register.py +15 -11
- meerschaum/api/dash/components.py +54 -59
- meerschaum/api/dash/jobs.py +5 -9
- meerschaum/api/dash/pages/pipes.py +4 -1
- meerschaum/api/dash/pipes.py +13 -17
- meerschaum/api/dash/plugins.py +6 -4
- meerschaum/api/dash/sessions.py +176 -0
- meerschaum/api/dash/users.py +2 -53
- meerschaum/api/dash/webterm.py +12 -17
- meerschaum/api/resources/static/js/terminado.js +1 -1
- meerschaum/api/routes/_actions.py +4 -20
- meerschaum/api/routes/_jobs.py +8 -7
- meerschaum/api/routes/_webterm.py +5 -6
- meerschaum/config/_default.py +6 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +9 -7
- meerschaum/config/static/__init__.py +4 -0
- meerschaum/connectors/__init__.py +15 -9
- meerschaum/connectors/api/{APIConnector.py → _APIConnector.py} +3 -1
- meerschaum/connectors/api/__init__.py +2 -1
- meerschaum/connectors/parse.py +18 -16
- meerschaum/connectors/sql/__init__.py +3 -1
- meerschaum/connectors/sql/_pipes.py +39 -39
- meerschaum/connectors/valkey/{ValkeyConnector.py → _ValkeyConnector.py} +5 -5
- meerschaum/connectors/valkey/__init__.py +3 -1
- meerschaum/connectors/valkey/_pipes.py +13 -8
- meerschaum/core/Pipe/__init__.py +1 -0
- meerschaum/core/Pipe/_clear.py +16 -13
- meerschaum/core/Pipe/_copy.py +106 -0
- meerschaum/core/Pipe/_data.py +155 -100
- meerschaum/core/Pipe/_verify.py +11 -11
- meerschaum/jobs/_Job.py +1 -6
- meerschaum/jobs/__init__.py +7 -2
- meerschaum/utils/dataframe.py +4 -1
- meerschaum/utils/formatting/_shell.py +5 -6
- meerschaum/utils/packages/__init__.py +14 -9
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/METADATA +1 -1
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/RECORD +70 -69
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/WHEEL +1 -1
- meerschaum/api/dash/actions.py +0 -255
- /meerschaum/connectors/{Connector.py → _Connector.py} +0 -0
- /meerschaum/connectors/sql/{SQLConnector.py → _SQLConnector.py} +0 -0
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/LICENSE +0 -0
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/NOTICE +0 -0
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.4.0.dev0.dist-info → meerschaum-2.4.0rc1.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/__init__.py
CHANGED
meerschaum/core/Pipe/_clear.py
CHANGED
@@ -7,25 +7,28 @@ Clear pipe data within a bounded or unbounded interval.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
|
11
|
+
from datetime import datetime
|
10
12
|
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Dict
|
11
13
|
|
14
|
+
|
12
15
|
def clear(
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
self,
|
17
|
+
begin: Optional[datetime] = None,
|
18
|
+
end: Optional[datetime] = None,
|
19
|
+
params: Optional[Dict[str, Any]] = None,
|
20
|
+
debug: bool = False,
|
21
|
+
**kwargs: Any
|
22
|
+
) -> SuccessTuple:
|
20
23
|
"""
|
21
24
|
Call the Pipe's instance connector's `clear_pipe` method.
|
22
25
|
|
23
26
|
Parameters
|
24
27
|
----------
|
25
|
-
begin: Optional[datetime
|
28
|
+
begin: Optional[datetime], default None:
|
26
29
|
If provided, only remove rows newer than this datetime value.
|
27
30
|
|
28
|
-
end: Optional[datetime
|
31
|
+
end: Optional[datetime], default None:
|
29
32
|
If provided, only remove rows older than this datetime column (not including end).
|
30
33
|
|
31
34
|
params: Optional[Dict[str, Any]], default None
|
@@ -41,11 +44,11 @@ def clear(
|
|
41
44
|
Examples
|
42
45
|
--------
|
43
46
|
>>> pipe = mrsm.Pipe('test', 'test', columns={'datetime': 'dt'}, instance='sql:local')
|
44
|
-
>>> pipe.sync({'dt': [datetime
|
45
|
-
>>> pipe.sync({'dt': [datetime
|
46
|
-
>>> pipe.sync({'dt': [datetime
|
47
|
+
>>> pipe.sync({'dt': [datetime(2020, 1, 1, 0, 0)]})
|
48
|
+
>>> pipe.sync({'dt': [datetime(2021, 1, 1, 0, 0)]})
|
49
|
+
>>> pipe.sync({'dt': [datetime(2022, 1, 1, 0, 0)]})
|
47
50
|
>>>
|
48
|
-
>>> pipe.clear(begin=datetime
|
51
|
+
>>> pipe.clear(begin=datetime(2021, 1, 1, 0, 0))
|
49
52
|
>>> pipe.get_data()
|
50
53
|
dt
|
51
54
|
0 2020-01-01
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#! /usr/bin/env python3
|
2
|
+
# vim:fenc=utf-8
|
3
|
+
|
4
|
+
"""
|
5
|
+
Define methods for copying pipes.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from datetime import datetime, timedelta
|
9
|
+
|
10
|
+
import meerschaum as mrsm
|
11
|
+
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Dict, Union
|
12
|
+
|
13
|
+
|
14
|
+
def copy_to(
|
15
|
+
self,
|
16
|
+
instance_keys: str,
|
17
|
+
sync: bool = True,
|
18
|
+
begin: Union[datetime, int, None] = None,
|
19
|
+
end: Union[datetime, int, None] = None,
|
20
|
+
params: Optional[Dict[str, Any]] = None,
|
21
|
+
chunk_interval: Union[timedelta, int, None] = None,
|
22
|
+
debug: bool = False,
|
23
|
+
**kwargs: Any
|
24
|
+
) -> SuccessTuple:
|
25
|
+
"""
|
26
|
+
Copy a pipe to another instance.
|
27
|
+
|
28
|
+
Parameters
|
29
|
+
----------
|
30
|
+
instance_keys: str
|
31
|
+
The instance to which to copy this pipe.
|
32
|
+
|
33
|
+
sync: bool, default True
|
34
|
+
If `True`, sync the source pipe's documents
|
35
|
+
|
36
|
+
begin: Union[datetime, int, None], default None
|
37
|
+
Beginning datetime value to pass to `Pipe.get_data()`.
|
38
|
+
|
39
|
+
end: Union[datetime, int, None], default None
|
40
|
+
End datetime value to pass to `Pipe.get_data()`.
|
41
|
+
|
42
|
+
params: Optional[Dict[str, Any]], default None
|
43
|
+
Parameters filter to pass to `Pipe.get_data()`.
|
44
|
+
|
45
|
+
chunk_interval: Union[timedelta, int, None], default None
|
46
|
+
The size of chunks to retrieve from `Pipe.get_data()` for syncing.
|
47
|
+
|
48
|
+
kwargs: Any
|
49
|
+
Additional flags to pass to `Pipe.get_data()` and `Pipe.sync()`, e.g. `workers`.
|
50
|
+
|
51
|
+
Returns
|
52
|
+
-------
|
53
|
+
A SuccessTuple indicating success.
|
54
|
+
"""
|
55
|
+
if str(instance_keys) == self.instance_keys:
|
56
|
+
return False, f"Cannot copy {self} to instance '{instance_keys}'."
|
57
|
+
|
58
|
+
new_pipe = mrsm.Pipe(
|
59
|
+
self.connector_keys,
|
60
|
+
self.metric_key,
|
61
|
+
self.location_key,
|
62
|
+
parameters=self.parameters.copy(),
|
63
|
+
instance=instance_keys,
|
64
|
+
)
|
65
|
+
|
66
|
+
new_pipe_is_registered = new_pipe.get_id() is not None
|
67
|
+
|
68
|
+
metadata_method = new_pipe.edit if new_pipe_is_registered else new_pipe.register
|
69
|
+
metadata_success, metadata_msg = metadata_method(debug=debug)
|
70
|
+
if not metadata_success:
|
71
|
+
return metadata_success, metadata_msg
|
72
|
+
|
73
|
+
if not self.exists(debug=debug):
|
74
|
+
return True, f"{self} does not exist; nothing to sync."
|
75
|
+
|
76
|
+
original_as_iterator = kwargs.get('as_iterator', None)
|
77
|
+
kwargs['as_iterator'] = True
|
78
|
+
|
79
|
+
chunk_generator = self.get_data(
|
80
|
+
begin=begin,
|
81
|
+
end=end,
|
82
|
+
params=params,
|
83
|
+
chunk_interval=chunk_interval,
|
84
|
+
debug=debug,
|
85
|
+
**kwargs
|
86
|
+
)
|
87
|
+
|
88
|
+
if original_as_iterator is None:
|
89
|
+
_ = kwargs.pop('as_iterator', None)
|
90
|
+
else:
|
91
|
+
kwargs['as_iterator'] = original_as_iterator
|
92
|
+
|
93
|
+
sync_success, sync_msg = new_pipe.sync(
|
94
|
+
chunk_generator,
|
95
|
+
begin=begin,
|
96
|
+
end=end,
|
97
|
+
params=params,
|
98
|
+
debug=debug,
|
99
|
+
**kwargs
|
100
|
+
)
|
101
|
+
msg = (
|
102
|
+
f"Successfully synced {new_pipe}:\n{sync_msg}"
|
103
|
+
if sync_success
|
104
|
+
else f"Failed to sync {new_pipe}:\n{sync_msg}"
|
105
|
+
)
|
106
|
+
return sync_success, msg
|
meerschaum/core/Pipe/_data.py
CHANGED
@@ -30,6 +30,8 @@ def get_data(
|
|
30
30
|
as_chunks: bool = False,
|
31
31
|
as_dask: bool = False,
|
32
32
|
chunk_interval: Union[timedelta, int, None] = None,
|
33
|
+
order: Optional[str] = 'asc',
|
34
|
+
limit: Optional[int] = None,
|
33
35
|
fresh: bool = False,
|
34
36
|
debug: bool = False,
|
35
37
|
**kw: Any
|
@@ -80,6 +82,12 @@ def get_data(
|
|
80
82
|
If `chunk_interval` is a `timedelta` and the `datetime` axis an integer,
|
81
83
|
use the number of minutes in the `timedelta`.
|
82
84
|
|
85
|
+
order: Optional[str], default 'asc'
|
86
|
+
If `order` is not `None`, sort the resulting dataframe by indices.
|
87
|
+
|
88
|
+
limit: Optional[int], default None
|
89
|
+
If provided, cap the dataframe to this many rows.
|
90
|
+
|
83
91
|
fresh: bool, default True
|
84
92
|
If `True`, skip local cache and directly query the instance connector.
|
85
93
|
Defaults to `True`.
|
@@ -113,17 +121,39 @@ def get_data(
|
|
113
121
|
|
114
122
|
as_iterator = as_iterator or as_chunks
|
115
123
|
|
124
|
+
def _sort_df(_df):
|
125
|
+
dt_col = self.columns.get('datetime', None)
|
126
|
+
indices = [] if dt_col not in _df.columns else [dt_col]
|
127
|
+
non_dt_cols = [
|
128
|
+
col
|
129
|
+
for col_ix, col in self.columns.values()
|
130
|
+
if col_ix != 'datetime' and col in _df.columns
|
131
|
+
]
|
132
|
+
indices.extend(non_dt_cols)
|
133
|
+
_df.sort_values(
|
134
|
+
by=indices,
|
135
|
+
inplace=True,
|
136
|
+
ascending=(str(order).lower() == 'asc')
|
137
|
+
)
|
138
|
+
_df.reset_index(drop=True, inplace=True)
|
139
|
+
if limit is not None and len(_df) > limit:
|
140
|
+
return _df.head(limit)
|
141
|
+
return _df
|
142
|
+
|
116
143
|
if as_iterator or as_chunks:
|
117
|
-
|
118
|
-
select_columns
|
119
|
-
omit_columns
|
120
|
-
begin
|
121
|
-
end
|
122
|
-
params
|
123
|
-
chunk_interval
|
124
|
-
|
125
|
-
|
144
|
+
df = self._get_data_as_iterator(
|
145
|
+
select_columns=select_columns,
|
146
|
+
omit_columns=omit_columns,
|
147
|
+
begin=begin,
|
148
|
+
end=end,
|
149
|
+
params=params,
|
150
|
+
chunk_interval=chunk_interval,
|
151
|
+
limit=limit,
|
152
|
+
order=order,
|
153
|
+
fresh=fresh,
|
154
|
+
debug=debug,
|
126
155
|
)
|
156
|
+
return _sort_df(df)
|
127
157
|
|
128
158
|
if as_dask:
|
129
159
|
from multiprocessing.pool import ThreadPool
|
@@ -131,22 +161,24 @@ def get_data(
|
|
131
161
|
dask.config.set(pool=dask_pool)
|
132
162
|
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
133
163
|
bounds = self.get_chunk_bounds(
|
134
|
-
begin
|
135
|
-
end
|
136
|
-
bounded
|
137
|
-
chunk_interval
|
138
|
-
debug
|
164
|
+
begin=begin,
|
165
|
+
end=end,
|
166
|
+
bounded=False,
|
167
|
+
chunk_interval=chunk_interval,
|
168
|
+
debug=debug,
|
139
169
|
)
|
140
170
|
dask_chunks = [
|
141
171
|
dask.delayed(self.get_data)(
|
142
|
-
select_columns
|
143
|
-
omit_columns
|
144
|
-
begin
|
145
|
-
end
|
146
|
-
params
|
147
|
-
chunk_interval
|
148
|
-
|
149
|
-
|
172
|
+
select_columns=select_columns,
|
173
|
+
omit_columns=omit_columns,
|
174
|
+
begin=chunk_begin,
|
175
|
+
end=chunk_end,
|
176
|
+
params=params,
|
177
|
+
chunk_interval=chunk_interval,
|
178
|
+
order=order,
|
179
|
+
limit=limit,
|
180
|
+
fresh=fresh,
|
181
|
+
debug=debug,
|
150
182
|
)
|
151
183
|
for (chunk_begin, chunk_end) in bounds
|
152
184
|
]
|
@@ -154,18 +186,18 @@ def get_data(
|
|
154
186
|
col: to_pandas_dtype(typ)
|
155
187
|
for col, typ in self.dtypes.items()
|
156
188
|
}
|
157
|
-
return dd.from_delayed(dask_chunks, meta=dask_meta)
|
189
|
+
return _sort_df(dd.from_delayed(dask_chunks, meta=dask_meta))
|
158
190
|
|
159
191
|
if not self.exists(debug=debug):
|
160
192
|
return None
|
161
|
-
|
193
|
+
|
162
194
|
if self.cache_pipe is not None:
|
163
195
|
if not fresh:
|
164
196
|
_sync_cache_tuple = self.cache_pipe.sync(
|
165
|
-
begin
|
166
|
-
end
|
167
|
-
params
|
168
|
-
debug
|
197
|
+
begin=begin,
|
198
|
+
end=end,
|
199
|
+
params=params,
|
200
|
+
debug=debug,
|
169
201
|
**kw
|
170
202
|
)
|
171
203
|
if not _sync_cache_tuple[0]:
|
@@ -174,27 +206,31 @@ def get_data(
|
|
174
206
|
else: ### Successfully synced cache.
|
175
207
|
return self.enforce_dtypes(
|
176
208
|
self.cache_pipe.get_data(
|
177
|
-
select_columns
|
178
|
-
omit_columns
|
179
|
-
begin
|
180
|
-
end
|
181
|
-
params
|
182
|
-
|
183
|
-
|
209
|
+
select_columns=select_columns,
|
210
|
+
omit_columns=omit_columns,
|
211
|
+
begin=begin,
|
212
|
+
end=end,
|
213
|
+
params=params,
|
214
|
+
order=order,
|
215
|
+
limit=limit,
|
216
|
+
debug=debug,
|
217
|
+
fresh=True,
|
184
218
|
**kw
|
185
219
|
),
|
186
|
-
debug
|
220
|
+
debug=debug,
|
187
221
|
)
|
188
222
|
|
189
223
|
with Venv(get_connector_plugin(self.instance_connector)):
|
190
224
|
df = self.instance_connector.get_pipe_data(
|
191
|
-
pipe
|
192
|
-
select_columns
|
193
|
-
omit_columns
|
194
|
-
begin
|
195
|
-
end
|
196
|
-
params
|
197
|
-
|
225
|
+
pipe=self,
|
226
|
+
select_columns=select_columns,
|
227
|
+
omit_columns=omit_columns,
|
228
|
+
begin=begin,
|
229
|
+
end=end,
|
230
|
+
params=params,
|
231
|
+
limit=limit,
|
232
|
+
order=order,
|
233
|
+
debug=debug,
|
198
234
|
**kw
|
199
235
|
)
|
200
236
|
if df is None:
|
@@ -226,7 +262,7 @@ def get_data(
|
|
226
262
|
+ "Consider adding `select_columns` and `omit_columns` support to "
|
227
263
|
+ f"'{self.instance_connector.type}' connectors to improve performance."
|
228
264
|
),
|
229
|
-
stack
|
265
|
+
stack=False,
|
230
266
|
)
|
231
267
|
_cols_to_select = [col for col in df.columns if col not in cols_to_omit]
|
232
268
|
df = df[_cols_to_select]
|
@@ -237,25 +273,31 @@ def get_data(
|
|
237
273
|
f"Specified columns {items_str(cols_to_add)} were not found on {self}. "
|
238
274
|
+ "Adding these to the DataFrame as null columns."
|
239
275
|
),
|
240
|
-
stack
|
276
|
+
stack=False,
|
241
277
|
)
|
242
278
|
df = add_missing_cols_to_df(df, {col: 'string' for col in cols_to_add})
|
243
279
|
|
244
|
-
|
280
|
+
enforced_df = self.enforce_dtypes(df, debug=debug)
|
281
|
+
|
282
|
+
if order:
|
283
|
+
return _sort_df(enforced_df)
|
284
|
+
return enforced_df
|
245
285
|
|
246
286
|
|
247
287
|
def _get_data_as_iterator(
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
288
|
+
self,
|
289
|
+
select_columns: Optional[List[str]] = None,
|
290
|
+
omit_columns: Optional[List[str]] = None,
|
291
|
+
begin: Optional[datetime] = None,
|
292
|
+
end: Optional[datetime] = None,
|
293
|
+
params: Optional[Dict[str, Any]] = None,
|
294
|
+
chunk_interval: Union[timedelta, int, None] = None,
|
295
|
+
limit: Optional[int] = None,
|
296
|
+
order: Optional[str] = 'asc',
|
297
|
+
fresh: bool = False,
|
298
|
+
debug: bool = False,
|
299
|
+
**kw: Any
|
300
|
+
) -> Iterator['pd.DataFrame']:
|
259
301
|
"""
|
260
302
|
Return a pipe's data as a generator.
|
261
303
|
"""
|
@@ -305,46 +347,51 @@ def _get_data_as_iterator(
|
|
305
347
|
(min_dt + chunk_interval) > max_dt
|
306
348
|
):
|
307
349
|
yield self.get_data(
|
308
|
-
select_columns
|
309
|
-
omit_columns
|
310
|
-
begin
|
311
|
-
end
|
312
|
-
params
|
313
|
-
|
314
|
-
|
350
|
+
select_columns=select_columns,
|
351
|
+
omit_columns=omit_columns,
|
352
|
+
begin=begin,
|
353
|
+
end=end,
|
354
|
+
params=params,
|
355
|
+
limit=limit,
|
356
|
+
order=order,
|
357
|
+
fresh=fresh,
|
358
|
+
debug=debug,
|
315
359
|
)
|
316
360
|
return
|
317
361
|
|
318
362
|
chunk_bounds = self.get_chunk_bounds(
|
319
|
-
begin
|
320
|
-
end
|
321
|
-
chunk_interval
|
322
|
-
debug
|
363
|
+
begin=min_dt,
|
364
|
+
end=max_dt,
|
365
|
+
chunk_interval=chunk_interval,
|
366
|
+
debug=debug,
|
323
367
|
)
|
324
368
|
|
325
369
|
for chunk_begin, chunk_end in chunk_bounds:
|
326
370
|
chunk = self.get_data(
|
327
|
-
select_columns
|
328
|
-
omit_columns
|
329
|
-
begin
|
330
|
-
end
|
331
|
-
params
|
332
|
-
|
333
|
-
|
371
|
+
select_columns=select_columns,
|
372
|
+
omit_columns=omit_columns,
|
373
|
+
begin=chunk_begin,
|
374
|
+
end=chunk_end,
|
375
|
+
params=params,
|
376
|
+
limit=limit,
|
377
|
+
order=order,
|
378
|
+
fresh=fresh,
|
379
|
+
debug=debug,
|
334
380
|
)
|
335
381
|
if len(chunk) > 0:
|
336
382
|
yield chunk
|
337
383
|
|
338
384
|
|
339
385
|
def get_backtrack_data(
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
386
|
+
self,
|
387
|
+
backtrack_minutes: Optional[int] = None,
|
388
|
+
begin: Union[datetime, int, None] = None,
|
389
|
+
params: Optional[Dict[str, Any]] = None,
|
390
|
+
limit: Optional[int] = None,
|
391
|
+
fresh: bool = False,
|
392
|
+
debug: bool = False,
|
393
|
+
**kw: Any
|
394
|
+
) -> Optional['pd.DataFrame']:
|
348
395
|
"""
|
349
396
|
Get the most recent data from the instance connector as a Pandas DataFrame.
|
350
397
|
|
@@ -371,8 +418,10 @@ def get_backtrack_data(
|
|
371
418
|
|
372
419
|
params: Optional[Dict[str, Any]], default None
|
373
420
|
The standard Meerschaum `params` query dictionary.
|
374
|
-
|
375
|
-
|
421
|
+
|
422
|
+
limit: Optional[int], default None
|
423
|
+
If provided, cap the number of rows to be returned.
|
424
|
+
|
376
425
|
fresh: bool, default False
|
377
426
|
If `True`, Ignore local cache and pull directly from the instance connector.
|
378
427
|
Only comes into effect if a pipe was created with `cache=True`.
|
@@ -409,28 +458,31 @@ def get_backtrack_data(
|
|
409
458
|
else: ### Successfully synced cache.
|
410
459
|
return self.enforce_dtypes(
|
411
460
|
self.cache_pipe.get_backtrack_data(
|
412
|
-
fresh
|
413
|
-
begin
|
414
|
-
backtrack_minutes
|
415
|
-
params
|
416
|
-
|
461
|
+
fresh=True,
|
462
|
+
begin=begin,
|
463
|
+
backtrack_minutes=backtrack_minutes,
|
464
|
+
params=params,
|
465
|
+
limit=limit,
|
466
|
+
order=kw.get('order', 'desc'),
|
467
|
+
debug=debug,
|
417
468
|
**kw
|
418
469
|
),
|
419
|
-
debug
|
470
|
+
debug=debug,
|
420
471
|
)
|
421
472
|
|
422
473
|
if hasattr(self.instance_connector, 'get_backtrack_data'):
|
423
474
|
with Venv(get_connector_plugin(self.instance_connector)):
|
424
475
|
return self.enforce_dtypes(
|
425
476
|
self.instance_connector.get_backtrack_data(
|
426
|
-
pipe
|
427
|
-
begin
|
428
|
-
backtrack_minutes
|
429
|
-
params
|
430
|
-
|
477
|
+
pipe=self,
|
478
|
+
begin=begin,
|
479
|
+
backtrack_minutes=backtrack_minutes,
|
480
|
+
params=params,
|
481
|
+
limit=limit,
|
482
|
+
debug=debug,
|
431
483
|
**kw
|
432
484
|
),
|
433
|
-
debug
|
485
|
+
debug=debug,
|
434
486
|
)
|
435
487
|
|
436
488
|
if begin is None:
|
@@ -445,13 +497,16 @@ def get_backtrack_data(
|
|
445
497
|
begin = begin - backtrack_interval
|
446
498
|
|
447
499
|
return self.get_data(
|
448
|
-
begin
|
449
|
-
params
|
450
|
-
debug
|
500
|
+
begin=begin,
|
501
|
+
params=params,
|
502
|
+
debug=debug,
|
503
|
+
limit=limit,
|
504
|
+
order=kw.get('order', 'desc'),
|
451
505
|
**kw
|
452
506
|
)
|
453
507
|
|
454
508
|
|
509
|
+
|
455
510
|
def get_rowcount(
|
456
511
|
self,
|
457
512
|
begin: Optional[datetime] = None,
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -12,17 +12,17 @@ from meerschaum.utils.warnings import warn, info
|
|
12
12
|
from meerschaum.utils.debug import dprint
|
13
13
|
|
14
14
|
def verify(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
15
|
+
self,
|
16
|
+
begin: Union[datetime, int, None] = None,
|
17
|
+
end: Union[datetime, int, None] = None,
|
18
|
+
params: Optional[Dict[str, Any]] = None,
|
19
|
+
chunk_interval: Union[timedelta, int, None] = None,
|
20
|
+
bounded: Optional[bool] = None,
|
21
|
+
deduplicate: bool = False,
|
22
|
+
workers: Optional[int] = None,
|
23
|
+
debug: bool = False,
|
24
|
+
**kwargs: Any
|
25
|
+
) -> SuccessTuple:
|
26
26
|
"""
|
27
27
|
Verify the contents of the pipe by resyncing its interval.
|
28
28
|
|
meerschaum/jobs/_Job.py
CHANGED
@@ -10,10 +10,7 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
import shlex
|
12
12
|
import asyncio
|
13
|
-
import threading
|
14
|
-
import json
|
15
13
|
import pathlib
|
16
|
-
import os
|
17
14
|
import sys
|
18
15
|
import traceback
|
19
16
|
from functools import partial
|
@@ -206,13 +203,11 @@ class Job:
|
|
206
203
|
jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
|
207
204
|
daemon_dir = jobs_dir / daemon_id
|
208
205
|
pid_file = daemon_dir / 'process.pid'
|
209
|
-
properties_path = daemon_dir / 'properties.json'
|
210
|
-
pickle_path = daemon_dir / 'pickle.pkl'
|
211
206
|
|
212
207
|
if pid_file.exists():
|
213
208
|
with open(pid_file, 'r', encoding='utf-8') as f:
|
214
209
|
daemon_pid = int(f.read())
|
215
|
-
|
210
|
+
|
216
211
|
if pid != daemon_pid:
|
217
212
|
raise EnvironmentError(f"Differing PIDs: {pid=}, {daemon_pid=}")
|
218
213
|
else:
|
meerschaum/jobs/__init__.py
CHANGED
@@ -9,9 +9,9 @@ Higher-level utilities for managing `meerschaum.utils.daemon.Daemon`.
|
|
9
9
|
import pathlib
|
10
10
|
|
11
11
|
import meerschaum as mrsm
|
12
|
-
from meerschaum.utils.typing import Dict, Optional, List,
|
12
|
+
from meerschaum.utils.typing import Dict, Optional, List, SuccessTuple
|
13
13
|
|
14
|
-
from meerschaum.jobs._Job import Job
|
14
|
+
from meerschaum.jobs._Job import Job
|
15
15
|
from meerschaum.jobs._Executor import Executor
|
16
16
|
|
17
17
|
__all__ = (
|
@@ -403,9 +403,14 @@ def get_executor_keys_from_context() -> str:
|
|
403
403
|
if _context_keys is not None:
|
404
404
|
return _context_keys
|
405
405
|
|
406
|
+
from meerschaum.config import get_config
|
406
407
|
from meerschaum.config.paths import ROOT_DIR_PATH, DEFAULT_ROOT_DIR_PATH
|
407
408
|
from meerschaum.utils.misc import is_systemd_available
|
408
409
|
|
410
|
+
configured_executor = get_config('meerschaum', 'executor', warn=False)
|
411
|
+
if configured_executor is not None:
|
412
|
+
return configured_executor
|
413
|
+
|
409
414
|
_context_keys = (
|
410
415
|
'systemd'
|
411
416
|
if is_systemd_available() and ROOT_DIR_PATH == DEFAULT_ROOT_DIR_PATH
|
meerschaum/utils/dataframe.py
CHANGED
@@ -1187,6 +1187,9 @@ def query_df(
|
|
1187
1187
|
query_mask = query_mask & mask
|
1188
1188
|
|
1189
1189
|
original_cols = df.columns
|
1190
|
+
|
1191
|
+
### NOTE: We must cast bool columns to `boolean[pyarrow]`
|
1192
|
+
### to allow for `<NA>` values.
|
1190
1193
|
bool_cols = [
|
1191
1194
|
col
|
1192
1195
|
for col, typ in df.dtypes.items()
|
@@ -1194,7 +1197,7 @@ def query_df(
|
|
1194
1197
|
]
|
1195
1198
|
for col in bool_cols:
|
1196
1199
|
df[col] = df[col].astype('boolean[pyarrow]')
|
1197
|
-
df['__mrsm_mask'] = query_mask
|
1200
|
+
df['__mrsm_mask'] = query_mask.astype('boolean[pyarrow]')
|
1198
1201
|
|
1199
1202
|
if inplace:
|
1200
1203
|
df.where(query_mask, other=NA, inplace=True)
|