meerschaum 2.3.5.dev0__py3-none-any.whl → 2.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/__init__.py +2 -1
- meerschaum/_internal/arguments/_parse_arguments.py +86 -7
- meerschaum/_internal/entry.py +29 -13
- meerschaum/actions/api.py +16 -16
- meerschaum/actions/bootstrap.py +36 -10
- meerschaum/actions/start.py +16 -15
- meerschaum/api/_events.py +11 -7
- meerschaum/api/dash/__init__.py +7 -6
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/dashboard.py +7 -5
- meerschaum/api/dash/callbacks/pipes.py +42 -0
- meerschaum/api/dash/pages/__init__.py +1 -0
- meerschaum/api/dash/pages/pipes.py +16 -0
- meerschaum/api/dash/pipes.py +79 -47
- meerschaum/api/dash/users.py +19 -6
- meerschaum/api/routes/_actions.py +0 -98
- meerschaum/api/routes/_jobs.py +38 -18
- meerschaum/api/routes/_login.py +4 -4
- meerschaum/api/routes/_pipes.py +3 -3
- meerschaum/config/_default.py +9 -2
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +59 -18
- meerschaum/config/static/__init__.py +2 -0
- meerschaum/connectors/Connector.py +19 -13
- meerschaum/connectors/__init__.py +9 -5
- meerschaum/connectors/api/_actions.py +22 -36
- meerschaum/connectors/api/_jobs.py +1 -0
- meerschaum/connectors/poll.py +30 -24
- meerschaum/connectors/sql/_pipes.py +126 -154
- meerschaum/connectors/sql/_plugins.py +45 -43
- meerschaum/connectors/sql/_users.py +46 -38
- meerschaum/connectors/valkey/ValkeyConnector.py +535 -0
- meerschaum/connectors/valkey/__init__.py +8 -0
- meerschaum/connectors/valkey/_fetch.py +75 -0
- meerschaum/connectors/valkey/_pipes.py +839 -0
- meerschaum/connectors/valkey/_plugins.py +265 -0
- meerschaum/connectors/valkey/_users.py +305 -0
- meerschaum/core/Pipe/__init__.py +2 -0
- meerschaum/core/Pipe/_attributes.py +1 -2
- meerschaum/core/Pipe/_drop.py +4 -4
- meerschaum/core/Pipe/_dtypes.py +14 -14
- meerschaum/core/Pipe/_edit.py +15 -14
- meerschaum/core/Pipe/_sync.py +134 -51
- meerschaum/core/User/_User.py +14 -12
- meerschaum/jobs/_Job.py +26 -8
- meerschaum/jobs/systemd.py +20 -8
- meerschaum/plugins/_Plugin.py +17 -13
- meerschaum/utils/_get_pipes.py +14 -20
- meerschaum/utils/dataframe.py +288 -101
- meerschaum/utils/dtypes/__init__.py +31 -6
- meerschaum/utils/dtypes/sql.py +4 -4
- meerschaum/utils/misc.py +3 -3
- meerschaum/utils/packages/_packages.py +1 -0
- meerschaum/utils/prompt.py +1 -1
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/METADATA +3 -1
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/RECORD +62 -54
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/WHEEL +1 -1
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/LICENSE +0 -0
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/NOTICE +0 -0
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.3.5.dev0.dist-info → meerschaum-2.4.0.dev0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,839 @@
|
|
1
|
+
#! /usr/bin/env python3
|
2
|
+
# vim:fenc=utf-8
|
3
|
+
|
4
|
+
"""
|
5
|
+
Define pipes methods for `ValkeyConnector`.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import json
|
9
|
+
from datetime import datetime, timedelta, timezone
|
10
|
+
|
11
|
+
import meerschaum as mrsm
|
12
|
+
from meerschaum.utils.typing import SuccessTuple, Any, Union, Optional, Dict, List, Tuple
|
13
|
+
from meerschaum.utils.misc import json_serialize_datetime, string_to_dict
|
14
|
+
from meerschaum.utils.warnings import dprint, warn
|
15
|
+
|
16
|
+
PIPES_TABLE: str = 'mrsm_pipes'
|
17
|
+
PIPES_COUNTER: str = 'mrsm_pipes:counter'
|
18
|
+
|
19
|
+
|
20
|
+
def get_pipe_key(pipe: mrsm.Pipe) -> str:
|
21
|
+
"""
|
22
|
+
Return the key to store a pipe's ID.
|
23
|
+
"""
|
24
|
+
return f"mrsm_pipe:{pipe.connector_keys}:{pipe.metric_key}:{pipe.location_key}"
|
25
|
+
|
26
|
+
|
27
|
+
def get_pipe_parameters_key(pipe: mrsm.Pipe) -> str:
|
28
|
+
"""
|
29
|
+
Return the key to store a pipe's parameters.
|
30
|
+
"""
|
31
|
+
return get_pipe_key(pipe) + ':parameters'
|
32
|
+
|
33
|
+
|
34
|
+
def serialize_document(doc: Dict[str, Any]) -> str:
|
35
|
+
"""
|
36
|
+
Return a serialized string for a document.
|
37
|
+
|
38
|
+
Parameters
|
39
|
+
----------
|
40
|
+
doc: Dict[str, Any]
|
41
|
+
The document to be serialized.
|
42
|
+
|
43
|
+
Returns
|
44
|
+
-------
|
45
|
+
A serialized string for the document.
|
46
|
+
"""
|
47
|
+
return json.dumps(
|
48
|
+
doc,
|
49
|
+
default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
|
50
|
+
separators=(',', ':'),
|
51
|
+
sort_keys=True,
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
def get_document_key(
|
56
|
+
doc: Dict[str, Any],
|
57
|
+
indices: List[str],
|
58
|
+
table_name: Optional[str] = None,
|
59
|
+
) -> str:
|
60
|
+
"""
|
61
|
+
Return a serialized string for a document's indices only.
|
62
|
+
|
63
|
+
Parameters
|
64
|
+
----------
|
65
|
+
doc: Dict[str, Any]
|
66
|
+
The document containing index values to be serialized.
|
67
|
+
|
68
|
+
indices: List[str]
|
69
|
+
The name of the indices to be serialized.
|
70
|
+
|
71
|
+
table_name: Optional[str], default None
|
72
|
+
If provided, prepend the table to the key.
|
73
|
+
|
74
|
+
Returns
|
75
|
+
-------
|
76
|
+
A serialized string of the document's indices.
|
77
|
+
"""
|
78
|
+
from meerschaum.utils.dtypes import coerce_timezone
|
79
|
+
index_vals = {
|
80
|
+
key: (
|
81
|
+
str(val)
|
82
|
+
if not isinstance(val, datetime)
|
83
|
+
else str(int(coerce_timezone(val).replace(tzinfo=timezone.utc).timestamp()))
|
84
|
+
)
|
85
|
+
for key, val in doc.items()
|
86
|
+
if key in indices
|
87
|
+
} if indices else {}
|
88
|
+
indices_str = ((table_name + ':indices:') if table_name else '') + ','.join(
|
89
|
+
sorted(
|
90
|
+
[
|
91
|
+
f'{key}:{val}'
|
92
|
+
for key, val in index_vals.items()
|
93
|
+
]
|
94
|
+
)
|
95
|
+
) if indices else serialize_document(doc)
|
96
|
+
return indices_str
|
97
|
+
|
98
|
+
|
99
|
+
def get_table_quoted_doc_key(
|
100
|
+
table_name: str,
|
101
|
+
doc: Dict[str, Any],
|
102
|
+
indices: List[str],
|
103
|
+
datetime_column: Optional[str] = None,
|
104
|
+
) -> str:
|
105
|
+
"""
|
106
|
+
Return the document string as stored in the underling set.
|
107
|
+
"""
|
108
|
+
return json.dumps(
|
109
|
+
{
|
110
|
+
get_document_key(doc, indices, table_name): serialize_document(doc),
|
111
|
+
**(
|
112
|
+
{datetime_column: doc.get(datetime_column, 0)}
|
113
|
+
if datetime_column
|
114
|
+
else {}
|
115
|
+
)
|
116
|
+
},
|
117
|
+
sort_keys=True,
|
118
|
+
separators=(',', ':'),
|
119
|
+
default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
|
120
|
+
)
|
121
|
+
|
122
|
+
|
123
|
+
def register_pipe(
|
124
|
+
self,
|
125
|
+
pipe: mrsm.Pipe,
|
126
|
+
debug: bool = False,
|
127
|
+
**kwargs: Any
|
128
|
+
) -> SuccessTuple:
|
129
|
+
"""
|
130
|
+
Insert the pipe's attributes into the internal `pipes` table.
|
131
|
+
|
132
|
+
Parameters
|
133
|
+
----------
|
134
|
+
pipe: mrsm.Pipe
|
135
|
+
The pipe to be registered.
|
136
|
+
|
137
|
+
Returns
|
138
|
+
-------
|
139
|
+
A `SuccessTuple` of the result.
|
140
|
+
"""
|
141
|
+
attributes = {
|
142
|
+
'connector_keys': str(pipe.connector_keys),
|
143
|
+
'metric_key': str(pipe.metric_key),
|
144
|
+
'location_key': str(pipe.location_key),
|
145
|
+
}
|
146
|
+
parameters_str = json.dumps(
|
147
|
+
pipe._attributes.get('parameters', {}),
|
148
|
+
separators=(',', ':'),
|
149
|
+
)
|
150
|
+
|
151
|
+
pipe_key = get_pipe_key(pipe)
|
152
|
+
parameters_key = get_pipe_parameters_key(pipe)
|
153
|
+
|
154
|
+
try:
|
155
|
+
existing_pipe_id = self.get(pipe_key)
|
156
|
+
if existing_pipe_id is not None:
|
157
|
+
return False, f"{pipe} is already registered."
|
158
|
+
|
159
|
+
pipe_id = self.client.incr(PIPES_COUNTER)
|
160
|
+
_ = self.push_docs(
|
161
|
+
[{'pipe_id': pipe_id, **attributes}],
|
162
|
+
PIPES_TABLE,
|
163
|
+
datetime_column='pipe_id',
|
164
|
+
debug=debug,
|
165
|
+
)
|
166
|
+
self.set(pipe_key, pipe_id)
|
167
|
+
self.set(parameters_key, parameters_str)
|
168
|
+
|
169
|
+
except Exception as e:
|
170
|
+
return False, f"Failed to register {pipe}:\n{e}"
|
171
|
+
|
172
|
+
return True, "Success"
|
173
|
+
|
174
|
+
|
175
|
+
def get_pipe_id(
|
176
|
+
self,
|
177
|
+
pipe: mrsm.Pipe,
|
178
|
+
debug: bool = False,
|
179
|
+
**kwargs: Any
|
180
|
+
) -> Union[str, int, None]:
|
181
|
+
"""
|
182
|
+
Return the `_id` for the pipe if it exists.
|
183
|
+
|
184
|
+
Parameters
|
185
|
+
----------
|
186
|
+
pipe: mrsm.Pipe
|
187
|
+
The pipe whose `_id` to fetch.
|
188
|
+
|
189
|
+
Returns
|
190
|
+
-------
|
191
|
+
The `_id` for the pipe's document or `None`.
|
192
|
+
"""
|
193
|
+
pipe_key = get_pipe_key(pipe)
|
194
|
+
try:
|
195
|
+
return int(self.get(pipe_key))
|
196
|
+
except Exception:
|
197
|
+
pass
|
198
|
+
return None
|
199
|
+
|
200
|
+
|
201
|
+
def get_pipe_attributes(
|
202
|
+
self,
|
203
|
+
pipe: mrsm.Pipe,
|
204
|
+
debug: bool = False,
|
205
|
+
**kwargs: Any
|
206
|
+
) -> Dict[str, Any]:
|
207
|
+
"""
|
208
|
+
Return the pipe's document from the internal `pipes` collection.
|
209
|
+
|
210
|
+
Parameters
|
211
|
+
----------
|
212
|
+
pipe: mrsm.Pipe
|
213
|
+
The pipe whose attributes should be retrieved.
|
214
|
+
|
215
|
+
Returns
|
216
|
+
-------
|
217
|
+
The document that matches the keys of the pipe.
|
218
|
+
"""
|
219
|
+
pipe_id = pipe.get_id(debug=debug)
|
220
|
+
if pipe_id is None:
|
221
|
+
return {}
|
222
|
+
|
223
|
+
parameters_key = get_pipe_parameters_key(pipe)
|
224
|
+
parameters_str = self.get(parameters_key)
|
225
|
+
|
226
|
+
parameters = json.loads(parameters_str) if parameters_str else {}
|
227
|
+
|
228
|
+
attributes = {
|
229
|
+
'connector_keys': pipe.connector_keys,
|
230
|
+
'metric_key': pipe.metric_key,
|
231
|
+
'location_key': pipe.location_key,
|
232
|
+
'parameters': parameters,
|
233
|
+
'pipe_id': pipe_id,
|
234
|
+
}
|
235
|
+
return attributes
|
236
|
+
|
237
|
+
|
238
|
+
def edit_pipe(
|
239
|
+
self,
|
240
|
+
pipe: mrsm.Pipe,
|
241
|
+
debug: bool = False,
|
242
|
+
**kwargs: Any
|
243
|
+
) -> mrsm.SuccessTuple:
|
244
|
+
"""
|
245
|
+
Edit the attributes of the pipe.
|
246
|
+
|
247
|
+
Parameters
|
248
|
+
----------
|
249
|
+
pipe: mrsm.Pipe
|
250
|
+
The pipe whose in-memory parameters must be persisted.
|
251
|
+
|
252
|
+
Returns
|
253
|
+
-------
|
254
|
+
A `SuccessTuple` indicating success.
|
255
|
+
"""
|
256
|
+
pipe_id = pipe.get_id(debug=debug)
|
257
|
+
if pipe_id is None:
|
258
|
+
return False, f"{pipe} is not registered."
|
259
|
+
|
260
|
+
parameters_key = get_pipe_parameters_key(pipe)
|
261
|
+
parameters_str = json.dumps(pipe.parameters, separators=(',', ':'))
|
262
|
+
self.set(parameters_key, parameters_str)
|
263
|
+
return True, "Success"
|
264
|
+
|
265
|
+
|
266
|
+
def pipe_exists(
|
267
|
+
self,
|
268
|
+
pipe: mrsm.Pipe,
|
269
|
+
debug: bool = False,
|
270
|
+
**kwargs: Any
|
271
|
+
) -> bool:
|
272
|
+
"""
|
273
|
+
Check whether a pipe's target table exists.
|
274
|
+
|
275
|
+
Parameters
|
276
|
+
----------
|
277
|
+
pipe: mrsm.Pipe
|
278
|
+
The pipe to check whether its table exists.
|
279
|
+
|
280
|
+
Returns
|
281
|
+
-------
|
282
|
+
A `bool` indicating the table exists.
|
283
|
+
"""
|
284
|
+
table_name = self.quote_table(pipe.target)
|
285
|
+
return self.client.exists(table_name) != 0
|
286
|
+
|
287
|
+
|
288
|
+
def drop_pipe(
|
289
|
+
self,
|
290
|
+
pipe: mrsm.Pipe,
|
291
|
+
debug: bool = False,
|
292
|
+
**kwargs: Any
|
293
|
+
) -> mrsm.SuccessTuple:
|
294
|
+
"""
|
295
|
+
Drop a pipe's collection if it exists.
|
296
|
+
|
297
|
+
Parameters
|
298
|
+
----------
|
299
|
+
pipe: mrsm.Pipe
|
300
|
+
The pipe to be dropped.
|
301
|
+
|
302
|
+
Returns
|
303
|
+
-------
|
304
|
+
A `SuccessTuple` indicating success.
|
305
|
+
"""
|
306
|
+
try:
|
307
|
+
self.drop_table(pipe.target, debug=debug)
|
308
|
+
except Exception as e:
|
309
|
+
return False, f"Failed to drop {pipe}:\n{e}"
|
310
|
+
|
311
|
+
if 'valkey' not in pipe.parameters:
|
312
|
+
return True, "Success"
|
313
|
+
|
314
|
+
pipe.parameters['valkey']['dtypes'] = {}
|
315
|
+
if not pipe.temporary:
|
316
|
+
edit_success, edit_msg = pipe.edit(debug=debug)
|
317
|
+
if not edit_success:
|
318
|
+
return edit_success, edit_msg
|
319
|
+
|
320
|
+
return True, "Success"
|
321
|
+
|
322
|
+
|
323
|
+
def delete_pipe(
|
324
|
+
self,
|
325
|
+
pipe: mrsm.Pipe,
|
326
|
+
debug: bool = False,
|
327
|
+
**kwargs: Any
|
328
|
+
) -> mrsm.SuccessTuple:
|
329
|
+
"""
|
330
|
+
Delete a pipe's registration from the `pipes` collection.
|
331
|
+
|
332
|
+
Parameters
|
333
|
+
----------
|
334
|
+
pipe: mrsm.Pipe
|
335
|
+
The pipe to be deleted.
|
336
|
+
|
337
|
+
Returns
|
338
|
+
-------
|
339
|
+
A `SuccessTuple` indicating success.
|
340
|
+
"""
|
341
|
+
drop_success, drop_message = pipe.drop(debug=debug)
|
342
|
+
if not drop_success:
|
343
|
+
return drop_success, drop_message
|
344
|
+
|
345
|
+
pipe_id = self.get_pipe_id(pipe, debug=debug)
|
346
|
+
if pipe_id is None:
|
347
|
+
return False, f"{pipe} is not registered."
|
348
|
+
|
349
|
+
pipe_key = get_pipe_key(pipe)
|
350
|
+
parameters_key = get_pipe_parameters_key(pipe)
|
351
|
+
self.client.delete(pipe_key)
|
352
|
+
self.client.delete(parameters_key)
|
353
|
+
df = self.read(PIPES_TABLE, params={'pipe_id': pipe_id})
|
354
|
+
docs = df.to_dict(orient='records')
|
355
|
+
if docs:
|
356
|
+
doc = docs[0]
|
357
|
+
doc_str = json.dumps(
|
358
|
+
doc,
|
359
|
+
default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
|
360
|
+
separators=(',', ':'),
|
361
|
+
sort_keys=True,
|
362
|
+
)
|
363
|
+
self.client.zrem(PIPES_TABLE, doc_str)
|
364
|
+
return True, "Success"
|
365
|
+
|
366
|
+
|
367
|
+
def get_pipe_data(
|
368
|
+
self,
|
369
|
+
pipe: mrsm.Pipe,
|
370
|
+
select_columns: Optional[List[str]] = None,
|
371
|
+
omit_columns: Optional[List[str]] = None,
|
372
|
+
begin: Union[datetime, int, None] = None,
|
373
|
+
end: Union[datetime, int, None] = None,
|
374
|
+
params: Optional[Dict[str, Any]] = None,
|
375
|
+
debug: bool = False,
|
376
|
+
**kwargs: Any
|
377
|
+
) -> Union['pd.DataFrame', None]:
|
378
|
+
"""
|
379
|
+
Query a pipe's target table and return the DataFrame.
|
380
|
+
|
381
|
+
Parameters
|
382
|
+
----------
|
383
|
+
pipe: mrsm.Pipe
|
384
|
+
The pipe with the target table from which to read.
|
385
|
+
|
386
|
+
select_columns: Optional[List[str]], default None
|
387
|
+
If provided, only select these given columns.
|
388
|
+
Otherwise select all available columns (i.e. `SELECT *`).
|
389
|
+
|
390
|
+
omit_columns: Optional[List[str]], default None
|
391
|
+
If provided, remove these columns from the selection.
|
392
|
+
|
393
|
+
begin: Union[datetime, int, None], default None
|
394
|
+
The earliest `datetime` value to search from (inclusive).
|
395
|
+
|
396
|
+
end: Union[datetime, int, None], default None
|
397
|
+
The lastest `datetime` value to search from (exclusive).
|
398
|
+
|
399
|
+
params: Optional[Dict[str, str]], default None
|
400
|
+
Additional filters to apply to the query.
|
401
|
+
|
402
|
+
Returns
|
403
|
+
-------
|
404
|
+
The target table's data as a DataFrame.
|
405
|
+
"""
|
406
|
+
if not pipe.exists(debug=debug):
|
407
|
+
return None
|
408
|
+
|
409
|
+
from meerschaum.utils.dataframe import query_df, parse_df_datetimes
|
410
|
+
|
411
|
+
valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
|
412
|
+
dt_col = pipe.columns.get('datetime', None)
|
413
|
+
table_name = self.quote_table(pipe.target)
|
414
|
+
indices = [col for col in pipe.columns.values() if col]
|
415
|
+
ix_docs = [
|
416
|
+
string_to_dict(doc['ix'])
|
417
|
+
for doc in self.read_docs(
|
418
|
+
pipe.target,
|
419
|
+
begin=begin,
|
420
|
+
end=end,
|
421
|
+
debug=debug,
|
422
|
+
)
|
423
|
+
]
|
424
|
+
try:
|
425
|
+
docs_strings = [
|
426
|
+
self.get(get_document_key(
|
427
|
+
doc, indices, table_name
|
428
|
+
))
|
429
|
+
for doc in ix_docs
|
430
|
+
]
|
431
|
+
except Exception as e:
|
432
|
+
warn(f"Failed to fetch documents for {pipe}:\n{e}")
|
433
|
+
docs_strings = []
|
434
|
+
|
435
|
+
docs = [
|
436
|
+
json.loads(doc_str)
|
437
|
+
for doc_str in docs_strings
|
438
|
+
if doc_str
|
439
|
+
]
|
440
|
+
ignore_dt_cols = [
|
441
|
+
col
|
442
|
+
for col, dtype in pipe.dtypes.items()
|
443
|
+
if 'datetime' not in str(dtype)
|
444
|
+
]
|
445
|
+
|
446
|
+
df = parse_df_datetimes(
|
447
|
+
docs,
|
448
|
+
ignore_cols=ignore_dt_cols,
|
449
|
+
chunksize=kwargs.get('chunksize', None),
|
450
|
+
debug=debug,
|
451
|
+
)
|
452
|
+
for col, typ in valkey_dtypes.items():
|
453
|
+
try:
|
454
|
+
df[col] = df[col].astype(typ)
|
455
|
+
except Exception:
|
456
|
+
pass
|
457
|
+
|
458
|
+
df = pipe.enforce_dtypes(df, debug=debug)
|
459
|
+
|
460
|
+
if len(df) == 0:
|
461
|
+
return query_df(df, select_columns=select_columns, omit_columns=omit_columns)
|
462
|
+
|
463
|
+
return query_df(
|
464
|
+
df,
|
465
|
+
select_columns=select_columns,
|
466
|
+
omit_columns=omit_columns,
|
467
|
+
params=params,
|
468
|
+
begin=begin,
|
469
|
+
end=end,
|
470
|
+
datetime_column=dt_col,
|
471
|
+
inplace=True,
|
472
|
+
reset_index=True,
|
473
|
+
)
|
474
|
+
|
475
|
+
|
476
|
+
def sync_pipe(
|
477
|
+
self,
|
478
|
+
pipe: mrsm.Pipe,
|
479
|
+
df: 'pd.DataFrame' = None,
|
480
|
+
check_existing: bool = True,
|
481
|
+
debug: bool = False,
|
482
|
+
**kwargs: Any
|
483
|
+
) -> mrsm.SuccessTuple:
|
484
|
+
"""
|
485
|
+
Upsert new documents into the pipe's collection.
|
486
|
+
|
487
|
+
Parameters
|
488
|
+
----------
|
489
|
+
pipe: mrsm.Pipe
|
490
|
+
The pipe whose collection should receive the new documents.
|
491
|
+
|
492
|
+
df: Union['pd.DataFrame', Iterator['pd.DataFrame']], default None
|
493
|
+
The data to be synced.
|
494
|
+
|
495
|
+
check_existing: bool, default True
|
496
|
+
If `False`, do not check the documents against existing data and instead insert directly.
|
497
|
+
|
498
|
+
Returns
|
499
|
+
-------
|
500
|
+
A `SuccessTuple` indicating success.
|
501
|
+
"""
|
502
|
+
dt_col = pipe.columns.get('datetime', None)
|
503
|
+
indices = [col for col in pipe.columns.values() if col]
|
504
|
+
table_name = self.quote_table(pipe.target)
|
505
|
+
is_dask = 'dask' in df.__module__
|
506
|
+
if is_dask:
|
507
|
+
df = df.compute()
|
508
|
+
|
509
|
+
def _serialize_indices_docs(_docs):
|
510
|
+
return [
|
511
|
+
{
|
512
|
+
'ix': get_document_key(doc, indices),
|
513
|
+
**(
|
514
|
+
{
|
515
|
+
dt_col: doc.get(dt_col, 0)
|
516
|
+
}
|
517
|
+
if dt_col
|
518
|
+
else {}
|
519
|
+
)
|
520
|
+
}
|
521
|
+
for doc in _docs
|
522
|
+
]
|
523
|
+
|
524
|
+
valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
|
525
|
+
new_dtypes = {
|
526
|
+
str(key): str(val)
|
527
|
+
for key, val in df.dtypes.items()
|
528
|
+
if str(key) not in valkey_dtypes
|
529
|
+
}
|
530
|
+
for col, typ in {c: v for c, v in valkey_dtypes.items()}.items():
|
531
|
+
if col in df.columns:
|
532
|
+
try:
|
533
|
+
df[col] = df[col].astype(typ)
|
534
|
+
except Exception:
|
535
|
+
valkey_dtypes[col] = 'string'
|
536
|
+
new_dtypes[col] = 'string'
|
537
|
+
df[col] = df[col].astype('string')
|
538
|
+
|
539
|
+
if new_dtypes:
|
540
|
+
valkey_dtypes.update(new_dtypes)
|
541
|
+
if 'valkey' not in pipe.parameters:
|
542
|
+
pipe.parameters['valkey'] = {}
|
543
|
+
pipe.parameters['valkey']['dtypes'] = valkey_dtypes
|
544
|
+
if not pipe.temporary:
|
545
|
+
edit_success, edit_msg = pipe.edit(debug=debug)
|
546
|
+
if not edit_success:
|
547
|
+
return edit_success, edit_msg
|
548
|
+
|
549
|
+
# df = pipe.enforce_dtypes(df, debug=debug)
|
550
|
+
|
551
|
+
unseen_df, update_df, delta_df = (
|
552
|
+
pipe.filter_existing(df, include_unchanged_columns=True, debug=debug)
|
553
|
+
if check_existing
|
554
|
+
else (df, None, df)
|
555
|
+
)
|
556
|
+
num_insert = len(unseen_df) if unseen_df is not None else 0
|
557
|
+
num_update = len(update_df) if update_df is not None else 0
|
558
|
+
msg = f"Inserted {num_insert}, updated {num_update} rows."
|
559
|
+
if len(delta_df) == 0:
|
560
|
+
return True, msg
|
561
|
+
|
562
|
+
unseen_docs = unseen_df.to_dict(orient='records')
|
563
|
+
unseen_indices_docs = _serialize_indices_docs(unseen_docs)
|
564
|
+
unseen_ix_vals = {
|
565
|
+
get_document_key(doc, indices, table_name): serialize_document(doc)
|
566
|
+
for doc in unseen_docs
|
567
|
+
}
|
568
|
+
for key, val in unseen_ix_vals.items():
|
569
|
+
try:
|
570
|
+
self.set(key, val)
|
571
|
+
except Exception as e:
|
572
|
+
return False, f"Failed to set keys for {pipe}:\n{e}"
|
573
|
+
|
574
|
+
try:
|
575
|
+
self.push_docs(
|
576
|
+
unseen_indices_docs,
|
577
|
+
pipe.target,
|
578
|
+
datetime_column=dt_col,
|
579
|
+
debug=debug,
|
580
|
+
)
|
581
|
+
except Exception as e:
|
582
|
+
return False, f"Failed to push docs to '{pipe.target}':\n{e}"
|
583
|
+
|
584
|
+
update_docs = update_df.to_dict(orient='records') if update_df is not None else []
|
585
|
+
update_ix_docs = {
|
586
|
+
get_document_key(doc, indices, table_name): doc
|
587
|
+
for doc in update_docs
|
588
|
+
}
|
589
|
+
for key, doc in update_ix_docs.items():
|
590
|
+
try:
|
591
|
+
old_doc = json.loads(self.get(key))
|
592
|
+
old_doc.update(doc)
|
593
|
+
self.set(key, serialize_document(old_doc))
|
594
|
+
except Exception as e:
|
595
|
+
return False, f"Failed to set keys for {pipe}:\n{e}"
|
596
|
+
|
597
|
+
return True, msg
|
598
|
+
|
599
|
+
|
600
|
+
def get_pipe_columns_types(
|
601
|
+
self,
|
602
|
+
pipe: mrsm.Pipe,
|
603
|
+
debug: bool = False,
|
604
|
+
**kwargs: Any
|
605
|
+
) -> Dict[str, str]:
|
606
|
+
"""
|
607
|
+
Return the data types for the columns in the target table for data type enforcement.
|
608
|
+
|
609
|
+
Parameters
|
610
|
+
----------
|
611
|
+
pipe: mrsm.Pipe
|
612
|
+
The pipe whose target table contains columns and data types.
|
613
|
+
|
614
|
+
Returns
|
615
|
+
-------
|
616
|
+
A dictionary mapping columns to data types.
|
617
|
+
"""
|
618
|
+
if not pipe.exists(debug=debug):
|
619
|
+
return {}
|
620
|
+
|
621
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
622
|
+
return {
|
623
|
+
col: get_db_type_from_pd_type(typ)
|
624
|
+
for col, typ in pipe.parameters.get('valkey', {}).get('dtypes', {}).items()
|
625
|
+
}
|
626
|
+
|
627
|
+
|
628
|
+
def clear_pipe(
|
629
|
+
self,
|
630
|
+
pipe: mrsm.Pipe,
|
631
|
+
begin: Union[datetime, int, None] = None,
|
632
|
+
end: Union[datetime, int, None] = None,
|
633
|
+
params: Optional[Dict[str, Any]] = None,
|
634
|
+
debug: bool = False,
|
635
|
+
) -> mrsm.SuccessTuple:
|
636
|
+
"""
|
637
|
+
Delete rows within `begin`, `end`, and `params`.
|
638
|
+
|
639
|
+
Parameters
|
640
|
+
----------
|
641
|
+
pipe: mrsm.Pipe
|
642
|
+
The pipe whose rows to clear.
|
643
|
+
|
644
|
+
begin: Union[datetime, int, None], default None
|
645
|
+
If provided, remove rows >= `begin`.
|
646
|
+
|
647
|
+
end: Union[datetime, int, None], default None
|
648
|
+
If provided, remove rows < `end`.
|
649
|
+
|
650
|
+
params: Optional[Dict[str, Any]], default None
|
651
|
+
If provided, only remove rows which match the `params` filter.
|
652
|
+
|
653
|
+
Returns
|
654
|
+
-------
|
655
|
+
A `SuccessTuple` indicating success.
|
656
|
+
"""
|
657
|
+
if begin is None and end is None and params is None:
|
658
|
+
return self.drop_pipe(pipe, debug=debug)
|
659
|
+
|
660
|
+
dt_col = pipe.columns.get('datetime', None)
|
661
|
+
|
662
|
+
existing_df = pipe.get_data(
|
663
|
+
begin=begin,
|
664
|
+
end=end,
|
665
|
+
params=params,
|
666
|
+
debug=debug,
|
667
|
+
)
|
668
|
+
if existing_df is None or len(existing_df) == 0:
|
669
|
+
return True, "Deleted 0 rows."
|
670
|
+
|
671
|
+
docs = existing_df.to_dict(orient='records')
|
672
|
+
table_name = self.quote_table(pipe.target)
|
673
|
+
indices = [col for col in pipe.columns.values() if col]
|
674
|
+
for doc in docs:
|
675
|
+
set_doc_key = get_document_key(doc, indices)
|
676
|
+
table_doc_key = get_document_key(doc, indices, table_name)
|
677
|
+
try:
|
678
|
+
if dt_col:
|
679
|
+
self.client.zrem(table_name, set_doc_key)
|
680
|
+
else:
|
681
|
+
self.client.srem(table_name, set_doc_key)
|
682
|
+
self.client.delete(table_doc_key)
|
683
|
+
except Exception as e:
|
684
|
+
return False, f"Failed to delete documents:\n{e}"
|
685
|
+
msg = (
|
686
|
+
f"Deleted {len(docs)} row"
|
687
|
+
+ ('s' if len(docs) != 1 else '')
|
688
|
+
+ '.'
|
689
|
+
)
|
690
|
+
return True, msg
|
691
|
+
|
692
|
+
|
693
|
+
def get_sync_time(
|
694
|
+
self,
|
695
|
+
pipe: mrsm.Pipe,
|
696
|
+
newest: bool = True,
|
697
|
+
**kwargs: Any
|
698
|
+
) -> Union[datetime, int, None]:
|
699
|
+
"""
|
700
|
+
Return the newest (or oldest) timestamp in a pipe.
|
701
|
+
"""
|
702
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
703
|
+
dt_col = pipe.columns.get('datetime', None)
|
704
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
|
705
|
+
if not dt_col:
|
706
|
+
return None
|
707
|
+
|
708
|
+
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
709
|
+
table_name = self.quote_table(pipe.target)
|
710
|
+
try:
|
711
|
+
vals = (
|
712
|
+
self.client.zrevrange(table_name, 0, 0)
|
713
|
+
if newest
|
714
|
+
else self.client.zrange(table_name, 0, 0)
|
715
|
+
)
|
716
|
+
if not vals:
|
717
|
+
return None
|
718
|
+
val = vals[0]
|
719
|
+
except Exception:
|
720
|
+
return None
|
721
|
+
|
722
|
+
doc = json.loads(val)
|
723
|
+
dt_val = doc.get(dt_col, None)
|
724
|
+
if dt_val is None:
|
725
|
+
return None
|
726
|
+
|
727
|
+
try:
|
728
|
+
return (
|
729
|
+
int(dt_val)
|
730
|
+
if are_dtypes_equal(dt_typ, 'int')
|
731
|
+
else dateutil_parser.parse(str(dt_val)).replace(tzinfo=None)
|
732
|
+
)
|
733
|
+
except Exception as e:
|
734
|
+
warn(f"Failed to parse sync time for {pipe}:\n{e}")
|
735
|
+
|
736
|
+
return None
|
737
|
+
|
738
|
+
|
739
|
+
def get_pipe_rowcount(
|
740
|
+
self,
|
741
|
+
pipe: mrsm.Pipe,
|
742
|
+
begin: Union[datetime, int, None] = None,
|
743
|
+
end: Union[datetime, int, None] = None,
|
744
|
+
params: Optional[Dict[str, Any]] = None,
|
745
|
+
debug: bool = False,
|
746
|
+
**kwargs: Any
|
747
|
+
) -> Union[int, None]:
|
748
|
+
"""
|
749
|
+
Return the number of documents in the pipe's set.
|
750
|
+
"""
|
751
|
+
dt_col = pipe.columns.get('datetime', None)
|
752
|
+
table_name = self.quote_table(pipe.target)
|
753
|
+
|
754
|
+
if not pipe.exists():
|
755
|
+
return 0
|
756
|
+
|
757
|
+
try:
|
758
|
+
if begin is None and end is None and params is None:
|
759
|
+
return (
|
760
|
+
self.client.zcard(table_name)
|
761
|
+
if dt_col
|
762
|
+
else self.client.llen(table_name)
|
763
|
+
)
|
764
|
+
except Exception:
|
765
|
+
return None
|
766
|
+
|
767
|
+
df = pipe.get_data(begin=begin, end=end, params=params, debug=debug)
|
768
|
+
if df is None:
|
769
|
+
return 0
|
770
|
+
|
771
|
+
return len(df)
|
772
|
+
|
773
|
+
|
774
|
+
def fetch_pipes_keys(
|
775
|
+
self,
|
776
|
+
connector_keys: Optional[List[str]] = None,
|
777
|
+
metric_keys: Optional[List[str]] = None,
|
778
|
+
location_keys: Optional[List[str]] = None,
|
779
|
+
tags: Optional[List[str]] = None,
|
780
|
+
params: Optional[Dict[str, Any]] = None,
|
781
|
+
debug: bool = False
|
782
|
+
) -> Optional[List[Tuple[str, str, Optional[str]]]]:
|
783
|
+
"""
|
784
|
+
Return the keys for the registered pipes.
|
785
|
+
"""
|
786
|
+
from meerschaum.utils.dataframe import query_df
|
787
|
+
from meerschaum.utils.misc import separate_negation_values
|
788
|
+
try:
|
789
|
+
df = self.read(PIPES_TABLE, debug=debug)
|
790
|
+
except Exception:
|
791
|
+
return []
|
792
|
+
|
793
|
+
if df is None or len(df) == 0:
|
794
|
+
return []
|
795
|
+
|
796
|
+
query = {}
|
797
|
+
if connector_keys:
|
798
|
+
query['connector_keys'] = [str(k) for k in connector_keys]
|
799
|
+
if metric_keys:
|
800
|
+
query['metric_key'] = [str(k) for k in metric_keys]
|
801
|
+
if location_keys:
|
802
|
+
query['location_key'] = [str(k) for k in location_keys]
|
803
|
+
if params:
|
804
|
+
query.update(params)
|
805
|
+
|
806
|
+
df = query_df(df, query, inplace=True)
|
807
|
+
|
808
|
+
keys = [
|
809
|
+
(
|
810
|
+
doc['connector_keys'],
|
811
|
+
doc['metric_key'],
|
812
|
+
doc['location_key'],
|
813
|
+
)
|
814
|
+
for doc in df.to_dict(orient='records')
|
815
|
+
]
|
816
|
+
if not tags:
|
817
|
+
return keys
|
818
|
+
|
819
|
+
tag_groups = [tag.split(',') for tag in tags]
|
820
|
+
in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
|
821
|
+
|
822
|
+
filtered_keys = []
|
823
|
+
for ck, mk, lk in keys:
|
824
|
+
pipe = mrsm.Pipe(ck, mk, lk, instance=self)
|
825
|
+
pipe_tags = set(pipe.tags)
|
826
|
+
|
827
|
+
include_pipe = True
|
828
|
+
for in_tags, ex_tags in in_ex_tag_groups:
|
829
|
+
all_in = all(tag in pipe_tags for tag in in_tags)
|
830
|
+
any_ex = any(tag in pipe_tags for tag in ex_tags)
|
831
|
+
|
832
|
+
if (not all_in) or any_ex:
|
833
|
+
include_pipe = False
|
834
|
+
continue
|
835
|
+
|
836
|
+
if include_pipe:
|
837
|
+
filtered_keys.append((ck, mk, lk))
|
838
|
+
|
839
|
+
return filtered_keys
|