meerschaum 2.3.6__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. meerschaum/_internal/arguments/_parse_arguments.py +2 -5
  2. meerschaum/_internal/docs/index.py +3 -2
  3. meerschaum/_internal/entry.py +13 -7
  4. meerschaum/_internal/shell/Shell.py +38 -44
  5. meerschaum/_internal/term/TermPageHandler.py +2 -3
  6. meerschaum/_internal/term/__init__.py +13 -11
  7. meerschaum/actions/api.py +10 -7
  8. meerschaum/actions/bootstrap.py +38 -11
  9. meerschaum/actions/copy.py +3 -3
  10. meerschaum/actions/delete.py +4 -1
  11. meerschaum/actions/register.py +1 -3
  12. meerschaum/actions/stack.py +24 -19
  13. meerschaum/actions/start.py +38 -40
  14. meerschaum/actions/sync.py +53 -52
  15. meerschaum/api/__init__.py +48 -14
  16. meerschaum/api/_events.py +15 -10
  17. meerschaum/api/_oauth2.py +2 -2
  18. meerschaum/api/_websockets.py +5 -4
  19. meerschaum/api/dash/__init__.py +7 -16
  20. meerschaum/api/dash/callbacks/__init__.py +1 -0
  21. meerschaum/api/dash/callbacks/dashboard.py +52 -58
  22. meerschaum/api/dash/callbacks/jobs.py +15 -16
  23. meerschaum/api/dash/callbacks/login.py +16 -10
  24. meerschaum/api/dash/callbacks/pipes.py +41 -0
  25. meerschaum/api/dash/callbacks/plugins.py +1 -1
  26. meerschaum/api/dash/callbacks/register.py +15 -11
  27. meerschaum/api/dash/components.py +54 -59
  28. meerschaum/api/dash/jobs.py +5 -9
  29. meerschaum/api/dash/pages/__init__.py +1 -0
  30. meerschaum/api/dash/pages/pipes.py +19 -0
  31. meerschaum/api/dash/pipes.py +86 -58
  32. meerschaum/api/dash/plugins.py +6 -4
  33. meerschaum/api/dash/sessions.py +176 -0
  34. meerschaum/api/dash/users.py +3 -41
  35. meerschaum/api/dash/webterm.py +12 -17
  36. meerschaum/api/resources/static/js/terminado.js +1 -1
  37. meerschaum/api/routes/_actions.py +4 -20
  38. meerschaum/api/routes/_jobs.py +8 -7
  39. meerschaum/api/routes/_login.py +4 -4
  40. meerschaum/api/routes/_pipes.py +3 -3
  41. meerschaum/api/routes/_webterm.py +5 -6
  42. meerschaum/config/_default.py +15 -2
  43. meerschaum/config/_version.py +1 -1
  44. meerschaum/config/stack/__init__.py +64 -19
  45. meerschaum/config/static/__init__.py +4 -0
  46. meerschaum/connectors/{Connector.py → _Connector.py} +19 -13
  47. meerschaum/connectors/__init__.py +24 -14
  48. meerschaum/connectors/api/{APIConnector.py → _APIConnector.py} +3 -1
  49. meerschaum/connectors/api/__init__.py +2 -1
  50. meerschaum/connectors/parse.py +18 -16
  51. meerschaum/connectors/poll.py +30 -24
  52. meerschaum/connectors/sql/__init__.py +3 -1
  53. meerschaum/connectors/sql/_pipes.py +172 -197
  54. meerschaum/connectors/sql/_plugins.py +45 -43
  55. meerschaum/connectors/sql/_users.py +46 -38
  56. meerschaum/connectors/valkey/_ValkeyConnector.py +535 -0
  57. meerschaum/connectors/valkey/__init__.py +10 -0
  58. meerschaum/connectors/valkey/_fetch.py +75 -0
  59. meerschaum/connectors/valkey/_pipes.py +844 -0
  60. meerschaum/connectors/valkey/_plugins.py +265 -0
  61. meerschaum/connectors/valkey/_users.py +305 -0
  62. meerschaum/core/Pipe/__init__.py +3 -0
  63. meerschaum/core/Pipe/_attributes.py +1 -2
  64. meerschaum/core/Pipe/_clear.py +16 -13
  65. meerschaum/core/Pipe/_copy.py +106 -0
  66. meerschaum/core/Pipe/_data.py +165 -101
  67. meerschaum/core/Pipe/_drop.py +4 -4
  68. meerschaum/core/Pipe/_dtypes.py +14 -14
  69. meerschaum/core/Pipe/_edit.py +15 -14
  70. meerschaum/core/Pipe/_sync.py +134 -53
  71. meerschaum/core/Pipe/_verify.py +11 -11
  72. meerschaum/core/User/_User.py +14 -12
  73. meerschaum/jobs/_Job.py +1 -6
  74. meerschaum/jobs/__init__.py +7 -2
  75. meerschaum/plugins/_Plugin.py +17 -13
  76. meerschaum/utils/_get_pipes.py +14 -20
  77. meerschaum/utils/dataframe.py +291 -101
  78. meerschaum/utils/dtypes/__init__.py +31 -6
  79. meerschaum/utils/dtypes/sql.py +4 -4
  80. meerschaum/utils/formatting/_shell.py +5 -6
  81. meerschaum/utils/misc.py +3 -3
  82. meerschaum/utils/packages/__init__.py +14 -9
  83. meerschaum/utils/packages/_packages.py +2 -0
  84. meerschaum/utils/schedule.py +1 -0
  85. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/METADATA +7 -1
  86. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/RECORD +93 -84
  87. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/WHEEL +1 -1
  88. meerschaum/api/dash/actions.py +0 -255
  89. /meerschaum/connectors/sql/{SQLConnector.py → _SQLConnector.py} +0 -0
  90. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/LICENSE +0 -0
  91. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/NOTICE +0 -0
  92. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/entry_points.txt +0 -0
  93. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/top_level.txt +0 -0
  94. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,844 @@
1
+ #! /usr/bin/env python3
2
+ # vim:fenc=utf-8
3
+
4
+ """
5
+ Define pipes methods for `ValkeyConnector`.
6
+ """
7
+
8
+ import json
9
+ from datetime import datetime, timezone
10
+
11
+ import meerschaum as mrsm
12
+ from meerschaum.utils.typing import SuccessTuple, Any, Union, Optional, Dict, List, Tuple
13
+ from meerschaum.utils.misc import json_serialize_datetime, string_to_dict
14
+ from meerschaum.utils.warnings import warn
15
+ from meerschaum.config.static import STATIC_CONFIG
16
+
17
+ PIPES_TABLE: str = 'mrsm_pipes'
18
+ PIPES_COUNTER: str = 'mrsm_pipes:counter'
19
+ COLON: str = STATIC_CONFIG['valkey']['colon']
20
+
21
+
22
+ def get_pipe_key(pipe: mrsm.Pipe) -> str:
23
+ """
24
+ Return the key to store a pipe's ID.
25
+ """
26
+ return f"mrsm_pipe:{pipe.connector_keys}:{pipe.metric_key}:{pipe.location_key}"
27
+
28
+
29
+ def get_pipe_parameters_key(pipe: mrsm.Pipe) -> str:
30
+ """
31
+ Return the key to store a pipe's parameters.
32
+ """
33
+ return get_pipe_key(pipe) + ':parameters'
34
+
35
+
36
+ def serialize_document(doc: Dict[str, Any]) -> str:
37
+ """
38
+ Return a serialized string for a document.
39
+
40
+ Parameters
41
+ ----------
42
+ doc: Dict[str, Any]
43
+ The document to be serialized.
44
+
45
+ Returns
46
+ -------
47
+ A serialized string for the document.
48
+ """
49
+ return json.dumps(
50
+ doc,
51
+ default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
52
+ separators=(',', ':'),
53
+ sort_keys=True,
54
+ )
55
+
56
+
57
+ def get_document_key(
58
+ doc: Dict[str, Any],
59
+ indices: List[str],
60
+ table_name: Optional[str] = None,
61
+ ) -> str:
62
+ """
63
+ Return a serialized string for a document's indices only.
64
+
65
+ Parameters
66
+ ----------
67
+ doc: Dict[str, Any]
68
+ The document containing index values to be serialized.
69
+
70
+ indices: List[str]
71
+ The name of the indices to be serialized.
72
+
73
+ table_name: Optional[str], default None
74
+ If provided, prepend the table to the key.
75
+
76
+ Returns
77
+ -------
78
+ A serialized string of the document's indices.
79
+ """
80
+ from meerschaum.utils.dtypes import coerce_timezone
81
+ index_vals = {
82
+ key: (
83
+ str(val)
84
+ if not isinstance(val, datetime)
85
+ else str(int(coerce_timezone(val).replace(tzinfo=timezone.utc).timestamp()))
86
+ )
87
+ for key, val in doc.items()
88
+ if key in indices
89
+ } if indices else {}
90
+ indices_str = ((table_name + ':indices:') if table_name else '') + ','.join(
91
+ sorted(
92
+ [
93
+ f'{key}{COLON}{val}'
94
+ for key, val in index_vals.items()
95
+ ]
96
+ )
97
+ ) if indices else serialize_document(doc)
98
+ return indices_str
99
+
100
+
101
+ def get_table_quoted_doc_key(
102
+ table_name: str,
103
+ doc: Dict[str, Any],
104
+ indices: List[str],
105
+ datetime_column: Optional[str] = None,
106
+ ) -> str:
107
+ """
108
+ Return the document string as stored in the underling set.
109
+ """
110
+ return json.dumps(
111
+ {
112
+ get_document_key(doc, indices, table_name): serialize_document(doc),
113
+ **(
114
+ {datetime_column: doc.get(datetime_column, 0)}
115
+ if datetime_column
116
+ else {}
117
+ )
118
+ },
119
+ sort_keys=True,
120
+ separators=(',', ':'),
121
+ default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
122
+ )
123
+
124
+
125
+ def register_pipe(
126
+ self,
127
+ pipe: mrsm.Pipe,
128
+ debug: bool = False,
129
+ **kwargs: Any
130
+ ) -> SuccessTuple:
131
+ """
132
+ Insert the pipe's attributes into the internal `pipes` table.
133
+
134
+ Parameters
135
+ ----------
136
+ pipe: mrsm.Pipe
137
+ The pipe to be registered.
138
+
139
+ Returns
140
+ -------
141
+ A `SuccessTuple` of the result.
142
+ """
143
+ attributes = {
144
+ 'connector_keys': str(pipe.connector_keys),
145
+ 'metric_key': str(pipe.metric_key),
146
+ 'location_key': str(pipe.location_key),
147
+ }
148
+ parameters_str = json.dumps(
149
+ pipe._attributes.get('parameters', {}),
150
+ separators=(',', ':'),
151
+ )
152
+
153
+ pipe_key = get_pipe_key(pipe)
154
+ parameters_key = get_pipe_parameters_key(pipe)
155
+
156
+ try:
157
+ existing_pipe_id = self.get(pipe_key)
158
+ if existing_pipe_id is not None:
159
+ return False, f"{pipe} is already registered."
160
+
161
+ pipe_id = self.client.incr(PIPES_COUNTER)
162
+ _ = self.push_docs(
163
+ [{'pipe_id': pipe_id, **attributes}],
164
+ PIPES_TABLE,
165
+ datetime_column='pipe_id',
166
+ debug=debug,
167
+ )
168
+ self.set(pipe_key, pipe_id)
169
+ self.set(parameters_key, parameters_str)
170
+
171
+ except Exception as e:
172
+ return False, f"Failed to register {pipe}:\n{e}"
173
+
174
+ return True, "Success"
175
+
176
+
177
+ def get_pipe_id(
178
+ self,
179
+ pipe: mrsm.Pipe,
180
+ debug: bool = False,
181
+ **kwargs: Any
182
+ ) -> Union[str, int, None]:
183
+ """
184
+ Return the `_id` for the pipe if it exists.
185
+
186
+ Parameters
187
+ ----------
188
+ pipe: mrsm.Pipe
189
+ The pipe whose `_id` to fetch.
190
+
191
+ Returns
192
+ -------
193
+ The `_id` for the pipe's document or `None`.
194
+ """
195
+ pipe_key = get_pipe_key(pipe)
196
+ try:
197
+ return int(self.get(pipe_key))
198
+ except Exception:
199
+ pass
200
+ return None
201
+
202
+
203
+ def get_pipe_attributes(
204
+ self,
205
+ pipe: mrsm.Pipe,
206
+ debug: bool = False,
207
+ **kwargs: Any
208
+ ) -> Dict[str, Any]:
209
+ """
210
+ Return the pipe's document from the internal `pipes` collection.
211
+
212
+ Parameters
213
+ ----------
214
+ pipe: mrsm.Pipe
215
+ The pipe whose attributes should be retrieved.
216
+
217
+ Returns
218
+ -------
219
+ The document that matches the keys of the pipe.
220
+ """
221
+ pipe_id = pipe.get_id(debug=debug)
222
+ if pipe_id is None:
223
+ return {}
224
+
225
+ parameters_key = get_pipe_parameters_key(pipe)
226
+ parameters_str = self.get(parameters_key)
227
+
228
+ parameters = json.loads(parameters_str) if parameters_str else {}
229
+
230
+ attributes = {
231
+ 'connector_keys': pipe.connector_keys,
232
+ 'metric_key': pipe.metric_key,
233
+ 'location_key': pipe.location_key,
234
+ 'parameters': parameters,
235
+ 'pipe_id': pipe_id,
236
+ }
237
+ return attributes
238
+
239
+
240
+ def edit_pipe(
241
+ self,
242
+ pipe: mrsm.Pipe,
243
+ debug: bool = False,
244
+ **kwargs: Any
245
+ ) -> mrsm.SuccessTuple:
246
+ """
247
+ Edit the attributes of the pipe.
248
+
249
+ Parameters
250
+ ----------
251
+ pipe: mrsm.Pipe
252
+ The pipe whose in-memory parameters must be persisted.
253
+
254
+ Returns
255
+ -------
256
+ A `SuccessTuple` indicating success.
257
+ """
258
+ pipe_id = pipe.get_id(debug=debug)
259
+ if pipe_id is None:
260
+ return False, f"{pipe} is not registered."
261
+
262
+ parameters_key = get_pipe_parameters_key(pipe)
263
+ parameters_str = json.dumps(pipe.parameters, separators=(',', ':'))
264
+ self.set(parameters_key, parameters_str)
265
+ return True, "Success"
266
+
267
+
268
+ def pipe_exists(
269
+ self,
270
+ pipe: mrsm.Pipe,
271
+ debug: bool = False,
272
+ **kwargs: Any
273
+ ) -> bool:
274
+ """
275
+ Check whether a pipe's target table exists.
276
+
277
+ Parameters
278
+ ----------
279
+ pipe: mrsm.Pipe
280
+ The pipe to check whether its table exists.
281
+
282
+ Returns
283
+ -------
284
+ A `bool` indicating the table exists.
285
+ """
286
+ table_name = self.quote_table(pipe.target)
287
+ return self.client.exists(table_name) != 0
288
+
289
+
290
+ def drop_pipe(
291
+ self,
292
+ pipe: mrsm.Pipe,
293
+ debug: bool = False,
294
+ **kwargs: Any
295
+ ) -> mrsm.SuccessTuple:
296
+ """
297
+ Drop a pipe's collection if it exists.
298
+
299
+ Parameters
300
+ ----------
301
+ pipe: mrsm.Pipe
302
+ The pipe to be dropped.
303
+
304
+ Returns
305
+ -------
306
+ A `SuccessTuple` indicating success.
307
+ """
308
+ try:
309
+ self.drop_table(pipe.target, debug=debug)
310
+ except Exception as e:
311
+ return False, f"Failed to drop {pipe}:\n{e}"
312
+
313
+ if 'valkey' not in pipe.parameters:
314
+ return True, "Success"
315
+
316
+ pipe.parameters['valkey']['dtypes'] = {}
317
+ if not pipe.temporary:
318
+ edit_success, edit_msg = pipe.edit(debug=debug)
319
+ if not edit_success:
320
+ return edit_success, edit_msg
321
+
322
+ return True, "Success"
323
+
324
+
325
+ def delete_pipe(
326
+ self,
327
+ pipe: mrsm.Pipe,
328
+ debug: bool = False,
329
+ **kwargs: Any
330
+ ) -> mrsm.SuccessTuple:
331
+ """
332
+ Delete a pipe's registration from the `pipes` collection.
333
+
334
+ Parameters
335
+ ----------
336
+ pipe: mrsm.Pipe
337
+ The pipe to be deleted.
338
+
339
+ Returns
340
+ -------
341
+ A `SuccessTuple` indicating success.
342
+ """
343
+ drop_success, drop_message = pipe.drop(debug=debug)
344
+ if not drop_success:
345
+ return drop_success, drop_message
346
+
347
+ pipe_id = self.get_pipe_id(pipe, debug=debug)
348
+ if pipe_id is None:
349
+ return False, f"{pipe} is not registered."
350
+
351
+ pipe_key = get_pipe_key(pipe)
352
+ parameters_key = get_pipe_parameters_key(pipe)
353
+ self.client.delete(pipe_key)
354
+ self.client.delete(parameters_key)
355
+ df = self.read(PIPES_TABLE, params={'pipe_id': pipe_id})
356
+ docs = df.to_dict(orient='records')
357
+ if docs:
358
+ doc = docs[0]
359
+ doc_str = json.dumps(
360
+ doc,
361
+ default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
362
+ separators=(',', ':'),
363
+ sort_keys=True,
364
+ )
365
+ self.client.zrem(PIPES_TABLE, doc_str)
366
+ return True, "Success"
367
+
368
+
369
+ def get_pipe_data(
370
+ self,
371
+ pipe: mrsm.Pipe,
372
+ select_columns: Optional[List[str]] = None,
373
+ omit_columns: Optional[List[str]] = None,
374
+ begin: Union[datetime, int, None] = None,
375
+ end: Union[datetime, int, None] = None,
376
+ params: Optional[Dict[str, Any]] = None,
377
+ debug: bool = False,
378
+ **kwargs: Any
379
+ ) -> Union['pd.DataFrame', None]:
380
+ """
381
+ Query a pipe's target table and return the DataFrame.
382
+
383
+ Parameters
384
+ ----------
385
+ pipe: mrsm.Pipe
386
+ The pipe with the target table from which to read.
387
+
388
+ select_columns: Optional[List[str]], default None
389
+ If provided, only select these given columns.
390
+ Otherwise select all available columns (i.e. `SELECT *`).
391
+
392
+ omit_columns: Optional[List[str]], default None
393
+ If provided, remove these columns from the selection.
394
+
395
+ begin: Union[datetime, int, None], default None
396
+ The earliest `datetime` value to search from (inclusive).
397
+
398
+ end: Union[datetime, int, None], default None
399
+ The lastest `datetime` value to search from (exclusive).
400
+
401
+ params: Optional[Dict[str, str]], default None
402
+ Additional filters to apply to the query.
403
+
404
+ Returns
405
+ -------
406
+ The target table's data as a DataFrame.
407
+ """
408
+ if not pipe.exists(debug=debug):
409
+ return None
410
+
411
+ from meerschaum.utils.dataframe import query_df, parse_df_datetimes
412
+
413
+ valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
414
+ dt_col = pipe.columns.get('datetime', None)
415
+ table_name = self.quote_table(pipe.target)
416
+ indices = [col for col in pipe.columns.values() if col]
417
+ ix_docs = [
418
+ string_to_dict(doc['ix'].replace(COLON, ':'))
419
+ for doc in self.read_docs(
420
+ pipe.target,
421
+ begin=begin,
422
+ end=end,
423
+ debug=debug,
424
+ )
425
+ ]
426
+ try:
427
+ docs_strings = [
428
+ self.get(get_document_key(
429
+ doc, indices, table_name
430
+ ))
431
+ for doc in ix_docs
432
+ ]
433
+ except Exception as e:
434
+ warn(f"Failed to fetch documents for {pipe}:\n{e}")
435
+ docs_strings = []
436
+
437
+ docs = [
438
+ json.loads(doc_str)
439
+ for doc_str in docs_strings
440
+ if doc_str
441
+ ]
442
+ ignore_dt_cols = [
443
+ col
444
+ for col, dtype in pipe.dtypes.items()
445
+ if 'datetime' not in str(dtype)
446
+ ]
447
+
448
+ df = parse_df_datetimes(
449
+ docs,
450
+ ignore_cols=ignore_dt_cols,
451
+ chunksize=kwargs.get('chunksize', None),
452
+ debug=debug,
453
+ )
454
+ for col, typ in valkey_dtypes.items():
455
+ try:
456
+ df[col] = df[col].astype(typ)
457
+ except Exception:
458
+ pass
459
+
460
+ df = pipe.enforce_dtypes(df, debug=debug)
461
+
462
+ if len(df) == 0:
463
+ return query_df(df, select_columns=select_columns, omit_columns=omit_columns)
464
+
465
+ return query_df(
466
+ df,
467
+ select_columns=select_columns,
468
+ omit_columns=omit_columns,
469
+ params=params,
470
+ begin=begin,
471
+ end=end,
472
+ datetime_column=dt_col,
473
+ inplace=True,
474
+ reset_index=True,
475
+ )
476
+
477
+
478
+ def sync_pipe(
479
+ self,
480
+ pipe: mrsm.Pipe,
481
+ df: 'pd.DataFrame' = None,
482
+ check_existing: bool = True,
483
+ debug: bool = False,
484
+ **kwargs: Any
485
+ ) -> mrsm.SuccessTuple:
486
+ """
487
+ Upsert new documents into the pipe's collection.
488
+
489
+ Parameters
490
+ ----------
491
+ pipe: mrsm.Pipe
492
+ The pipe whose collection should receive the new documents.
493
+
494
+ df: Union['pd.DataFrame', Iterator['pd.DataFrame']], default None
495
+ The data to be synced.
496
+
497
+ check_existing: bool, default True
498
+ If `False`, do not check the documents against existing data and instead insert directly.
499
+
500
+ Returns
501
+ -------
502
+ A `SuccessTuple` indicating success.
503
+ """
504
+ dt_col = pipe.columns.get('datetime', None)
505
+ indices = [col for col in pipe.columns.values() if col]
506
+ table_name = self.quote_table(pipe.target)
507
+ is_dask = 'dask' in df.__module__
508
+ if is_dask:
509
+ df = df.compute()
510
+ upsert = pipe.parameters.get('upsert', False)
511
+
512
+ def _serialize_indices_docs(_docs):
513
+ return [
514
+ {
515
+ 'ix': get_document_key(doc, indices),
516
+ **(
517
+ {
518
+ dt_col: doc.get(dt_col, 0)
519
+ }
520
+ if dt_col
521
+ else {}
522
+ )
523
+ }
524
+ for doc in _docs
525
+ ]
526
+
527
+ valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
528
+ new_dtypes = {
529
+ str(key): str(val)
530
+ for key, val in df.dtypes.items()
531
+ if str(key) not in valkey_dtypes
532
+ }
533
+ for col, typ in {c: v for c, v in valkey_dtypes.items()}.items():
534
+ if col in df.columns:
535
+ try:
536
+ df[col] = df[col].astype(typ)
537
+ except Exception:
538
+ valkey_dtypes[col] = 'string'
539
+ new_dtypes[col] = 'string'
540
+ df[col] = df[col].astype('string')
541
+
542
+ if new_dtypes:
543
+ valkey_dtypes.update(new_dtypes)
544
+ if 'valkey' not in pipe.parameters:
545
+ pipe.parameters['valkey'] = {}
546
+ pipe.parameters['valkey']['dtypes'] = valkey_dtypes
547
+ if not pipe.temporary:
548
+ edit_success, edit_msg = pipe.edit(debug=debug)
549
+ if not edit_success:
550
+ return edit_success, edit_msg
551
+
552
+ unseen_df, update_df, delta_df = (
553
+ pipe.filter_existing(df, include_unchanged_columns=True, debug=debug)
554
+ if check_existing and not upsert
555
+ else (df, None, df)
556
+ )
557
+ num_insert = len(unseen_df) if unseen_df is not None else 0
558
+ num_update = len(update_df) if update_df is not None else 0
559
+ msg = (
560
+ f"Inserted {num_insert}, updated {num_update} rows."
561
+ if not upsert
562
+ else f"Upserted {num_insert} rows."
563
+ )
564
+ if len(delta_df) == 0:
565
+ return True, msg
566
+
567
+ unseen_docs = unseen_df.to_dict(orient='records')
568
+ unseen_indices_docs = _serialize_indices_docs(unseen_docs)
569
+ unseen_ix_vals = {
570
+ get_document_key(doc, indices, table_name): serialize_document(doc)
571
+ for doc in unseen_docs
572
+ }
573
+ for key, val in unseen_ix_vals.items():
574
+ try:
575
+ self.set(key, val)
576
+ except Exception as e:
577
+ return False, f"Failed to set keys for {pipe}:\n{e}"
578
+
579
+ try:
580
+ self.push_docs(
581
+ unseen_indices_docs,
582
+ pipe.target,
583
+ datetime_column=dt_col,
584
+ debug=debug,
585
+ )
586
+ except Exception as e:
587
+ return False, f"Failed to push docs to '{pipe.target}':\n{e}"
588
+
589
+ update_docs = update_df.to_dict(orient='records') if update_df is not None else []
590
+ update_ix_docs = {
591
+ get_document_key(doc, indices, table_name): doc
592
+ for doc in update_docs
593
+ }
594
+ for key, doc in update_ix_docs.items():
595
+ try:
596
+ old_doc = json.loads(self.get(key))
597
+ old_doc.update(doc)
598
+ self.set(key, serialize_document(old_doc))
599
+ except Exception as e:
600
+ return False, f"Failed to set keys for {pipe}:\n{e}"
601
+
602
+ return True, msg
603
+
604
+
605
+ def get_pipe_columns_types(
606
+ self,
607
+ pipe: mrsm.Pipe,
608
+ debug: bool = False,
609
+ **kwargs: Any
610
+ ) -> Dict[str, str]:
611
+ """
612
+ Return the data types for the columns in the target table for data type enforcement.
613
+
614
+ Parameters
615
+ ----------
616
+ pipe: mrsm.Pipe
617
+ The pipe whose target table contains columns and data types.
618
+
619
+ Returns
620
+ -------
621
+ A dictionary mapping columns to data types.
622
+ """
623
+ if not pipe.exists(debug=debug):
624
+ return {}
625
+
626
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
627
+ return {
628
+ col: get_db_type_from_pd_type(typ)
629
+ for col, typ in pipe.parameters.get('valkey', {}).get('dtypes', {}).items()
630
+ }
631
+
632
+
633
+ def clear_pipe(
634
+ self,
635
+ pipe: mrsm.Pipe,
636
+ begin: Union[datetime, int, None] = None,
637
+ end: Union[datetime, int, None] = None,
638
+ params: Optional[Dict[str, Any]] = None,
639
+ debug: bool = False,
640
+ ) -> mrsm.SuccessTuple:
641
+ """
642
+ Delete rows within `begin`, `end`, and `params`.
643
+
644
+ Parameters
645
+ ----------
646
+ pipe: mrsm.Pipe
647
+ The pipe whose rows to clear.
648
+
649
+ begin: Union[datetime, int, None], default None
650
+ If provided, remove rows >= `begin`.
651
+
652
+ end: Union[datetime, int, None], default None
653
+ If provided, remove rows < `end`.
654
+
655
+ params: Optional[Dict[str, Any]], default None
656
+ If provided, only remove rows which match the `params` filter.
657
+
658
+ Returns
659
+ -------
660
+ A `SuccessTuple` indicating success.
661
+ """
662
+ if begin is None and end is None and params is None:
663
+ return self.drop_pipe(pipe, debug=debug)
664
+
665
+ dt_col = pipe.columns.get('datetime', None)
666
+
667
+ existing_df = pipe.get_data(
668
+ begin=begin,
669
+ end=end,
670
+ params=params,
671
+ debug=debug,
672
+ )
673
+ if existing_df is None or len(existing_df) == 0:
674
+ return True, "Deleted 0 rows."
675
+
676
+ docs = existing_df.to_dict(orient='records')
677
+ table_name = self.quote_table(pipe.target)
678
+ indices = [col for col in pipe.columns.values() if col]
679
+ for doc in docs:
680
+ set_doc_key = get_document_key(doc, indices)
681
+ table_doc_key = get_document_key(doc, indices, table_name)
682
+ try:
683
+ if dt_col:
684
+ self.client.zrem(table_name, set_doc_key)
685
+ else:
686
+ self.client.srem(table_name, set_doc_key)
687
+ self.client.delete(table_doc_key)
688
+ except Exception as e:
689
+ return False, f"Failed to delete documents:\n{e}"
690
+ msg = (
691
+ f"Deleted {len(docs)} row"
692
+ + ('s' if len(docs) != 1 else '')
693
+ + '.'
694
+ )
695
+ return True, msg
696
+
697
+
698
+ def get_sync_time(
699
+ self,
700
+ pipe: mrsm.Pipe,
701
+ newest: bool = True,
702
+ **kwargs: Any
703
+ ) -> Union[datetime, int, None]:
704
+ """
705
+ Return the newest (or oldest) timestamp in a pipe.
706
+ """
707
+ from meerschaum.utils.dtypes import are_dtypes_equal
708
+ dt_col = pipe.columns.get('datetime', None)
709
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
710
+ if not dt_col:
711
+ return None
712
+
713
+ dateutil_parser = mrsm.attempt_import('dateutil.parser')
714
+ table_name = self.quote_table(pipe.target)
715
+ try:
716
+ vals = (
717
+ self.client.zrevrange(table_name, 0, 0)
718
+ if newest
719
+ else self.client.zrange(table_name, 0, 0)
720
+ )
721
+ if not vals:
722
+ return None
723
+ val = vals[0]
724
+ except Exception:
725
+ return None
726
+
727
+ doc = json.loads(val)
728
+ dt_val = doc.get(dt_col, None)
729
+ if dt_val is None:
730
+ return None
731
+
732
+ try:
733
+ return (
734
+ int(dt_val)
735
+ if are_dtypes_equal(dt_typ, 'int')
736
+ else dateutil_parser.parse(str(dt_val)).replace(tzinfo=None)
737
+ )
738
+ except Exception as e:
739
+ warn(f"Failed to parse sync time for {pipe}:\n{e}")
740
+
741
+ return None
742
+
743
+
744
+ def get_pipe_rowcount(
745
+ self,
746
+ pipe: mrsm.Pipe,
747
+ begin: Union[datetime, int, None] = None,
748
+ end: Union[datetime, int, None] = None,
749
+ params: Optional[Dict[str, Any]] = None,
750
+ debug: bool = False,
751
+ **kwargs: Any
752
+ ) -> Union[int, None]:
753
+ """
754
+ Return the number of documents in the pipe's set.
755
+ """
756
+ dt_col = pipe.columns.get('datetime', None)
757
+ table_name = self.quote_table(pipe.target)
758
+
759
+ if not pipe.exists():
760
+ return 0
761
+
762
+ try:
763
+ if begin is None and end is None and params is None:
764
+ return (
765
+ self.client.zcard(table_name)
766
+ if dt_col
767
+ else self.client.llen(table_name)
768
+ )
769
+ except Exception:
770
+ return None
771
+
772
+ df = pipe.get_data(begin=begin, end=end, params=params, debug=debug)
773
+ if df is None:
774
+ return 0
775
+
776
+ return len(df)
777
+
778
+
779
+ def fetch_pipes_keys(
780
+ self,
781
+ connector_keys: Optional[List[str]] = None,
782
+ metric_keys: Optional[List[str]] = None,
783
+ location_keys: Optional[List[str]] = None,
784
+ tags: Optional[List[str]] = None,
785
+ params: Optional[Dict[str, Any]] = None,
786
+ debug: bool = False
787
+ ) -> Optional[List[Tuple[str, str, Optional[str]]]]:
788
+ """
789
+ Return the keys for the registered pipes.
790
+ """
791
+ from meerschaum.utils.dataframe import query_df
792
+ from meerschaum.utils.misc import separate_negation_values
793
+ try:
794
+ df = self.read(PIPES_TABLE, debug=debug)
795
+ except Exception:
796
+ return []
797
+
798
+ if df is None or len(df) == 0:
799
+ return []
800
+
801
+ query = {}
802
+ if connector_keys:
803
+ query['connector_keys'] = [str(k) for k in connector_keys]
804
+ if metric_keys:
805
+ query['metric_key'] = [str(k) for k in metric_keys]
806
+ if location_keys:
807
+ query['location_key'] = [str(k) for k in location_keys]
808
+ if params:
809
+ query.update(params)
810
+
811
+ df = query_df(df, query, inplace=True)
812
+
813
+ keys = [
814
+ (
815
+ doc['connector_keys'],
816
+ doc['metric_key'],
817
+ doc['location_key'],
818
+ )
819
+ for doc in df.to_dict(orient='records')
820
+ ]
821
+ if not tags:
822
+ return keys
823
+
824
+ tag_groups = [tag.split(',') for tag in tags]
825
+ in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
826
+
827
+ filtered_keys = []
828
+ for ck, mk, lk in keys:
829
+ pipe = mrsm.Pipe(ck, mk, lk, instance=self)
830
+ pipe_tags = set(pipe.tags)
831
+
832
+ include_pipe = True
833
+ for in_tags, ex_tags in in_ex_tag_groups:
834
+ all_in = all(tag in pipe_tags for tag in in_tags)
835
+ any_ex = any(tag in pipe_tags for tag in ex_tags)
836
+
837
+ if (not all_in) or any_ex:
838
+ include_pipe = False
839
+ continue
840
+
841
+ if include_pipe:
842
+ filtered_keys.append((ck, mk, lk))
843
+
844
+ return filtered_keys