meerschaum 2.3.6__py3-none-any.whl → 2.4.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. meerschaum/actions/bootstrap.py +36 -10
  2. meerschaum/actions/copy.py +3 -3
  3. meerschaum/actions/start.py +13 -14
  4. meerschaum/api/dash/__init__.py +7 -6
  5. meerschaum/api/dash/callbacks/__init__.py +1 -0
  6. meerschaum/api/dash/callbacks/dashboard.py +7 -5
  7. meerschaum/api/dash/callbacks/pipes.py +42 -0
  8. meerschaum/api/dash/pages/__init__.py +1 -0
  9. meerschaum/api/dash/pages/pipes.py +16 -0
  10. meerschaum/api/dash/pipes.py +79 -47
  11. meerschaum/api/dash/users.py +19 -6
  12. meerschaum/api/routes/_login.py +4 -4
  13. meerschaum/api/routes/_pipes.py +3 -3
  14. meerschaum/config/_default.py +9 -1
  15. meerschaum/config/_version.py +1 -1
  16. meerschaum/config/stack/__init__.py +59 -16
  17. meerschaum/connectors/Connector.py +19 -13
  18. meerschaum/connectors/__init__.py +9 -5
  19. meerschaum/connectors/poll.py +30 -24
  20. meerschaum/connectors/sql/_pipes.py +126 -154
  21. meerschaum/connectors/sql/_plugins.py +45 -43
  22. meerschaum/connectors/sql/_users.py +46 -38
  23. meerschaum/connectors/valkey/ValkeyConnector.py +535 -0
  24. meerschaum/connectors/valkey/__init__.py +8 -0
  25. meerschaum/connectors/valkey/_fetch.py +75 -0
  26. meerschaum/connectors/valkey/_pipes.py +839 -0
  27. meerschaum/connectors/valkey/_plugins.py +265 -0
  28. meerschaum/connectors/valkey/_users.py +305 -0
  29. meerschaum/core/Pipe/__init__.py +3 -0
  30. meerschaum/core/Pipe/_attributes.py +1 -2
  31. meerschaum/core/Pipe/_clear.py +16 -13
  32. meerschaum/core/Pipe/_copy.py +106 -0
  33. meerschaum/core/Pipe/_drop.py +4 -4
  34. meerschaum/core/Pipe/_dtypes.py +14 -14
  35. meerschaum/core/Pipe/_edit.py +15 -14
  36. meerschaum/core/Pipe/_sync.py +134 -51
  37. meerschaum/core/Pipe/_verify.py +11 -11
  38. meerschaum/core/User/_User.py +14 -12
  39. meerschaum/plugins/_Plugin.py +17 -13
  40. meerschaum/utils/_get_pipes.py +14 -20
  41. meerschaum/utils/dataframe.py +288 -101
  42. meerschaum/utils/dtypes/__init__.py +31 -6
  43. meerschaum/utils/dtypes/sql.py +4 -4
  44. meerschaum/utils/misc.py +3 -3
  45. meerschaum/utils/packages/_packages.py +1 -0
  46. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/METADATA +3 -1
  47. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/RECORD +53 -44
  48. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/WHEEL +1 -1
  49. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/LICENSE +0 -0
  50. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/NOTICE +0 -0
  51. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/entry_points.txt +0 -0
  52. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/top_level.txt +0 -0
  53. {meerschaum-2.3.6.dist-info → meerschaum-2.4.0.dev1.dist-info}/zip-safe +0 -0
@@ -0,0 +1,839 @@
1
+ #! /usr/bin/env python3
2
+ # vim:fenc=utf-8
3
+
4
+ """
5
+ Define pipes methods for `ValkeyConnector`.
6
+ """
7
+
8
+ import json
9
+ from datetime import datetime, timedelta, timezone
10
+
11
+ import meerschaum as mrsm
12
+ from meerschaum.utils.typing import SuccessTuple, Any, Union, Optional, Dict, List, Tuple
13
+ from meerschaum.utils.misc import json_serialize_datetime, string_to_dict
14
+ from meerschaum.utils.warnings import dprint, warn
15
+
16
+ PIPES_TABLE: str = 'mrsm_pipes'
17
+ PIPES_COUNTER: str = 'mrsm_pipes:counter'
18
+
19
+
20
+ def get_pipe_key(pipe: mrsm.Pipe) -> str:
21
+ """
22
+ Return the key to store a pipe's ID.
23
+ """
24
+ return f"mrsm_pipe:{pipe.connector_keys}:{pipe.metric_key}:{pipe.location_key}"
25
+
26
+
27
+ def get_pipe_parameters_key(pipe: mrsm.Pipe) -> str:
28
+ """
29
+ Return the key to store a pipe's parameters.
30
+ """
31
+ return get_pipe_key(pipe) + ':parameters'
32
+
33
+
34
+ def serialize_document(doc: Dict[str, Any]) -> str:
35
+ """
36
+ Return a serialized string for a document.
37
+
38
+ Parameters
39
+ ----------
40
+ doc: Dict[str, Any]
41
+ The document to be serialized.
42
+
43
+ Returns
44
+ -------
45
+ A serialized string for the document.
46
+ """
47
+ return json.dumps(
48
+ doc,
49
+ default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
50
+ separators=(',', ':'),
51
+ sort_keys=True,
52
+ )
53
+
54
+
55
+ def get_document_key(
56
+ doc: Dict[str, Any],
57
+ indices: List[str],
58
+ table_name: Optional[str] = None,
59
+ ) -> str:
60
+ """
61
+ Return a serialized string for a document's indices only.
62
+
63
+ Parameters
64
+ ----------
65
+ doc: Dict[str, Any]
66
+ The document containing index values to be serialized.
67
+
68
+ indices: List[str]
69
+ The name of the indices to be serialized.
70
+
71
+ table_name: Optional[str], default None
72
+ If provided, prepend the table to the key.
73
+
74
+ Returns
75
+ -------
76
+ A serialized string of the document's indices.
77
+ """
78
+ from meerschaum.utils.dtypes import coerce_timezone
79
+ index_vals = {
80
+ key: (
81
+ str(val)
82
+ if not isinstance(val, datetime)
83
+ else str(int(coerce_timezone(val).replace(tzinfo=timezone.utc).timestamp()))
84
+ )
85
+ for key, val in doc.items()
86
+ if key in indices
87
+ } if indices else {}
88
+ indices_str = ((table_name + ':indices:') if table_name else '') + ','.join(
89
+ sorted(
90
+ [
91
+ f'{key}:{val}'
92
+ for key, val in index_vals.items()
93
+ ]
94
+ )
95
+ ) if indices else serialize_document(doc)
96
+ return indices_str
97
+
98
+
99
+ def get_table_quoted_doc_key(
100
+ table_name: str,
101
+ doc: Dict[str, Any],
102
+ indices: List[str],
103
+ datetime_column: Optional[str] = None,
104
+ ) -> str:
105
+ """
106
+ Return the document string as stored in the underling set.
107
+ """
108
+ return json.dumps(
109
+ {
110
+ get_document_key(doc, indices, table_name): serialize_document(doc),
111
+ **(
112
+ {datetime_column: doc.get(datetime_column, 0)}
113
+ if datetime_column
114
+ else {}
115
+ )
116
+ },
117
+ sort_keys=True,
118
+ separators=(',', ':'),
119
+ default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
120
+ )
121
+
122
+
123
+ def register_pipe(
124
+ self,
125
+ pipe: mrsm.Pipe,
126
+ debug: bool = False,
127
+ **kwargs: Any
128
+ ) -> SuccessTuple:
129
+ """
130
+ Insert the pipe's attributes into the internal `pipes` table.
131
+
132
+ Parameters
133
+ ----------
134
+ pipe: mrsm.Pipe
135
+ The pipe to be registered.
136
+
137
+ Returns
138
+ -------
139
+ A `SuccessTuple` of the result.
140
+ """
141
+ attributes = {
142
+ 'connector_keys': str(pipe.connector_keys),
143
+ 'metric_key': str(pipe.metric_key),
144
+ 'location_key': str(pipe.location_key),
145
+ }
146
+ parameters_str = json.dumps(
147
+ pipe._attributes.get('parameters', {}),
148
+ separators=(',', ':'),
149
+ )
150
+
151
+ pipe_key = get_pipe_key(pipe)
152
+ parameters_key = get_pipe_parameters_key(pipe)
153
+
154
+ try:
155
+ existing_pipe_id = self.get(pipe_key)
156
+ if existing_pipe_id is not None:
157
+ return False, f"{pipe} is already registered."
158
+
159
+ pipe_id = self.client.incr(PIPES_COUNTER)
160
+ _ = self.push_docs(
161
+ [{'pipe_id': pipe_id, **attributes}],
162
+ PIPES_TABLE,
163
+ datetime_column='pipe_id',
164
+ debug=debug,
165
+ )
166
+ self.set(pipe_key, pipe_id)
167
+ self.set(parameters_key, parameters_str)
168
+
169
+ except Exception as e:
170
+ return False, f"Failed to register {pipe}:\n{e}"
171
+
172
+ return True, "Success"
173
+
174
+
175
+ def get_pipe_id(
176
+ self,
177
+ pipe: mrsm.Pipe,
178
+ debug: bool = False,
179
+ **kwargs: Any
180
+ ) -> Union[str, int, None]:
181
+ """
182
+ Return the `_id` for the pipe if it exists.
183
+
184
+ Parameters
185
+ ----------
186
+ pipe: mrsm.Pipe
187
+ The pipe whose `_id` to fetch.
188
+
189
+ Returns
190
+ -------
191
+ The `_id` for the pipe's document or `None`.
192
+ """
193
+ pipe_key = get_pipe_key(pipe)
194
+ try:
195
+ return int(self.get(pipe_key))
196
+ except Exception:
197
+ pass
198
+ return None
199
+
200
+
201
+ def get_pipe_attributes(
202
+ self,
203
+ pipe: mrsm.Pipe,
204
+ debug: bool = False,
205
+ **kwargs: Any
206
+ ) -> Dict[str, Any]:
207
+ """
208
+ Return the pipe's document from the internal `pipes` collection.
209
+
210
+ Parameters
211
+ ----------
212
+ pipe: mrsm.Pipe
213
+ The pipe whose attributes should be retrieved.
214
+
215
+ Returns
216
+ -------
217
+ The document that matches the keys of the pipe.
218
+ """
219
+ pipe_id = pipe.get_id(debug=debug)
220
+ if pipe_id is None:
221
+ return {}
222
+
223
+ parameters_key = get_pipe_parameters_key(pipe)
224
+ parameters_str = self.get(parameters_key)
225
+
226
+ parameters = json.loads(parameters_str) if parameters_str else {}
227
+
228
+ attributes = {
229
+ 'connector_keys': pipe.connector_keys,
230
+ 'metric_key': pipe.metric_key,
231
+ 'location_key': pipe.location_key,
232
+ 'parameters': parameters,
233
+ 'pipe_id': pipe_id,
234
+ }
235
+ return attributes
236
+
237
+
238
+ def edit_pipe(
239
+ self,
240
+ pipe: mrsm.Pipe,
241
+ debug: bool = False,
242
+ **kwargs: Any
243
+ ) -> mrsm.SuccessTuple:
244
+ """
245
+ Edit the attributes of the pipe.
246
+
247
+ Parameters
248
+ ----------
249
+ pipe: mrsm.Pipe
250
+ The pipe whose in-memory parameters must be persisted.
251
+
252
+ Returns
253
+ -------
254
+ A `SuccessTuple` indicating success.
255
+ """
256
+ pipe_id = pipe.get_id(debug=debug)
257
+ if pipe_id is None:
258
+ return False, f"{pipe} is not registered."
259
+
260
+ parameters_key = get_pipe_parameters_key(pipe)
261
+ parameters_str = json.dumps(pipe.parameters, separators=(',', ':'))
262
+ self.set(parameters_key, parameters_str)
263
+ return True, "Success"
264
+
265
+
266
+ def pipe_exists(
267
+ self,
268
+ pipe: mrsm.Pipe,
269
+ debug: bool = False,
270
+ **kwargs: Any
271
+ ) -> bool:
272
+ """
273
+ Check whether a pipe's target table exists.
274
+
275
+ Parameters
276
+ ----------
277
+ pipe: mrsm.Pipe
278
+ The pipe to check whether its table exists.
279
+
280
+ Returns
281
+ -------
282
+ A `bool` indicating the table exists.
283
+ """
284
+ table_name = self.quote_table(pipe.target)
285
+ return self.client.exists(table_name) != 0
286
+
287
+
288
+ def drop_pipe(
289
+ self,
290
+ pipe: mrsm.Pipe,
291
+ debug: bool = False,
292
+ **kwargs: Any
293
+ ) -> mrsm.SuccessTuple:
294
+ """
295
+ Drop a pipe's collection if it exists.
296
+
297
+ Parameters
298
+ ----------
299
+ pipe: mrsm.Pipe
300
+ The pipe to be dropped.
301
+
302
+ Returns
303
+ -------
304
+ A `SuccessTuple` indicating success.
305
+ """
306
+ try:
307
+ self.drop_table(pipe.target, debug=debug)
308
+ except Exception as e:
309
+ return False, f"Failed to drop {pipe}:\n{e}"
310
+
311
+ if 'valkey' not in pipe.parameters:
312
+ return True, "Success"
313
+
314
+ pipe.parameters['valkey']['dtypes'] = {}
315
+ if not pipe.temporary:
316
+ edit_success, edit_msg = pipe.edit(debug=debug)
317
+ if not edit_success:
318
+ return edit_success, edit_msg
319
+
320
+ return True, "Success"
321
+
322
+
323
+ def delete_pipe(
324
+ self,
325
+ pipe: mrsm.Pipe,
326
+ debug: bool = False,
327
+ **kwargs: Any
328
+ ) -> mrsm.SuccessTuple:
329
+ """
330
+ Delete a pipe's registration from the `pipes` collection.
331
+
332
+ Parameters
333
+ ----------
334
+ pipe: mrsm.Pipe
335
+ The pipe to be deleted.
336
+
337
+ Returns
338
+ -------
339
+ A `SuccessTuple` indicating success.
340
+ """
341
+ drop_success, drop_message = pipe.drop(debug=debug)
342
+ if not drop_success:
343
+ return drop_success, drop_message
344
+
345
+ pipe_id = self.get_pipe_id(pipe, debug=debug)
346
+ if pipe_id is None:
347
+ return False, f"{pipe} is not registered."
348
+
349
+ pipe_key = get_pipe_key(pipe)
350
+ parameters_key = get_pipe_parameters_key(pipe)
351
+ self.client.delete(pipe_key)
352
+ self.client.delete(parameters_key)
353
+ df = self.read(PIPES_TABLE, params={'pipe_id': pipe_id})
354
+ docs = df.to_dict(orient='records')
355
+ if docs:
356
+ doc = docs[0]
357
+ doc_str = json.dumps(
358
+ doc,
359
+ default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
360
+ separators=(',', ':'),
361
+ sort_keys=True,
362
+ )
363
+ self.client.zrem(PIPES_TABLE, doc_str)
364
+ return True, "Success"
365
+
366
+
367
+ def get_pipe_data(
368
+ self,
369
+ pipe: mrsm.Pipe,
370
+ select_columns: Optional[List[str]] = None,
371
+ omit_columns: Optional[List[str]] = None,
372
+ begin: Union[datetime, int, None] = None,
373
+ end: Union[datetime, int, None] = None,
374
+ params: Optional[Dict[str, Any]] = None,
375
+ debug: bool = False,
376
+ **kwargs: Any
377
+ ) -> Union['pd.DataFrame', None]:
378
+ """
379
+ Query a pipe's target table and return the DataFrame.
380
+
381
+ Parameters
382
+ ----------
383
+ pipe: mrsm.Pipe
384
+ The pipe with the target table from which to read.
385
+
386
+ select_columns: Optional[List[str]], default None
387
+ If provided, only select these given columns.
388
+ Otherwise select all available columns (i.e. `SELECT *`).
389
+
390
+ omit_columns: Optional[List[str]], default None
391
+ If provided, remove these columns from the selection.
392
+
393
+ begin: Union[datetime, int, None], default None
394
+ The earliest `datetime` value to search from (inclusive).
395
+
396
+ end: Union[datetime, int, None], default None
397
+ The lastest `datetime` value to search from (exclusive).
398
+
399
+ params: Optional[Dict[str, str]], default None
400
+ Additional filters to apply to the query.
401
+
402
+ Returns
403
+ -------
404
+ The target table's data as a DataFrame.
405
+ """
406
+ if not pipe.exists(debug=debug):
407
+ return None
408
+
409
+ from meerschaum.utils.dataframe import query_df, parse_df_datetimes
410
+
411
+ valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
412
+ dt_col = pipe.columns.get('datetime', None)
413
+ table_name = self.quote_table(pipe.target)
414
+ indices = [col for col in pipe.columns.values() if col]
415
+ ix_docs = [
416
+ string_to_dict(doc['ix'])
417
+ for doc in self.read_docs(
418
+ pipe.target,
419
+ begin=begin,
420
+ end=end,
421
+ debug=debug,
422
+ )
423
+ ]
424
+ try:
425
+ docs_strings = [
426
+ self.get(get_document_key(
427
+ doc, indices, table_name
428
+ ))
429
+ for doc in ix_docs
430
+ ]
431
+ except Exception as e:
432
+ warn(f"Failed to fetch documents for {pipe}:\n{e}")
433
+ docs_strings = []
434
+
435
+ docs = [
436
+ json.loads(doc_str)
437
+ for doc_str in docs_strings
438
+ if doc_str
439
+ ]
440
+ ignore_dt_cols = [
441
+ col
442
+ for col, dtype in pipe.dtypes.items()
443
+ if 'datetime' not in str(dtype)
444
+ ]
445
+
446
+ df = parse_df_datetimes(
447
+ docs,
448
+ ignore_cols=ignore_dt_cols,
449
+ chunksize=kwargs.get('chunksize', None),
450
+ debug=debug,
451
+ )
452
+ for col, typ in valkey_dtypes.items():
453
+ try:
454
+ df[col] = df[col].astype(typ)
455
+ except Exception:
456
+ pass
457
+
458
+ df = pipe.enforce_dtypes(df, debug=debug)
459
+
460
+ if len(df) == 0:
461
+ return query_df(df, select_columns=select_columns, omit_columns=omit_columns)
462
+
463
+ return query_df(
464
+ df,
465
+ select_columns=select_columns,
466
+ omit_columns=omit_columns,
467
+ params=params,
468
+ begin=begin,
469
+ end=end,
470
+ datetime_column=dt_col,
471
+ inplace=True,
472
+ reset_index=True,
473
+ )
474
+
475
+
476
+ def sync_pipe(
477
+ self,
478
+ pipe: mrsm.Pipe,
479
+ df: 'pd.DataFrame' = None,
480
+ check_existing: bool = True,
481
+ debug: bool = False,
482
+ **kwargs: Any
483
+ ) -> mrsm.SuccessTuple:
484
+ """
485
+ Upsert new documents into the pipe's collection.
486
+
487
+ Parameters
488
+ ----------
489
+ pipe: mrsm.Pipe
490
+ The pipe whose collection should receive the new documents.
491
+
492
+ df: Union['pd.DataFrame', Iterator['pd.DataFrame']], default None
493
+ The data to be synced.
494
+
495
+ check_existing: bool, default True
496
+ If `False`, do not check the documents against existing data and instead insert directly.
497
+
498
+ Returns
499
+ -------
500
+ A `SuccessTuple` indicating success.
501
+ """
502
+ dt_col = pipe.columns.get('datetime', None)
503
+ indices = [col for col in pipe.columns.values() if col]
504
+ table_name = self.quote_table(pipe.target)
505
+ is_dask = 'dask' in df.__module__
506
+ if is_dask:
507
+ df = df.compute()
508
+
509
+ def _serialize_indices_docs(_docs):
510
+ return [
511
+ {
512
+ 'ix': get_document_key(doc, indices),
513
+ **(
514
+ {
515
+ dt_col: doc.get(dt_col, 0)
516
+ }
517
+ if dt_col
518
+ else {}
519
+ )
520
+ }
521
+ for doc in _docs
522
+ ]
523
+
524
+ valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
525
+ new_dtypes = {
526
+ str(key): str(val)
527
+ for key, val in df.dtypes.items()
528
+ if str(key) not in valkey_dtypes
529
+ }
530
+ for col, typ in {c: v for c, v in valkey_dtypes.items()}.items():
531
+ if col in df.columns:
532
+ try:
533
+ df[col] = df[col].astype(typ)
534
+ except Exception:
535
+ valkey_dtypes[col] = 'string'
536
+ new_dtypes[col] = 'string'
537
+ df[col] = df[col].astype('string')
538
+
539
+ if new_dtypes:
540
+ valkey_dtypes.update(new_dtypes)
541
+ if 'valkey' not in pipe.parameters:
542
+ pipe.parameters['valkey'] = {}
543
+ pipe.parameters['valkey']['dtypes'] = valkey_dtypes
544
+ if not pipe.temporary:
545
+ edit_success, edit_msg = pipe.edit(debug=debug)
546
+ if not edit_success:
547
+ return edit_success, edit_msg
548
+
549
+ # df = pipe.enforce_dtypes(df, debug=debug)
550
+
551
+ unseen_df, update_df, delta_df = (
552
+ pipe.filter_existing(df, include_unchanged_columns=True, debug=debug)
553
+ if check_existing
554
+ else (df, None, df)
555
+ )
556
+ num_insert = len(unseen_df) if unseen_df is not None else 0
557
+ num_update = len(update_df) if update_df is not None else 0
558
+ msg = f"Inserted {num_insert}, updated {num_update} rows."
559
+ if len(delta_df) == 0:
560
+ return True, msg
561
+
562
+ unseen_docs = unseen_df.to_dict(orient='records')
563
+ unseen_indices_docs = _serialize_indices_docs(unseen_docs)
564
+ unseen_ix_vals = {
565
+ get_document_key(doc, indices, table_name): serialize_document(doc)
566
+ for doc in unseen_docs
567
+ }
568
+ for key, val in unseen_ix_vals.items():
569
+ try:
570
+ self.set(key, val)
571
+ except Exception as e:
572
+ return False, f"Failed to set keys for {pipe}:\n{e}"
573
+
574
+ try:
575
+ self.push_docs(
576
+ unseen_indices_docs,
577
+ pipe.target,
578
+ datetime_column=dt_col,
579
+ debug=debug,
580
+ )
581
+ except Exception as e:
582
+ return False, f"Failed to push docs to '{pipe.target}':\n{e}"
583
+
584
+ update_docs = update_df.to_dict(orient='records') if update_df is not None else []
585
+ update_ix_docs = {
586
+ get_document_key(doc, indices, table_name): doc
587
+ for doc in update_docs
588
+ }
589
+ for key, doc in update_ix_docs.items():
590
+ try:
591
+ old_doc = json.loads(self.get(key))
592
+ old_doc.update(doc)
593
+ self.set(key, serialize_document(old_doc))
594
+ except Exception as e:
595
+ return False, f"Failed to set keys for {pipe}:\n{e}"
596
+
597
+ return True, msg
598
+
599
+
600
+ def get_pipe_columns_types(
601
+ self,
602
+ pipe: mrsm.Pipe,
603
+ debug: bool = False,
604
+ **kwargs: Any
605
+ ) -> Dict[str, str]:
606
+ """
607
+ Return the data types for the columns in the target table for data type enforcement.
608
+
609
+ Parameters
610
+ ----------
611
+ pipe: mrsm.Pipe
612
+ The pipe whose target table contains columns and data types.
613
+
614
+ Returns
615
+ -------
616
+ A dictionary mapping columns to data types.
617
+ """
618
+ if not pipe.exists(debug=debug):
619
+ return {}
620
+
621
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
622
+ return {
623
+ col: get_db_type_from_pd_type(typ)
624
+ for col, typ in pipe.parameters.get('valkey', {}).get('dtypes', {}).items()
625
+ }
626
+
627
+
628
+ def clear_pipe(
629
+ self,
630
+ pipe: mrsm.Pipe,
631
+ begin: Union[datetime, int, None] = None,
632
+ end: Union[datetime, int, None] = None,
633
+ params: Optional[Dict[str, Any]] = None,
634
+ debug: bool = False,
635
+ ) -> mrsm.SuccessTuple:
636
+ """
637
+ Delete rows within `begin`, `end`, and `params`.
638
+
639
+ Parameters
640
+ ----------
641
+ pipe: mrsm.Pipe
642
+ The pipe whose rows to clear.
643
+
644
+ begin: Union[datetime, int, None], default None
645
+ If provided, remove rows >= `begin`.
646
+
647
+ end: Union[datetime, int, None], default None
648
+ If provided, remove rows < `end`.
649
+
650
+ params: Optional[Dict[str, Any]], default None
651
+ If provided, only remove rows which match the `params` filter.
652
+
653
+ Returns
654
+ -------
655
+ A `SuccessTuple` indicating success.
656
+ """
657
+ if begin is None and end is None and params is None:
658
+ return self.drop_pipe(pipe, debug=debug)
659
+
660
+ dt_col = pipe.columns.get('datetime', None)
661
+
662
+ existing_df = pipe.get_data(
663
+ begin=begin,
664
+ end=end,
665
+ params=params,
666
+ debug=debug,
667
+ )
668
+ if existing_df is None or len(existing_df) == 0:
669
+ return True, "Deleted 0 rows."
670
+
671
+ docs = existing_df.to_dict(orient='records')
672
+ table_name = self.quote_table(pipe.target)
673
+ indices = [col for col in pipe.columns.values() if col]
674
+ for doc in docs:
675
+ set_doc_key = get_document_key(doc, indices)
676
+ table_doc_key = get_document_key(doc, indices, table_name)
677
+ try:
678
+ if dt_col:
679
+ self.client.zrem(table_name, set_doc_key)
680
+ else:
681
+ self.client.srem(table_name, set_doc_key)
682
+ self.client.delete(table_doc_key)
683
+ except Exception as e:
684
+ return False, f"Failed to delete documents:\n{e}"
685
+ msg = (
686
+ f"Deleted {len(docs)} row"
687
+ + ('s' if len(docs) != 1 else '')
688
+ + '.'
689
+ )
690
+ return True, msg
691
+
692
+
693
+ def get_sync_time(
694
+ self,
695
+ pipe: mrsm.Pipe,
696
+ newest: bool = True,
697
+ **kwargs: Any
698
+ ) -> Union[datetime, int, None]:
699
+ """
700
+ Return the newest (or oldest) timestamp in a pipe.
701
+ """
702
+ from meerschaum.utils.dtypes import are_dtypes_equal
703
+ dt_col = pipe.columns.get('datetime', None)
704
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
705
+ if not dt_col:
706
+ return None
707
+
708
+ dateutil_parser = mrsm.attempt_import('dateutil.parser')
709
+ table_name = self.quote_table(pipe.target)
710
+ try:
711
+ vals = (
712
+ self.client.zrevrange(table_name, 0, 0)
713
+ if newest
714
+ else self.client.zrange(table_name, 0, 0)
715
+ )
716
+ if not vals:
717
+ return None
718
+ val = vals[0]
719
+ except Exception:
720
+ return None
721
+
722
+ doc = json.loads(val)
723
+ dt_val = doc.get(dt_col, None)
724
+ if dt_val is None:
725
+ return None
726
+
727
+ try:
728
+ return (
729
+ int(dt_val)
730
+ if are_dtypes_equal(dt_typ, 'int')
731
+ else dateutil_parser.parse(str(dt_val)).replace(tzinfo=None)
732
+ )
733
+ except Exception as e:
734
+ warn(f"Failed to parse sync time for {pipe}:\n{e}")
735
+
736
+ return None
737
+
738
+
739
+ def get_pipe_rowcount(
740
+ self,
741
+ pipe: mrsm.Pipe,
742
+ begin: Union[datetime, int, None] = None,
743
+ end: Union[datetime, int, None] = None,
744
+ params: Optional[Dict[str, Any]] = None,
745
+ debug: bool = False,
746
+ **kwargs: Any
747
+ ) -> Union[int, None]:
748
+ """
749
+ Return the number of documents in the pipe's set.
750
+ """
751
+ dt_col = pipe.columns.get('datetime', None)
752
+ table_name = self.quote_table(pipe.target)
753
+
754
+ if not pipe.exists():
755
+ return 0
756
+
757
+ try:
758
+ if begin is None and end is None and params is None:
759
+ return (
760
+ self.client.zcard(table_name)
761
+ if dt_col
762
+ else self.client.llen(table_name)
763
+ )
764
+ except Exception:
765
+ return None
766
+
767
+ df = pipe.get_data(begin=begin, end=end, params=params, debug=debug)
768
+ if df is None:
769
+ return 0
770
+
771
+ return len(df)
772
+
773
+
774
+ def fetch_pipes_keys(
775
+ self,
776
+ connector_keys: Optional[List[str]] = None,
777
+ metric_keys: Optional[List[str]] = None,
778
+ location_keys: Optional[List[str]] = None,
779
+ tags: Optional[List[str]] = None,
780
+ params: Optional[Dict[str, Any]] = None,
781
+ debug: bool = False
782
+ ) -> Optional[List[Tuple[str, str, Optional[str]]]]:
783
+ """
784
+ Return the keys for the registered pipes.
785
+ """
786
+ from meerschaum.utils.dataframe import query_df
787
+ from meerschaum.utils.misc import separate_negation_values
788
+ try:
789
+ df = self.read(PIPES_TABLE, debug=debug)
790
+ except Exception:
791
+ return []
792
+
793
+ if df is None or len(df) == 0:
794
+ return []
795
+
796
+ query = {}
797
+ if connector_keys:
798
+ query['connector_keys'] = [str(k) for k in connector_keys]
799
+ if metric_keys:
800
+ query['metric_key'] = [str(k) for k in metric_keys]
801
+ if location_keys:
802
+ query['location_key'] = [str(k) for k in location_keys]
803
+ if params:
804
+ query.update(params)
805
+
806
+ df = query_df(df, query, inplace=True)
807
+
808
+ keys = [
809
+ (
810
+ doc['connector_keys'],
811
+ doc['metric_key'],
812
+ doc['location_key'],
813
+ )
814
+ for doc in df.to_dict(orient='records')
815
+ ]
816
+ if not tags:
817
+ return keys
818
+
819
+ tag_groups = [tag.split(',') for tag in tags]
820
+ in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
821
+
822
+ filtered_keys = []
823
+ for ck, mk, lk in keys:
824
+ pipe = mrsm.Pipe(ck, mk, lk, instance=self)
825
+ pipe_tags = set(pipe.tags)
826
+
827
+ include_pipe = True
828
+ for in_tags, ex_tags in in_ex_tag_groups:
829
+ all_in = all(tag in pipe_tags for tag in in_tags)
830
+ any_ex = any(tag in pipe_tags for tag in ex_tags)
831
+
832
+ if (not all_in) or any_ex:
833
+ include_pipe = False
834
+ continue
835
+
836
+ if include_pipe:
837
+ filtered_keys.append((ck, mk, lk))
838
+
839
+ return filtered_keys