detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,581 @@
1
+ """
2
+ Internal tables manager for detectk.
3
+
4
+ High-level wrapper over BaseDatabaseManager for working with internal tables
5
+ (_dtk_datapoints, _dtk_detections, _dtk_tasks).
6
+
7
+ This class provides convenient methods that use the UNIVERSAL BaseDatabaseManager
8
+ methods underneath. It does NOT duplicate logic - just provides semantic wrappers.
9
+ """
10
+
11
+ from datetime import datetime, timezone
12
+ from typing import Dict, Optional
13
+
14
+ import numpy as np
15
+
16
+ from detectkit.database.manager import BaseDatabaseManager
17
+ from detectkit.database.tables import (
18
+ INTERNAL_TABLES,
19
+ TABLE_DATAPOINTS,
20
+ TABLE_DETECTIONS,
21
+ TABLE_TASKS,
22
+ )
23
+
24
+
25
+ class InternalTablesManager:
26
+ """
27
+ Manager for internal detectk tables.
28
+
29
+ Provides high-level methods for working with _dtk_* tables:
30
+ - Ensure tables exist
31
+ - Save datapoints and detections
32
+ - Task locking and status management
33
+ - Query last timestamps
34
+
35
+ This is a WRAPPER over BaseDatabaseManager - uses its universal methods.
36
+
37
+ Example:
38
+ >>> manager = ClickHouseDatabaseManager(...)
39
+ >>> internal = InternalTablesManager(manager)
40
+ >>> internal.ensure_tables()
41
+ >>> internal.save_datapoints("cpu_usage", data)
42
+ """
43
+
44
+ def __init__(self, manager: BaseDatabaseManager):
45
+ """
46
+ Initialize internal tables manager.
47
+
48
+ Args:
49
+ manager: Database manager instance (ClickHouse, PostgreSQL, etc.)
50
+ """
51
+ self._manager = manager
52
+
53
+ def ensure_tables(self) -> None:
54
+ """
55
+ Create all internal tables if they don't exist.
56
+
57
+ Tables created:
58
+ - _dtk_datapoints
59
+ - _dtk_detections
60
+ - _dtk_tasks
61
+
62
+ This is idempotent - safe to call multiple times.
63
+
64
+ Example:
65
+ >>> internal.ensure_tables()
66
+ """
67
+ for table_name, model_factory in INTERNAL_TABLES.items():
68
+ # Get fully qualified table name in internal location
69
+ full_table_name = self._manager.get_full_table_name(
70
+ table_name, use_internal=True
71
+ )
72
+
73
+ # Check if table exists
74
+ if not self._manager.table_exists(
75
+ table_name, schema=self._manager.internal_location
76
+ ):
77
+ # Create table from model
78
+ table_model = model_factory()
79
+ self._manager.create_table(
80
+ full_table_name, table_model, if_not_exists=True
81
+ )
82
+
83
+ def save_datapoints(
84
+ self,
85
+ metric_name: str,
86
+ data: Dict[str, np.ndarray],
87
+ interval_seconds: int,
88
+ seasonality_columns: list[str],
89
+ ) -> int:
90
+ """
91
+ Save metric datapoints to _dtk_datapoints table.
92
+
93
+ Args:
94
+ metric_name: Metric identifier
95
+ data: Dictionary with keys:
96
+ - timestamp: np.array of datetime64
97
+ - value: np.array of float64 (nullable)
98
+ - seasonality_data: np.array of JSON strings
99
+ interval_seconds: Interval in seconds
100
+ seasonality_columns: List of seasonality column names
101
+
102
+ Returns:
103
+ Number of rows inserted
104
+
105
+ Example:
106
+ >>> data = {
107
+ ... "timestamp": np.array([dt1, dt2], dtype="datetime64[ms]"),
108
+ ... "value": np.array([0.5, 0.6]),
109
+ ... "seasonality_data": np.array(['{"hour": 10}', '{"hour": 11}']),
110
+ ... }
111
+ >>> rows = internal.save_datapoints(
112
+ ... "cpu_usage", data, 600, ["hour", "day_of_week"]
113
+ ... )
114
+ """
115
+ num_rows = len(data["timestamp"])
116
+
117
+ # Prepare data for insert_batch
118
+ insert_data = {
119
+ "metric_name": np.full(num_rows, metric_name, dtype=object),
120
+ "timestamp": data["timestamp"],
121
+ "value": data["value"],
122
+ "seasonality_data": data["seasonality_data"],
123
+ "interval_seconds": np.full(num_rows, interval_seconds, dtype=np.int32),
124
+ "seasonality_columns": np.full(
125
+ num_rows, ",".join(seasonality_columns), dtype=object
126
+ ),
127
+ "created_at": np.full(
128
+ num_rows, datetime.now(timezone.utc), dtype="datetime64[ms]"
129
+ ),
130
+ }
131
+
132
+ # Use universal insert_batch method
133
+ full_table_name = self._manager.get_full_table_name(
134
+ TABLE_DATAPOINTS, use_internal=True
135
+ )
136
+
137
+ return self._manager.insert_batch(
138
+ full_table_name, insert_data, conflict_strategy="ignore"
139
+ )
140
+
141
+ def save_detections(
142
+ self,
143
+ metric_name: str,
144
+ detector_id: str,
145
+ data: Dict[str, np.ndarray],
146
+ detector_params: str,
147
+ ) -> int:
148
+ """
149
+ Save detection results to _dtk_detections table.
150
+
151
+ Args:
152
+ metric_name: Metric identifier
153
+ detector_id: Detector identifier (hash)
154
+ data: Dictionary with keys:
155
+ - timestamp: np.array of datetime64
156
+ - is_anomaly: np.array of bool
157
+ - confidence_lower: np.array of float64 (nullable)
158
+ - confidence_upper: np.array of float64 (nullable)
159
+ - value: np.array of float64 (nullable)
160
+ - detection_metadata: np.array of JSON strings
161
+ detector_params: JSON string with sorted detector parameters
162
+
163
+ Returns:
164
+ Number of rows inserted
165
+
166
+ Example:
167
+ >>> data = {
168
+ ... "timestamp": np.array([dt1, dt2]),
169
+ ... "is_anomaly": np.array([False, True]),
170
+ ... "confidence_lower": np.array([0.4, 0.5]),
171
+ ... "confidence_upper": np.array([0.6, 0.7]),
172
+ ... "value": np.array([0.5, 0.9]),
173
+ ... "detection_metadata": np.array(['{"severity": 0.0}', '{"severity": 0.8}']),
174
+ ... }
175
+ >>> rows = internal.save_detections(
176
+ ... "cpu_usage", "mad_abc123", data, '{"threshold": 3.0}'
177
+ ... )
178
+ """
179
+ num_rows = len(data["timestamp"])
180
+
181
+ # Prepare data for insert_batch
182
+ insert_data = {
183
+ "metric_name": np.full(num_rows, metric_name, dtype=object),
184
+ "detector_id": np.full(num_rows, detector_id, dtype=object),
185
+ "timestamp": data["timestamp"],
186
+ "is_anomaly": data["is_anomaly"],
187
+ "confidence_lower": data["confidence_lower"],
188
+ "confidence_upper": data["confidence_upper"],
189
+ "value": data["value"],
190
+ "detector_params": np.full(num_rows, detector_params, dtype=object),
191
+ "detection_metadata": data["detection_metadata"],
192
+ "created_at": np.full(
193
+ num_rows, datetime.now(timezone.utc), dtype="datetime64[ms]"
194
+ ),
195
+ }
196
+
197
+ # Use universal insert_batch method
198
+ full_table_name = self._manager.get_full_table_name(
199
+ TABLE_DETECTIONS, use_internal=True
200
+ )
201
+
202
+ return self._manager.insert_batch(
203
+ full_table_name, insert_data, conflict_strategy="ignore"
204
+ )
205
+
206
+ def get_last_datapoint_timestamp(self, metric_name: str) -> Optional[datetime]:
207
+ """
208
+ Get last saved timestamp for a metric in _dtk_datapoints.
209
+
210
+ Args:
211
+ metric_name: Metric identifier
212
+
213
+ Returns:
214
+ Last timestamp or None if no data
215
+
216
+ Example:
217
+ >>> last_ts = internal.get_last_datapoint_timestamp("cpu_usage")
218
+ >>> if last_ts:
219
+ ... print(f"Last data at {last_ts}")
220
+ """
221
+ full_table_name = self._manager.get_full_table_name(
222
+ TABLE_DATAPOINTS, use_internal=True
223
+ )
224
+
225
+ return self._manager.get_last_timestamp(full_table_name, metric_name)
226
+
227
+ def get_last_detection_timestamp(
228
+ self, metric_name: str, detector_id: str
229
+ ) -> Optional[datetime]:
230
+ """
231
+ Get last saved timestamp for a detector in _dtk_detections.
232
+
233
+ Args:
234
+ metric_name: Metric identifier
235
+ detector_id: Detector identifier
236
+
237
+ Returns:
238
+ Last timestamp or None if no data
239
+
240
+ Example:
241
+ >>> last_ts = internal.get_last_detection_timestamp("cpu_usage", "mad_abc123")
242
+ >>> if last_ts:
243
+ ... print(f"Last detection at {last_ts}")
244
+ """
245
+ full_table_name = self._manager.get_full_table_name(
246
+ TABLE_DETECTIONS, use_internal=True
247
+ )
248
+
249
+ # Need to filter by both metric_name AND detector_id
250
+ query = f"""
251
+ SELECT max(timestamp) as last_ts
252
+ FROM {full_table_name}
253
+ WHERE metric_name = %(metric_name)s
254
+ AND detector_id = %(detector_id)s
255
+ """
256
+
257
+ result = self._manager.execute_query(
258
+ query, {"metric_name": metric_name, "detector_id": detector_id}
259
+ )
260
+
261
+ if result and result[0]["last_ts"]:
262
+ return result[0]["last_ts"]
263
+
264
+ return None
265
+
266
+ def load_datapoints(
267
+ self,
268
+ metric_name: str,
269
+ from_timestamp: Optional[datetime] = None,
270
+ to_timestamp: Optional[datetime] = None,
271
+ ) -> Dict[str, np.ndarray]:
272
+ """
273
+ Load datapoints from _dtk_datapoints table.
274
+
275
+ Args:
276
+ metric_name: Metric identifier
277
+ from_timestamp: Start timestamp (inclusive, optional)
278
+ to_timestamp: End timestamp (exclusive, optional)
279
+
280
+ Returns:
281
+ Dict with numpy arrays: timestamp, value, seasonality_data
282
+
283
+ Example:
284
+ >>> data = internal.load_datapoints("cpu_usage", from_timestamp=start, to_timestamp=end)
285
+ >>> print(f"Loaded {len(data['timestamp'])} points")
286
+ """
287
+ full_table_name = self._manager.get_full_table_name(
288
+ TABLE_DATAPOINTS, use_internal=True
289
+ )
290
+
291
+ # Build WHERE clause
292
+ where_parts = [f"metric_name = '{metric_name}'"]
293
+ if from_timestamp:
294
+ where_parts.append(f"timestamp >= '{from_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
295
+ if to_timestamp:
296
+ where_parts.append(f"timestamp < '{to_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
297
+
298
+ where_clause = " AND ".join(where_parts)
299
+
300
+ # Query data
301
+ query = f"""
302
+ SELECT
303
+ timestamp,
304
+ value,
305
+ seasonality_data,
306
+ seasonality_columns
307
+ FROM {full_table_name}
308
+ WHERE {where_clause}
309
+ ORDER BY timestamp
310
+ """
311
+
312
+ results = self._manager.execute_query(query)
313
+
314
+ # Convert to numpy arrays
315
+ if not results:
316
+ return {
317
+ "timestamp": np.array([], dtype="datetime64[ms]"),
318
+ "value": np.array([], dtype=np.float64),
319
+ "seasonality_data": np.array([], dtype=object),
320
+ "seasonality_columns": [],
321
+ }
322
+
323
+ timestamps = [row["timestamp"] for row in results]
324
+ values = [row["value"] for row in results]
325
+ seasonality = [row["seasonality_data"] for row in results]
326
+
327
+ # Get seasonality_columns from first row (comma-separated string)
328
+ seasonality_columns_str = results[0].get("seasonality_columns", "")
329
+ seasonality_columns = [c.strip() for c in seasonality_columns_str.split(",") if c.strip()] if seasonality_columns_str else []
330
+
331
+ return {
332
+ "timestamp": np.array(timestamps, dtype="datetime64[ms]"),
333
+ "value": np.array(values, dtype=np.float64),
334
+ "seasonality_data": np.array(seasonality, dtype=object),
335
+ "seasonality_columns": seasonality_columns,
336
+ }
337
+
338
+ def delete_datapoints(
339
+ self,
340
+ metric_name: str,
341
+ from_timestamp: Optional[datetime] = None,
342
+ to_timestamp: Optional[datetime] = None,
343
+ ) -> int:
344
+ """
345
+ Delete datapoints for a metric.
346
+
347
+ Args:
348
+ metric_name: Metric name
349
+ from_timestamp: Optional start timestamp (inclusive)
350
+ to_timestamp: Optional end timestamp (exclusive)
351
+
352
+ Returns:
353
+ Number of rows deleted (if supported by database)
354
+ """
355
+ full_table_name = self._manager.get_full_table_name(
356
+ TABLE_DATAPOINTS, use_internal=True
357
+ )
358
+
359
+ # Build WHERE clause
360
+ where_parts = [f"metric_name = '{metric_name}'"]
361
+ if from_timestamp:
362
+ where_parts.append(f"timestamp >= '{from_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
363
+ if to_timestamp:
364
+ where_parts.append(f"timestamp < '{to_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
365
+
366
+ where_clause = " AND ".join(where_parts)
367
+
368
+ # Delete data
369
+ query = f"ALTER TABLE {full_table_name} DELETE WHERE {where_clause}"
370
+ self._manager.execute_query(query)
371
+
372
+ # ClickHouse ALTER TABLE DELETE is async, return 0
373
+ # Other databases might return affected rows
374
+ return 0
375
+
376
+ def delete_detections(
377
+ self,
378
+ metric_name: str,
379
+ detector_id: Optional[str] = None,
380
+ from_timestamp: Optional[datetime] = None,
381
+ to_timestamp: Optional[datetime] = None,
382
+ ) -> int:
383
+ """
384
+ Delete detections for a metric.
385
+
386
+ Args:
387
+ metric_name: Metric name
388
+ detector_id: Optional detector ID filter
389
+ from_timestamp: Optional start timestamp (inclusive)
390
+ to_timestamp: Optional end timestamp (exclusive)
391
+
392
+ Returns:
393
+ Number of rows deleted (if supported by database)
394
+ """
395
+ full_table_name = self._manager.get_full_table_name(
396
+ TABLE_DETECTIONS, use_internal=True
397
+ )
398
+
399
+ # Build WHERE clause
400
+ where_parts = [f"metric_name = '{metric_name}'"]
401
+ if detector_id:
402
+ where_parts.append(f"detector_id = '{detector_id}'")
403
+ if from_timestamp:
404
+ where_parts.append(f"timestamp >= '{from_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
405
+ if to_timestamp:
406
+ where_parts.append(f"timestamp < '{to_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
407
+
408
+ where_clause = " AND ".join(where_parts)
409
+
410
+ # Delete data
411
+ query = f"ALTER TABLE {full_table_name} DELETE WHERE {where_clause}"
412
+ self._manager.execute_query(query)
413
+
414
+ # ClickHouse ALTER TABLE DELETE is async, return 0
415
+ return 0
416
+
417
+ def acquire_lock(
418
+ self,
419
+ metric_name: str,
420
+ detector_id: str,
421
+ process_type: str,
422
+ timeout_seconds: int = 3600,
423
+ ) -> bool:
424
+ """
425
+ Acquire task lock by creating task record with status='running'.
426
+
427
+ This implements task locking to prevent concurrent execution.
428
+
429
+ Args:
430
+ metric_name: Metric identifier
431
+ detector_id: Detector identifier (or "load" for loading tasks)
432
+ process_type: Process type ("load" or "detect")
433
+ timeout_seconds: Task timeout in seconds
434
+
435
+ Returns:
436
+ True if lock acquired, False if already locked
437
+
438
+ Raises:
439
+ Exception: If lock is held by another process (check timeout)
440
+
441
+ Example:
442
+ >>> if internal.acquire_lock("cpu_usage", "load", "load"):
443
+ ... try:
444
+ ... # Do work
445
+ ... pass
446
+ ... finally:
447
+ ... internal.release_lock("cpu_usage", "load", "load", "completed")
448
+ """
449
+ # Check if task is already running
450
+ existing_status = self.check_lock(metric_name, detector_id, process_type)
451
+
452
+ if existing_status:
453
+ # Task is locked
454
+ # TODO: Check if lock expired based on timeout
455
+ return False
456
+
457
+ # Acquire lock by creating task record
458
+ self._manager.upsert_task_status(
459
+ metric_name=metric_name,
460
+ detector_id=detector_id,
461
+ process_type=process_type,
462
+ status="running",
463
+ timeout_seconds=timeout_seconds,
464
+ )
465
+
466
+ return True
467
+
468
+ def release_lock(
469
+ self,
470
+ metric_name: str,
471
+ detector_id: str,
472
+ process_type: str,
473
+ status: str,
474
+ last_processed_timestamp: Optional[datetime] = None,
475
+ error_message: Optional[str] = None,
476
+ ) -> None:
477
+ """
478
+ Release task lock by updating status to 'completed' or 'failed'.
479
+
480
+ Args:
481
+ metric_name: Metric identifier
482
+ detector_id: Detector identifier
483
+ process_type: Process type
484
+ status: Final status ("completed" or "failed")
485
+ last_processed_timestamp: Last successfully processed timestamp
486
+ error_message: Error message if status is "failed"
487
+
488
+ Example:
489
+ >>> internal.release_lock(
490
+ ... "cpu_usage", "load", "load",
491
+ ... status="completed",
492
+ ... last_processed_timestamp=datetime(2024, 1, 1, 23, 59)
493
+ ... )
494
+ """
495
+ self._manager.upsert_task_status(
496
+ metric_name=metric_name,
497
+ detector_id=detector_id,
498
+ process_type=process_type,
499
+ status=status,
500
+ last_processed_timestamp=last_processed_timestamp,
501
+ error_message=error_message,
502
+ )
503
+
504
+ def check_lock(
505
+ self, metric_name: str, detector_id: str, process_type: str
506
+ ) -> Optional[Dict]:
507
+ """
508
+ Check if task is locked (running).
509
+
510
+ Args:
511
+ metric_name: Metric identifier
512
+ detector_id: Detector identifier
513
+ process_type: Process type
514
+
515
+ Returns:
516
+ Task status dict if locked, None if not locked
517
+
518
+ Example:
519
+ >>> status = internal.check_lock("cpu_usage", "load", "load")
520
+ >>> if status and status["status"] == "running":
521
+ ... print("Task is locked")
522
+ """
523
+ full_table_name = self._manager.get_full_table_name(
524
+ TABLE_TASKS, use_internal=True
525
+ )
526
+
527
+ query = f"""
528
+ SELECT *
529
+ FROM {full_table_name}
530
+ WHERE metric_name = %(metric_name)s
531
+ AND detector_id = %(detector_id)s
532
+ AND process_type = %(process_type)s
533
+ AND status = 'running'
534
+ """
535
+
536
+ results = self._manager.execute_query(
537
+ query,
538
+ {
539
+ "metric_name": metric_name,
540
+ "detector_id": detector_id,
541
+ "process_type": process_type,
542
+ },
543
+ )
544
+
545
+ if results:
546
+ return results[0]
547
+ return None
548
+
549
+ def update_task_progress(
550
+ self,
551
+ metric_name: str,
552
+ detector_id: str,
553
+ process_type: str,
554
+ last_processed_timestamp: datetime,
555
+ ) -> None:
556
+ """
557
+ Update task progress (last_processed_timestamp) while task is running.
558
+
559
+ This enables idempotency - if process crashes, it can resume from
560
+ last_processed_timestamp.
561
+
562
+ Args:
563
+ metric_name: Metric identifier
564
+ detector_id: Detector identifier
565
+ process_type: Process type
566
+ last_processed_timestamp: Last successfully processed timestamp
567
+
568
+ Example:
569
+ >>> # Update progress every 1000 rows
570
+ >>> internal.update_task_progress(
571
+ ... "cpu_usage", "load", "load",
572
+ ... datetime(2024, 1, 1, 12, 0)
573
+ ... )
574
+ """
575
+ self._manager.upsert_task_status(
576
+ metric_name=metric_name,
577
+ detector_id=detector_id,
578
+ process_type=process_type,
579
+ status="running",
580
+ last_processed_timestamp=last_processed_timestamp,
581
+ )