detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,324 @@
1
+ """
2
+ Base database manager interface.
3
+
4
+ Provides universal methods for database operations WITHOUT hardcoding
5
+ specific table logic (e.g., _dtk_datapoints, _dtk_detections).
6
+
7
+ The manager is database-agnostic and provides generic operations:
8
+ - execute_query(): Run SQL and return results
9
+ - create_table(): Create table from TableModel
10
+ - table_exists(): Check if table exists
11
+ - insert_batch(): Insert batch of data
12
+ - get_last_timestamp(): Get last timestamp for a metric
13
+ """
14
+
15
+ from abc import ABC, abstractmethod
16
+ from datetime import datetime
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ import numpy as np
20
+
21
+ from detectkit.core.models import TableModel
22
+
23
+
24
+ class BaseDatabaseManager(ABC):
25
+ """
26
+ Universal database manager interface.
27
+
28
+ This class provides GENERIC methods for database operations.
29
+ It does NOT hardcode logic for internal tables (_dtk_datapoints, etc.).
30
+
31
+ Internal table management is handled by higher-level classes that
32
+ use these generic methods.
33
+
34
+ Key Design Principles:
35
+ 1. Universal methods (not table-specific)
36
+ 2. Works with any table via table_name parameter
37
+ 3. Type conversion handled internally
38
+ 4. Connection pooling and error handling
39
+ """
40
+
41
+ @abstractmethod
42
+ def execute_query(
43
+ self,
44
+ query: str,
45
+ params: Optional[Dict[str, Any]] = None
46
+ ) -> List[Dict[str, Any]]:
47
+ """
48
+ Execute SQL query and return results as list of dictionaries.
49
+
50
+ Args:
51
+ query: SQL query to execute
52
+ params: Optional query parameters for parameterized queries
53
+
54
+ Returns:
55
+ List of dictionaries where each dict represents a row
56
+
57
+ Raises:
58
+ DatabaseError: If query execution fails
59
+
60
+ Example:
61
+ >>> results = manager.execute_query(
62
+ ... "SELECT * FROM metrics WHERE name = %(name)s",
63
+ ... {"name": "cpu_usage"}
64
+ ... )
65
+ >>> for row in results:
66
+ ... print(row['timestamp'], row['value'])
67
+ """
68
+ pass
69
+
70
+ @abstractmethod
71
+ def create_table(
72
+ self,
73
+ table_name: str,
74
+ table_model: TableModel,
75
+ if_not_exists: bool = True
76
+ ) -> None:
77
+ """
78
+ Create table from TableModel definition.
79
+
80
+ Converts database-agnostic TableModel into database-specific DDL.
81
+
82
+ Args:
83
+ table_name: Name of table to create
84
+ table_model: Table schema definition
85
+ if_not_exists: Add IF NOT EXISTS clause
86
+
87
+ Raises:
88
+ DatabaseError: If table creation fails
89
+
90
+ Example:
91
+ >>> model = TableModel(
92
+ ... columns=[
93
+ ... ColumnDefinition("id", "Int32"),
94
+ ... ColumnDefinition("value", "Float64", nullable=True),
95
+ ... ],
96
+ ... primary_key=["id"],
97
+ ... engine="MergeTree",
98
+ ... order_by=["id"]
99
+ ... )
100
+ >>> manager.create_table("my_metrics", model)
101
+ """
102
+ pass
103
+
104
+ @abstractmethod
105
+ def table_exists(
106
+ self,
107
+ table_name: str,
108
+ schema: Optional[str] = None
109
+ ) -> bool:
110
+ """
111
+ Check if table exists in database.
112
+
113
+ Args:
114
+ table_name: Name of table to check
115
+ schema: Optional schema/database name (if None, use default)
116
+
117
+ Returns:
118
+ True if table exists, False otherwise
119
+
120
+ Example:
121
+ >>> if not manager.table_exists("_dtk_datapoints"):
122
+ ... manager.create_table("_dtk_datapoints", datapoints_model)
123
+ """
124
+ pass
125
+
126
+ @abstractmethod
127
+ def insert_batch(
128
+ self,
129
+ table_name: str,
130
+ data: Dict[str, np.ndarray],
131
+ conflict_strategy: str = "ignore"
132
+ ) -> int:
133
+ """
134
+ Insert batch of data into table.
135
+
136
+ Universal method that works with any table - NOT specific to
137
+ internal tables.
138
+
139
+ Args:
140
+ table_name: Name of table to insert into
141
+ data: Dictionary mapping column names to numpy arrays
142
+ All arrays must have same length
143
+ conflict_strategy: How to handle conflicts:
144
+ - "ignore": Skip rows with duplicate primary keys
145
+ - "replace": Replace existing rows
146
+ - "fail": Raise error on conflict
147
+
148
+ Returns:
149
+ Number of rows inserted (may be less than input if conflicts ignored)
150
+
151
+ Raises:
152
+ ValueError: If arrays have different lengths
153
+ DatabaseError: If insertion fails
154
+
155
+ Example:
156
+ >>> data = {
157
+ ... "metric_name": np.array(["cpu", "cpu"]),
158
+ ... "timestamp": np.array([dt1, dt2]),
159
+ ... "value": np.array([0.5, 0.6]),
160
+ ... }
161
+ >>> rows_inserted = manager.insert_batch(
162
+ ... "_dtk_datapoints", data, conflict_strategy="ignore"
163
+ ... )
164
+ """
165
+ pass
166
+
167
+ @abstractmethod
168
+ def get_last_timestamp(
169
+ self,
170
+ table_name: str,
171
+ metric_name: str,
172
+ timestamp_column: str = "timestamp"
173
+ ) -> Optional[datetime]:
174
+ """
175
+ Get last timestamp for a specific metric in a table.
176
+
177
+ Universal method that works with any table containing metric_name
178
+ and timestamp columns.
179
+
180
+ Args:
181
+ table_name: Table to query
182
+ metric_name: Value to filter by metric_name column
183
+ timestamp_column: Name of timestamp column (default: "timestamp")
184
+
185
+ Returns:
186
+ Last timestamp or None if no data found
187
+
188
+ Example:
189
+ >>> last_ts = manager.get_last_timestamp(
190
+ ... "_dtk_datapoints", "cpu_usage"
191
+ ... )
192
+ >>> if last_ts:
193
+ ... print(f"Last data point at {last_ts}")
194
+ """
195
+ pass
196
+
197
+ @abstractmethod
198
+ def upsert_task_status(
199
+ self,
200
+ metric_name: str,
201
+ detector_id: str,
202
+ process_type: str,
203
+ status: str,
204
+ last_processed_timestamp: Optional[datetime] = None,
205
+ error_message: Optional[str] = None,
206
+ timeout_seconds: int = 3600
207
+ ) -> None:
208
+ """
209
+ Update or insert task status (for locking and idempotency).
210
+
211
+ This method is critical for:
212
+ 1. Task locking: Prevent concurrent runs of same task
213
+ 2. Idempotency: Store last_processed_timestamp to resume from interruptions
214
+
215
+ Implementation varies by database:
216
+ - ClickHouse: DELETE + INSERT (no native UPSERT)
217
+ - PostgreSQL: INSERT ... ON CONFLICT DO UPDATE
218
+ - MySQL: INSERT ... ON DUPLICATE KEY UPDATE
219
+
220
+ Args:
221
+ metric_name: Metric identifier
222
+ detector_id: Detector identifier (or "load" for loading tasks)
223
+ process_type: Type of process ("load" or "detect")
224
+ status: Task status ("running", "completed", "failed")
225
+ last_processed_timestamp: Last successfully processed timestamp
226
+ error_message: Error message if status is "failed"
227
+ timeout_seconds: Task timeout in seconds
228
+
229
+ Example:
230
+ >>> # Start task
231
+ >>> manager.upsert_task_status(
232
+ ... "cpu_usage", "load", "load", "running",
233
+ ... timeout_seconds=3600
234
+ ... )
235
+ >>> # Update progress
236
+ >>> manager.upsert_task_status(
237
+ ... "cpu_usage", "load", "load", "running",
238
+ ... last_processed_timestamp=datetime(2024, 1, 1, 12, 0)
239
+ ... )
240
+ >>> # Complete task
241
+ >>> manager.upsert_task_status(
242
+ ... "cpu_usage", "load", "load", "completed",
243
+ ... last_processed_timestamp=datetime(2024, 1, 1, 23, 59)
244
+ ... )
245
+ """
246
+ pass
247
+
248
+ @property
249
+ @abstractmethod
250
+ def internal_location(self) -> str:
251
+ """
252
+ Get full location path for internal tables.
253
+
254
+ Format depends on database:
255
+ - ClickHouse: "database_name"
256
+ - PostgreSQL: "schema_name"
257
+
258
+ Returns:
259
+ Full path to internal schema/database
260
+
261
+ Example:
262
+ >>> manager.internal_location
263
+ 'detectk_internal'
264
+ """
265
+ pass
266
+
267
+ @property
268
+ @abstractmethod
269
+ def data_location(self) -> str:
270
+ """
271
+ Get full location path for user data tables.
272
+
273
+ Format depends on database:
274
+ - ClickHouse: "database_name"
275
+ - PostgreSQL: "schema_name"
276
+
277
+ Returns:
278
+ Full path to data schema/database
279
+
280
+ Example:
281
+ >>> manager.data_location
282
+ 'analytics'
283
+ """
284
+ pass
285
+
286
+ def get_full_table_name(
287
+ self,
288
+ table_name: str,
289
+ use_internal: bool = True
290
+ ) -> str:
291
+ """
292
+ Get fully qualified table name.
293
+
294
+ Args:
295
+ table_name: Table name
296
+ use_internal: If True, use internal_location, else data_location
297
+
298
+ Returns:
299
+ Fully qualified table name
300
+
301
+ Example:
302
+ >>> manager.get_full_table_name("_dtk_datapoints", use_internal=True)
303
+ 'detectk_internal._dtk_datapoints'
304
+ """
305
+ location = self.internal_location if use_internal else self.data_location
306
+ return f"{location}.{table_name}"
307
+
308
+ @abstractmethod
309
+ def close(self) -> None:
310
+ """
311
+ Close database connection and cleanup resources.
312
+
313
+ Example:
314
+ >>> manager.close()
315
+ """
316
+ pass
317
+
318
+ def __enter__(self):
319
+ """Context manager entry."""
320
+ return self
321
+
322
+ def __exit__(self, exc_type, exc_val, exc_tb):
323
+ """Context manager exit - close connection."""
324
+ self.close()
@@ -0,0 +1,134 @@
1
+ """
2
+ Internal table models for detectk.
3
+
4
+ Defines schemas for internal tables:
5
+ - _dtk_datapoints: Metric data points
6
+ - _dtk_detections: Anomaly detections
7
+ - _dtk_tasks: Task status and locking
8
+ """
9
+
10
+ from detectkit.core.models import ColumnDefinition, TableModel
11
+
12
+
13
+ def get_datapoints_table_model() -> TableModel:
14
+ """
15
+ Get TableModel for _dtk_datapoints table.
16
+
17
+ Schema:
18
+ - metric_name: Metric identifier
19
+ - timestamp: Data point timestamp (UTC, millisecond precision)
20
+ - value: Metric value (nullable for missing data)
21
+ - seasonality_data: JSON with seasonality components (hour, day_of_week, etc.)
22
+ - interval_seconds: Interval in seconds
23
+ - seasonality_columns: Comma-separated list of seasonality columns used
24
+ - created_at: When record was created (UTC, millisecond precision)
25
+
26
+ Primary Key: (metric_name, timestamp)
27
+ """
28
+ return TableModel(
29
+ columns=[
30
+ ColumnDefinition("metric_name", "String"),
31
+ ColumnDefinition("timestamp", "DateTime64(3, 'UTC')"),
32
+ ColumnDefinition("value", "Nullable(Float64)", nullable=True),
33
+ ColumnDefinition("seasonality_data", "String"),
34
+ ColumnDefinition("interval_seconds", "Int32"),
35
+ ColumnDefinition("seasonality_columns", "String"),
36
+ ColumnDefinition("created_at", "DateTime64(3, 'UTC')"),
37
+ ],
38
+ primary_key=["metric_name", "timestamp"],
39
+ engine="ReplacingMergeTree(created_at)",
40
+ order_by=["metric_name", "timestamp"],
41
+ )
42
+
43
+
44
+ def get_detections_table_model() -> TableModel:
45
+ """
46
+ Get TableModel for _dtk_detections table.
47
+
48
+ Schema:
49
+ - metric_name: Metric identifier
50
+ - detector_id: Detector identifier (hash of class + params)
51
+ - timestamp: Detection timestamp (UTC, millisecond precision)
52
+ - is_anomaly: Whether point is anomalous
53
+ - confidence_lower: Lower confidence bound
54
+ - confidence_upper: Upper confidence bound
55
+ - value: Actual metric value
56
+ - detector_params: JSON with sorted detector parameters
57
+ - detection_metadata: JSON with missing_ratio, severity, direction, etc.
58
+ - created_at: When detection was performed (UTC, millisecond precision)
59
+
60
+ Primary Key: (metric_name, detector_id, timestamp)
61
+ """
62
+ return TableModel(
63
+ columns=[
64
+ ColumnDefinition("metric_name", "String"),
65
+ ColumnDefinition("detector_id", "String"),
66
+ ColumnDefinition("timestamp", "DateTime64(3, 'UTC')"),
67
+ ColumnDefinition("is_anomaly", "Bool"),
68
+ ColumnDefinition("confidence_lower", "Nullable(Float64)", nullable=True),
69
+ ColumnDefinition("confidence_upper", "Nullable(Float64)", nullable=True),
70
+ ColumnDefinition("value", "Nullable(Float64)", nullable=True),
71
+ ColumnDefinition("detector_params", "String"),
72
+ ColumnDefinition("detection_metadata", "String"),
73
+ ColumnDefinition("created_at", "DateTime64(3, 'UTC')"),
74
+ ],
75
+ primary_key=["metric_name", "detector_id", "timestamp"],
76
+ engine="ReplacingMergeTree(created_at)",
77
+ order_by=["metric_name", "detector_id", "timestamp"],
78
+ )
79
+
80
+
81
+ def get_tasks_table_model() -> TableModel:
82
+ """
83
+ Get TableModel for _dtk_tasks table.
84
+
85
+ Schema:
86
+ - metric_name: Metric identifier
87
+ - detector_id: Detector identifier (or "load" for loading tasks)
88
+ - process_type: Type of process ("load" or "detect")
89
+ - status: Task status ("running", "completed", "failed")
90
+ - started_at: When task started (UTC, millisecond precision)
91
+ - updated_at: Last update timestamp (UTC, millisecond precision)
92
+ - last_processed_timestamp: Last successfully processed timestamp
93
+ - error_message: Error message if failed (nullable)
94
+ - timeout_seconds: Task timeout in seconds
95
+
96
+ Primary Key: (metric_name, detector_id, process_type)
97
+
98
+ This table serves dual purpose:
99
+ 1. Locking: Only one process can run for a given (metric, detector, type)
100
+ 2. Resume: Stores last_processed_timestamp to resume from interruptions
101
+ """
102
+ return TableModel(
103
+ columns=[
104
+ ColumnDefinition("metric_name", "String"),
105
+ ColumnDefinition("detector_id", "String"),
106
+ ColumnDefinition("process_type", "String"),
107
+ ColumnDefinition("status", "String"),
108
+ ColumnDefinition("started_at", "DateTime64(3, 'UTC')"),
109
+ ColumnDefinition("updated_at", "DateTime64(3, 'UTC')"),
110
+ ColumnDefinition(
111
+ "last_processed_timestamp",
112
+ "Nullable(DateTime64(3, 'UTC'))",
113
+ nullable=True
114
+ ),
115
+ ColumnDefinition("error_message", "Nullable(String)", nullable=True),
116
+ ColumnDefinition("timeout_seconds", "Int32"),
117
+ ],
118
+ primary_key=["metric_name", "detector_id", "process_type"],
119
+ engine="MergeTree",
120
+ order_by=["metric_name", "detector_id", "process_type"],
121
+ )
122
+
123
+
124
+ # Table names as constants
125
+ TABLE_DATAPOINTS = "_dtk_datapoints"
126
+ TABLE_DETECTIONS = "_dtk_detections"
127
+ TABLE_TASKS = "_dtk_tasks"
128
+
129
+ # Map of table names to model factories
130
+ INTERNAL_TABLES = {
131
+ TABLE_DATAPOINTS: get_datapoints_table_model,
132
+ TABLE_DETECTIONS: get_detections_table_model,
133
+ TABLE_TASKS: get_tasks_table_model,
134
+ }
@@ -0,0 +1,6 @@
1
+ """Anomaly detectors for detectkit."""
2
+
3
+ from detectkit.detectors.base import BaseDetector, DetectionResult
4
+ from detectkit.detectors.factory import DetectorFactory
5
+
6
+ __all__ = ["BaseDetector", "DetectionResult", "DetectorFactory"]
@@ -0,0 +1,222 @@
1
+ """
2
+ Base detector interface for anomaly detection.
3
+
4
+ All detectors must inherit from BaseDetector and implement:
5
+ - _validate_params() - parameter validation
6
+ - detect() - main detection method
7
+ - _get_non_default_params() - for hash generation
8
+ """
9
+
10
+ import hashlib
11
+ from abc import ABC, abstractmethod
12
+ from dataclasses import dataclass
13
+ from typing import Any, Dict, Optional
14
+
15
+ import numpy as np
16
+
17
+ try:
18
+ import orjson
19
+ HAS_ORJSON = True
20
+ except ImportError:
21
+ import json
22
+ HAS_ORJSON = False
23
+
24
+
25
+ def json_dumps_sorted(obj):
26
+ """JSON dumps with sorted keys - handles both orjson and standard json."""
27
+ if HAS_ORJSON:
28
+ return orjson.dumps(obj, option=orjson.OPT_SORT_KEYS).decode('utf-8')
29
+ else:
30
+ return json.dumps(obj, sort_keys=True)
31
+
32
+
33
+ @dataclass
34
+ class DetectionResult:
35
+ """
36
+ Result of anomaly detection for a single data point.
37
+
38
+ Attributes:
39
+ timestamp: Data point timestamp
40
+ value: Actual metric value
41
+ is_anomaly: Whether point is anomalous
42
+ confidence_lower: Lower bound of confidence interval (if available)
43
+ confidence_upper: Upper bound of confidence interval (if available)
44
+ detection_metadata: Additional metadata (severity, direction, etc.)
45
+ """
46
+
47
+ timestamp: np.datetime64
48
+ value: float
49
+ is_anomaly: bool
50
+ confidence_lower: Optional[float] = None
51
+ confidence_upper: Optional[float] = None
52
+ detection_metadata: Optional[Dict[str, Any]] = None
53
+
54
+ def to_dict(self) -> Dict[str, Any]:
55
+ """Convert to dictionary for database storage."""
56
+ return {
57
+ "timestamp": self.timestamp,
58
+ "value": self.value,
59
+ "is_anomaly": self.is_anomaly,
60
+ "confidence_lower": self.confidence_lower,
61
+ "confidence_upper": self.confidence_upper,
62
+ "detection_metadata": json_dumps_sorted(self.detection_metadata or {}),
63
+ }
64
+
65
+
66
+ class BaseDetector(ABC):
67
+ """
68
+ Abstract base class for anomaly detectors.
69
+
70
+ All detectors must:
71
+ 1. Validate parameters in _validate_params()
72
+ 2. Implement detect() to return DetectionResult for each point
73
+ 3. Implement _get_non_default_params() for hash generation
74
+
75
+ The detector_id (hash) is used for:
76
+ - Storing detections in _dtk_detections table
77
+ - Task locking in _dtk_tasks table
78
+
79
+ Example:
80
+ >>> class MyDetector(BaseDetector):
81
+ ... def __init__(self, threshold: float = 3.0):
82
+ ... super().__init__(threshold=threshold)
83
+ ...
84
+ ... def _validate_params(self):
85
+ ... if self.params["threshold"] <= 0:
86
+ ... raise ValueError("threshold must be positive")
87
+ ...
88
+ ... def detect(self, data):
89
+ ... # Detection logic here
90
+ ... pass
91
+ ...
92
+ ... def _get_non_default_params(self):
93
+ ... defaults = {"threshold": 3.0}
94
+ ... return {k: v for k, v in self.params.items() if v != defaults.get(k)}
95
+ """
96
+
97
+ def __init__(self, **params):
98
+ """
99
+ Initialize detector with parameters.
100
+
101
+ Args:
102
+ **params: Detector-specific parameters
103
+ """
104
+ self.params = params
105
+ self._validate_params()
106
+
107
+ @abstractmethod
108
+ def _validate_params(self):
109
+ """
110
+ Validate detector parameters.
111
+
112
+ Should raise ValueError if parameters are invalid.
113
+
114
+ Example:
115
+ >>> def _validate_params(self):
116
+ ... if self.params.get("threshold", 0) <= 0:
117
+ ... raise ValueError("threshold must be positive")
118
+ """
119
+ pass
120
+
121
+ @abstractmethod
122
+ def detect(self, data: Dict[str, np.ndarray]) -> list[DetectionResult]:
123
+ """
124
+ Perform anomaly detection on metric data.
125
+
126
+ Args:
127
+ data: Dictionary from MetricLoader.load() with keys:
128
+ - timestamp: np.array of datetime64[ms]
129
+ - value: np.array of float64 (may contain NaN for missing data)
130
+ - seasonality_data: np.array of JSON strings
131
+ - seasonality_columns: list of column names
132
+
133
+ Returns:
134
+ List of DetectionResult for each data point
135
+
136
+ Notes:
137
+ - Handle NaN values appropriately (missing data)
138
+ - Use seasonality_data if detector supports it
139
+ - confidence_lower/upper are optional (only if detector provides them)
140
+ - detection_metadata can include: severity, direction, missing_ratio, etc.
141
+
142
+ Example:
143
+ >>> results = detector.detect(data)
144
+ >>> for result in results:
145
+ ... if result.is_anomaly:
146
+ ... print(f"Anomaly at {result.timestamp}: {result.value}")
147
+ """
148
+ pass
149
+
150
+ def get_detector_id(self) -> str:
151
+ """
152
+ Generate unique detector ID (hash).
153
+
154
+ Hash is based on:
155
+ - Detector class name
156
+ - Non-default parameters (sorted)
157
+
158
+ This ensures:
159
+ - Same detector with same params = same ID
160
+ - Different params = different ID (allows parallel runs)
161
+
162
+ Returns:
163
+ 16-character hex string (first 16 chars of SHA256)
164
+
165
+ Example:
166
+ >>> detector1 = MADDetector(threshold=3.0)
167
+ >>> detector2 = MADDetector(threshold=3.0)
168
+ >>> detector1.get_detector_id() == detector2.get_detector_id()
169
+ True
170
+ >>> detector3 = MADDetector(threshold=2.5)
171
+ >>> detector1.get_detector_id() != detector3.get_detector_id()
172
+ True
173
+ """
174
+ non_default_params = self._get_non_default_params()
175
+ sorted_params = sorted(non_default_params.items())
176
+ hash_string = self.__class__.__name__ + str(sorted_params)
177
+ return hashlib.sha256(hash_string.encode()).hexdigest()[:16]
178
+
179
+ def get_detector_params(self) -> str:
180
+ """
181
+ Get detector parameters as JSON string.
182
+
183
+ Returns JSON with sorted keys for consistency.
184
+ Used for storing in _dtk_detections.detector_params.
185
+
186
+ Returns:
187
+ JSON string with sorted parameters
188
+
189
+ Example:
190
+ >>> detector = MADDetector(threshold=3.0, min_samples=30)
191
+ >>> detector.get_detector_params()
192
+ '{"min_samples": 30, "threshold": 3.0}'
193
+ """
194
+ non_default_params = self._get_non_default_params()
195
+ return json_dumps_sorted(non_default_params)
196
+
197
+ @abstractmethod
198
+ def _get_non_default_params(self) -> Dict[str, Any]:
199
+ """
200
+ Get parameters that differ from defaults.
201
+
202
+ Used for hash generation and parameter storage.
203
+ Only non-default parameters are included to ensure
204
+ consistent hashing across different instantiations.
205
+
206
+ Returns:
207
+ Dictionary of non-default parameters
208
+
209
+ Example:
210
+ >>> def _get_non_default_params(self):
211
+ ... defaults = {"threshold": 3.0, "min_samples": 30}
212
+ ... return {
213
+ ... k: v for k, v in self.params.items()
214
+ ... if v != defaults.get(k)
215
+ ... }
216
+ """
217
+ pass
218
+
219
+ def __repr__(self) -> str:
220
+ """String representation of detector."""
221
+ params_str = ", ".join(f"{k}={v}" for k, v in self.params.items())
222
+ return f"{self.__class__.__name__}({params_str})"