detectkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- detectkit/__init__.py +17 -0
- detectkit/alerting/__init__.py +13 -0
- detectkit/alerting/channels/__init__.py +21 -0
- detectkit/alerting/channels/base.py +191 -0
- detectkit/alerting/channels/email.py +146 -0
- detectkit/alerting/channels/factory.py +193 -0
- detectkit/alerting/channels/mattermost.py +53 -0
- detectkit/alerting/channels/slack.py +55 -0
- detectkit/alerting/channels/telegram.py +110 -0
- detectkit/alerting/channels/webhook.py +139 -0
- detectkit/alerting/orchestrator.py +368 -0
- detectkit/cli/__init__.py +1 -0
- detectkit/cli/commands/__init__.py +1 -0
- detectkit/cli/commands/init.py +282 -0
- detectkit/cli/commands/run.py +427 -0
- detectkit/cli/commands/test_alert.py +184 -0
- detectkit/cli/main.py +186 -0
- detectkit/config/__init__.py +30 -0
- detectkit/config/metric_config.py +467 -0
- detectkit/config/profile.py +285 -0
- detectkit/config/project_config.py +164 -0
- detectkit/core/__init__.py +6 -0
- detectkit/core/interval.py +132 -0
- detectkit/core/models.py +106 -0
- detectkit/database/__init__.py +27 -0
- detectkit/database/clickhouse_manager.py +385 -0
- detectkit/database/internal_tables.py +581 -0
- detectkit/database/manager.py +324 -0
- detectkit/database/tables.py +134 -0
- detectkit/detectors/__init__.py +6 -0
- detectkit/detectors/base.py +222 -0
- detectkit/detectors/factory.py +138 -0
- detectkit/detectors/statistical/__init__.py +8 -0
- detectkit/detectors/statistical/iqr.py +230 -0
- detectkit/detectors/statistical/mad.py +423 -0
- detectkit/detectors/statistical/manual_bounds.py +177 -0
- detectkit/detectors/statistical/zscore.py +225 -0
- detectkit/loaders/__init__.py +6 -0
- detectkit/loaders/metric_loader.py +470 -0
- detectkit/loaders/query_template.py +164 -0
- detectkit/orchestration/__init__.py +9 -0
- detectkit/orchestration/task_manager.py +698 -0
- detectkit/utils/__init__.py +1 -0
- detectkit-0.1.0.dist-info/METADATA +231 -0
- detectkit-0.1.0.dist-info/RECORD +49 -0
- detectkit-0.1.0.dist-info/WHEEL +5 -0
- detectkit-0.1.0.dist-info/entry_points.txt +2 -0
- detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
- detectkit-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base database manager interface.
|
|
3
|
+
|
|
4
|
+
Provides universal methods for database operations WITHOUT hardcoding
|
|
5
|
+
specific table logic (e.g., _dtk_datapoints, _dtk_detections).
|
|
6
|
+
|
|
7
|
+
The manager is database-agnostic and provides generic operations:
|
|
8
|
+
- execute_query(): Run SQL and return results
|
|
9
|
+
- create_table(): Create table from TableModel
|
|
10
|
+
- table_exists(): Check if table exists
|
|
11
|
+
- insert_batch(): Insert batch of data
|
|
12
|
+
- get_last_timestamp(): Get last timestamp for a metric
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
from detectkit.core.models import TableModel
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BaseDatabaseManager(ABC):
|
|
25
|
+
"""
|
|
26
|
+
Universal database manager interface.
|
|
27
|
+
|
|
28
|
+
This class provides GENERIC methods for database operations.
|
|
29
|
+
It does NOT hardcode logic for internal tables (_dtk_datapoints, etc.).
|
|
30
|
+
|
|
31
|
+
Internal table management is handled by higher-level classes that
|
|
32
|
+
use these generic methods.
|
|
33
|
+
|
|
34
|
+
Key Design Principles:
|
|
35
|
+
1. Universal methods (not table-specific)
|
|
36
|
+
2. Works with any table via table_name parameter
|
|
37
|
+
3. Type conversion handled internally
|
|
38
|
+
4. Connection pooling and error handling
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def execute_query(
|
|
43
|
+
self,
|
|
44
|
+
query: str,
|
|
45
|
+
params: Optional[Dict[str, Any]] = None
|
|
46
|
+
) -> List[Dict[str, Any]]:
|
|
47
|
+
"""
|
|
48
|
+
Execute SQL query and return results as list of dictionaries.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
query: SQL query to execute
|
|
52
|
+
params: Optional query parameters for parameterized queries
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
List of dictionaries where each dict represents a row
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
DatabaseError: If query execution fails
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> results = manager.execute_query(
|
|
62
|
+
... "SELECT * FROM metrics WHERE name = %(name)s",
|
|
63
|
+
... {"name": "cpu_usage"}
|
|
64
|
+
... )
|
|
65
|
+
>>> for row in results:
|
|
66
|
+
... print(row['timestamp'], row['value'])
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def create_table(
|
|
72
|
+
self,
|
|
73
|
+
table_name: str,
|
|
74
|
+
table_model: TableModel,
|
|
75
|
+
if_not_exists: bool = True
|
|
76
|
+
) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Create table from TableModel definition.
|
|
79
|
+
|
|
80
|
+
Converts database-agnostic TableModel into database-specific DDL.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
table_name: Name of table to create
|
|
84
|
+
table_model: Table schema definition
|
|
85
|
+
if_not_exists: Add IF NOT EXISTS clause
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
DatabaseError: If table creation fails
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
>>> model = TableModel(
|
|
92
|
+
... columns=[
|
|
93
|
+
... ColumnDefinition("id", "Int32"),
|
|
94
|
+
... ColumnDefinition("value", "Float64", nullable=True),
|
|
95
|
+
... ],
|
|
96
|
+
... primary_key=["id"],
|
|
97
|
+
... engine="MergeTree",
|
|
98
|
+
... order_by=["id"]
|
|
99
|
+
... )
|
|
100
|
+
>>> manager.create_table("my_metrics", model)
|
|
101
|
+
"""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
def table_exists(
|
|
106
|
+
self,
|
|
107
|
+
table_name: str,
|
|
108
|
+
schema: Optional[str] = None
|
|
109
|
+
) -> bool:
|
|
110
|
+
"""
|
|
111
|
+
Check if table exists in database.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
table_name: Name of table to check
|
|
115
|
+
schema: Optional schema/database name (if None, use default)
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
True if table exists, False otherwise
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
>>> if not manager.table_exists("_dtk_datapoints"):
|
|
122
|
+
... manager.create_table("_dtk_datapoints", datapoints_model)
|
|
123
|
+
"""
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
def insert_batch(
|
|
128
|
+
self,
|
|
129
|
+
table_name: str,
|
|
130
|
+
data: Dict[str, np.ndarray],
|
|
131
|
+
conflict_strategy: str = "ignore"
|
|
132
|
+
) -> int:
|
|
133
|
+
"""
|
|
134
|
+
Insert batch of data into table.
|
|
135
|
+
|
|
136
|
+
Universal method that works with any table - NOT specific to
|
|
137
|
+
internal tables.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
table_name: Name of table to insert into
|
|
141
|
+
data: Dictionary mapping column names to numpy arrays
|
|
142
|
+
All arrays must have same length
|
|
143
|
+
conflict_strategy: How to handle conflicts:
|
|
144
|
+
- "ignore": Skip rows with duplicate primary keys
|
|
145
|
+
- "replace": Replace existing rows
|
|
146
|
+
- "fail": Raise error on conflict
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Number of rows inserted (may be less than input if conflicts ignored)
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If arrays have different lengths
|
|
153
|
+
DatabaseError: If insertion fails
|
|
154
|
+
|
|
155
|
+
Example:
|
|
156
|
+
>>> data = {
|
|
157
|
+
... "metric_name": np.array(["cpu", "cpu"]),
|
|
158
|
+
... "timestamp": np.array([dt1, dt2]),
|
|
159
|
+
... "value": np.array([0.5, 0.6]),
|
|
160
|
+
... }
|
|
161
|
+
>>> rows_inserted = manager.insert_batch(
|
|
162
|
+
... "_dtk_datapoints", data, conflict_strategy="ignore"
|
|
163
|
+
... )
|
|
164
|
+
"""
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
@abstractmethod
|
|
168
|
+
def get_last_timestamp(
|
|
169
|
+
self,
|
|
170
|
+
table_name: str,
|
|
171
|
+
metric_name: str,
|
|
172
|
+
timestamp_column: str = "timestamp"
|
|
173
|
+
) -> Optional[datetime]:
|
|
174
|
+
"""
|
|
175
|
+
Get last timestamp for a specific metric in a table.
|
|
176
|
+
|
|
177
|
+
Universal method that works with any table containing metric_name
|
|
178
|
+
and timestamp columns.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
table_name: Table to query
|
|
182
|
+
metric_name: Value to filter by metric_name column
|
|
183
|
+
timestamp_column: Name of timestamp column (default: "timestamp")
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Last timestamp or None if no data found
|
|
187
|
+
|
|
188
|
+
Example:
|
|
189
|
+
>>> last_ts = manager.get_last_timestamp(
|
|
190
|
+
... "_dtk_datapoints", "cpu_usage"
|
|
191
|
+
... )
|
|
192
|
+
>>> if last_ts:
|
|
193
|
+
... print(f"Last data point at {last_ts}")
|
|
194
|
+
"""
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
@abstractmethod
|
|
198
|
+
def upsert_task_status(
|
|
199
|
+
self,
|
|
200
|
+
metric_name: str,
|
|
201
|
+
detector_id: str,
|
|
202
|
+
process_type: str,
|
|
203
|
+
status: str,
|
|
204
|
+
last_processed_timestamp: Optional[datetime] = None,
|
|
205
|
+
error_message: Optional[str] = None,
|
|
206
|
+
timeout_seconds: int = 3600
|
|
207
|
+
) -> None:
|
|
208
|
+
"""
|
|
209
|
+
Update or insert task status (for locking and idempotency).
|
|
210
|
+
|
|
211
|
+
This method is critical for:
|
|
212
|
+
1. Task locking: Prevent concurrent runs of same task
|
|
213
|
+
2. Idempotency: Store last_processed_timestamp to resume from interruptions
|
|
214
|
+
|
|
215
|
+
Implementation varies by database:
|
|
216
|
+
- ClickHouse: DELETE + INSERT (no native UPSERT)
|
|
217
|
+
- PostgreSQL: INSERT ... ON CONFLICT DO UPDATE
|
|
218
|
+
- MySQL: INSERT ... ON DUPLICATE KEY UPDATE
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
metric_name: Metric identifier
|
|
222
|
+
detector_id: Detector identifier (or "load" for loading tasks)
|
|
223
|
+
process_type: Type of process ("load" or "detect")
|
|
224
|
+
status: Task status ("running", "completed", "failed")
|
|
225
|
+
last_processed_timestamp: Last successfully processed timestamp
|
|
226
|
+
error_message: Error message if status is "failed"
|
|
227
|
+
timeout_seconds: Task timeout in seconds
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
>>> # Start task
|
|
231
|
+
>>> manager.upsert_task_status(
|
|
232
|
+
... "cpu_usage", "load", "load", "running",
|
|
233
|
+
... timeout_seconds=3600
|
|
234
|
+
... )
|
|
235
|
+
>>> # Update progress
|
|
236
|
+
>>> manager.upsert_task_status(
|
|
237
|
+
... "cpu_usage", "load", "load", "running",
|
|
238
|
+
... last_processed_timestamp=datetime(2024, 1, 1, 12, 0)
|
|
239
|
+
... )
|
|
240
|
+
>>> # Complete task
|
|
241
|
+
>>> manager.upsert_task_status(
|
|
242
|
+
... "cpu_usage", "load", "load", "completed",
|
|
243
|
+
... last_processed_timestamp=datetime(2024, 1, 1, 23, 59)
|
|
244
|
+
... )
|
|
245
|
+
"""
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
@abstractmethod
|
|
250
|
+
def internal_location(self) -> str:
|
|
251
|
+
"""
|
|
252
|
+
Get full location path for internal tables.
|
|
253
|
+
|
|
254
|
+
Format depends on database:
|
|
255
|
+
- ClickHouse: "database_name"
|
|
256
|
+
- PostgreSQL: "schema_name"
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Full path to internal schema/database
|
|
260
|
+
|
|
261
|
+
Example:
|
|
262
|
+
>>> manager.internal_location
|
|
263
|
+
'detectk_internal'
|
|
264
|
+
"""
|
|
265
|
+
pass
|
|
266
|
+
|
|
267
|
+
@property
|
|
268
|
+
@abstractmethod
|
|
269
|
+
def data_location(self) -> str:
|
|
270
|
+
"""
|
|
271
|
+
Get full location path for user data tables.
|
|
272
|
+
|
|
273
|
+
Format depends on database:
|
|
274
|
+
- ClickHouse: "database_name"
|
|
275
|
+
- PostgreSQL: "schema_name"
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Full path to data schema/database
|
|
279
|
+
|
|
280
|
+
Example:
|
|
281
|
+
>>> manager.data_location
|
|
282
|
+
'analytics'
|
|
283
|
+
"""
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
def get_full_table_name(
|
|
287
|
+
self,
|
|
288
|
+
table_name: str,
|
|
289
|
+
use_internal: bool = True
|
|
290
|
+
) -> str:
|
|
291
|
+
"""
|
|
292
|
+
Get fully qualified table name.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
table_name: Table name
|
|
296
|
+
use_internal: If True, use internal_location, else data_location
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Fully qualified table name
|
|
300
|
+
|
|
301
|
+
Example:
|
|
302
|
+
>>> manager.get_full_table_name("_dtk_datapoints", use_internal=True)
|
|
303
|
+
'detectk_internal._dtk_datapoints'
|
|
304
|
+
"""
|
|
305
|
+
location = self.internal_location if use_internal else self.data_location
|
|
306
|
+
return f"{location}.{table_name}"
|
|
307
|
+
|
|
308
|
+
@abstractmethod
|
|
309
|
+
def close(self) -> None:
|
|
310
|
+
"""
|
|
311
|
+
Close database connection and cleanup resources.
|
|
312
|
+
|
|
313
|
+
Example:
|
|
314
|
+
>>> manager.close()
|
|
315
|
+
"""
|
|
316
|
+
pass
|
|
317
|
+
|
|
318
|
+
def __enter__(self):
|
|
319
|
+
"""Context manager entry."""
|
|
320
|
+
return self
|
|
321
|
+
|
|
322
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
323
|
+
"""Context manager exit - close connection."""
|
|
324
|
+
self.close()
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Internal table models for detectk.
|
|
3
|
+
|
|
4
|
+
Defines schemas for internal tables:
|
|
5
|
+
- _dtk_datapoints: Metric data points
|
|
6
|
+
- _dtk_detections: Anomaly detections
|
|
7
|
+
- _dtk_tasks: Task status and locking
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from detectkit.core.models import ColumnDefinition, TableModel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_datapoints_table_model() -> TableModel:
|
|
14
|
+
"""
|
|
15
|
+
Get TableModel for _dtk_datapoints table.
|
|
16
|
+
|
|
17
|
+
Schema:
|
|
18
|
+
- metric_name: Metric identifier
|
|
19
|
+
- timestamp: Data point timestamp (UTC, millisecond precision)
|
|
20
|
+
- value: Metric value (nullable for missing data)
|
|
21
|
+
- seasonality_data: JSON with seasonality components (hour, day_of_week, etc.)
|
|
22
|
+
- interval_seconds: Interval in seconds
|
|
23
|
+
- seasonality_columns: Comma-separated list of seasonality columns used
|
|
24
|
+
- created_at: When record was created (UTC, millisecond precision)
|
|
25
|
+
|
|
26
|
+
Primary Key: (metric_name, timestamp)
|
|
27
|
+
"""
|
|
28
|
+
return TableModel(
|
|
29
|
+
columns=[
|
|
30
|
+
ColumnDefinition("metric_name", "String"),
|
|
31
|
+
ColumnDefinition("timestamp", "DateTime64(3, 'UTC')"),
|
|
32
|
+
ColumnDefinition("value", "Nullable(Float64)", nullable=True),
|
|
33
|
+
ColumnDefinition("seasonality_data", "String"),
|
|
34
|
+
ColumnDefinition("interval_seconds", "Int32"),
|
|
35
|
+
ColumnDefinition("seasonality_columns", "String"),
|
|
36
|
+
ColumnDefinition("created_at", "DateTime64(3, 'UTC')"),
|
|
37
|
+
],
|
|
38
|
+
primary_key=["metric_name", "timestamp"],
|
|
39
|
+
engine="ReplacingMergeTree(created_at)",
|
|
40
|
+
order_by=["metric_name", "timestamp"],
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_detections_table_model() -> TableModel:
|
|
45
|
+
"""
|
|
46
|
+
Get TableModel for _dtk_detections table.
|
|
47
|
+
|
|
48
|
+
Schema:
|
|
49
|
+
- metric_name: Metric identifier
|
|
50
|
+
- detector_id: Detector identifier (hash of class + params)
|
|
51
|
+
- timestamp: Detection timestamp (UTC, millisecond precision)
|
|
52
|
+
- is_anomaly: Whether point is anomalous
|
|
53
|
+
- confidence_lower: Lower confidence bound
|
|
54
|
+
- confidence_upper: Upper confidence bound
|
|
55
|
+
- value: Actual metric value
|
|
56
|
+
- detector_params: JSON with sorted detector parameters
|
|
57
|
+
- detection_metadata: JSON with missing_ratio, severity, direction, etc.
|
|
58
|
+
- created_at: When detection was performed (UTC, millisecond precision)
|
|
59
|
+
|
|
60
|
+
Primary Key: (metric_name, detector_id, timestamp)
|
|
61
|
+
"""
|
|
62
|
+
return TableModel(
|
|
63
|
+
columns=[
|
|
64
|
+
ColumnDefinition("metric_name", "String"),
|
|
65
|
+
ColumnDefinition("detector_id", "String"),
|
|
66
|
+
ColumnDefinition("timestamp", "DateTime64(3, 'UTC')"),
|
|
67
|
+
ColumnDefinition("is_anomaly", "Bool"),
|
|
68
|
+
ColumnDefinition("confidence_lower", "Nullable(Float64)", nullable=True),
|
|
69
|
+
ColumnDefinition("confidence_upper", "Nullable(Float64)", nullable=True),
|
|
70
|
+
ColumnDefinition("value", "Nullable(Float64)", nullable=True),
|
|
71
|
+
ColumnDefinition("detector_params", "String"),
|
|
72
|
+
ColumnDefinition("detection_metadata", "String"),
|
|
73
|
+
ColumnDefinition("created_at", "DateTime64(3, 'UTC')"),
|
|
74
|
+
],
|
|
75
|
+
primary_key=["metric_name", "detector_id", "timestamp"],
|
|
76
|
+
engine="ReplacingMergeTree(created_at)",
|
|
77
|
+
order_by=["metric_name", "detector_id", "timestamp"],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_tasks_table_model() -> TableModel:
|
|
82
|
+
"""
|
|
83
|
+
Get TableModel for _dtk_tasks table.
|
|
84
|
+
|
|
85
|
+
Schema:
|
|
86
|
+
- metric_name: Metric identifier
|
|
87
|
+
- detector_id: Detector identifier (or "load" for loading tasks)
|
|
88
|
+
- process_type: Type of process ("load" or "detect")
|
|
89
|
+
- status: Task status ("running", "completed", "failed")
|
|
90
|
+
- started_at: When task started (UTC, millisecond precision)
|
|
91
|
+
- updated_at: Last update timestamp (UTC, millisecond precision)
|
|
92
|
+
- last_processed_timestamp: Last successfully processed timestamp
|
|
93
|
+
- error_message: Error message if failed (nullable)
|
|
94
|
+
- timeout_seconds: Task timeout in seconds
|
|
95
|
+
|
|
96
|
+
Primary Key: (metric_name, detector_id, process_type)
|
|
97
|
+
|
|
98
|
+
This table serves dual purpose:
|
|
99
|
+
1. Locking: Only one process can run for a given (metric, detector, type)
|
|
100
|
+
2. Resume: Stores last_processed_timestamp to resume from interruptions
|
|
101
|
+
"""
|
|
102
|
+
return TableModel(
|
|
103
|
+
columns=[
|
|
104
|
+
ColumnDefinition("metric_name", "String"),
|
|
105
|
+
ColumnDefinition("detector_id", "String"),
|
|
106
|
+
ColumnDefinition("process_type", "String"),
|
|
107
|
+
ColumnDefinition("status", "String"),
|
|
108
|
+
ColumnDefinition("started_at", "DateTime64(3, 'UTC')"),
|
|
109
|
+
ColumnDefinition("updated_at", "DateTime64(3, 'UTC')"),
|
|
110
|
+
ColumnDefinition(
|
|
111
|
+
"last_processed_timestamp",
|
|
112
|
+
"Nullable(DateTime64(3, 'UTC'))",
|
|
113
|
+
nullable=True
|
|
114
|
+
),
|
|
115
|
+
ColumnDefinition("error_message", "Nullable(String)", nullable=True),
|
|
116
|
+
ColumnDefinition("timeout_seconds", "Int32"),
|
|
117
|
+
],
|
|
118
|
+
primary_key=["metric_name", "detector_id", "process_type"],
|
|
119
|
+
engine="MergeTree",
|
|
120
|
+
order_by=["metric_name", "detector_id", "process_type"],
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Table names as constants
|
|
125
|
+
TABLE_DATAPOINTS = "_dtk_datapoints"
|
|
126
|
+
TABLE_DETECTIONS = "_dtk_detections"
|
|
127
|
+
TABLE_TASKS = "_dtk_tasks"
|
|
128
|
+
|
|
129
|
+
# Map of table names to model factories
|
|
130
|
+
INTERNAL_TABLES = {
|
|
131
|
+
TABLE_DATAPOINTS: get_datapoints_table_model,
|
|
132
|
+
TABLE_DETECTIONS: get_detections_table_model,
|
|
133
|
+
TABLE_TASKS: get_tasks_table_model,
|
|
134
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base detector interface for anomaly detection.
|
|
3
|
+
|
|
4
|
+
All detectors must inherit from BaseDetector and implement:
|
|
5
|
+
- _validate_params() - parameter validation
|
|
6
|
+
- detect() - main detection method
|
|
7
|
+
- _get_non_default_params() - for hash generation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import orjson
|
|
19
|
+
HAS_ORJSON = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
import json
|
|
22
|
+
HAS_ORJSON = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def json_dumps_sorted(obj):
|
|
26
|
+
"""JSON dumps with sorted keys - handles both orjson and standard json."""
|
|
27
|
+
if HAS_ORJSON:
|
|
28
|
+
return orjson.dumps(obj, option=orjson.OPT_SORT_KEYS).decode('utf-8')
|
|
29
|
+
else:
|
|
30
|
+
return json.dumps(obj, sort_keys=True)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DetectionResult:
|
|
35
|
+
"""
|
|
36
|
+
Result of anomaly detection for a single data point.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
timestamp: Data point timestamp
|
|
40
|
+
value: Actual metric value
|
|
41
|
+
is_anomaly: Whether point is anomalous
|
|
42
|
+
confidence_lower: Lower bound of confidence interval (if available)
|
|
43
|
+
confidence_upper: Upper bound of confidence interval (if available)
|
|
44
|
+
detection_metadata: Additional metadata (severity, direction, etc.)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
timestamp: np.datetime64
|
|
48
|
+
value: float
|
|
49
|
+
is_anomaly: bool
|
|
50
|
+
confidence_lower: Optional[float] = None
|
|
51
|
+
confidence_upper: Optional[float] = None
|
|
52
|
+
detection_metadata: Optional[Dict[str, Any]] = None
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
55
|
+
"""Convert to dictionary for database storage."""
|
|
56
|
+
return {
|
|
57
|
+
"timestamp": self.timestamp,
|
|
58
|
+
"value": self.value,
|
|
59
|
+
"is_anomaly": self.is_anomaly,
|
|
60
|
+
"confidence_lower": self.confidence_lower,
|
|
61
|
+
"confidence_upper": self.confidence_upper,
|
|
62
|
+
"detection_metadata": json_dumps_sorted(self.detection_metadata or {}),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class BaseDetector(ABC):
|
|
67
|
+
"""
|
|
68
|
+
Abstract base class for anomaly detectors.
|
|
69
|
+
|
|
70
|
+
All detectors must:
|
|
71
|
+
1. Validate parameters in _validate_params()
|
|
72
|
+
2. Implement detect() to return DetectionResult for each point
|
|
73
|
+
3. Implement _get_non_default_params() for hash generation
|
|
74
|
+
|
|
75
|
+
The detector_id (hash) is used for:
|
|
76
|
+
- Storing detections in _dtk_detections table
|
|
77
|
+
- Task locking in _dtk_tasks table
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
>>> class MyDetector(BaseDetector):
|
|
81
|
+
... def __init__(self, threshold: float = 3.0):
|
|
82
|
+
... super().__init__(threshold=threshold)
|
|
83
|
+
...
|
|
84
|
+
... def _validate_params(self):
|
|
85
|
+
... if self.params["threshold"] <= 0:
|
|
86
|
+
... raise ValueError("threshold must be positive")
|
|
87
|
+
...
|
|
88
|
+
... def detect(self, data):
|
|
89
|
+
... # Detection logic here
|
|
90
|
+
... pass
|
|
91
|
+
...
|
|
92
|
+
... def _get_non_default_params(self):
|
|
93
|
+
... defaults = {"threshold": 3.0}
|
|
94
|
+
... return {k: v for k, v in self.params.items() if v != defaults.get(k)}
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self, **params):
|
|
98
|
+
"""
|
|
99
|
+
Initialize detector with parameters.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
**params: Detector-specific parameters
|
|
103
|
+
"""
|
|
104
|
+
self.params = params
|
|
105
|
+
self._validate_params()
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def _validate_params(self):
|
|
109
|
+
"""
|
|
110
|
+
Validate detector parameters.
|
|
111
|
+
|
|
112
|
+
Should raise ValueError if parameters are invalid.
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
>>> def _validate_params(self):
|
|
116
|
+
... if self.params.get("threshold", 0) <= 0:
|
|
117
|
+
... raise ValueError("threshold must be positive")
|
|
118
|
+
"""
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def detect(self, data: Dict[str, np.ndarray]) -> list[DetectionResult]:
|
|
123
|
+
"""
|
|
124
|
+
Perform anomaly detection on metric data.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
data: Dictionary from MetricLoader.load() with keys:
|
|
128
|
+
- timestamp: np.array of datetime64[ms]
|
|
129
|
+
- value: np.array of float64 (may contain NaN for missing data)
|
|
130
|
+
- seasonality_data: np.array of JSON strings
|
|
131
|
+
- seasonality_columns: list of column names
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
List of DetectionResult for each data point
|
|
135
|
+
|
|
136
|
+
Notes:
|
|
137
|
+
- Handle NaN values appropriately (missing data)
|
|
138
|
+
- Use seasonality_data if detector supports it
|
|
139
|
+
- confidence_lower/upper are optional (only if detector provides them)
|
|
140
|
+
- detection_metadata can include: severity, direction, missing_ratio, etc.
|
|
141
|
+
|
|
142
|
+
Example:
|
|
143
|
+
>>> results = detector.detect(data)
|
|
144
|
+
>>> for result in results:
|
|
145
|
+
... if result.is_anomaly:
|
|
146
|
+
... print(f"Anomaly at {result.timestamp}: {result.value}")
|
|
147
|
+
"""
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
def get_detector_id(self) -> str:
|
|
151
|
+
"""
|
|
152
|
+
Generate unique detector ID (hash).
|
|
153
|
+
|
|
154
|
+
Hash is based on:
|
|
155
|
+
- Detector class name
|
|
156
|
+
- Non-default parameters (sorted)
|
|
157
|
+
|
|
158
|
+
This ensures:
|
|
159
|
+
- Same detector with same params = same ID
|
|
160
|
+
- Different params = different ID (allows parallel runs)
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
16-character hex string (first 16 chars of SHA256)
|
|
164
|
+
|
|
165
|
+
Example:
|
|
166
|
+
>>> detector1 = MADDetector(threshold=3.0)
|
|
167
|
+
>>> detector2 = MADDetector(threshold=3.0)
|
|
168
|
+
>>> detector1.get_detector_id() == detector2.get_detector_id()
|
|
169
|
+
True
|
|
170
|
+
>>> detector3 = MADDetector(threshold=2.5)
|
|
171
|
+
>>> detector1.get_detector_id() != detector3.get_detector_id()
|
|
172
|
+
True
|
|
173
|
+
"""
|
|
174
|
+
non_default_params = self._get_non_default_params()
|
|
175
|
+
sorted_params = sorted(non_default_params.items())
|
|
176
|
+
hash_string = self.__class__.__name__ + str(sorted_params)
|
|
177
|
+
return hashlib.sha256(hash_string.encode()).hexdigest()[:16]
|
|
178
|
+
|
|
179
|
+
def get_detector_params(self) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Get detector parameters as JSON string.
|
|
182
|
+
|
|
183
|
+
Returns JSON with sorted keys for consistency.
|
|
184
|
+
Used for storing in _dtk_detections.detector_params.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
JSON string with sorted parameters
|
|
188
|
+
|
|
189
|
+
Example:
|
|
190
|
+
>>> detector = MADDetector(threshold=3.0, min_samples=30)
|
|
191
|
+
>>> detector.get_detector_params()
|
|
192
|
+
'{"min_samples": 30, "threshold": 3.0}'
|
|
193
|
+
"""
|
|
194
|
+
non_default_params = self._get_non_default_params()
|
|
195
|
+
return json_dumps_sorted(non_default_params)
|
|
196
|
+
|
|
197
|
+
@abstractmethod
|
|
198
|
+
def _get_non_default_params(self) -> Dict[str, Any]:
|
|
199
|
+
"""
|
|
200
|
+
Get parameters that differ from defaults.
|
|
201
|
+
|
|
202
|
+
Used for hash generation and parameter storage.
|
|
203
|
+
Only non-default parameters are included to ensure
|
|
204
|
+
consistent hashing across different instantiations.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Dictionary of non-default parameters
|
|
208
|
+
|
|
209
|
+
Example:
|
|
210
|
+
>>> def _get_non_default_params(self):
|
|
211
|
+
... defaults = {"threshold": 3.0, "min_samples": 30}
|
|
212
|
+
... return {
|
|
213
|
+
... k: v for k, v in self.params.items()
|
|
214
|
+
... if v != defaults.get(k)
|
|
215
|
+
... }
|
|
216
|
+
"""
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
def __repr__(self) -> str:
|
|
220
|
+
"""String representation of detector."""
|
|
221
|
+
params_str = ", ".join(f"{k}={v}" for k, v in self.params.items())
|
|
222
|
+
return f"{self.__class__.__name__}({params_str})"
|