detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,467 @@
1
+ """
2
+ Metric configuration models.
3
+
4
+ Defines configuration structure for individual metrics loaded from YAML files.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, Union
9
+
10
+ from pydantic import BaseModel, Field, field_validator, model_validator
11
+
12
+ from detectkit.core.interval import Interval
13
+
14
+
15
+ class DetectorConfig(BaseModel):
16
+ """
17
+ Configuration for a single detector.
18
+
19
+ Attributes:
20
+ type: Detector type ("mad", "zscore", "iqr", "manual_bounds", etc.)
21
+ params: Detector-specific parameters including:
22
+ - Algorithm params: threshold, window_size, etc.
23
+ - Execution params: start_time, batch_size, min_samples, etc.
24
+ - Seasonality params: seasonality_components (with grouping support)
25
+
26
+ Example YAML:
27
+ ```yaml
28
+ detectors:
29
+ - type: mad
30
+ params:
31
+ # Algorithm parameters
32
+ threshold: 3.0
33
+ window_size: 4320
34
+
35
+ # Execution parameters (optional)
36
+ start_time: "2024-02-01 00:00:00" # When to start detection
37
+ batch_size: 500 # Detection batch size
38
+ min_samples: 100 # Min points before detection
39
+ min_samples_per_group: 10 # Min points per seasonal group
40
+ weighting: null # null, 'linear', 'exponential'
41
+
42
+ # Seasonality grouping (optional)
43
+ seasonality_components:
44
+ - "day_of_week" # Single component
45
+ - ["league_day", "hour"] # Grouped components
46
+ ```
47
+ """
48
+
49
+ type: str = Field(..., description="Detector type")
50
+ params: Dict[str, Any] = Field(
51
+ default_factory=dict, description="Detector parameters"
52
+ )
53
+
54
+ @field_validator("type")
55
+ @classmethod
56
+ def validate_type(cls, v: str) -> str:
57
+ """Validate detector type."""
58
+ allowed_types = {
59
+ "mad",
60
+ "zscore",
61
+ "iqr",
62
+ "manual_bounds",
63
+ "prophet",
64
+ "timesfm",
65
+ }
66
+ if v not in allowed_types:
67
+ raise ValueError(
68
+ f"Invalid detector type: {v}. "
69
+ f"Allowed: {', '.join(sorted(allowed_types))}"
70
+ )
71
+ return v
72
+
73
+ def get_algorithm_params(self) -> Dict[str, Any]:
74
+ """
75
+ Extract algorithm parameters (exclude execution parameters).
76
+
77
+ Execution parameters that are filtered out:
78
+ - start_time: When to start detection
79
+ - batch_size: Detection batch size
80
+ - seasonality_components: Seasonality grouping config
81
+
82
+ Returns:
83
+ Dict with only algorithm parameters
84
+ """
85
+ execution_params = {"start_time", "batch_size", "seasonality_components"}
86
+ return {k: v for k, v in self.params.items() if k not in execution_params}
87
+
88
+ def get_start_time(self) -> Optional[str]:
89
+ """Get start_time execution parameter if configured."""
90
+ return self.params.get("start_time")
91
+
92
+ def get_batch_size(self) -> Optional[int]:
93
+ """Get batch_size execution parameter if configured."""
94
+ return self.params.get("batch_size")
95
+
96
+ def get_seasonality_components(self) -> Optional[List[Union[str, List[str]]]]:
97
+ """Get seasonality_components configuration if configured."""
98
+ return self.params.get("seasonality_components")
99
+
100
+
101
+ class QueryColumnsConfig(BaseModel):
102
+ """
103
+ Column name mapping for SQL query results.
104
+
105
+ Allows mapping custom column names from query to internal names.
106
+
107
+ Attributes:
108
+ timestamp: Name of timestamp column in query results (default: "timestamp")
109
+ metric: Name of metric value column in query results (default: "value")
110
+ seasonality: List of seasonality column names in query results (optional)
111
+
112
+ Example YAML:
113
+ ```yaml
114
+ query_columns:
115
+ timestamp: "time_interval"
116
+ metric: "metric_value"
117
+ seasonality: ["day_of_week", "league_day", "hour"]
118
+ ```
119
+ """
120
+
121
+ timestamp: str = Field(
122
+ default="timestamp", description="Timestamp column name in query"
123
+ )
124
+ metric: str = Field(default="value", description="Metric value column name in query")
125
+ seasonality: Optional[List[str]] = Field(
126
+ default=None, description="Seasonality column names in query"
127
+ )
128
+
129
+
130
+ class AlertConfig(BaseModel):
131
+ """
132
+ Alert configuration for a metric.
133
+
134
+ Attributes:
135
+ enabled: Whether alerting is enabled
136
+ timezone: Timezone for displaying timestamps in alerts (e.g., "Europe/Moscow")
137
+ channels: List of alert channels to use
138
+ min_detectors: Minimum number of detectors that must agree
139
+ direction: Required anomaly direction ("same", "any", "up", "down")
140
+ consecutive_anomalies: Minimum consecutive anomalies to trigger alert
141
+ no_data_alert: Whether to alert when data is missing
142
+ template_single: Custom template for single anomaly alert
143
+ template_consecutive: Custom template for consecutive anomalies alert
144
+ """
145
+
146
+ enabled: bool = Field(default=True, description="Enable alerting")
147
+ timezone: Optional[str] = Field(
148
+ default=None, description="Timezone for displaying timestamps (e.g., 'Europe/Moscow')"
149
+ )
150
+ channels: List[str] = Field(
151
+ default_factory=list, description="Alert channel names"
152
+ )
153
+ min_detectors: int = Field(
154
+ default=1, description="Minimum detectors that must agree"
155
+ )
156
+ direction: str = Field(
157
+ default="same", description="Required anomaly direction: 'same', 'any', 'up', 'down'"
158
+ )
159
+ consecutive_anomalies: int = Field(
160
+ default=3, description="Consecutive anomalies to trigger alert"
161
+ )
162
+ no_data_alert: bool = Field(
163
+ default=False, description="Alert when no data is available"
164
+ )
165
+ template_single: Optional[str] = Field(
166
+ default=None, description="Custom template for single anomaly"
167
+ )
168
+ template_consecutive: Optional[str] = Field(
169
+ default=None, description="Custom template for consecutive anomalies"
170
+ )
171
+
172
+ @field_validator("consecutive_anomalies")
173
+ @classmethod
174
+ def validate_consecutive(cls, v: int) -> int:
175
+ """Validate consecutive anomalies threshold."""
176
+ if v < 1:
177
+ raise ValueError("consecutive_anomalies must be at least 1")
178
+ return v
179
+
180
+ @field_validator("min_detectors")
181
+ @classmethod
182
+ def validate_min_detectors(cls, v: int) -> int:
183
+ """Validate min_detectors."""
184
+ if v < 1:
185
+ raise ValueError("min_detectors must be at least 1")
186
+ return v
187
+
188
+ @field_validator("direction")
189
+ @classmethod
190
+ def validate_direction(cls, v: str) -> str:
191
+ """Validate direction."""
192
+ allowed = {"same", "any", "up", "down"}
193
+ if v not in allowed:
194
+ raise ValueError(f"direction must be one of: {', '.join(allowed)}")
195
+ return v
196
+
197
+
198
+ class TablesConfig(BaseModel):
199
+ """
200
+ Custom table names for a specific metric.
201
+
202
+ Allows overriding default internal table names on a per-metric basis.
203
+
204
+ Attributes:
205
+ datapoints: Custom name for datapoints table
206
+ detections: Custom name for detections table
207
+
208
+ Note: tasks table cannot be overridden (shared across all metrics)
209
+
210
+ Example YAML:
211
+ ```yaml
212
+ tables:
213
+ datapoints: "_dtk_datapoints_sales"
214
+ detections: "_dtk_detections_sales"
215
+ ```
216
+ """
217
+
218
+ datapoints: Optional[str] = Field(
219
+ default=None, description="Custom datapoints table name"
220
+ )
221
+ detections: Optional[str] = Field(
222
+ default=None, description="Custom detections table name"
223
+ )
224
+
225
+
226
+ class MetricConfig(BaseModel):
227
+ """
228
+ Configuration for a single metric.
229
+
230
+ Loaded from YAML files in metrics/ directory.
231
+
232
+ Attributes:
233
+ name: Metric name (unique identifier)
234
+ profile: Profile name to use (overrides default_profile from project config)
235
+ query: Inline SQL query (mutually exclusive with query_file)
236
+ query_file: Path to SQL file (mutually exclusive with query)
237
+ query_columns: Column name mapping for query results
238
+ interval: Data interval ("10min", "1h", or seconds as int)
239
+ loading_start_time: Start time for initial data loading (UTC)
240
+ seasonality_columns: List of seasonality features to extract
241
+ loading_batch_size: Number of rows to load per batch
242
+ detectors: List of detector configurations
243
+ alerting: Alert configuration (optional)
244
+ enabled: Whether metric is enabled for processing
245
+
246
+ Example YAML:
247
+ ```yaml
248
+ name: cpu_usage
249
+ profile: clickhouse_prod
250
+ query_file: sql/cpu_usage.sql
251
+ query_columns:
252
+ timestamp: "time_interval"
253
+ metric: "cpu_pct"
254
+ seasonality: ["hour", "day_of_week"]
255
+ interval: 10min
256
+ loading_start_time: "2024-01-01 00:00:00"
257
+ seasonality_columns:
258
+ - hour
259
+ - day_of_week
260
+ - is_weekend
261
+ loading_batch_size: 10000
262
+ detectors:
263
+ - type: mad
264
+ params:
265
+ threshold: 3.0
266
+ - type: zscore
267
+ params:
268
+ threshold: 3.0
269
+ alerting:
270
+ enabled: true
271
+ channels:
272
+ - mattermost_alerts
273
+ consecutive_anomalies: 3
274
+ ```
275
+ """
276
+
277
+ name: str = Field(..., description="Metric name")
278
+ profile: Optional[str] = Field(
279
+ default=None, description="Profile name to use (overrides default_profile)"
280
+ )
281
+ query: Optional[str] = Field(default=None, description="Inline SQL query")
282
+ query_file: Optional[Path] = Field(default=None, description="Path to SQL file")
283
+ query_columns: Optional[QueryColumnsConfig] = Field(
284
+ default=None, description="Column name mapping for query results"
285
+ )
286
+ interval: Union[int, str] = Field(..., description="Data interval")
287
+ loading_start_time: Optional[str] = Field(
288
+ default=None,
289
+ description="Start time for initial data loading (UTC, format: YYYY-MM-DD HH:MM:SS)",
290
+ )
291
+ seasonality_columns: List[str] = Field(
292
+ default_factory=list, description="Seasonality features to extract"
293
+ )
294
+ loading_batch_size: int = Field(
295
+ default=10000, description="Batch size for loading"
296
+ )
297
+ detectors: List[DetectorConfig] = Field(
298
+ default_factory=list, description="Detector configurations"
299
+ )
300
+ alerting: Optional[AlertConfig] = Field(
301
+ default=None, description="Alert configuration"
302
+ )
303
+ tables: Optional[TablesConfig] = Field(
304
+ default=None, description="Custom table names (overrides defaults)"
305
+ )
306
+ enabled: bool = Field(default=True, description="Whether metric is enabled")
307
+
308
+ # Parsed interval (computed from string/int)
309
+ _interval: Optional[Interval] = None
310
+
311
+ @model_validator(mode="after")
312
+ def validate_query_source(self) -> "MetricConfig":
313
+ """Validate that exactly one of query or query_file is specified."""
314
+ if self.query is None and self.query_file is None:
315
+ raise ValueError("Either 'query' or 'query_file' must be specified")
316
+
317
+ if self.query is not None and self.query_file is not None:
318
+ raise ValueError(
319
+ "Only one of 'query' or 'query_file' can be specified, not both"
320
+ )
321
+
322
+ return self
323
+
324
+ @field_validator("name")
325
+ @classmethod
326
+ def validate_name(cls, v: str) -> str:
327
+ """Validate metric name."""
328
+ if not v:
329
+ raise ValueError("Metric name cannot be empty")
330
+ # Allow alphanumeric, underscore, dash
331
+ if not all(c.isalnum() or c in ("_", "-") for c in v):
332
+ raise ValueError(
333
+ "Metric name can only contain alphanumeric characters, "
334
+ "underscores, and dashes"
335
+ )
336
+ return v
337
+
338
+ @field_validator("loading_batch_size")
339
+ @classmethod
340
+ def validate_batch_size(cls, v: int) -> int:
341
+ """Validate batch size."""
342
+ if v < 1:
343
+ raise ValueError("loading_batch_size must be at least 1")
344
+ if v > 1_000_000:
345
+ raise ValueError(
346
+ "loading_batch_size too large (max 1,000,000). "
347
+ "Use smaller batches to avoid memory issues."
348
+ )
349
+ return v
350
+
351
+ @field_validator("seasonality_columns")
352
+ @classmethod
353
+ def validate_seasonality_columns(cls, v: List[str]) -> List[str]:
354
+ """Validate seasonality columns."""
355
+ allowed_columns = {
356
+ "hour",
357
+ "day_of_week",
358
+ "day_of_month",
359
+ "month",
360
+ "is_weekend",
361
+ "is_holiday",
362
+ }
363
+
364
+ for col in v:
365
+ if col not in allowed_columns:
366
+ raise ValueError(
367
+ f"Invalid seasonality column: '{col}'. "
368
+ f"Allowed: {', '.join(sorted(allowed_columns))}"
369
+ )
370
+
371
+ # Check for duplicates
372
+ if len(v) != len(set(v)):
373
+ raise ValueError("Duplicate seasonality columns not allowed")
374
+
375
+ return v
376
+
377
+ def get_interval(self) -> Interval:
378
+ """
379
+ Get parsed Interval object.
380
+
381
+ Returns:
382
+ Interval instance
383
+
384
+ Example:
385
+ >>> config = MetricConfig(name="test", interval="10min", query="SELECT 1")
386
+ >>> config.get_interval().seconds
387
+ 600
388
+ """
389
+ if self._interval is None:
390
+ self._interval = Interval(self.interval)
391
+ return self._interval
392
+
393
+ def get_query_text(self, project_root: Optional[Path] = None) -> str:
394
+ """
395
+ Get SQL query text (from inline query or file).
396
+
397
+ Args:
398
+ project_root: Root directory for resolving query_file paths
399
+
400
+ Returns:
401
+ SQL query text
402
+
403
+ Raises:
404
+ FileNotFoundError: If query_file doesn't exist
405
+
406
+ Example:
407
+ >>> config = MetricConfig(
408
+ ... name="test",
409
+ ... interval=600,
410
+ ... query="SELECT timestamp, value FROM metrics"
411
+ ... )
412
+ >>> config.get_query_text()
413
+ 'SELECT timestamp, value FROM metrics'
414
+ """
415
+ if self.query is not None:
416
+ return self.query
417
+
418
+ # Load from file
419
+ if project_root is not None:
420
+ query_path = project_root / self.query_file
421
+ else:
422
+ query_path = self.query_file
423
+
424
+ if not query_path.exists():
425
+ raise FileNotFoundError(f"Query file not found: {query_path}")
426
+
427
+ with open(query_path, "r") as f:
428
+ return f.read()
429
+
430
+ @classmethod
431
+ def from_yaml_file(cls, path: Path) -> "MetricConfig":
432
+ """
433
+ Load metric configuration from YAML file.
434
+
435
+ Supports both flat and nested structures:
436
+ - Flat: name: "cpu_usage" at root level
437
+ - Nested: metric: { name: "cpu_usage", ... }
438
+
439
+ Args:
440
+ path: Path to YAML file
441
+
442
+ Returns:
443
+ MetricConfig instance
444
+
445
+ Raises:
446
+ FileNotFoundError: If file doesn't exist
447
+ ValueError: If YAML is invalid
448
+
449
+ Example:
450
+ >>> config = MetricConfig.from_yaml_file(Path("metrics/cpu_usage.yml"))
451
+ """
452
+ import yaml
453
+
454
+ if not path.exists():
455
+ raise FileNotFoundError(f"Metric config file not found: {path}")
456
+
457
+ with open(path, "r") as f:
458
+ data = yaml.safe_load(f)
459
+
460
+ if not data:
461
+ raise ValueError(f"Empty metric config file: {path}")
462
+
463
+ # Support nested structure: metric: { ... }
464
+ if "metric" in data and isinstance(data["metric"], dict):
465
+ data = data["metric"]
466
+
467
+ return cls.model_validate(data)