detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,282 @@
1
+ """
2
+ Implementation of 'dtk init' command.
3
+
4
+ Creates a new detectkit project with proper structure.
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+ import click
11
+
12
+
13
+ def run_init(project_name: str, target_dir: str):
14
+ """
15
+ Initialize a new detectkit project.
16
+
17
+ Args:
18
+ project_name: Name of the project
19
+ target_dir: Directory to create project in
20
+
21
+ Creates:
22
+ project_name/
23
+ ├── detectkit_project.yml
24
+ ├── profiles.yml
25
+ ├── metrics/
26
+ │ └── .gitkeep
27
+ └── sql/
28
+ └── .gitkeep
29
+ """
30
+ target_path = Path(target_dir) / project_name
31
+
32
+ # Check if project already exists
33
+ if target_path.exists():
34
+ click.echo(
35
+ click.style(
36
+ f"Error: Directory '{target_path}' already exists!",
37
+ fg="red",
38
+ bold=True,
39
+ )
40
+ )
41
+ return
42
+
43
+ # Create project directory
44
+ click.echo(f"Creating detectkit project '{project_name}' in {target_dir}...")
45
+
46
+ target_path.mkdir(parents=True, exist_ok=True)
47
+
48
+ # Create subdirectories
49
+ (target_path / "metrics").mkdir(exist_ok=True)
50
+ (target_path / "sql").mkdir(exist_ok=True)
51
+
52
+ # Create .gitkeep files
53
+ (target_path / "metrics" / ".gitkeep").touch()
54
+ (target_path / "sql" / ".gitkeep").touch()
55
+
56
+ # Create detectkit_project.yml
57
+ project_config = f"""# detectkit project configuration
58
+ name: {project_name}
59
+ version: '1.0'
60
+
61
+ # Paths
62
+ metrics_path: metrics
63
+ sql_path: sql
64
+
65
+ # Default profile to use
66
+ default_profile: dev
67
+
68
+ # Default table names (can be overridden in metrics)
69
+ tables:
70
+ datapoints: _dtk_datapoints
71
+ detections: _dtk_detections
72
+ tasks: _dtk_tasks
73
+
74
+ # Default timeouts (seconds)
75
+ timeouts:
76
+ load: 1800 # 30 minutes
77
+ detect: 3600 # 1 hour
78
+ alert: 300 # 5 minutes
79
+ """
80
+
81
+ (target_path / "detectkit_project.yml").write_text(project_config)
82
+
83
+ # Create profiles.yml
84
+ profiles_config = """# Database connection profiles
85
+ # Copy this file to ~/.detectkit/profiles.yml for user-level config
86
+
87
+ dev:
88
+ type: clickhouse
89
+ host: localhost
90
+ port: 9000
91
+ database: default
92
+ user: default
93
+ password: ""
94
+
95
+ # ClickHouse specific settings
96
+ settings:
97
+ max_execution_time: 300
98
+
99
+ prod:
100
+ type: clickhouse
101
+ host: "{{ env_var('CLICKHOUSE_HOST') }}"
102
+ port: 9000
103
+ database: monitoring
104
+ user: "{{ env_var('CLICKHOUSE_USER') }}"
105
+ password: "{{ env_var('CLICKHOUSE_PASSWORD') }}"
106
+
107
+ settings:
108
+ max_execution_time: 600
109
+
110
+ # Example PostgreSQL profile
111
+ # postgres_dev:
112
+ # type: postgres
113
+ # host: localhost
114
+ # port: 5432
115
+ # database: monitoring
116
+ # user: postgres
117
+ # password: postgres
118
+ # schema: public
119
+
120
+ # Example MySQL profile
121
+ # mysql_dev:
122
+ # type: mysql
123
+ # host: localhost
124
+ # port: 3306
125
+ # database: monitoring
126
+ # user: root
127
+ # password: root
128
+
129
+ # Alert channels configuration
130
+ alert_channels:
131
+ # Mattermost channel
132
+ mattermost_alerts:
133
+ type: mattermost
134
+ webhook_url: "{{ env_var('MATTERMOST_WEBHOOK_URL') }}"
135
+ username: detectkit
136
+ icon_url: https://example.com/detectkit-icon.png
137
+
138
+ # Slack channel example
139
+ # slack_alerts:
140
+ # type: slack
141
+ # webhook_url: "{{ env_var('SLACK_WEBHOOK_URL') }}"
142
+ # channel: "#alerts"
143
+ # username: detectkit
144
+
145
+ # Generic webhook example
146
+ # webhook_alerts:
147
+ # type: webhook
148
+ # url: "{{ env_var('WEBHOOK_URL') }}"
149
+ # method: POST
150
+ # headers:
151
+ # Authorization: "Bearer {{ env_var('WEBHOOK_TOKEN') }}"
152
+ """
153
+
154
+ (target_path / "profiles.yml").write_text(profiles_config)
155
+
156
+ # Create example metric
157
+ example_metric = """# Example metric configuration
158
+ name: example_cpu_usage
159
+ description: CPU usage monitoring example
160
+
161
+ # Data source
162
+ query: |
163
+ SELECT
164
+ timestamp,
165
+ cpu_usage as value
166
+ FROM system_metrics
167
+ WHERE metric_name = 'cpu_usage'
168
+ AND timestamp >= {{ from_date }}
169
+ AND timestamp < {{ to_date }}
170
+ ORDER BY timestamp
171
+
172
+ # Or use external SQL file:
173
+ # query_file: sql/cpu_usage.sql
174
+
175
+ # Time interval between datapoints
176
+ interval: 1min
177
+
178
+ # Loading configuration
179
+ loading:
180
+ fill_gaps: true
181
+ max_gap_fill: 10 # Fill up to 10 missing points
182
+
183
+ # Seasonality extraction
184
+ extract_seasonality:
185
+ - minute_of_hour
186
+ - hour_of_day
187
+ - day_of_week
188
+
189
+ # Anomaly detectors
190
+ detectors:
191
+ - type: zscore
192
+ params:
193
+ threshold: 3.0
194
+ window_size: 100
195
+
196
+ - type: mad
197
+ params:
198
+ threshold: 3.0
199
+ window_size: 100
200
+
201
+ # Alerting (optional)
202
+ alerting:
203
+ enabled: true
204
+
205
+ # Alert channel names (defined in profiles.yml)
206
+ channels:
207
+ - mattermost_alerts
208
+
209
+ # Alert conditions
210
+ consecutive_anomalies: 3
211
+ alert_on_missing_data: false
212
+
213
+ # Tags for selection
214
+ tags:
215
+ - critical
216
+ - system
217
+ """
218
+
219
+ (target_path / "metrics" / "example_cpu_usage.yml").write_text(example_metric)
220
+
221
+ # Create README
222
+ readme = f"""# {project_name}
223
+
224
+ detectkit monitoring project.
225
+
226
+ ## Getting Started
227
+
228
+ 1. Configure your database connection in `profiles.yml`
229
+
230
+ 2. Create metric definitions in `metrics/` directory
231
+
232
+ 3. Run metrics:
233
+ ```bash
234
+ cd {project_name}
235
+ dtk run --select example_cpu_usage
236
+ ```
237
+
238
+ ## Project Structure
239
+
240
+ - `detectkit_project.yml` - Project configuration
241
+ - `profiles.yml` - Database connection profiles
242
+ - `metrics/` - Metric definitions (YAML files)
243
+ - `sql/` - SQL query files (optional)
244
+
245
+ ## Commands
246
+
247
+ ```bash
248
+ # Run single metric
249
+ dtk run --select cpu_usage
250
+
251
+ # Run with specific steps
252
+ dtk run --select cpu_usage --steps load,detect
253
+
254
+ # Run metrics by tag
255
+ dtk run --select tag:critical
256
+
257
+ # Reload data from specific date
258
+ dtk run --select cpu_usage --from 2024-01-01
259
+
260
+ # Full refresh
261
+ dtk run --select cpu_usage --full-refresh
262
+ ```
263
+
264
+ ## Documentation
265
+
266
+ See https://github.com/alexeiveselov92/detectkit for full documentation.
267
+ """
268
+
269
+ (target_path / "README.md").write_text(readme)
270
+
271
+ # Success message
272
+ click.echo()
273
+ click.echo(click.style("✓ Project created successfully!", fg="green", bold=True))
274
+ click.echo()
275
+ click.echo("Your new detectkit project is ready!")
276
+ click.echo()
277
+ click.echo("Next steps:")
278
+ click.echo(f" 1. cd {project_name}")
279
+ click.echo(" 2. Configure database connection in profiles.yml")
280
+ click.echo(" 3. Create or edit metric definitions in metrics/")
281
+ click.echo(" 4. Run: dtk run --select example_cpu_usage")
282
+ click.echo()
@@ -0,0 +1,427 @@
1
+ """
2
+ Implementation of 'dtk run' command.
3
+
4
+ Executes metric processing pipeline.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import List, Optional
10
+
11
+ import click
12
+
13
+ from detectkit.config.metric_config import MetricConfig
14
+ from detectkit.config.profile import ProfilesConfig
15
+ from detectkit.database.internal_tables import InternalTablesManager
16
+ from detectkit.orchestration.task_manager import PipelineStep, TaskManager
17
+
18
+
19
+ def run_command(
20
+ select: str,
21
+ exclude: Optional[str],
22
+ steps: str,
23
+ from_date: Optional[str],
24
+ to_date: Optional[str],
25
+ full_refresh: bool,
26
+ force: bool,
27
+ profile: Optional[str],
28
+ ):
29
+ """
30
+ Execute metric processing pipeline.
31
+
32
+ Args:
33
+ select: Metric selector (name, path, or tag)
34
+ exclude: Metrics to exclude (name, path, or tag)
35
+ steps: Comma-separated pipeline steps
36
+ from_date: Start date string
37
+ to_date: End date string
38
+ full_refresh: Delete and reload all data
39
+ force: Ignore task locks
40
+ profile: Profile name to use
41
+ """
42
+ # Parse steps
43
+ step_list = parse_steps(steps)
44
+
45
+ # Parse dates
46
+ from_dt = parse_date(from_date) if from_date else None
47
+ to_dt = parse_date(to_date) if to_date else None
48
+
49
+ # Find project root and load config
50
+ project_root = find_project_root()
51
+ if not project_root:
52
+ click.echo(
53
+ click.style(
54
+ "Error: Not in a detectkit project directory!",
55
+ fg="red",
56
+ bold=True,
57
+ )
58
+ )
59
+ click.echo("Run 'dtk init <project_name>' to create a new project.")
60
+ return
61
+
62
+ click.echo(f"Project root: {project_root}")
63
+
64
+ # Load project config
65
+ # project_config = load_project_config(project_root)
66
+
67
+ # Select metrics based on selector
68
+ metrics = select_metrics(select, project_root)
69
+
70
+ # Exclude metrics if specified
71
+ if exclude:
72
+ excluded_metrics = select_metrics(exclude, project_root)
73
+ excluded_names = {m.name for m in excluded_metrics}
74
+ metrics = [m for m in metrics if m.name not in excluded_names]
75
+
76
+ if excluded_metrics:
77
+ click.echo(f"Excluded {len(excluded_metrics)} metric(s) matching: {exclude}")
78
+
79
+ if not metrics:
80
+ click.echo(
81
+ click.style(
82
+ f"No metrics found matching selector: {select}",
83
+ fg="yellow",
84
+ )
85
+ )
86
+ return
87
+
88
+ click.echo(f"Found {len(metrics)} metric(s) to process")
89
+ click.echo()
90
+
91
+ # Load profiles.yml
92
+ profiles_path = project_root / "profiles.yml"
93
+ if not profiles_path.exists():
94
+ click.echo(
95
+ click.style(
96
+ "Error: profiles.yml not found!",
97
+ fg="red",
98
+ bold=True,
99
+ )
100
+ )
101
+ click.echo(f"Expected at: {profiles_path}")
102
+ return
103
+
104
+ try:
105
+ profiles_config = ProfilesConfig.from_yaml(profiles_path)
106
+ except Exception as e:
107
+ click.echo(
108
+ click.style(
109
+ f"Error loading profiles.yml: {e}",
110
+ fg="red",
111
+ bold=True,
112
+ )
113
+ )
114
+ return
115
+
116
+ # Create database manager
117
+ try:
118
+ db_manager = profiles_config.create_manager(profile)
119
+ except Exception as e:
120
+ click.echo(
121
+ click.style(
122
+ f"Error creating database manager: {e}",
123
+ fg="red",
124
+ bold=True,
125
+ )
126
+ )
127
+ return
128
+
129
+ # Create internal tables manager
130
+ internal_manager = InternalTablesManager(db_manager)
131
+
132
+ # Initialize internal tables if needed
133
+ try:
134
+ internal_manager.ensure_tables()
135
+ except Exception as e:
136
+ click.echo(
137
+ click.style(
138
+ f"Error initializing internal tables: {e}",
139
+ fg="red",
140
+ bold=True,
141
+ )
142
+ )
143
+ return
144
+
145
+ # Create task manager
146
+ task_manager = TaskManager(
147
+ internal_manager=internal_manager,
148
+ db_manager=db_manager,
149
+ profiles_config=profiles_config,
150
+ )
151
+
152
+ # Process each metric
153
+ for metric_path in metrics:
154
+ process_metric(
155
+ metric_path=metric_path,
156
+ project_root=project_root,
157
+ task_manager=task_manager,
158
+ steps=step_list,
159
+ from_date=from_dt,
160
+ to_date=to_dt,
161
+ full_refresh=full_refresh,
162
+ force=force,
163
+ )
164
+
165
+
166
+ def parse_steps(steps_str: str) -> List[PipelineStep]:
167
+ """
168
+ Parse comma-separated steps string.
169
+
170
+ Args:
171
+ steps_str: Comma-separated steps (e.g., "load,detect,alert")
172
+
173
+ Returns:
174
+ List of PipelineStep enums
175
+
176
+ Example:
177
+ >>> parse_steps("load,detect")
178
+ [PipelineStep.LOAD, PipelineStep.DETECT]
179
+ """
180
+ step_map = {
181
+ "load": PipelineStep.LOAD,
182
+ "detect": PipelineStep.DETECT,
183
+ "alert": PipelineStep.ALERT,
184
+ }
185
+
186
+ steps = []
187
+ for step_str in steps_str.split(","):
188
+ step_str = step_str.strip().lower()
189
+ if step_str not in step_map:
190
+ raise click.BadParameter(
191
+ f"Invalid step: {step_str}. Valid steps: load, detect, alert"
192
+ )
193
+ steps.append(step_map[step_str])
194
+
195
+ return steps
196
+
197
+
198
+ def parse_date(date_str: str) -> datetime:
199
+ """
200
+ Parse date string to datetime.
201
+
202
+ Supports formats:
203
+ - YYYY-MM-DD
204
+ - YYYY-MM-DD HH:MM:SS
205
+
206
+ Args:
207
+ date_str: Date string
208
+
209
+ Returns:
210
+ datetime object
211
+
212
+ Raises:
213
+ click.BadParameter: If date format is invalid
214
+ """
215
+ formats = [
216
+ "%Y-%m-%d",
217
+ "%Y-%m-%d %H:%M:%S",
218
+ ]
219
+
220
+ for fmt in formats:
221
+ try:
222
+ return datetime.strptime(date_str, fmt)
223
+ except ValueError:
224
+ continue
225
+
226
+ raise click.BadParameter(
227
+ f"Invalid date format: {date_str}. "
228
+ f"Use YYYY-MM-DD or 'YYYY-MM-DD HH:MM:SS'"
229
+ )
230
+
231
+
232
+ def find_project_root() -> Optional[Path]:
233
+ """
234
+ Find detectkit project root by looking for detectkit_project.yml.
235
+
236
+ Searches current directory and parent directories.
237
+
238
+ Returns:
239
+ Path to project root or None if not found
240
+ """
241
+ current = Path.cwd()
242
+
243
+ # Search up to 10 levels up
244
+ for _ in range(10):
245
+ if (current / "detectkit_project.yml").exists():
246
+ return current
247
+
248
+ if current.parent == current:
249
+ # Reached filesystem root
250
+ break
251
+
252
+ current = current.parent
253
+
254
+ return None
255
+
256
+
257
+ def select_metrics(selector: str, project_root: Path) -> List[Path]:
258
+ """
259
+ Select metrics based on selector.
260
+
261
+ Selector types:
262
+ - Metric name: "cpu_usage"
263
+ - Path pattern: "metrics/critical/*.yml"
264
+ - Tag: "tag:critical"
265
+
266
+ Args:
267
+ selector: Selector string
268
+ project_root: Project root path
269
+
270
+ Returns:
271
+ List of metric file paths
272
+ """
273
+ metrics_dir = project_root / "metrics"
274
+
275
+ if not metrics_dir.exists():
276
+ return []
277
+
278
+ # Tag selector
279
+ if selector.startswith("tag:"):
280
+ tag = selector[4:]
281
+ return find_metrics_by_tag(metrics_dir, tag)
282
+
283
+ # Path pattern selector
284
+ if "*" in selector or "/" in selector:
285
+ pattern = selector if selector.startswith("metrics/") else f"metrics/{selector}"
286
+ return list(project_root.glob(pattern))
287
+
288
+ # Metric name selector
289
+ metric_file = metrics_dir / f"{selector}.yml"
290
+ if metric_file.exists():
291
+ return [metric_file]
292
+
293
+ # Try with .yaml extension
294
+ metric_file = metrics_dir / f"{selector}.yaml"
295
+ if metric_file.exists():
296
+ return [metric_file]
297
+
298
+ return []
299
+
300
+
301
+ def find_metrics_by_tag(metrics_dir: Path, tag: str) -> List[Path]:
302
+ """
303
+ Find all metrics with specific tag.
304
+
305
+ Args:
306
+ metrics_dir: Metrics directory path
307
+ tag: Tag to search for
308
+
309
+ Returns:
310
+ List of metric paths with this tag
311
+ """
312
+ import yaml
313
+
314
+ matching_metrics = []
315
+
316
+ for metric_file in metrics_dir.glob("**/*.yml"):
317
+ try:
318
+ with open(metric_file) as f:
319
+ config = yaml.safe_load(f)
320
+
321
+ if config and "tags" in config:
322
+ if tag in config["tags"]:
323
+ matching_metrics.append(metric_file)
324
+ except Exception:
325
+ # Skip files that can't be parsed
326
+ continue
327
+
328
+ return matching_metrics
329
+
330
+
331
+ def process_metric(
332
+ metric_path: Path,
333
+ project_root: Path,
334
+ task_manager: TaskManager,
335
+ steps: List[PipelineStep],
336
+ from_date: Optional[datetime],
337
+ to_date: Optional[datetime],
338
+ full_refresh: bool,
339
+ force: bool,
340
+ ):
341
+ """
342
+ Process a single metric.
343
+
344
+ Args:
345
+ metric_path: Path to metric YAML file
346
+ project_root: Project root directory
347
+ task_manager: Task manager instance
348
+ steps: Pipeline steps to execute
349
+ from_date: Start date
350
+ to_date: End date
351
+ full_refresh: Full refresh flag
352
+ force: Force flag
353
+ """
354
+ metric_name = metric_path.stem
355
+
356
+ click.echo(click.style(f"Processing: {metric_name}", fg="cyan", bold=True))
357
+ click.echo(f" File: {metric_path}")
358
+ click.echo(f" Steps: {', '.join(s.value for s in steps)}")
359
+
360
+ if from_date:
361
+ click.echo(f" From: {from_date}")
362
+ if to_date:
363
+ click.echo(f" To: {to_date}")
364
+ if full_refresh:
365
+ click.echo(click.style(" Full refresh: YES", fg="yellow"))
366
+ if force:
367
+ click.echo(click.style(" Force: YES (ignoring locks)", fg="yellow"))
368
+
369
+ click.echo()
370
+
371
+ # Load metric configuration
372
+ try:
373
+ config = MetricConfig.from_yaml_file(metric_path)
374
+ except Exception as e:
375
+ click.echo(
376
+ click.style(
377
+ f" ✗ Error loading metric config: {e}",
378
+ fg="red",
379
+ )
380
+ )
381
+ click.echo()
382
+ return
383
+
384
+ # Run pipeline
385
+ try:
386
+ result = task_manager.run_metric(
387
+ config=config,
388
+ steps=steps,
389
+ from_date=from_date,
390
+ to_date=to_date,
391
+ full_refresh=full_refresh,
392
+ force=force,
393
+ )
394
+
395
+ # Display results
396
+ if result["status"] == "success":
397
+ click.echo(click.style(" ✓ Success!", fg="green", bold=True))
398
+
399
+ if PipelineStep.LOAD in steps:
400
+ click.echo(f" Loaded: {result['datapoints_loaded']} datapoints")
401
+
402
+ if PipelineStep.DETECT in steps:
403
+ click.echo(f" Detected: {result['anomalies_detected']} anomalies")
404
+
405
+ if PipelineStep.ALERT in steps:
406
+ click.echo(f" Sent: {result['alerts_sent']} alerts")
407
+ else:
408
+ click.echo(
409
+ click.style(
410
+ f" ✗ Failed: {result['error']}",
411
+ fg="red",
412
+ bold=True,
413
+ )
414
+ )
415
+
416
+ except Exception as e:
417
+ click.echo(
418
+ click.style(
419
+ f" ✗ Pipeline error: {e}",
420
+ fg="red",
421
+ bold=True,
422
+ )
423
+ )
424
+ import traceback
425
+ click.echo(traceback.format_exc())
426
+
427
+ click.echo()