detectkit 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- detectkit-0.3.0/LICENSE +21 -0
- detectkit-0.3.0/MANIFEST.in +7 -0
- detectkit-0.3.0/PKG-INFO +237 -0
- detectkit-0.3.0/README.md +175 -0
- detectkit-0.3.0/detectkit/__init__.py +17 -0
- detectkit-0.3.0/detectkit/alerting/__init__.py +13 -0
- detectkit-0.3.0/detectkit/alerting/channels/__init__.py +21 -0
- detectkit-0.3.0/detectkit/alerting/channels/base.py +193 -0
- detectkit-0.3.0/detectkit/alerting/channels/email.py +146 -0
- detectkit-0.3.0/detectkit/alerting/channels/factory.py +193 -0
- detectkit-0.3.0/detectkit/alerting/channels/mattermost.py +53 -0
- detectkit-0.3.0/detectkit/alerting/channels/slack.py +55 -0
- detectkit-0.3.0/detectkit/alerting/channels/telegram.py +110 -0
- detectkit-0.3.0/detectkit/alerting/channels/webhook.py +139 -0
- detectkit-0.3.0/detectkit/alerting/orchestrator.py +533 -0
- detectkit-0.3.0/detectkit/cli/__init__.py +1 -0
- detectkit-0.3.0/detectkit/cli/commands/__init__.py +1 -0
- detectkit-0.3.0/detectkit/cli/commands/init.py +284 -0
- detectkit-0.3.0/detectkit/cli/commands/run.py +500 -0
- detectkit-0.3.0/detectkit/cli/commands/test_alert.py +184 -0
- detectkit-0.3.0/detectkit/cli/main.py +186 -0
- detectkit-0.3.0/detectkit/config/__init__.py +30 -0
- detectkit-0.3.0/detectkit/config/metric_config.py +520 -0
- detectkit-0.3.0/detectkit/config/profile.py +285 -0
- detectkit-0.3.0/detectkit/config/project_config.py +167 -0
- detectkit-0.3.0/detectkit/config/validator.py +124 -0
- detectkit-0.3.0/detectkit/core/__init__.py +6 -0
- detectkit-0.3.0/detectkit/core/interval.py +132 -0
- detectkit-0.3.0/detectkit/core/models.py +106 -0
- detectkit-0.3.0/detectkit/database/__init__.py +27 -0
- detectkit-0.3.0/detectkit/database/clickhouse_manager.py +442 -0
- detectkit-0.3.0/detectkit/database/internal_tables.py +965 -0
- detectkit-0.3.0/detectkit/database/manager.py +372 -0
- detectkit-0.3.0/detectkit/database/tables.py +208 -0
- detectkit-0.3.0/detectkit/detectors/__init__.py +6 -0
- detectkit-0.3.0/detectkit/detectors/base.py +441 -0
- detectkit-0.3.0/detectkit/detectors/factory.py +138 -0
- detectkit-0.3.0/detectkit/detectors/statistical/__init__.py +8 -0
- detectkit-0.3.0/detectkit/detectors/statistical/iqr.py +508 -0
- detectkit-0.3.0/detectkit/detectors/statistical/mad.py +478 -0
- detectkit-0.3.0/detectkit/detectors/statistical/manual_bounds.py +206 -0
- detectkit-0.3.0/detectkit/detectors/statistical/zscore.py +491 -0
- detectkit-0.3.0/detectkit/loaders/__init__.py +6 -0
- detectkit-0.3.0/detectkit/loaders/metric_loader.py +470 -0
- detectkit-0.3.0/detectkit/loaders/query_template.py +164 -0
- detectkit-0.3.0/detectkit/orchestration/__init__.py +9 -0
- detectkit-0.3.0/detectkit/orchestration/task_manager.py +777 -0
- detectkit-0.3.0/detectkit/utils/__init__.py +17 -0
- detectkit-0.3.0/detectkit/utils/stats.py +196 -0
- detectkit-0.3.0/detectkit.egg-info/PKG-INFO +237 -0
- detectkit-0.3.0/detectkit.egg-info/SOURCES.txt +57 -0
- detectkit-0.3.0/detectkit.egg-info/dependency_links.txt +1 -0
- detectkit-0.3.0/detectkit.egg-info/entry_points.txt +2 -0
- detectkit-0.3.0/detectkit.egg-info/requires.txt +45 -0
- detectkit-0.3.0/detectkit.egg-info/top_level.txt +1 -0
- detectkit-0.3.0/pyproject.toml +151 -0
- detectkit-0.3.0/requirements.txt +8 -0
- detectkit-0.3.0/setup.cfg +4 -0
- detectkit-0.3.0/setup.py +6 -0
detectkit-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 detectkit team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
detectkit-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: detectkit
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Metric monitoring with automatic anomaly detection
|
|
5
|
+
Author: detectkit team
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/alexeiveselov92/detectkit
|
|
8
|
+
Project-URL: Documentation, https://github.com/alexeiveselov92/detectkit
|
|
9
|
+
Project-URL: Repository, https://github.com/alexeiveselov92/detectkit
|
|
10
|
+
Project-URL: Issues, https://github.com/alexeiveselov92/detectkit/issues
|
|
11
|
+
Keywords: monitoring,anomaly-detection,metrics,timeseries,alerting
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
|
21
|
+
Classifier: Topic :: System :: Monitoring
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: numpy>=1.24.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0.0
|
|
27
|
+
Requires-Dist: pyyaml>=6.0
|
|
28
|
+
Requires-Dist: click>=8.0
|
|
29
|
+
Requires-Dist: jinja2>=3.0
|
|
30
|
+
Requires-Dist: orjson>=3.0
|
|
31
|
+
Requires-Dist: requests>=2.25.0
|
|
32
|
+
Provides-Extra: clickhouse
|
|
33
|
+
Requires-Dist: clickhouse-driver>=0.2.0; extra == "clickhouse"
|
|
34
|
+
Provides-Extra: postgres
|
|
35
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
36
|
+
Provides-Extra: mysql
|
|
37
|
+
Requires-Dist: pymysql>=1.0.0; extra == "mysql"
|
|
38
|
+
Provides-Extra: all-db
|
|
39
|
+
Requires-Dist: clickhouse-driver>=0.2.0; extra == "all-db"
|
|
40
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all-db"
|
|
41
|
+
Requires-Dist: pymysql>=1.0.0; extra == "all-db"
|
|
42
|
+
Provides-Extra: prophet
|
|
43
|
+
Requires-Dist: prophet>=1.1.0; extra == "prophet"
|
|
44
|
+
Provides-Extra: timesfm
|
|
45
|
+
Requires-Dist: timesfm>=0.1.0; extra == "timesfm"
|
|
46
|
+
Provides-Extra: advanced-detectors
|
|
47
|
+
Requires-Dist: prophet>=1.1.0; extra == "advanced-detectors"
|
|
48
|
+
Requires-Dist: timesfm>=0.1.0; extra == "advanced-detectors"
|
|
49
|
+
Provides-Extra: all
|
|
50
|
+
Requires-Dist: clickhouse-driver>=0.2.0; extra == "all"
|
|
51
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
52
|
+
Requires-Dist: pymysql>=1.0.0; extra == "all"
|
|
53
|
+
Requires-Dist: prophet>=1.1.0; extra == "all"
|
|
54
|
+
Requires-Dist: timesfm>=0.1.0; extra == "all"
|
|
55
|
+
Provides-Extra: dev
|
|
56
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
57
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
58
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
59
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
60
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
61
|
+
Dynamic: license-file
|
|
62
|
+
|
|
63
|
+
# detectkit
|
|
64
|
+
|
|
65
|
+
**Metric monitoring with automatic anomaly detection**
|
|
66
|
+
|
|
67
|
+
`detectkit` is a Python library for data analysts and engineers to monitor time-series metrics with automatic anomaly detection and alerting.
|
|
68
|
+
|
|
69
|
+
## Status
|
|
70
|
+
|
|
71
|
+
✅ **Production Ready** - Version 0.1.2
|
|
72
|
+
|
|
73
|
+
Published to PyPI: https://pypi.org/project/detectkit/
|
|
74
|
+
|
|
75
|
+
Complete rewrite with modern architecture and full documentation (2025).
|
|
76
|
+
|
|
77
|
+
## Features
|
|
78
|
+
|
|
79
|
+
- ✅ **Pure numpy arrays** - No pandas dependency in core logic
|
|
80
|
+
- ✅ **Batch processing** - Efficient vectorized operations
|
|
81
|
+
- ✅ **Multiple detectors** - Statistical methods (Z-Score, MAD, IQR, Manual Bounds)
|
|
82
|
+
- ✅ **Alert channels** - Mattermost, Slack, Webhook support
|
|
83
|
+
- ✅ **Database agnostic** - ClickHouse, PostgreSQL, MySQL support
|
|
84
|
+
- ✅ **Idempotent operations** - Resume from interruptions
|
|
85
|
+
- 🚧 **CLI interface** - dbt-like commands (coming soon)
|
|
86
|
+
|
|
87
|
+
## Installation
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install detectkit
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Or from source:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
git clone https://github.com/alexeiveselov92/detectkit
|
|
97
|
+
cd detectkit
|
|
98
|
+
pip install -e .
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Optional dependencies
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# ClickHouse support
|
|
105
|
+
pip install detectkit[clickhouse]
|
|
106
|
+
|
|
107
|
+
# All database drivers
|
|
108
|
+
pip install detectkit[all-db]
|
|
109
|
+
|
|
110
|
+
# Development dependencies
|
|
111
|
+
pip install detectkit[dev]
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Quick Start
|
|
115
|
+
|
|
116
|
+
### CLI Usage (Recommended)
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Create a new project
|
|
120
|
+
dtk init my_monitoring_project
|
|
121
|
+
cd my_monitoring_project
|
|
122
|
+
|
|
123
|
+
# Configure database in profiles.yml
|
|
124
|
+
# Then run your metrics
|
|
125
|
+
dtk run --select example_cpu_usage
|
|
126
|
+
|
|
127
|
+
# Run specific pipeline steps
|
|
128
|
+
dtk run --select cpu_usage --steps load,detect
|
|
129
|
+
|
|
130
|
+
# Run all critical metrics
|
|
131
|
+
dtk run --select tag:critical
|
|
132
|
+
|
|
133
|
+
# Reload data from specific date
|
|
134
|
+
dtk run --select cpu_usage --from 2024-01-01
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Python API Usage
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import numpy as np
|
|
141
|
+
from detectkit.detectors.statistical import ZScoreDetector
|
|
142
|
+
|
|
143
|
+
# Your time-series data
|
|
144
|
+
timestamps = np.array([...], dtype='datetime64[ms]')
|
|
145
|
+
values = np.array([1.0, 2.0, 1.5, 10.0, 1.8]) # 10.0 is anomaly
|
|
146
|
+
|
|
147
|
+
# Create detector
|
|
148
|
+
detector = ZScoreDetector(threshold=3.0, window_size=100)
|
|
149
|
+
|
|
150
|
+
# Detect anomalies
|
|
151
|
+
data = {
|
|
152
|
+
'timestamp': timestamps,
|
|
153
|
+
'value': values
|
|
154
|
+
}
|
|
155
|
+
results = detector.detect(data)
|
|
156
|
+
|
|
157
|
+
# Check results
|
|
158
|
+
for result in results:
|
|
159
|
+
if result.is_anomaly:
|
|
160
|
+
print(f"Anomaly at {result.timestamp}: {result.value}")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Architecture
|
|
164
|
+
|
|
165
|
+
- **Detectors** - Statistical and ML-based anomaly detection
|
|
166
|
+
- **Loaders** - Metric data loading from databases with gap filling
|
|
167
|
+
- **Alerting** - Multi-channel notifications with orchestration
|
|
168
|
+
- **Config** - YAML-based configuration (dbt-like)
|
|
169
|
+
|
|
170
|
+
## Testing
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
# Run tests
|
|
174
|
+
pytest tests/
|
|
175
|
+
|
|
176
|
+
# With coverage
|
|
177
|
+
pytest tests/ --cov=detectkit --cov-report=html
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**Current status:** 287 tests passing, 87% coverage
|
|
181
|
+
|
|
182
|
+
## Development Status
|
|
183
|
+
|
|
184
|
+
### ✅ Completed (Phases 1-6)
|
|
185
|
+
- ✅ **Phase 1**: Core models (Interval, TableModel, ColumnDefinition)
|
|
186
|
+
- ✅ **Phase 2**: Database managers & data loading (MetricLoader, gap filling, seasonality)
|
|
187
|
+
- ✅ **Phase 3**: Statistical detectors (Z-Score, MAD, IQR, Manual Bounds)
|
|
188
|
+
- ✅ **Phase 4**: Alerting system (Channels, Orchestrator, consecutive anomalies)
|
|
189
|
+
- ✅ **Phase 5**: Task manager (Pipeline execution, locking, idempotency)
|
|
190
|
+
- ✅ **Phase 6**: CLI commands (dtk init, dtk run with selectors)
|
|
191
|
+
|
|
192
|
+
### 🔄 Integration Status
|
|
193
|
+
- ⚠️ Full end-to-end integration pending (database connection required)
|
|
194
|
+
- ⚠️ Advanced detectors (Prophet, TimesFM) - optional extras
|
|
195
|
+
- ⚠️ Additional alert channels (Telegram, Email) - optional
|
|
196
|
+
|
|
197
|
+
## Documentation
|
|
198
|
+
|
|
199
|
+
📚 **Complete documentation available at: https://github.com/alexeiveselov92/detectkit/tree/main/docs**
|
|
200
|
+
|
|
201
|
+
- [Getting Started](https://github.com/alexeiveselov92/detectkit/blob/main/docs/getting-started/quickstart.md) - 5-minute quickstart
|
|
202
|
+
- [Configuration Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/configuration.md) - All configuration options
|
|
203
|
+
- [Detectors Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/detectors.md) - Choosing the right detector
|
|
204
|
+
- [Alerting Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/alerting.md) - Setting up alerts
|
|
205
|
+
- [CLI Reference](https://github.com/alexeiveselov92/detectkit/blob/main/docs/reference/cli.md) - Command-line documentation
|
|
206
|
+
- [Examples](https://github.com/alexeiveselov92/detectkit/tree/main/docs/examples) - Real-world monitoring scenarios
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
## Requirements
|
|
210
|
+
|
|
211
|
+
- Python 3.10+
|
|
212
|
+
- numpy >= 1.24.0
|
|
213
|
+
- pydantic >= 2.0.0
|
|
214
|
+
- click >= 8.0
|
|
215
|
+
- PyYAML >= 6.0
|
|
216
|
+
- Jinja2 >= 3.0
|
|
217
|
+
|
|
218
|
+
## License
|
|
219
|
+
|
|
220
|
+
MIT License - See LICENSE file for details
|
|
221
|
+
|
|
222
|
+
## Contributing
|
|
223
|
+
|
|
224
|
+
This project is currently in active development. Contributions are welcome once we reach v1.0.0.
|
|
225
|
+
|
|
226
|
+
## Changelog
|
|
227
|
+
|
|
228
|
+
### 0.1.0 (2025-11-07)
|
|
229
|
+
- Initial release with complete rewrite
|
|
230
|
+
- ✅ Core foundation: models, database, config
|
|
231
|
+
- ✅ Metric loading with gap filling and seasonality extraction
|
|
232
|
+
- ✅ Statistical detectors (Z-Score, MAD, IQR, Manual Bounds)
|
|
233
|
+
- ✅ Alert channels (Webhook, Mattermost, Slack)
|
|
234
|
+
- ✅ Alert orchestration with consecutive anomaly logic
|
|
235
|
+
- ✅ Task manager for pipeline execution
|
|
236
|
+
- ✅ CLI commands (dtk init, dtk run)
|
|
237
|
+
- 📊 287 unit tests, 87% coverage
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# detectkit
|
|
2
|
+
|
|
3
|
+
**Metric monitoring with automatic anomaly detection**
|
|
4
|
+
|
|
5
|
+
`detectkit` is a Python library for data analysts and engineers to monitor time-series metrics with automatic anomaly detection and alerting.
|
|
6
|
+
|
|
7
|
+
## Status
|
|
8
|
+
|
|
9
|
+
✅ **Production Ready** - Version 0.1.2
|
|
10
|
+
|
|
11
|
+
Published to PyPI: https://pypi.org/project/detectkit/
|
|
12
|
+
|
|
13
|
+
Complete rewrite with modern architecture and full documentation (2025).
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- ✅ **Pure numpy arrays** - No pandas dependency in core logic
|
|
18
|
+
- ✅ **Batch processing** - Efficient vectorized operations
|
|
19
|
+
- ✅ **Multiple detectors** - Statistical methods (Z-Score, MAD, IQR, Manual Bounds)
|
|
20
|
+
- ✅ **Alert channels** - Mattermost, Slack, Webhook support
|
|
21
|
+
- ✅ **Database agnostic** - ClickHouse, PostgreSQL, MySQL support
|
|
22
|
+
- ✅ **Idempotent operations** - Resume from interruptions
|
|
23
|
+
- 🚧 **CLI interface** - dbt-like commands (coming soon)
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install detectkit
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Or from source:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
git clone https://github.com/alexeiveselov92/detectkit
|
|
35
|
+
cd detectkit
|
|
36
|
+
pip install -e .
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Optional dependencies
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# ClickHouse support
|
|
43
|
+
pip install detectkit[clickhouse]
|
|
44
|
+
|
|
45
|
+
# All database drivers
|
|
46
|
+
pip install detectkit[all-db]
|
|
47
|
+
|
|
48
|
+
# Development dependencies
|
|
49
|
+
pip install detectkit[dev]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Quick Start
|
|
53
|
+
|
|
54
|
+
### CLI Usage (Recommended)
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Create a new project
|
|
58
|
+
dtk init my_monitoring_project
|
|
59
|
+
cd my_monitoring_project
|
|
60
|
+
|
|
61
|
+
# Configure database in profiles.yml
|
|
62
|
+
# Then run your metrics
|
|
63
|
+
dtk run --select example_cpu_usage
|
|
64
|
+
|
|
65
|
+
# Run specific pipeline steps
|
|
66
|
+
dtk run --select cpu_usage --steps load,detect
|
|
67
|
+
|
|
68
|
+
# Run all critical metrics
|
|
69
|
+
dtk run --select tag:critical
|
|
70
|
+
|
|
71
|
+
# Reload data from specific date
|
|
72
|
+
dtk run --select cpu_usage --from 2024-01-01
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Python API Usage
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import numpy as np
|
|
79
|
+
from detectkit.detectors.statistical import ZScoreDetector
|
|
80
|
+
|
|
81
|
+
# Your time-series data
|
|
82
|
+
timestamps = np.array([...], dtype='datetime64[ms]')
|
|
83
|
+
values = np.array([1.0, 2.0, 1.5, 10.0, 1.8]) # 10.0 is anomaly
|
|
84
|
+
|
|
85
|
+
# Create detector
|
|
86
|
+
detector = ZScoreDetector(threshold=3.0, window_size=100)
|
|
87
|
+
|
|
88
|
+
# Detect anomalies
|
|
89
|
+
data = {
|
|
90
|
+
'timestamp': timestamps,
|
|
91
|
+
'value': values
|
|
92
|
+
}
|
|
93
|
+
results = detector.detect(data)
|
|
94
|
+
|
|
95
|
+
# Check results
|
|
96
|
+
for result in results:
|
|
97
|
+
if result.is_anomaly:
|
|
98
|
+
print(f"Anomaly at {result.timestamp}: {result.value}")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Architecture
|
|
102
|
+
|
|
103
|
+
- **Detectors** - Statistical and ML-based anomaly detection
|
|
104
|
+
- **Loaders** - Metric data loading from databases with gap filling
|
|
105
|
+
- **Alerting** - Multi-channel notifications with orchestration
|
|
106
|
+
- **Config** - YAML-based configuration (dbt-like)
|
|
107
|
+
|
|
108
|
+
## Testing
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Run tests
|
|
112
|
+
pytest tests/
|
|
113
|
+
|
|
114
|
+
# With coverage
|
|
115
|
+
pytest tests/ --cov=detectkit --cov-report=html
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Current status:** 287 tests passing, 87% coverage
|
|
119
|
+
|
|
120
|
+
## Development Status
|
|
121
|
+
|
|
122
|
+
### ✅ Completed (Phases 1-6)
|
|
123
|
+
- ✅ **Phase 1**: Core models (Interval, TableModel, ColumnDefinition)
|
|
124
|
+
- ✅ **Phase 2**: Database managers & data loading (MetricLoader, gap filling, seasonality)
|
|
125
|
+
- ✅ **Phase 3**: Statistical detectors (Z-Score, MAD, IQR, Manual Bounds)
|
|
126
|
+
- ✅ **Phase 4**: Alerting system (Channels, Orchestrator, consecutive anomalies)
|
|
127
|
+
- ✅ **Phase 5**: Task manager (Pipeline execution, locking, idempotency)
|
|
128
|
+
- ✅ **Phase 6**: CLI commands (dtk init, dtk run with selectors)
|
|
129
|
+
|
|
130
|
+
### 🔄 Integration Status
|
|
131
|
+
- ⚠️ Full end-to-end integration pending (database connection required)
|
|
132
|
+
- ⚠️ Advanced detectors (Prophet, TimesFM) - optional extras
|
|
133
|
+
- ⚠️ Additional alert channels (Telegram, Email) - optional
|
|
134
|
+
|
|
135
|
+
## Documentation
|
|
136
|
+
|
|
137
|
+
📚 **Complete documentation available at: https://github.com/alexeiveselov92/detectkit/tree/main/docs**
|
|
138
|
+
|
|
139
|
+
- [Getting Started](https://github.com/alexeiveselov92/detectkit/blob/main/docs/getting-started/quickstart.md) - 5-minute quickstart
|
|
140
|
+
- [Configuration Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/configuration.md) - All configuration options
|
|
141
|
+
- [Detectors Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/detectors.md) - Choosing the right detector
|
|
142
|
+
- [Alerting Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/alerting.md) - Setting up alerts
|
|
143
|
+
- [CLI Reference](https://github.com/alexeiveselov92/detectkit/blob/main/docs/reference/cli.md) - Command-line documentation
|
|
144
|
+
- [Examples](https://github.com/alexeiveselov92/detectkit/tree/main/docs/examples) - Real-world monitoring scenarios
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
## Requirements
|
|
148
|
+
|
|
149
|
+
- Python 3.10+
|
|
150
|
+
- numpy >= 1.24.0
|
|
151
|
+
- pydantic >= 2.0.0
|
|
152
|
+
- click >= 8.0
|
|
153
|
+
- PyYAML >= 6.0
|
|
154
|
+
- Jinja2 >= 3.0
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
MIT License - See LICENSE file for details
|
|
159
|
+
|
|
160
|
+
## Contributing
|
|
161
|
+
|
|
162
|
+
This project is currently in active development. Contributions are welcome once we reach v1.0.0.
|
|
163
|
+
|
|
164
|
+
## Changelog
|
|
165
|
+
|
|
166
|
+
### 0.1.0 (2025-11-07)
|
|
167
|
+
- Initial release with complete rewrite
|
|
168
|
+
- ✅ Core foundation: models, database, config
|
|
169
|
+
- ✅ Metric loading with gap filling and seasonality extraction
|
|
170
|
+
- ✅ Statistical detectors (Z-Score, MAD, IQR, Manual Bounds)
|
|
171
|
+
- ✅ Alert channels (Webhook, Mattermost, Slack)
|
|
172
|
+
- ✅ Alert orchestration with consecutive anomaly logic
|
|
173
|
+
- ✅ Task manager for pipeline execution
|
|
174
|
+
- ✅ CLI commands (dtk init, dtk run)
|
|
175
|
+
- 📊 287 unit tests, 87% coverage
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
detectk - Anomaly Detection for Time-Series Metrics
|
|
3
|
+
|
|
4
|
+
A Python library for data analysts and engineers to monitor metrics with automatic anomaly detection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
from detectkit.core.interval import Interval
|
|
10
|
+
from detectkit.core.models import ColumnDefinition, TableModel
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Interval",
|
|
14
|
+
"ColumnDefinition",
|
|
15
|
+
"TableModel",
|
|
16
|
+
"__version__",
|
|
17
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Alert channels for external notifications."""
|
|
2
|
+
|
|
3
|
+
from detectkit.alerting.channels.base import AlertData, BaseAlertChannel
|
|
4
|
+
from detectkit.alerting.channels.mattermost import MattermostChannel
|
|
5
|
+
from detectkit.alerting.channels.slack import SlackChannel
|
|
6
|
+
from detectkit.alerting.channels.webhook import WebhookChannel
|
|
7
|
+
from detectkit.alerting.channels.telegram import TelegramChannel
|
|
8
|
+
from detectkit.alerting.channels.email import EmailChannel
|
|
9
|
+
|
|
10
|
+
from detectkit.alerting.channels.factory import AlertChannelFactory
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AlertData",
|
|
14
|
+
"BaseAlertChannel",
|
|
15
|
+
"WebhookChannel",
|
|
16
|
+
"MattermostChannel",
|
|
17
|
+
"SlackChannel",
|
|
18
|
+
"TelegramChannel",
|
|
19
|
+
"EmailChannel",
|
|
20
|
+
"AlertChannelFactory",
|
|
21
|
+
]
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base alert channel interface.
|
|
3
|
+
|
|
4
|
+
All alert channels must inherit from BaseAlertChannel and implement
|
|
5
|
+
the send() method for delivering alerts to specific destinations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from detectkit.detectors.base import DetectionResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class AlertData:
|
|
17
|
+
"""
|
|
18
|
+
Data for alert message.
|
|
19
|
+
|
|
20
|
+
Contains all information needed to format and send an alert.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
metric_name: Name of the metric
|
|
24
|
+
timestamp: Timestamp of the anomaly (datetime64)
|
|
25
|
+
timezone: Timezone for display (e.g., "Europe/Moscow")
|
|
26
|
+
value: Actual metric value
|
|
27
|
+
confidence_lower: Lower confidence bound
|
|
28
|
+
confidence_upper: Upper confidence bound
|
|
29
|
+
detector_name: Name/ID of detector that found the anomaly
|
|
30
|
+
detector_params: Detector parameters (JSON string)
|
|
31
|
+
direction: Direction of anomaly ("above" or "below")
|
|
32
|
+
severity: Severity score
|
|
33
|
+
detection_metadata: Additional metadata from detector
|
|
34
|
+
consecutive_count: Number of consecutive anomalies
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
metric_name: str
|
|
38
|
+
timestamp: Any # datetime64 or datetime
|
|
39
|
+
timezone: str
|
|
40
|
+
value: float
|
|
41
|
+
confidence_lower: Optional[float]
|
|
42
|
+
confidence_upper: Optional[float]
|
|
43
|
+
detector_name: str
|
|
44
|
+
detector_params: str
|
|
45
|
+
direction: str
|
|
46
|
+
severity: float
|
|
47
|
+
detection_metadata: Dict[str, Any]
|
|
48
|
+
consecutive_count: int = 1
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BaseAlertChannel(ABC):
|
|
52
|
+
"""
|
|
53
|
+
Abstract base class for alert channels.
|
|
54
|
+
|
|
55
|
+
Alert channels deliver notifications to external systems when
|
|
56
|
+
anomalies are detected. Each channel implements a specific
|
|
57
|
+
delivery mechanism (webhook, email, etc.).
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
>>> class MyChannel(BaseAlertChannel):
|
|
61
|
+
... def send(self, alert_data, template=None):
|
|
62
|
+
... message = self.format_message(alert_data, template)
|
|
63
|
+
... # Send via specific mechanism
|
|
64
|
+
... return True
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def send(
|
|
69
|
+
self,
|
|
70
|
+
alert_data: AlertData,
|
|
71
|
+
template: Optional[str] = None,
|
|
72
|
+
) -> bool:
|
|
73
|
+
"""
|
|
74
|
+
Send alert to this channel.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
alert_data: Alert data to send
|
|
78
|
+
template: Optional custom message template
|
|
79
|
+
Uses default template if None
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if sent successfully, False otherwise
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
Exception: If sending fails critically
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
>>> alert = AlertData(
|
|
89
|
+
... metric_name="cpu_usage",
|
|
90
|
+
... timestamp=datetime.now(),
|
|
91
|
+
... value=95.0,
|
|
92
|
+
... ...
|
|
93
|
+
... )
|
|
94
|
+
>>> success = channel.send(alert)
|
|
95
|
+
"""
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
def format_message(
|
|
99
|
+
self,
|
|
100
|
+
alert_data: AlertData,
|
|
101
|
+
template: Optional[str] = None,
|
|
102
|
+
) -> str:
|
|
103
|
+
"""
|
|
104
|
+
Format alert message from template.
|
|
105
|
+
|
|
106
|
+
Uses default template if none provided. Template variables:
|
|
107
|
+
- {metric_name}
|
|
108
|
+
- {timestamp}
|
|
109
|
+
- {timezone}
|
|
110
|
+
- {value}
|
|
111
|
+
- {confidence_lower}
|
|
112
|
+
- {confidence_upper}
|
|
113
|
+
- {detector_name}
|
|
114
|
+
- {direction}
|
|
115
|
+
- {severity}
|
|
116
|
+
- {consecutive_count}
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
alert_data: Alert data to format
|
|
120
|
+
template: Optional custom template string
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Formatted message string
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
>>> template = "Anomaly in {metric_name}: {value}"
|
|
127
|
+
>>> message = channel.format_message(alert_data, template)
|
|
128
|
+
"""
|
|
129
|
+
if template is None:
|
|
130
|
+
template = self.get_default_template()
|
|
131
|
+
|
|
132
|
+
# Format timestamp to string
|
|
133
|
+
from datetime import datetime
|
|
134
|
+
import numpy as np
|
|
135
|
+
|
|
136
|
+
ts = alert_data.timestamp
|
|
137
|
+
if isinstance(ts, np.datetime64):
|
|
138
|
+
ts = ts.astype(datetime)
|
|
139
|
+
|
|
140
|
+
# Format timestamp with timezone
|
|
141
|
+
ts_str = ts.strftime("%Y-%m-%d %H:%M:%S")
|
|
142
|
+
if alert_data.timezone:
|
|
143
|
+
ts_str = f"{ts_str} ({alert_data.timezone})"
|
|
144
|
+
|
|
145
|
+
# Format confidence interval
|
|
146
|
+
if alert_data.confidence_lower is not None and alert_data.confidence_upper is not None:
|
|
147
|
+
confidence_str = f"[{alert_data.confidence_lower:.2f}, {alert_data.confidence_upper:.2f}]"
|
|
148
|
+
else:
|
|
149
|
+
confidence_str = "N/A"
|
|
150
|
+
|
|
151
|
+
# Format message
|
|
152
|
+
try:
|
|
153
|
+
message = template.format(
|
|
154
|
+
metric_name=alert_data.metric_name,
|
|
155
|
+
timestamp=ts_str,
|
|
156
|
+
timezone=alert_data.timezone,
|
|
157
|
+
value=alert_data.value,
|
|
158
|
+
confidence_lower=alert_data.confidence_lower,
|
|
159
|
+
confidence_upper=alert_data.confidence_upper,
|
|
160
|
+
confidence_interval=confidence_str,
|
|
161
|
+
detector_name=alert_data.detector_name,
|
|
162
|
+
detector_params=alert_data.detector_params,
|
|
163
|
+
direction=alert_data.direction,
|
|
164
|
+
severity=alert_data.severity,
|
|
165
|
+
consecutive_count=alert_data.consecutive_count,
|
|
166
|
+
)
|
|
167
|
+
except KeyError as e:
|
|
168
|
+
# If template has unknown variables, fall back to default
|
|
169
|
+
message = self.format_message(alert_data, self.get_default_template())
|
|
170
|
+
|
|
171
|
+
return message
|
|
172
|
+
|
|
173
|
+
def get_default_template(self) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Get default message template.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Default template string
|
|
179
|
+
"""
|
|
180
|
+
return (
|
|
181
|
+
"Anomaly detected in metric: {metric_name}\n"
|
|
182
|
+
"Time: {timestamp}\n"
|
|
183
|
+
"Value: {value}\n"
|
|
184
|
+
"Confidence interval: {confidence_interval}\n"
|
|
185
|
+
"Detector: {detector_name}\n"
|
|
186
|
+
"Parameters: {detector_params}\n"
|
|
187
|
+
"Direction: {direction}\n"
|
|
188
|
+
"Severity: {severity:.2f}"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def __repr__(self) -> str:
|
|
192
|
+
"""String representation of channel."""
|
|
193
|
+
return f"{self.__class__.__name__}()"
|