detectkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- detectkit/__init__.py +17 -0
- detectkit/alerting/__init__.py +13 -0
- detectkit/alerting/channels/__init__.py +21 -0
- detectkit/alerting/channels/base.py +191 -0
- detectkit/alerting/channels/email.py +146 -0
- detectkit/alerting/channels/factory.py +193 -0
- detectkit/alerting/channels/mattermost.py +53 -0
- detectkit/alerting/channels/slack.py +55 -0
- detectkit/alerting/channels/telegram.py +110 -0
- detectkit/alerting/channels/webhook.py +139 -0
- detectkit/alerting/orchestrator.py +368 -0
- detectkit/cli/__init__.py +1 -0
- detectkit/cli/commands/__init__.py +1 -0
- detectkit/cli/commands/init.py +282 -0
- detectkit/cli/commands/run.py +427 -0
- detectkit/cli/commands/test_alert.py +184 -0
- detectkit/cli/main.py +186 -0
- detectkit/config/__init__.py +30 -0
- detectkit/config/metric_config.py +467 -0
- detectkit/config/profile.py +285 -0
- detectkit/config/project_config.py +164 -0
- detectkit/core/__init__.py +6 -0
- detectkit/core/interval.py +132 -0
- detectkit/core/models.py +106 -0
- detectkit/database/__init__.py +27 -0
- detectkit/database/clickhouse_manager.py +385 -0
- detectkit/database/internal_tables.py +581 -0
- detectkit/database/manager.py +324 -0
- detectkit/database/tables.py +134 -0
- detectkit/detectors/__init__.py +6 -0
- detectkit/detectors/base.py +222 -0
- detectkit/detectors/factory.py +138 -0
- detectkit/detectors/statistical/__init__.py +8 -0
- detectkit/detectors/statistical/iqr.py +230 -0
- detectkit/detectors/statistical/mad.py +423 -0
- detectkit/detectors/statistical/manual_bounds.py +177 -0
- detectkit/detectors/statistical/zscore.py +225 -0
- detectkit/loaders/__init__.py +6 -0
- detectkit/loaders/metric_loader.py +470 -0
- detectkit/loaders/query_template.py +164 -0
- detectkit/orchestration/__init__.py +9 -0
- detectkit/orchestration/task_manager.py +698 -0
- detectkit/utils/__init__.py +1 -0
- detectkit-0.1.0.dist-info/METADATA +231 -0
- detectkit-0.1.0.dist-info/RECORD +49 -0
- detectkit-0.1.0.dist-info/WHEEL +5 -0
- detectkit-0.1.0.dist-info/entry_points.txt +2 -0
- detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
- detectkit-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Database managers for detectk."""
|
|
2
|
+
|
|
3
|
+
from detectkit.database.manager import BaseDatabaseManager
|
|
4
|
+
from detectkit.database.clickhouse_manager import ClickHouseDatabaseManager
|
|
5
|
+
from detectkit.database.internal_tables import InternalTablesManager
|
|
6
|
+
from detectkit.database.tables import (
|
|
7
|
+
TABLE_DATAPOINTS,
|
|
8
|
+
TABLE_DETECTIONS,
|
|
9
|
+
TABLE_TASKS,
|
|
10
|
+
INTERNAL_TABLES,
|
|
11
|
+
get_datapoints_table_model,
|
|
12
|
+
get_detections_table_model,
|
|
13
|
+
get_tasks_table_model,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"BaseDatabaseManager",
|
|
18
|
+
"ClickHouseDatabaseManager",
|
|
19
|
+
"InternalTablesManager",
|
|
20
|
+
"TABLE_DATAPOINTS",
|
|
21
|
+
"TABLE_DETECTIONS",
|
|
22
|
+
"TABLE_TASKS",
|
|
23
|
+
"INTERNAL_TABLES",
|
|
24
|
+
"get_datapoints_table_model",
|
|
25
|
+
"get_detections_table_model",
|
|
26
|
+
"get_tasks_table_model",
|
|
27
|
+
]
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ClickHouse database manager implementation.
|
|
3
|
+
|
|
4
|
+
Implements BaseDatabaseManager for ClickHouse using universal methods.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from clickhouse_driver import Client
|
|
14
|
+
CLICKHOUSE_AVAILABLE = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
CLICKHOUSE_AVAILABLE = False
|
|
17
|
+
|
|
18
|
+
from detectkit.core.models import ColumnDefinition, TableModel
|
|
19
|
+
from detectkit.database.manager import BaseDatabaseManager
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ClickHouseDatabaseManager(BaseDatabaseManager):
|
|
23
|
+
"""
|
|
24
|
+
ClickHouse implementation of BaseDatabaseManager.
|
|
25
|
+
|
|
26
|
+
Uses universal methods - does NOT hardcode internal table logic.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
host: ClickHouse host
|
|
30
|
+
port: ClickHouse port (default: 9000 for native protocol)
|
|
31
|
+
user: Database user
|
|
32
|
+
password: Database password
|
|
33
|
+
internal_database: Database for internal tables (_dtk_*)
|
|
34
|
+
data_database: Database for user data tables
|
|
35
|
+
settings: Optional ClickHouse settings
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
host: str = "localhost",
|
|
41
|
+
port: int = 9000,
|
|
42
|
+
user: str = "default",
|
|
43
|
+
password: str = "",
|
|
44
|
+
internal_database: str = "detectk_internal",
|
|
45
|
+
data_database: str = "default",
|
|
46
|
+
settings: Optional[Dict[str, Any]] = None,
|
|
47
|
+
):
|
|
48
|
+
"""Initialize ClickHouse manager."""
|
|
49
|
+
if not CLICKHOUSE_AVAILABLE:
|
|
50
|
+
raise ImportError(
|
|
51
|
+
"clickhouse-driver is not installed. "
|
|
52
|
+
"Install with: pip install detectk[clickhouse]"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
self._internal_database = internal_database
|
|
56
|
+
self._data_database = data_database
|
|
57
|
+
|
|
58
|
+
# Create client
|
|
59
|
+
self._client = Client(
|
|
60
|
+
host=host,
|
|
61
|
+
port=port,
|
|
62
|
+
user=user,
|
|
63
|
+
password=password,
|
|
64
|
+
settings=settings or {},
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Ensure databases exist
|
|
68
|
+
self._ensure_databases()
|
|
69
|
+
|
|
70
|
+
def _ensure_databases(self) -> None:
|
|
71
|
+
"""Create internal and data databases if they don't exist."""
|
|
72
|
+
for db in [self._internal_database, self._data_database]:
|
|
73
|
+
self._client.execute(f"CREATE DATABASE IF NOT EXISTS {db}")
|
|
74
|
+
|
|
75
|
+
def execute_query(
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
params: Optional[Dict[str, Any]] = None
|
|
79
|
+
) -> List[Dict[str, Any]]:
|
|
80
|
+
"""
|
|
81
|
+
Execute SQL query and return results as list of dictionaries.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
query: SQL query to execute
|
|
85
|
+
params: Optional query parameters
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of dictionaries where each dict represents a row
|
|
89
|
+
"""
|
|
90
|
+
# Execute query with or without parameters
|
|
91
|
+
if params:
|
|
92
|
+
result = self._client.execute(query, params, with_column_types=True)
|
|
93
|
+
else:
|
|
94
|
+
result = self._client.execute(query, with_column_types=True)
|
|
95
|
+
|
|
96
|
+
# result is tuple: (rows, columns_with_types)
|
|
97
|
+
# columns_with_types is list of tuples: (name, type)
|
|
98
|
+
rows, columns_with_types = result
|
|
99
|
+
column_names = [col[0] for col in columns_with_types]
|
|
100
|
+
|
|
101
|
+
# Convert to list of dicts
|
|
102
|
+
return [
|
|
103
|
+
dict(zip(column_names, row))
|
|
104
|
+
for row in rows
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
def create_table(
|
|
108
|
+
self,
|
|
109
|
+
table_name: str,
|
|
110
|
+
table_model: TableModel,
|
|
111
|
+
if_not_exists: bool = True
|
|
112
|
+
) -> None:
|
|
113
|
+
"""
|
|
114
|
+
Create ClickHouse table from TableModel.
|
|
115
|
+
|
|
116
|
+
Converts generic TableModel to ClickHouse-specific DDL.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
table_name: Name of table to create
|
|
120
|
+
table_model: Table schema definition
|
|
121
|
+
if_not_exists: Add IF NOT EXISTS clause
|
|
122
|
+
"""
|
|
123
|
+
# Build column definitions
|
|
124
|
+
col_defs = []
|
|
125
|
+
for col in table_model.columns:
|
|
126
|
+
col_def = f"{col.name} {col.type}"
|
|
127
|
+
if col.default is not None:
|
|
128
|
+
col_def += f" DEFAULT {self._format_default(col.default)}"
|
|
129
|
+
col_defs.append(col_def)
|
|
130
|
+
|
|
131
|
+
columns_sql = ",\n ".join(col_defs)
|
|
132
|
+
|
|
133
|
+
# Build CREATE TABLE statement
|
|
134
|
+
if_not_exists_clause = "IF NOT EXISTS " if if_not_exists else ""
|
|
135
|
+
|
|
136
|
+
# For ClickHouse, use engine and order_by from table_model
|
|
137
|
+
engine = table_model.engine or "MergeTree"
|
|
138
|
+
order_by = table_model.order_by or table_model.primary_key
|
|
139
|
+
|
|
140
|
+
order_by_clause = ", ".join(order_by)
|
|
141
|
+
|
|
142
|
+
# Add parentheses only if engine doesn't already have them
|
|
143
|
+
if "(" in engine:
|
|
144
|
+
engine_clause = engine
|
|
145
|
+
else:
|
|
146
|
+
engine_clause = f"{engine}()"
|
|
147
|
+
|
|
148
|
+
ddl = f"""
|
|
149
|
+
CREATE TABLE {if_not_exists_clause}{table_name} (
|
|
150
|
+
{columns_sql}
|
|
151
|
+
)
|
|
152
|
+
ENGINE = {engine_clause}
|
|
153
|
+
ORDER BY ({order_by_clause})
|
|
154
|
+
""".strip()
|
|
155
|
+
|
|
156
|
+
self._client.execute(ddl)
|
|
157
|
+
|
|
158
|
+
def _format_default(self, value: Any) -> str:
|
|
159
|
+
"""Format default value for SQL."""
|
|
160
|
+
if isinstance(value, str):
|
|
161
|
+
return f"'{value}'"
|
|
162
|
+
elif isinstance(value, (int, float)):
|
|
163
|
+
return str(value)
|
|
164
|
+
elif value is None:
|
|
165
|
+
return "NULL"
|
|
166
|
+
else:
|
|
167
|
+
return str(value)
|
|
168
|
+
|
|
169
|
+
def table_exists(
|
|
170
|
+
self,
|
|
171
|
+
table_name: str,
|
|
172
|
+
schema: Optional[str] = None
|
|
173
|
+
) -> bool:
|
|
174
|
+
"""
|
|
175
|
+
Check if table exists in ClickHouse.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
table_name: Name of table to check
|
|
179
|
+
schema: Database name (if None, check both internal and data databases)
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
True if table exists
|
|
183
|
+
"""
|
|
184
|
+
if schema:
|
|
185
|
+
databases = [schema]
|
|
186
|
+
else:
|
|
187
|
+
databases = [self._internal_database, self._data_database]
|
|
188
|
+
|
|
189
|
+
for db in databases:
|
|
190
|
+
query = """
|
|
191
|
+
SELECT 1
|
|
192
|
+
FROM system.tables
|
|
193
|
+
WHERE database = %(database)s
|
|
194
|
+
AND name = %(table)s
|
|
195
|
+
"""
|
|
196
|
+
result = self.execute_query(
|
|
197
|
+
query,
|
|
198
|
+
{"database": db, "table": table_name}
|
|
199
|
+
)
|
|
200
|
+
if result:
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
return False
|
|
204
|
+
|
|
205
|
+
def insert_batch(
|
|
206
|
+
self,
|
|
207
|
+
table_name: str,
|
|
208
|
+
data: Dict[str, np.ndarray],
|
|
209
|
+
conflict_strategy: str = "ignore"
|
|
210
|
+
) -> int:
|
|
211
|
+
"""
|
|
212
|
+
Insert batch of data into ClickHouse table.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
table_name: Table to insert into
|
|
216
|
+
data: Dictionary mapping column names to numpy arrays
|
|
217
|
+
conflict_strategy: "ignore" or "replace" (ClickHouse doesn't support REPLACE)
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Number of rows inserted
|
|
221
|
+
"""
|
|
222
|
+
if not data:
|
|
223
|
+
return 0
|
|
224
|
+
|
|
225
|
+
# Validate all arrays have same length
|
|
226
|
+
lengths = [len(arr) for arr in data.values()]
|
|
227
|
+
if len(set(lengths)) > 1:
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"All arrays must have same length, got: {dict(zip(data.keys(), lengths))}"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
num_rows = lengths[0]
|
|
233
|
+
if num_rows == 0:
|
|
234
|
+
return 0
|
|
235
|
+
|
|
236
|
+
# Convert numpy arrays to lists for ClickHouse driver
|
|
237
|
+
column_names = list(data.keys())
|
|
238
|
+
rows = []
|
|
239
|
+
|
|
240
|
+
for i in range(num_rows):
|
|
241
|
+
row = []
|
|
242
|
+
for col_name in column_names:
|
|
243
|
+
value = data[col_name][i]
|
|
244
|
+
|
|
245
|
+
# Convert numpy types to Python types
|
|
246
|
+
if isinstance(value, (np.datetime64, np.timedelta64)):
|
|
247
|
+
# Convert numpy datetime64 to Python datetime
|
|
248
|
+
value = self._convert_numpy_datetime(value)
|
|
249
|
+
elif isinstance(value, np.ndarray):
|
|
250
|
+
value = value.tolist()
|
|
251
|
+
elif isinstance(value, (np.integer, np.floating)):
|
|
252
|
+
value = value.item()
|
|
253
|
+
elif value is None or (isinstance(value, float) and np.isnan(value)):
|
|
254
|
+
value = None
|
|
255
|
+
|
|
256
|
+
row.append(value)
|
|
257
|
+
rows.append(row)
|
|
258
|
+
|
|
259
|
+
# For ClickHouse, conflict_strategy="ignore" is handled by PRIMARY KEY
|
|
260
|
+
# Duplicates are silently ignored by MergeTree
|
|
261
|
+
# Note: For ReplacingMergeTree, use conflict_strategy="replace"
|
|
262
|
+
|
|
263
|
+
# Insert data
|
|
264
|
+
self._client.execute(
|
|
265
|
+
f"INSERT INTO {table_name} ({', '.join(column_names)}) VALUES",
|
|
266
|
+
rows
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return num_rows
|
|
270
|
+
|
|
271
|
+
def _convert_numpy_datetime(self, dt: np.datetime64) -> datetime:
|
|
272
|
+
"""Convert numpy datetime64 to Python datetime with UTC timezone."""
|
|
273
|
+
# Convert to timestamp
|
|
274
|
+
timestamp = (dt - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
|
|
275
|
+
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
|
276
|
+
|
|
277
|
+
def get_last_timestamp(
|
|
278
|
+
self,
|
|
279
|
+
table_name: str,
|
|
280
|
+
metric_name: str,
|
|
281
|
+
timestamp_column: str = "timestamp"
|
|
282
|
+
) -> Optional[datetime]:
|
|
283
|
+
"""
|
|
284
|
+
Get last timestamp for a metric in a table.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
table_name: Table to query
|
|
288
|
+
metric_name: Metric name to filter by
|
|
289
|
+
timestamp_column: Name of timestamp column
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Last timestamp or None if no data
|
|
293
|
+
"""
|
|
294
|
+
query = f"""
|
|
295
|
+
SELECT max({timestamp_column}) as last_ts
|
|
296
|
+
FROM {table_name}
|
|
297
|
+
WHERE metric_name = %(metric_name)s
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
result = self.execute_query(query, {"metric_name": metric_name})
|
|
301
|
+
|
|
302
|
+
if result and result[0]["last_ts"]:
|
|
303
|
+
return result[0]["last_ts"]
|
|
304
|
+
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
def upsert_task_status(
|
|
308
|
+
self,
|
|
309
|
+
metric_name: str,
|
|
310
|
+
detector_id: str,
|
|
311
|
+
process_type: str,
|
|
312
|
+
status: str,
|
|
313
|
+
last_processed_timestamp: Optional[datetime] = None,
|
|
314
|
+
error_message: Optional[str] = None,
|
|
315
|
+
timeout_seconds: int = 3600
|
|
316
|
+
) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Update or insert task status in ClickHouse.
|
|
319
|
+
|
|
320
|
+
ClickHouse doesn't have native UPSERT, so we use DELETE + INSERT pattern.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
metric_name: Metric identifier
|
|
324
|
+
detector_id: Detector identifier
|
|
325
|
+
process_type: Process type
|
|
326
|
+
status: Task status
|
|
327
|
+
last_processed_timestamp: Last processed timestamp
|
|
328
|
+
error_message: Error message if failed
|
|
329
|
+
timeout_seconds: Timeout in seconds
|
|
330
|
+
"""
|
|
331
|
+
from detectkit.database.tables import TABLE_TASKS
|
|
332
|
+
|
|
333
|
+
# Get current UTC time
|
|
334
|
+
now = datetime.now(timezone.utc)
|
|
335
|
+
|
|
336
|
+
# First, delete existing record (if any)
|
|
337
|
+
delete_query = f"""
|
|
338
|
+
ALTER TABLE {self.get_full_table_name(TABLE_TASKS, use_internal=True)}
|
|
339
|
+
DELETE WHERE metric_name = %(metric_name)s
|
|
340
|
+
AND detector_id = %(detector_id)s
|
|
341
|
+
AND process_type = %(process_type)s
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
self._client.execute(
|
|
345
|
+
delete_query,
|
|
346
|
+
{
|
|
347
|
+
"metric_name": metric_name,
|
|
348
|
+
"detector_id": detector_id,
|
|
349
|
+
"process_type": process_type,
|
|
350
|
+
}
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Then insert new record
|
|
354
|
+
insert_data = {
|
|
355
|
+
"metric_name": np.array([metric_name]),
|
|
356
|
+
"detector_id": np.array([detector_id]),
|
|
357
|
+
"process_type": np.array([process_type]),
|
|
358
|
+
"status": np.array([status]),
|
|
359
|
+
"started_at": np.array([now], dtype="datetime64[ms]"),
|
|
360
|
+
"updated_at": np.array([now], dtype="datetime64[ms]"),
|
|
361
|
+
"last_processed_timestamp": np.array([last_processed_timestamp], dtype="datetime64[ms]") if last_processed_timestamp else np.array([None]),
|
|
362
|
+
"error_message": np.array([error_message]),
|
|
363
|
+
"timeout_seconds": np.array([timeout_seconds], dtype=np.int32),
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
self.insert_batch(
|
|
367
|
+
self.get_full_table_name(TABLE_TASKS, use_internal=True),
|
|
368
|
+
insert_data,
|
|
369
|
+
conflict_strategy="ignore"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
@property
|
|
373
|
+
def internal_location(self) -> str:
|
|
374
|
+
"""Get internal database name."""
|
|
375
|
+
return self._internal_database
|
|
376
|
+
|
|
377
|
+
@property
|
|
378
|
+
def data_location(self) -> str:
|
|
379
|
+
"""Get data database name."""
|
|
380
|
+
return self._data_database
|
|
381
|
+
|
|
382
|
+
def close(self) -> None:
|
|
383
|
+
"""Close ClickHouse connection."""
|
|
384
|
+
if hasattr(self, "_client"):
|
|
385
|
+
self._client.disconnect()
|