detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,27 @@
1
+ """Database managers for detectk."""
2
+
3
+ from detectkit.database.manager import BaseDatabaseManager
4
+ from detectkit.database.clickhouse_manager import ClickHouseDatabaseManager
5
+ from detectkit.database.internal_tables import InternalTablesManager
6
+ from detectkit.database.tables import (
7
+ TABLE_DATAPOINTS,
8
+ TABLE_DETECTIONS,
9
+ TABLE_TASKS,
10
+ INTERNAL_TABLES,
11
+ get_datapoints_table_model,
12
+ get_detections_table_model,
13
+ get_tasks_table_model,
14
+ )
15
+
16
+ __all__ = [
17
+ "BaseDatabaseManager",
18
+ "ClickHouseDatabaseManager",
19
+ "InternalTablesManager",
20
+ "TABLE_DATAPOINTS",
21
+ "TABLE_DETECTIONS",
22
+ "TABLE_TASKS",
23
+ "INTERNAL_TABLES",
24
+ "get_datapoints_table_model",
25
+ "get_detections_table_model",
26
+ "get_tasks_table_model",
27
+ ]
@@ -0,0 +1,385 @@
1
+ """
2
+ ClickHouse database manager implementation.
3
+
4
+ Implements BaseDatabaseManager for ClickHouse using universal methods.
5
+ """
6
+
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ import numpy as np
11
+
12
+ try:
13
+ from clickhouse_driver import Client
14
+ CLICKHOUSE_AVAILABLE = True
15
+ except ImportError:
16
+ CLICKHOUSE_AVAILABLE = False
17
+
18
+ from detectkit.core.models import ColumnDefinition, TableModel
19
+ from detectkit.database.manager import BaseDatabaseManager
20
+
21
+
22
+ class ClickHouseDatabaseManager(BaseDatabaseManager):
23
+ """
24
+ ClickHouse implementation of BaseDatabaseManager.
25
+
26
+ Uses universal methods - does NOT hardcode internal table logic.
27
+
28
+ Args:
29
+ host: ClickHouse host
30
+ port: ClickHouse port (default: 9000 for native protocol)
31
+ user: Database user
32
+ password: Database password
33
+ internal_database: Database for internal tables (_dtk_*)
34
+ data_database: Database for user data tables
35
+ settings: Optional ClickHouse settings
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ host: str = "localhost",
41
+ port: int = 9000,
42
+ user: str = "default",
43
+ password: str = "",
44
+ internal_database: str = "detectk_internal",
45
+ data_database: str = "default",
46
+ settings: Optional[Dict[str, Any]] = None,
47
+ ):
48
+ """Initialize ClickHouse manager."""
49
+ if not CLICKHOUSE_AVAILABLE:
50
+ raise ImportError(
51
+ "clickhouse-driver is not installed. "
52
+ "Install with: pip install detectk[clickhouse]"
53
+ )
54
+
55
+ self._internal_database = internal_database
56
+ self._data_database = data_database
57
+
58
+ # Create client
59
+ self._client = Client(
60
+ host=host,
61
+ port=port,
62
+ user=user,
63
+ password=password,
64
+ settings=settings or {},
65
+ )
66
+
67
+ # Ensure databases exist
68
+ self._ensure_databases()
69
+
70
+ def _ensure_databases(self) -> None:
71
+ """Create internal and data databases if they don't exist."""
72
+ for db in [self._internal_database, self._data_database]:
73
+ self._client.execute(f"CREATE DATABASE IF NOT EXISTS {db}")
74
+
75
+ def execute_query(
76
+ self,
77
+ query: str,
78
+ params: Optional[Dict[str, Any]] = None
79
+ ) -> List[Dict[str, Any]]:
80
+ """
81
+ Execute SQL query and return results as list of dictionaries.
82
+
83
+ Args:
84
+ query: SQL query to execute
85
+ params: Optional query parameters
86
+
87
+ Returns:
88
+ List of dictionaries where each dict represents a row
89
+ """
90
+ # Execute query with or without parameters
91
+ if params:
92
+ result = self._client.execute(query, params, with_column_types=True)
93
+ else:
94
+ result = self._client.execute(query, with_column_types=True)
95
+
96
+ # result is tuple: (rows, columns_with_types)
97
+ # columns_with_types is list of tuples: (name, type)
98
+ rows, columns_with_types = result
99
+ column_names = [col[0] for col in columns_with_types]
100
+
101
+ # Convert to list of dicts
102
+ return [
103
+ dict(zip(column_names, row))
104
+ for row in rows
105
+ ]
106
+
107
+ def create_table(
108
+ self,
109
+ table_name: str,
110
+ table_model: TableModel,
111
+ if_not_exists: bool = True
112
+ ) -> None:
113
+ """
114
+ Create ClickHouse table from TableModel.
115
+
116
+ Converts generic TableModel to ClickHouse-specific DDL.
117
+
118
+ Args:
119
+ table_name: Name of table to create
120
+ table_model: Table schema definition
121
+ if_not_exists: Add IF NOT EXISTS clause
122
+ """
123
+ # Build column definitions
124
+ col_defs = []
125
+ for col in table_model.columns:
126
+ col_def = f"{col.name} {col.type}"
127
+ if col.default is not None:
128
+ col_def += f" DEFAULT {self._format_default(col.default)}"
129
+ col_defs.append(col_def)
130
+
131
+ columns_sql = ",\n ".join(col_defs)
132
+
133
+ # Build CREATE TABLE statement
134
+ if_not_exists_clause = "IF NOT EXISTS " if if_not_exists else ""
135
+
136
+ # For ClickHouse, use engine and order_by from table_model
137
+ engine = table_model.engine or "MergeTree"
138
+ order_by = table_model.order_by or table_model.primary_key
139
+
140
+ order_by_clause = ", ".join(order_by)
141
+
142
+ # Add parentheses only if engine doesn't already have them
143
+ if "(" in engine:
144
+ engine_clause = engine
145
+ else:
146
+ engine_clause = f"{engine}()"
147
+
148
+ ddl = f"""
149
+ CREATE TABLE {if_not_exists_clause}{table_name} (
150
+ {columns_sql}
151
+ )
152
+ ENGINE = {engine_clause}
153
+ ORDER BY ({order_by_clause})
154
+ """.strip()
155
+
156
+ self._client.execute(ddl)
157
+
158
+ def _format_default(self, value: Any) -> str:
159
+ """Format default value for SQL."""
160
+ if isinstance(value, str):
161
+ return f"'{value}'"
162
+ elif isinstance(value, (int, float)):
163
+ return str(value)
164
+ elif value is None:
165
+ return "NULL"
166
+ else:
167
+ return str(value)
168
+
169
+ def table_exists(
170
+ self,
171
+ table_name: str,
172
+ schema: Optional[str] = None
173
+ ) -> bool:
174
+ """
175
+ Check if table exists in ClickHouse.
176
+
177
+ Args:
178
+ table_name: Name of table to check
179
+ schema: Database name (if None, check both internal and data databases)
180
+
181
+ Returns:
182
+ True if table exists
183
+ """
184
+ if schema:
185
+ databases = [schema]
186
+ else:
187
+ databases = [self._internal_database, self._data_database]
188
+
189
+ for db in databases:
190
+ query = """
191
+ SELECT 1
192
+ FROM system.tables
193
+ WHERE database = %(database)s
194
+ AND name = %(table)s
195
+ """
196
+ result = self.execute_query(
197
+ query,
198
+ {"database": db, "table": table_name}
199
+ )
200
+ if result:
201
+ return True
202
+
203
+ return False
204
+
205
+ def insert_batch(
206
+ self,
207
+ table_name: str,
208
+ data: Dict[str, np.ndarray],
209
+ conflict_strategy: str = "ignore"
210
+ ) -> int:
211
+ """
212
+ Insert batch of data into ClickHouse table.
213
+
214
+ Args:
215
+ table_name: Table to insert into
216
+ data: Dictionary mapping column names to numpy arrays
217
+ conflict_strategy: "ignore" or "replace" (ClickHouse doesn't support REPLACE)
218
+
219
+ Returns:
220
+ Number of rows inserted
221
+ """
222
+ if not data:
223
+ return 0
224
+
225
+ # Validate all arrays have same length
226
+ lengths = [len(arr) for arr in data.values()]
227
+ if len(set(lengths)) > 1:
228
+ raise ValueError(
229
+ f"All arrays must have same length, got: {dict(zip(data.keys(), lengths))}"
230
+ )
231
+
232
+ num_rows = lengths[0]
233
+ if num_rows == 0:
234
+ return 0
235
+
236
+ # Convert numpy arrays to lists for ClickHouse driver
237
+ column_names = list(data.keys())
238
+ rows = []
239
+
240
+ for i in range(num_rows):
241
+ row = []
242
+ for col_name in column_names:
243
+ value = data[col_name][i]
244
+
245
+ # Convert numpy types to Python types
246
+ if isinstance(value, (np.datetime64, np.timedelta64)):
247
+ # Convert numpy datetime64 to Python datetime
248
+ value = self._convert_numpy_datetime(value)
249
+ elif isinstance(value, np.ndarray):
250
+ value = value.tolist()
251
+ elif isinstance(value, (np.integer, np.floating)):
252
+ value = value.item()
253
+ elif value is None or (isinstance(value, float) and np.isnan(value)):
254
+ value = None
255
+
256
+ row.append(value)
257
+ rows.append(row)
258
+
259
+ # For ClickHouse, conflict_strategy="ignore" is handled by PRIMARY KEY
260
+ # Duplicates are silently ignored by MergeTree
261
+ # Note: For ReplacingMergeTree, use conflict_strategy="replace"
262
+
263
+ # Insert data
264
+ self._client.execute(
265
+ f"INSERT INTO {table_name} ({', '.join(column_names)}) VALUES",
266
+ rows
267
+ )
268
+
269
+ return num_rows
270
+
271
+ def _convert_numpy_datetime(self, dt: np.datetime64) -> datetime:
272
+ """Convert numpy datetime64 to Python datetime with UTC timezone."""
273
+ # Convert to timestamp
274
+ timestamp = (dt - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
275
+ return datetime.fromtimestamp(timestamp, tz=timezone.utc)
276
+
277
+ def get_last_timestamp(
278
+ self,
279
+ table_name: str,
280
+ metric_name: str,
281
+ timestamp_column: str = "timestamp"
282
+ ) -> Optional[datetime]:
283
+ """
284
+ Get last timestamp for a metric in a table.
285
+
286
+ Args:
287
+ table_name: Table to query
288
+ metric_name: Metric name to filter by
289
+ timestamp_column: Name of timestamp column
290
+
291
+ Returns:
292
+ Last timestamp or None if no data
293
+ """
294
+ query = f"""
295
+ SELECT max({timestamp_column}) as last_ts
296
+ FROM {table_name}
297
+ WHERE metric_name = %(metric_name)s
298
+ """
299
+
300
+ result = self.execute_query(query, {"metric_name": metric_name})
301
+
302
+ if result and result[0]["last_ts"]:
303
+ return result[0]["last_ts"]
304
+
305
+ return None
306
+
307
+ def upsert_task_status(
308
+ self,
309
+ metric_name: str,
310
+ detector_id: str,
311
+ process_type: str,
312
+ status: str,
313
+ last_processed_timestamp: Optional[datetime] = None,
314
+ error_message: Optional[str] = None,
315
+ timeout_seconds: int = 3600
316
+ ) -> None:
317
+ """
318
+ Update or insert task status in ClickHouse.
319
+
320
+ ClickHouse doesn't have native UPSERT, so we use DELETE + INSERT pattern.
321
+
322
+ Args:
323
+ metric_name: Metric identifier
324
+ detector_id: Detector identifier
325
+ process_type: Process type
326
+ status: Task status
327
+ last_processed_timestamp: Last processed timestamp
328
+ error_message: Error message if failed
329
+ timeout_seconds: Timeout in seconds
330
+ """
331
+ from detectkit.database.tables import TABLE_TASKS
332
+
333
+ # Get current UTC time
334
+ now = datetime.now(timezone.utc)
335
+
336
+ # First, delete existing record (if any)
337
+ delete_query = f"""
338
+ ALTER TABLE {self.get_full_table_name(TABLE_TASKS, use_internal=True)}
339
+ DELETE WHERE metric_name = %(metric_name)s
340
+ AND detector_id = %(detector_id)s
341
+ AND process_type = %(process_type)s
342
+ """
343
+
344
+ self._client.execute(
345
+ delete_query,
346
+ {
347
+ "metric_name": metric_name,
348
+ "detector_id": detector_id,
349
+ "process_type": process_type,
350
+ }
351
+ )
352
+
353
+ # Then insert new record
354
+ insert_data = {
355
+ "metric_name": np.array([metric_name]),
356
+ "detector_id": np.array([detector_id]),
357
+ "process_type": np.array([process_type]),
358
+ "status": np.array([status]),
359
+ "started_at": np.array([now], dtype="datetime64[ms]"),
360
+ "updated_at": np.array([now], dtype="datetime64[ms]"),
361
+ "last_processed_timestamp": np.array([last_processed_timestamp], dtype="datetime64[ms]") if last_processed_timestamp else np.array([None]),
362
+ "error_message": np.array([error_message]),
363
+ "timeout_seconds": np.array([timeout_seconds], dtype=np.int32),
364
+ }
365
+
366
+ self.insert_batch(
367
+ self.get_full_table_name(TABLE_TASKS, use_internal=True),
368
+ insert_data,
369
+ conflict_strategy="ignore"
370
+ )
371
+
372
+ @property
373
+ def internal_location(self) -> str:
374
+ """Get internal database name."""
375
+ return self._internal_database
376
+
377
+ @property
378
+ def data_location(self) -> str:
379
+ """Get data database name."""
380
+ return self._data_database
381
+
382
+ def close(self) -> None:
383
+ """Close ClickHouse connection."""
384
+ if hasattr(self, "_client"):
385
+ self._client.disconnect()