detectkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- detectkit/__init__.py +17 -0
- detectkit/alerting/__init__.py +13 -0
- detectkit/alerting/channels/__init__.py +21 -0
- detectkit/alerting/channels/base.py +191 -0
- detectkit/alerting/channels/email.py +146 -0
- detectkit/alerting/channels/factory.py +193 -0
- detectkit/alerting/channels/mattermost.py +53 -0
- detectkit/alerting/channels/slack.py +55 -0
- detectkit/alerting/channels/telegram.py +110 -0
- detectkit/alerting/channels/webhook.py +139 -0
- detectkit/alerting/orchestrator.py +368 -0
- detectkit/cli/__init__.py +1 -0
- detectkit/cli/commands/__init__.py +1 -0
- detectkit/cli/commands/init.py +282 -0
- detectkit/cli/commands/run.py +427 -0
- detectkit/cli/commands/test_alert.py +184 -0
- detectkit/cli/main.py +186 -0
- detectkit/config/__init__.py +30 -0
- detectkit/config/metric_config.py +467 -0
- detectkit/config/profile.py +285 -0
- detectkit/config/project_config.py +164 -0
- detectkit/core/__init__.py +6 -0
- detectkit/core/interval.py +132 -0
- detectkit/core/models.py +106 -0
- detectkit/database/__init__.py +27 -0
- detectkit/database/clickhouse_manager.py +385 -0
- detectkit/database/internal_tables.py +581 -0
- detectkit/database/manager.py +324 -0
- detectkit/database/tables.py +134 -0
- detectkit/detectors/__init__.py +6 -0
- detectkit/detectors/base.py +222 -0
- detectkit/detectors/factory.py +138 -0
- detectkit/detectors/statistical/__init__.py +8 -0
- detectkit/detectors/statistical/iqr.py +230 -0
- detectkit/detectors/statistical/mad.py +423 -0
- detectkit/detectors/statistical/manual_bounds.py +177 -0
- detectkit/detectors/statistical/zscore.py +225 -0
- detectkit/loaders/__init__.py +6 -0
- detectkit/loaders/metric_loader.py +470 -0
- detectkit/loaders/query_template.py +164 -0
- detectkit/orchestration/__init__.py +9 -0
- detectkit/orchestration/task_manager.py +698 -0
- detectkit/utils/__init__.py +1 -0
- detectkit-0.1.0.dist-info/METADATA +231 -0
- detectkit-0.1.0.dist-info/RECORD +49 -0
- detectkit-0.1.0.dist-info/WHEEL +5 -0
- detectkit-0.1.0.dist-info/entry_points.txt +2 -0
- detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
- detectkit-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Internal tables manager for detectk.
|
|
3
|
+
|
|
4
|
+
High-level wrapper over BaseDatabaseManager for working with internal tables
|
|
5
|
+
(_dtk_datapoints, _dtk_detections, _dtk_tasks).
|
|
6
|
+
|
|
7
|
+
This class provides convenient methods that use the UNIVERSAL BaseDatabaseManager
|
|
8
|
+
methods underneath. It does NOT duplicate logic - just provides semantic wrappers.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from typing import Dict, Optional
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
from detectkit.database.manager import BaseDatabaseManager
|
|
17
|
+
from detectkit.database.tables import (
|
|
18
|
+
INTERNAL_TABLES,
|
|
19
|
+
TABLE_DATAPOINTS,
|
|
20
|
+
TABLE_DETECTIONS,
|
|
21
|
+
TABLE_TASKS,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class InternalTablesManager:
|
|
26
|
+
"""
|
|
27
|
+
Manager for internal detectk tables.
|
|
28
|
+
|
|
29
|
+
Provides high-level methods for working with _dtk_* tables:
|
|
30
|
+
- Ensure tables exist
|
|
31
|
+
- Save datapoints and detections
|
|
32
|
+
- Task locking and status management
|
|
33
|
+
- Query last timestamps
|
|
34
|
+
|
|
35
|
+
This is a WRAPPER over BaseDatabaseManager - uses its universal methods.
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
>>> manager = ClickHouseDatabaseManager(...)
|
|
39
|
+
>>> internal = InternalTablesManager(manager)
|
|
40
|
+
>>> internal.ensure_tables()
|
|
41
|
+
>>> internal.save_datapoints("cpu_usage", data)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, manager: BaseDatabaseManager):
|
|
45
|
+
"""
|
|
46
|
+
Initialize internal tables manager.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
manager: Database manager instance (ClickHouse, PostgreSQL, etc.)
|
|
50
|
+
"""
|
|
51
|
+
self._manager = manager
|
|
52
|
+
|
|
53
|
+
def ensure_tables(self) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Create all internal tables if they don't exist.
|
|
56
|
+
|
|
57
|
+
Tables created:
|
|
58
|
+
- _dtk_datapoints
|
|
59
|
+
- _dtk_detections
|
|
60
|
+
- _dtk_tasks
|
|
61
|
+
|
|
62
|
+
This is idempotent - safe to call multiple times.
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
>>> internal.ensure_tables()
|
|
66
|
+
"""
|
|
67
|
+
for table_name, model_factory in INTERNAL_TABLES.items():
|
|
68
|
+
# Get fully qualified table name in internal location
|
|
69
|
+
full_table_name = self._manager.get_full_table_name(
|
|
70
|
+
table_name, use_internal=True
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Check if table exists
|
|
74
|
+
if not self._manager.table_exists(
|
|
75
|
+
table_name, schema=self._manager.internal_location
|
|
76
|
+
):
|
|
77
|
+
# Create table from model
|
|
78
|
+
table_model = model_factory()
|
|
79
|
+
self._manager.create_table(
|
|
80
|
+
full_table_name, table_model, if_not_exists=True
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def save_datapoints(
|
|
84
|
+
self,
|
|
85
|
+
metric_name: str,
|
|
86
|
+
data: Dict[str, np.ndarray],
|
|
87
|
+
interval_seconds: int,
|
|
88
|
+
seasonality_columns: list[str],
|
|
89
|
+
) -> int:
|
|
90
|
+
"""
|
|
91
|
+
Save metric datapoints to _dtk_datapoints table.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
metric_name: Metric identifier
|
|
95
|
+
data: Dictionary with keys:
|
|
96
|
+
- timestamp: np.array of datetime64
|
|
97
|
+
- value: np.array of float64 (nullable)
|
|
98
|
+
- seasonality_data: np.array of JSON strings
|
|
99
|
+
interval_seconds: Interval in seconds
|
|
100
|
+
seasonality_columns: List of seasonality column names
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Number of rows inserted
|
|
104
|
+
|
|
105
|
+
Example:
|
|
106
|
+
>>> data = {
|
|
107
|
+
... "timestamp": np.array([dt1, dt2], dtype="datetime64[ms]"),
|
|
108
|
+
... "value": np.array([0.5, 0.6]),
|
|
109
|
+
... "seasonality_data": np.array(['{"hour": 10}', '{"hour": 11}']),
|
|
110
|
+
... }
|
|
111
|
+
>>> rows = internal.save_datapoints(
|
|
112
|
+
... "cpu_usage", data, 600, ["hour", "day_of_week"]
|
|
113
|
+
... )
|
|
114
|
+
"""
|
|
115
|
+
num_rows = len(data["timestamp"])
|
|
116
|
+
|
|
117
|
+
# Prepare data for insert_batch
|
|
118
|
+
insert_data = {
|
|
119
|
+
"metric_name": np.full(num_rows, metric_name, dtype=object),
|
|
120
|
+
"timestamp": data["timestamp"],
|
|
121
|
+
"value": data["value"],
|
|
122
|
+
"seasonality_data": data["seasonality_data"],
|
|
123
|
+
"interval_seconds": np.full(num_rows, interval_seconds, dtype=np.int32),
|
|
124
|
+
"seasonality_columns": np.full(
|
|
125
|
+
num_rows, ",".join(seasonality_columns), dtype=object
|
|
126
|
+
),
|
|
127
|
+
"created_at": np.full(
|
|
128
|
+
num_rows, datetime.now(timezone.utc), dtype="datetime64[ms]"
|
|
129
|
+
),
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
# Use universal insert_batch method
|
|
133
|
+
full_table_name = self._manager.get_full_table_name(
|
|
134
|
+
TABLE_DATAPOINTS, use_internal=True
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return self._manager.insert_batch(
|
|
138
|
+
full_table_name, insert_data, conflict_strategy="ignore"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def save_detections(
|
|
142
|
+
self,
|
|
143
|
+
metric_name: str,
|
|
144
|
+
detector_id: str,
|
|
145
|
+
data: Dict[str, np.ndarray],
|
|
146
|
+
detector_params: str,
|
|
147
|
+
) -> int:
|
|
148
|
+
"""
|
|
149
|
+
Save detection results to _dtk_detections table.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
metric_name: Metric identifier
|
|
153
|
+
detector_id: Detector identifier (hash)
|
|
154
|
+
data: Dictionary with keys:
|
|
155
|
+
- timestamp: np.array of datetime64
|
|
156
|
+
- is_anomaly: np.array of bool
|
|
157
|
+
- confidence_lower: np.array of float64 (nullable)
|
|
158
|
+
- confidence_upper: np.array of float64 (nullable)
|
|
159
|
+
- value: np.array of float64 (nullable)
|
|
160
|
+
- detection_metadata: np.array of JSON strings
|
|
161
|
+
detector_params: JSON string with sorted detector parameters
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Number of rows inserted
|
|
165
|
+
|
|
166
|
+
Example:
|
|
167
|
+
>>> data = {
|
|
168
|
+
... "timestamp": np.array([dt1, dt2]),
|
|
169
|
+
... "is_anomaly": np.array([False, True]),
|
|
170
|
+
... "confidence_lower": np.array([0.4, 0.5]),
|
|
171
|
+
... "confidence_upper": np.array([0.6, 0.7]),
|
|
172
|
+
... "value": np.array([0.5, 0.9]),
|
|
173
|
+
... "detection_metadata": np.array(['{"severity": 0.0}', '{"severity": 0.8}']),
|
|
174
|
+
... }
|
|
175
|
+
>>> rows = internal.save_detections(
|
|
176
|
+
... "cpu_usage", "mad_abc123", data, '{"threshold": 3.0}'
|
|
177
|
+
... )
|
|
178
|
+
"""
|
|
179
|
+
num_rows = len(data["timestamp"])
|
|
180
|
+
|
|
181
|
+
# Prepare data for insert_batch
|
|
182
|
+
insert_data = {
|
|
183
|
+
"metric_name": np.full(num_rows, metric_name, dtype=object),
|
|
184
|
+
"detector_id": np.full(num_rows, detector_id, dtype=object),
|
|
185
|
+
"timestamp": data["timestamp"],
|
|
186
|
+
"is_anomaly": data["is_anomaly"],
|
|
187
|
+
"confidence_lower": data["confidence_lower"],
|
|
188
|
+
"confidence_upper": data["confidence_upper"],
|
|
189
|
+
"value": data["value"],
|
|
190
|
+
"detector_params": np.full(num_rows, detector_params, dtype=object),
|
|
191
|
+
"detection_metadata": data["detection_metadata"],
|
|
192
|
+
"created_at": np.full(
|
|
193
|
+
num_rows, datetime.now(timezone.utc), dtype="datetime64[ms]"
|
|
194
|
+
),
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# Use universal insert_batch method
|
|
198
|
+
full_table_name = self._manager.get_full_table_name(
|
|
199
|
+
TABLE_DETECTIONS, use_internal=True
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return self._manager.insert_batch(
|
|
203
|
+
full_table_name, insert_data, conflict_strategy="ignore"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def get_last_datapoint_timestamp(self, metric_name: str) -> Optional[datetime]:
|
|
207
|
+
"""
|
|
208
|
+
Get last saved timestamp for a metric in _dtk_datapoints.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
metric_name: Metric identifier
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Last timestamp or None if no data
|
|
215
|
+
|
|
216
|
+
Example:
|
|
217
|
+
>>> last_ts = internal.get_last_datapoint_timestamp("cpu_usage")
|
|
218
|
+
>>> if last_ts:
|
|
219
|
+
... print(f"Last data at {last_ts}")
|
|
220
|
+
"""
|
|
221
|
+
full_table_name = self._manager.get_full_table_name(
|
|
222
|
+
TABLE_DATAPOINTS, use_internal=True
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return self._manager.get_last_timestamp(full_table_name, metric_name)
|
|
226
|
+
|
|
227
|
+
def get_last_detection_timestamp(
|
|
228
|
+
self, metric_name: str, detector_id: str
|
|
229
|
+
) -> Optional[datetime]:
|
|
230
|
+
"""
|
|
231
|
+
Get last saved timestamp for a detector in _dtk_detections.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
metric_name: Metric identifier
|
|
235
|
+
detector_id: Detector identifier
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Last timestamp or None if no data
|
|
239
|
+
|
|
240
|
+
Example:
|
|
241
|
+
>>> last_ts = internal.get_last_detection_timestamp("cpu_usage", "mad_abc123")
|
|
242
|
+
>>> if last_ts:
|
|
243
|
+
... print(f"Last detection at {last_ts}")
|
|
244
|
+
"""
|
|
245
|
+
full_table_name = self._manager.get_full_table_name(
|
|
246
|
+
TABLE_DETECTIONS, use_internal=True
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Need to filter by both metric_name AND detector_id
|
|
250
|
+
query = f"""
|
|
251
|
+
SELECT max(timestamp) as last_ts
|
|
252
|
+
FROM {full_table_name}
|
|
253
|
+
WHERE metric_name = %(metric_name)s
|
|
254
|
+
AND detector_id = %(detector_id)s
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
result = self._manager.execute_query(
|
|
258
|
+
query, {"metric_name": metric_name, "detector_id": detector_id}
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if result and result[0]["last_ts"]:
|
|
262
|
+
return result[0]["last_ts"]
|
|
263
|
+
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
def load_datapoints(
|
|
267
|
+
self,
|
|
268
|
+
metric_name: str,
|
|
269
|
+
from_timestamp: Optional[datetime] = None,
|
|
270
|
+
to_timestamp: Optional[datetime] = None,
|
|
271
|
+
) -> Dict[str, np.ndarray]:
|
|
272
|
+
"""
|
|
273
|
+
Load datapoints from _dtk_datapoints table.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
metric_name: Metric identifier
|
|
277
|
+
from_timestamp: Start timestamp (inclusive, optional)
|
|
278
|
+
to_timestamp: End timestamp (exclusive, optional)
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Dict with numpy arrays: timestamp, value, seasonality_data
|
|
282
|
+
|
|
283
|
+
Example:
|
|
284
|
+
>>> data = internal.load_datapoints("cpu_usage", from_timestamp=start, to_timestamp=end)
|
|
285
|
+
>>> print(f"Loaded {len(data['timestamp'])} points")
|
|
286
|
+
"""
|
|
287
|
+
full_table_name = self._manager.get_full_table_name(
|
|
288
|
+
TABLE_DATAPOINTS, use_internal=True
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Build WHERE clause
|
|
292
|
+
where_parts = [f"metric_name = '{metric_name}'"]
|
|
293
|
+
if from_timestamp:
|
|
294
|
+
where_parts.append(f"timestamp >= '{from_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
|
|
295
|
+
if to_timestamp:
|
|
296
|
+
where_parts.append(f"timestamp < '{to_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
|
|
297
|
+
|
|
298
|
+
where_clause = " AND ".join(where_parts)
|
|
299
|
+
|
|
300
|
+
# Query data
|
|
301
|
+
query = f"""
|
|
302
|
+
SELECT
|
|
303
|
+
timestamp,
|
|
304
|
+
value,
|
|
305
|
+
seasonality_data,
|
|
306
|
+
seasonality_columns
|
|
307
|
+
FROM {full_table_name}
|
|
308
|
+
WHERE {where_clause}
|
|
309
|
+
ORDER BY timestamp
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
results = self._manager.execute_query(query)
|
|
313
|
+
|
|
314
|
+
# Convert to numpy arrays
|
|
315
|
+
if not results:
|
|
316
|
+
return {
|
|
317
|
+
"timestamp": np.array([], dtype="datetime64[ms]"),
|
|
318
|
+
"value": np.array([], dtype=np.float64),
|
|
319
|
+
"seasonality_data": np.array([], dtype=object),
|
|
320
|
+
"seasonality_columns": [],
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
timestamps = [row["timestamp"] for row in results]
|
|
324
|
+
values = [row["value"] for row in results]
|
|
325
|
+
seasonality = [row["seasonality_data"] for row in results]
|
|
326
|
+
|
|
327
|
+
# Get seasonality_columns from first row (comma-separated string)
|
|
328
|
+
seasonality_columns_str = results[0].get("seasonality_columns", "")
|
|
329
|
+
seasonality_columns = [c.strip() for c in seasonality_columns_str.split(",") if c.strip()] if seasonality_columns_str else []
|
|
330
|
+
|
|
331
|
+
return {
|
|
332
|
+
"timestamp": np.array(timestamps, dtype="datetime64[ms]"),
|
|
333
|
+
"value": np.array(values, dtype=np.float64),
|
|
334
|
+
"seasonality_data": np.array(seasonality, dtype=object),
|
|
335
|
+
"seasonality_columns": seasonality_columns,
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def delete_datapoints(
|
|
339
|
+
self,
|
|
340
|
+
metric_name: str,
|
|
341
|
+
from_timestamp: Optional[datetime] = None,
|
|
342
|
+
to_timestamp: Optional[datetime] = None,
|
|
343
|
+
) -> int:
|
|
344
|
+
"""
|
|
345
|
+
Delete datapoints for a metric.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
metric_name: Metric name
|
|
349
|
+
from_timestamp: Optional start timestamp (inclusive)
|
|
350
|
+
to_timestamp: Optional end timestamp (exclusive)
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Number of rows deleted (if supported by database)
|
|
354
|
+
"""
|
|
355
|
+
full_table_name = self._manager.get_full_table_name(
|
|
356
|
+
TABLE_DATAPOINTS, use_internal=True
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Build WHERE clause
|
|
360
|
+
where_parts = [f"metric_name = '{metric_name}'"]
|
|
361
|
+
if from_timestamp:
|
|
362
|
+
where_parts.append(f"timestamp >= '{from_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
|
|
363
|
+
if to_timestamp:
|
|
364
|
+
where_parts.append(f"timestamp < '{to_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
|
|
365
|
+
|
|
366
|
+
where_clause = " AND ".join(where_parts)
|
|
367
|
+
|
|
368
|
+
# Delete data
|
|
369
|
+
query = f"ALTER TABLE {full_table_name} DELETE WHERE {where_clause}"
|
|
370
|
+
self._manager.execute_query(query)
|
|
371
|
+
|
|
372
|
+
# ClickHouse ALTER TABLE DELETE is async, return 0
|
|
373
|
+
# Other databases might return affected rows
|
|
374
|
+
return 0
|
|
375
|
+
|
|
376
|
+
def delete_detections(
|
|
377
|
+
self,
|
|
378
|
+
metric_name: str,
|
|
379
|
+
detector_id: Optional[str] = None,
|
|
380
|
+
from_timestamp: Optional[datetime] = None,
|
|
381
|
+
to_timestamp: Optional[datetime] = None,
|
|
382
|
+
) -> int:
|
|
383
|
+
"""
|
|
384
|
+
Delete detections for a metric.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
metric_name: Metric name
|
|
388
|
+
detector_id: Optional detector ID filter
|
|
389
|
+
from_timestamp: Optional start timestamp (inclusive)
|
|
390
|
+
to_timestamp: Optional end timestamp (exclusive)
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
Number of rows deleted (if supported by database)
|
|
394
|
+
"""
|
|
395
|
+
full_table_name = self._manager.get_full_table_name(
|
|
396
|
+
TABLE_DETECTIONS, use_internal=True
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Build WHERE clause
|
|
400
|
+
where_parts = [f"metric_name = '{metric_name}'"]
|
|
401
|
+
if detector_id:
|
|
402
|
+
where_parts.append(f"detector_id = '{detector_id}'")
|
|
403
|
+
if from_timestamp:
|
|
404
|
+
where_parts.append(f"timestamp >= '{from_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
|
|
405
|
+
if to_timestamp:
|
|
406
|
+
where_parts.append(f"timestamp < '{to_timestamp.strftime('%Y-%m-%d %H:%M:%S')}'")
|
|
407
|
+
|
|
408
|
+
where_clause = " AND ".join(where_parts)
|
|
409
|
+
|
|
410
|
+
# Delete data
|
|
411
|
+
query = f"ALTER TABLE {full_table_name} DELETE WHERE {where_clause}"
|
|
412
|
+
self._manager.execute_query(query)
|
|
413
|
+
|
|
414
|
+
# ClickHouse ALTER TABLE DELETE is async, return 0
|
|
415
|
+
return 0
|
|
416
|
+
|
|
417
|
+
def acquire_lock(
|
|
418
|
+
self,
|
|
419
|
+
metric_name: str,
|
|
420
|
+
detector_id: str,
|
|
421
|
+
process_type: str,
|
|
422
|
+
timeout_seconds: int = 3600,
|
|
423
|
+
) -> bool:
|
|
424
|
+
"""
|
|
425
|
+
Acquire task lock by creating task record with status='running'.
|
|
426
|
+
|
|
427
|
+
This implements task locking to prevent concurrent execution.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
metric_name: Metric identifier
|
|
431
|
+
detector_id: Detector identifier (or "load" for loading tasks)
|
|
432
|
+
process_type: Process type ("load" or "detect")
|
|
433
|
+
timeout_seconds: Task timeout in seconds
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
True if lock acquired, False if already locked
|
|
437
|
+
|
|
438
|
+
Raises:
|
|
439
|
+
Exception: If lock is held by another process (check timeout)
|
|
440
|
+
|
|
441
|
+
Example:
|
|
442
|
+
>>> if internal.acquire_lock("cpu_usage", "load", "load"):
|
|
443
|
+
... try:
|
|
444
|
+
... # Do work
|
|
445
|
+
... pass
|
|
446
|
+
... finally:
|
|
447
|
+
... internal.release_lock("cpu_usage", "load", "load", "completed")
|
|
448
|
+
"""
|
|
449
|
+
# Check if task is already running
|
|
450
|
+
existing_status = self.check_lock(metric_name, detector_id, process_type)
|
|
451
|
+
|
|
452
|
+
if existing_status:
|
|
453
|
+
# Task is locked
|
|
454
|
+
# TODO: Check if lock expired based on timeout
|
|
455
|
+
return False
|
|
456
|
+
|
|
457
|
+
# Acquire lock by creating task record
|
|
458
|
+
self._manager.upsert_task_status(
|
|
459
|
+
metric_name=metric_name,
|
|
460
|
+
detector_id=detector_id,
|
|
461
|
+
process_type=process_type,
|
|
462
|
+
status="running",
|
|
463
|
+
timeout_seconds=timeout_seconds,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
return True
|
|
467
|
+
|
|
468
|
+
def release_lock(
|
|
469
|
+
self,
|
|
470
|
+
metric_name: str,
|
|
471
|
+
detector_id: str,
|
|
472
|
+
process_type: str,
|
|
473
|
+
status: str,
|
|
474
|
+
last_processed_timestamp: Optional[datetime] = None,
|
|
475
|
+
error_message: Optional[str] = None,
|
|
476
|
+
) -> None:
|
|
477
|
+
"""
|
|
478
|
+
Release task lock by updating status to 'completed' or 'failed'.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
metric_name: Metric identifier
|
|
482
|
+
detector_id: Detector identifier
|
|
483
|
+
process_type: Process type
|
|
484
|
+
status: Final status ("completed" or "failed")
|
|
485
|
+
last_processed_timestamp: Last successfully processed timestamp
|
|
486
|
+
error_message: Error message if status is "failed"
|
|
487
|
+
|
|
488
|
+
Example:
|
|
489
|
+
>>> internal.release_lock(
|
|
490
|
+
... "cpu_usage", "load", "load",
|
|
491
|
+
... status="completed",
|
|
492
|
+
... last_processed_timestamp=datetime(2024, 1, 1, 23, 59)
|
|
493
|
+
... )
|
|
494
|
+
"""
|
|
495
|
+
self._manager.upsert_task_status(
|
|
496
|
+
metric_name=metric_name,
|
|
497
|
+
detector_id=detector_id,
|
|
498
|
+
process_type=process_type,
|
|
499
|
+
status=status,
|
|
500
|
+
last_processed_timestamp=last_processed_timestamp,
|
|
501
|
+
error_message=error_message,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
def check_lock(
|
|
505
|
+
self, metric_name: str, detector_id: str, process_type: str
|
|
506
|
+
) -> Optional[Dict]:
|
|
507
|
+
"""
|
|
508
|
+
Check if task is locked (running).
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
metric_name: Metric identifier
|
|
512
|
+
detector_id: Detector identifier
|
|
513
|
+
process_type: Process type
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Task status dict if locked, None if not locked
|
|
517
|
+
|
|
518
|
+
Example:
|
|
519
|
+
>>> status = internal.check_lock("cpu_usage", "load", "load")
|
|
520
|
+
>>> if status and status["status"] == "running":
|
|
521
|
+
... print("Task is locked")
|
|
522
|
+
"""
|
|
523
|
+
full_table_name = self._manager.get_full_table_name(
|
|
524
|
+
TABLE_TASKS, use_internal=True
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
query = f"""
|
|
528
|
+
SELECT *
|
|
529
|
+
FROM {full_table_name}
|
|
530
|
+
WHERE metric_name = %(metric_name)s
|
|
531
|
+
AND detector_id = %(detector_id)s
|
|
532
|
+
AND process_type = %(process_type)s
|
|
533
|
+
AND status = 'running'
|
|
534
|
+
"""
|
|
535
|
+
|
|
536
|
+
results = self._manager.execute_query(
|
|
537
|
+
query,
|
|
538
|
+
{
|
|
539
|
+
"metric_name": metric_name,
|
|
540
|
+
"detector_id": detector_id,
|
|
541
|
+
"process_type": process_type,
|
|
542
|
+
},
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
if results:
|
|
546
|
+
return results[0]
|
|
547
|
+
return None
|
|
548
|
+
|
|
549
|
+
def update_task_progress(
|
|
550
|
+
self,
|
|
551
|
+
metric_name: str,
|
|
552
|
+
detector_id: str,
|
|
553
|
+
process_type: str,
|
|
554
|
+
last_processed_timestamp: datetime,
|
|
555
|
+
) -> None:
|
|
556
|
+
"""
|
|
557
|
+
Update task progress (last_processed_timestamp) while task is running.
|
|
558
|
+
|
|
559
|
+
This enables idempotency - if process crashes, it can resume from
|
|
560
|
+
last_processed_timestamp.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
metric_name: Metric identifier
|
|
564
|
+
detector_id: Detector identifier
|
|
565
|
+
process_type: Process type
|
|
566
|
+
last_processed_timestamp: Last successfully processed timestamp
|
|
567
|
+
|
|
568
|
+
Example:
|
|
569
|
+
>>> # Update progress every 1000 rows
|
|
570
|
+
>>> internal.update_task_progress(
|
|
571
|
+
... "cpu_usage", "load", "load",
|
|
572
|
+
... datetime(2024, 1, 1, 12, 0)
|
|
573
|
+
... )
|
|
574
|
+
"""
|
|
575
|
+
self._manager.upsert_task_status(
|
|
576
|
+
metric_name=metric_name,
|
|
577
|
+
detector_id=detector_id,
|
|
578
|
+
process_type=process_type,
|
|
579
|
+
status="running",
|
|
580
|
+
last_processed_timestamp=last_processed_timestamp,
|
|
581
|
+
)
|