kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -7
- kailash/cli/__init__.py +11 -1
- kailash/cli/validation_audit.py +570 -0
- kailash/core/actors/supervisor.py +1 -1
- kailash/core/resilience/circuit_breaker.py +71 -1
- kailash/core/resilience/health_monitor.py +172 -0
- kailash/edge/compliance.py +33 -0
- kailash/edge/consistency.py +609 -0
- kailash/edge/coordination/__init__.py +30 -0
- kailash/edge/coordination/global_ordering.py +355 -0
- kailash/edge/coordination/leader_election.py +217 -0
- kailash/edge/coordination/partition_detector.py +296 -0
- kailash/edge/coordination/raft.py +485 -0
- kailash/edge/discovery.py +63 -1
- kailash/edge/migration/__init__.py +19 -0
- kailash/edge/migration/edge_migrator.py +832 -0
- kailash/edge/monitoring/__init__.py +21 -0
- kailash/edge/monitoring/edge_monitor.py +736 -0
- kailash/edge/prediction/__init__.py +10 -0
- kailash/edge/prediction/predictive_warmer.py +591 -0
- kailash/edge/resource/__init__.py +102 -0
- kailash/edge/resource/cloud_integration.py +796 -0
- kailash/edge/resource/cost_optimizer.py +949 -0
- kailash/edge/resource/docker_integration.py +919 -0
- kailash/edge/resource/kubernetes_integration.py +893 -0
- kailash/edge/resource/platform_integration.py +913 -0
- kailash/edge/resource/predictive_scaler.py +959 -0
- kailash/edge/resource/resource_analyzer.py +824 -0
- kailash/edge/resource/resource_pools.py +610 -0
- kailash/integrations/dataflow_edge.py +261 -0
- kailash/mcp_server/registry_integration.py +1 -1
- kailash/monitoring/__init__.py +18 -0
- kailash/monitoring/alerts.py +646 -0
- kailash/monitoring/metrics.py +677 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/semantic_memory.py +2 -2
- kailash/nodes/base.py +545 -0
- kailash/nodes/edge/__init__.py +36 -0
- kailash/nodes/edge/base.py +240 -0
- kailash/nodes/edge/cloud_node.py +710 -0
- kailash/nodes/edge/coordination.py +239 -0
- kailash/nodes/edge/docker_node.py +825 -0
- kailash/nodes/edge/edge_data.py +582 -0
- kailash/nodes/edge/edge_migration_node.py +392 -0
- kailash/nodes/edge/edge_monitoring_node.py +421 -0
- kailash/nodes/edge/edge_state.py +673 -0
- kailash/nodes/edge/edge_warming_node.py +393 -0
- kailash/nodes/edge/kubernetes_node.py +652 -0
- kailash/nodes/edge/platform_node.py +766 -0
- kailash/nodes/edge/resource_analyzer_node.py +378 -0
- kailash/nodes/edge/resource_optimizer_node.py +501 -0
- kailash/nodes/edge/resource_scaler_node.py +397 -0
- kailash/nodes/ports.py +676 -0
- kailash/runtime/local.py +344 -1
- kailash/runtime/validation/__init__.py +20 -0
- kailash/runtime/validation/connection_context.py +119 -0
- kailash/runtime/validation/enhanced_error_formatter.py +202 -0
- kailash/runtime/validation/error_categorizer.py +164 -0
- kailash/runtime/validation/metrics.py +380 -0
- kailash/runtime/validation/performance.py +615 -0
- kailash/runtime/validation/suggestion_engine.py +212 -0
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +230 -4
- kailash/workflow/contracts.py +418 -0
- kailash/workflow/edge_infrastructure.py +369 -0
- kailash/workflow/migration.py +3 -3
- kailash/workflow/type_inference.py +669 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
- kailash/nexus/__init__.py +0 -21
- kailash/nexus/cli/__init__.py +0 -5
- kailash/nexus/cli/__main__.py +0 -6
- kailash/nexus/cli/main.py +0 -176
- kailash/nexus/factory.py +0 -413
- kailash/nexus/gateway.py +0 -545
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,677 @@
|
|
1
|
+
"""
|
2
|
+
Metrics collection and aggregation for monitoring system.
|
3
|
+
|
4
|
+
Provides detailed metrics for validation failures, security violations,
|
5
|
+
and performance monitoring with time-series data collection.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import json
|
9
|
+
import logging
|
10
|
+
import threading
|
11
|
+
import time
|
12
|
+
from collections import defaultdict, deque
|
13
|
+
from dataclasses import dataclass, field
|
14
|
+
from datetime import UTC, datetime, timedelta
|
15
|
+
from enum import Enum
|
16
|
+
from typing import Any, Dict, List, Optional, Union
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
class MetricType(Enum):
|
22
|
+
"""Types of metrics collected."""
|
23
|
+
|
24
|
+
COUNTER = "counter" # Incrementing values
|
25
|
+
GAUGE = "gauge" # Current value
|
26
|
+
HISTOGRAM = "histogram" # Distribution of values
|
27
|
+
TIMER = "timer" # Time-based measurements
|
28
|
+
|
29
|
+
|
30
|
+
class MetricSeverity(Enum):
|
31
|
+
"""Severity levels for metrics."""
|
32
|
+
|
33
|
+
LOW = "low"
|
34
|
+
MEDIUM = "medium"
|
35
|
+
HIGH = "high"
|
36
|
+
CRITICAL = "critical"
|
37
|
+
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class MetricPoint:
|
41
|
+
"""Single metric data point."""
|
42
|
+
|
43
|
+
timestamp: datetime
|
44
|
+
value: Union[int, float]
|
45
|
+
labels: Dict[str, str] = field(default_factory=dict)
|
46
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
47
|
+
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class MetricSeries:
|
51
|
+
"""Time series of metric data points."""
|
52
|
+
|
53
|
+
name: str
|
54
|
+
metric_type: MetricType
|
55
|
+
description: str
|
56
|
+
unit: str = ""
|
57
|
+
points: deque = field(default_factory=lambda: deque(maxlen=1000))
|
58
|
+
|
59
|
+
def add_point(
|
60
|
+
self,
|
61
|
+
value: Union[int, float],
|
62
|
+
labels: Optional[Dict[str, str]] = None,
|
63
|
+
metadata: Optional[Dict[str, Any]] = None,
|
64
|
+
):
|
65
|
+
"""Add a new data point to the series."""
|
66
|
+
point = MetricPoint(
|
67
|
+
timestamp=datetime.now(UTC),
|
68
|
+
value=value,
|
69
|
+
labels=labels or {},
|
70
|
+
metadata=metadata or {},
|
71
|
+
)
|
72
|
+
self.points.append(point)
|
73
|
+
|
74
|
+
def get_latest_value(self) -> Optional[Union[int, float]]:
|
75
|
+
"""Get the most recent metric value."""
|
76
|
+
return self.points[-1].value if self.points else None
|
77
|
+
|
78
|
+
def get_average(self, time_window: Optional[timedelta] = None) -> Optional[float]:
|
79
|
+
"""Get average value over time window."""
|
80
|
+
if not self.points:
|
81
|
+
return None
|
82
|
+
|
83
|
+
if time_window:
|
84
|
+
cutoff = datetime.now(UTC) - time_window
|
85
|
+
relevant_points = [p for p in self.points if p.timestamp >= cutoff]
|
86
|
+
else:
|
87
|
+
relevant_points = list(self.points)
|
88
|
+
|
89
|
+
if not relevant_points:
|
90
|
+
return None
|
91
|
+
|
92
|
+
return sum(p.value for p in relevant_points) / len(relevant_points)
|
93
|
+
|
94
|
+
def get_max(
|
95
|
+
self, time_window: Optional[timedelta] = None
|
96
|
+
) -> Optional[Union[int, float]]:
|
97
|
+
"""Get maximum value over time window."""
|
98
|
+
if not self.points:
|
99
|
+
return None
|
100
|
+
|
101
|
+
if time_window:
|
102
|
+
cutoff = datetime.now(UTC) - time_window
|
103
|
+
relevant_points = [p for p in self.points if p.timestamp >= cutoff]
|
104
|
+
else:
|
105
|
+
relevant_points = list(self.points)
|
106
|
+
|
107
|
+
if not relevant_points:
|
108
|
+
return None
|
109
|
+
|
110
|
+
return max(p.value for p in relevant_points)
|
111
|
+
|
112
|
+
def get_rate(
|
113
|
+
self, time_window: timedelta = timedelta(minutes=1)
|
114
|
+
) -> Optional[float]:
|
115
|
+
"""Get rate of change over time window."""
|
116
|
+
if len(self.points) < 2:
|
117
|
+
return None
|
118
|
+
|
119
|
+
cutoff = datetime.now(UTC) - time_window
|
120
|
+
relevant_points = [p for p in self.points if p.timestamp >= cutoff]
|
121
|
+
|
122
|
+
if len(relevant_points) < 2:
|
123
|
+
return None
|
124
|
+
|
125
|
+
# Calculate rate as points per second
|
126
|
+
time_span = (
|
127
|
+
relevant_points[-1].timestamp - relevant_points[0].timestamp
|
128
|
+
).total_seconds()
|
129
|
+
if time_span == 0:
|
130
|
+
return None
|
131
|
+
|
132
|
+
return len(relevant_points) / time_span
|
133
|
+
|
134
|
+
|
135
|
+
class MetricsCollector:
|
136
|
+
"""Base metrics collector."""
|
137
|
+
|
138
|
+
def __init__(self, max_series: int = 100):
|
139
|
+
"""Initialize metrics collector.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
max_series: Maximum number of metric series to track
|
143
|
+
"""
|
144
|
+
self.max_series = max_series
|
145
|
+
self._metrics: Dict[str, MetricSeries] = {}
|
146
|
+
self._lock = threading.RLock()
|
147
|
+
|
148
|
+
def create_metric(
|
149
|
+
self, name: str, metric_type: MetricType, description: str, unit: str = ""
|
150
|
+
) -> MetricSeries:
|
151
|
+
"""Create a new metric series.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
name: Metric name
|
155
|
+
metric_type: Type of metric
|
156
|
+
description: Description of what this metric measures
|
157
|
+
unit: Unit of measurement
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
MetricSeries instance
|
161
|
+
"""
|
162
|
+
with self._lock:
|
163
|
+
if name in self._metrics:
|
164
|
+
return self._metrics[name]
|
165
|
+
|
166
|
+
if len(self._metrics) >= self.max_series:
|
167
|
+
# Remove oldest metric
|
168
|
+
oldest_metric = min(
|
169
|
+
self._metrics.values(),
|
170
|
+
key=lambda m: (
|
171
|
+
m.points[0].timestamp
|
172
|
+
if m.points
|
173
|
+
else datetime.min.replace(tzinfo=UTC)
|
174
|
+
),
|
175
|
+
)
|
176
|
+
del self._metrics[oldest_metric.name]
|
177
|
+
|
178
|
+
metric = MetricSeries(
|
179
|
+
name=name, metric_type=metric_type, description=description, unit=unit
|
180
|
+
)
|
181
|
+
self._metrics[name] = metric
|
182
|
+
return metric
|
183
|
+
|
184
|
+
def increment(
|
185
|
+
self,
|
186
|
+
name: str,
|
187
|
+
value: Union[int, float] = 1,
|
188
|
+
labels: Optional[Dict[str, str]] = None,
|
189
|
+
):
|
190
|
+
"""Increment a counter metric."""
|
191
|
+
with self._lock:
|
192
|
+
if name not in self._metrics:
|
193
|
+
self.create_metric(name, MetricType.COUNTER, f"Counter: {name}")
|
194
|
+
|
195
|
+
current_value = self._metrics[name].get_latest_value() or 0
|
196
|
+
self._metrics[name].add_point(current_value + value, labels)
|
197
|
+
|
198
|
+
def set_gauge(
|
199
|
+
self,
|
200
|
+
name: str,
|
201
|
+
value: Union[int, float],
|
202
|
+
labels: Optional[Dict[str, str]] = None,
|
203
|
+
):
|
204
|
+
"""Set a gauge metric value."""
|
205
|
+
with self._lock:
|
206
|
+
if name not in self._metrics:
|
207
|
+
self.create_metric(name, MetricType.GAUGE, f"Gauge: {name}")
|
208
|
+
|
209
|
+
self._metrics[name].add_point(value, labels)
|
210
|
+
|
211
|
+
def record_timer(
|
212
|
+
self, name: str, duration_ms: float, labels: Optional[Dict[str, str]] = None
|
213
|
+
):
|
214
|
+
"""Record a timer metric."""
|
215
|
+
with self._lock:
|
216
|
+
if name not in self._metrics:
|
217
|
+
self.create_metric(
|
218
|
+
name, MetricType.TIMER, f"Timer: {name}", "milliseconds"
|
219
|
+
)
|
220
|
+
|
221
|
+
self._metrics[name].add_point(duration_ms, labels)
|
222
|
+
|
223
|
+
def record_histogram(
|
224
|
+
self,
|
225
|
+
name: str,
|
226
|
+
value: Union[int, float],
|
227
|
+
labels: Optional[Dict[str, str]] = None,
|
228
|
+
):
|
229
|
+
"""Record a histogram value."""
|
230
|
+
with self._lock:
|
231
|
+
if name not in self._metrics:
|
232
|
+
self.create_metric(name, MetricType.HISTOGRAM, f"Histogram: {name}")
|
233
|
+
|
234
|
+
self._metrics[name].add_point(value, labels)
|
235
|
+
|
236
|
+
def get_metric(self, name: str) -> Optional[MetricSeries]:
|
237
|
+
"""Get a metric series by name."""
|
238
|
+
with self._lock:
|
239
|
+
return self._metrics.get(name)
|
240
|
+
|
241
|
+
def get_all_metrics(self) -> Dict[str, MetricSeries]:
|
242
|
+
"""Get all metric series."""
|
243
|
+
with self._lock:
|
244
|
+
return self._metrics.copy()
|
245
|
+
|
246
|
+
def clear_metrics(self):
|
247
|
+
"""Clear all metrics."""
|
248
|
+
with self._lock:
|
249
|
+
self._metrics.clear()
|
250
|
+
|
251
|
+
|
252
|
+
class ValidationMetrics(MetricsCollector):
|
253
|
+
"""Metrics collector for validation operations."""
|
254
|
+
|
255
|
+
def __init__(self):
|
256
|
+
"""Initialize validation metrics collector."""
|
257
|
+
super().__init__(max_series=50)
|
258
|
+
|
259
|
+
# Initialize core validation metrics
|
260
|
+
self.create_metric(
|
261
|
+
"validation_total", MetricType.COUNTER, "Total validation attempts"
|
262
|
+
)
|
263
|
+
self.create_metric(
|
264
|
+
"validation_success", MetricType.COUNTER, "Successful validations"
|
265
|
+
)
|
266
|
+
self.create_metric(
|
267
|
+
"validation_failure", MetricType.COUNTER, "Failed validations"
|
268
|
+
)
|
269
|
+
self.create_metric(
|
270
|
+
"validation_duration",
|
271
|
+
MetricType.TIMER,
|
272
|
+
"Validation duration",
|
273
|
+
"milliseconds",
|
274
|
+
)
|
275
|
+
self.create_metric(
|
276
|
+
"validation_cache_hits", MetricType.COUNTER, "Validation cache hits"
|
277
|
+
)
|
278
|
+
self.create_metric(
|
279
|
+
"validation_cache_misses", MetricType.COUNTER, "Validation cache misses"
|
280
|
+
)
|
281
|
+
|
282
|
+
def record_validation_attempt(
|
283
|
+
self, node_type: str, success: bool, duration_ms: float, cached: bool = False
|
284
|
+
):
|
285
|
+
"""Record a validation attempt.
|
286
|
+
|
287
|
+
Args:
|
288
|
+
node_type: Type of node being validated
|
289
|
+
success: Whether validation succeeded
|
290
|
+
duration_ms: Validation duration in milliseconds
|
291
|
+
cached: Whether result came from cache
|
292
|
+
"""
|
293
|
+
labels = {"node_type": node_type}
|
294
|
+
|
295
|
+
self.increment("validation_total", labels=labels)
|
296
|
+
self.record_timer("validation_duration", duration_ms, labels=labels)
|
297
|
+
|
298
|
+
if success:
|
299
|
+
self.increment("validation_success", labels=labels)
|
300
|
+
else:
|
301
|
+
self.increment("validation_failure", labels=labels)
|
302
|
+
|
303
|
+
if cached:
|
304
|
+
self.increment("validation_cache_hits", labels=labels)
|
305
|
+
else:
|
306
|
+
self.increment("validation_cache_misses", labels=labels)
|
307
|
+
|
308
|
+
def get_success_rate(self, time_window: timedelta = timedelta(hours=1)) -> float:
|
309
|
+
"""Get validation success rate over time window."""
|
310
|
+
success_metric = self.get_metric("validation_success")
|
311
|
+
failure_metric = self.get_metric("validation_failure")
|
312
|
+
|
313
|
+
if not success_metric or not failure_metric:
|
314
|
+
return 0.0
|
315
|
+
|
316
|
+
success_count = len(
|
317
|
+
[
|
318
|
+
p
|
319
|
+
for p in success_metric.points
|
320
|
+
if p.timestamp >= datetime.now(UTC) - time_window
|
321
|
+
]
|
322
|
+
)
|
323
|
+
failure_count = len(
|
324
|
+
[
|
325
|
+
p
|
326
|
+
for p in failure_metric.points
|
327
|
+
if p.timestamp >= datetime.now(UTC) - time_window
|
328
|
+
]
|
329
|
+
)
|
330
|
+
|
331
|
+
total = success_count + failure_count
|
332
|
+
return success_count / total if total > 0 else 0.0
|
333
|
+
|
334
|
+
def get_cache_hit_rate(self, time_window: timedelta = timedelta(hours=1)) -> float:
|
335
|
+
"""Get cache hit rate over time window."""
|
336
|
+
hits_metric = self.get_metric("validation_cache_hits")
|
337
|
+
misses_metric = self.get_metric("validation_cache_misses")
|
338
|
+
|
339
|
+
if not hits_metric or not misses_metric:
|
340
|
+
return 0.0
|
341
|
+
|
342
|
+
hits_count = len(
|
343
|
+
[
|
344
|
+
p
|
345
|
+
for p in hits_metric.points
|
346
|
+
if p.timestamp >= datetime.now(UTC) - time_window
|
347
|
+
]
|
348
|
+
)
|
349
|
+
misses_count = len(
|
350
|
+
[
|
351
|
+
p
|
352
|
+
for p in misses_metric.points
|
353
|
+
if p.timestamp >= datetime.now(UTC) - time_window
|
354
|
+
]
|
355
|
+
)
|
356
|
+
|
357
|
+
total = hits_count + misses_count
|
358
|
+
return hits_count / total if total > 0 else 0.0
|
359
|
+
|
360
|
+
|
361
|
+
class SecurityMetrics(MetricsCollector):
|
362
|
+
"""Metrics collector for security events."""
|
363
|
+
|
364
|
+
def __init__(self):
|
365
|
+
"""Initialize security metrics collector."""
|
366
|
+
super().__init__(max_series=30)
|
367
|
+
|
368
|
+
# Initialize core security metrics
|
369
|
+
self.create_metric(
|
370
|
+
"security_violations_total", MetricType.COUNTER, "Total security violations"
|
371
|
+
)
|
372
|
+
self.create_metric(
|
373
|
+
"sql_injection_attempts", MetricType.COUNTER, "SQL injection attempts"
|
374
|
+
)
|
375
|
+
self.create_metric(
|
376
|
+
"code_injection_attempts", MetricType.COUNTER, "Code injection attempts"
|
377
|
+
)
|
378
|
+
self.create_metric(
|
379
|
+
"path_traversal_attempts", MetricType.COUNTER, "Path traversal attempts"
|
380
|
+
)
|
381
|
+
self.create_metric(
|
382
|
+
"credential_exposure_attempts",
|
383
|
+
MetricType.COUNTER,
|
384
|
+
"Credential exposure attempts",
|
385
|
+
)
|
386
|
+
self.create_metric(
|
387
|
+
"blocked_connections", MetricType.COUNTER, "Blocked malicious connections"
|
388
|
+
)
|
389
|
+
|
390
|
+
def record_security_violation(
|
391
|
+
self,
|
392
|
+
violation_type: str,
|
393
|
+
severity: MetricSeverity,
|
394
|
+
source: str,
|
395
|
+
details: Optional[Dict[str, Any]] = None,
|
396
|
+
):
|
397
|
+
"""Record a security violation.
|
398
|
+
|
399
|
+
Args:
|
400
|
+
violation_type: Type of security violation
|
401
|
+
severity: Severity level
|
402
|
+
source: Source of the violation (node, connection, etc.)
|
403
|
+
details: Additional violation details
|
404
|
+
"""
|
405
|
+
labels = {
|
406
|
+
"violation_type": violation_type,
|
407
|
+
"severity": severity.value,
|
408
|
+
"source": source,
|
409
|
+
}
|
410
|
+
|
411
|
+
self.increment("security_violations_total", labels=labels)
|
412
|
+
|
413
|
+
# Increment specific violation counters
|
414
|
+
if "sql" in violation_type.lower():
|
415
|
+
self.increment("sql_injection_attempts", labels=labels)
|
416
|
+
elif "code" in violation_type.lower():
|
417
|
+
self.increment("code_injection_attempts", labels=labels)
|
418
|
+
elif "path" in violation_type.lower():
|
419
|
+
self.increment("path_traversal_attempts", labels=labels)
|
420
|
+
elif "credential" in violation_type.lower():
|
421
|
+
self.increment("credential_exposure_attempts", labels=labels)
|
422
|
+
|
423
|
+
def record_blocked_connection(
|
424
|
+
self, source_node: str, target_node: str, reason: str
|
425
|
+
):
|
426
|
+
"""Record a blocked connection.
|
427
|
+
|
428
|
+
Args:
|
429
|
+
source_node: Source node identifier
|
430
|
+
target_node: Target node identifier
|
431
|
+
reason: Reason for blocking
|
432
|
+
"""
|
433
|
+
labels = {
|
434
|
+
"source_node": source_node,
|
435
|
+
"target_node": target_node,
|
436
|
+
"reason": reason,
|
437
|
+
}
|
438
|
+
|
439
|
+
self.increment("blocked_connections", labels=labels)
|
440
|
+
|
441
|
+
def get_violation_rate(self, time_window: timedelta = timedelta(hours=1)) -> float:
|
442
|
+
"""Get security violation rate per minute."""
|
443
|
+
violations_metric = self.get_metric("security_violations_total")
|
444
|
+
|
445
|
+
if not violations_metric:
|
446
|
+
return 0.0
|
447
|
+
|
448
|
+
return violations_metric.get_rate(time_window) or 0.0
|
449
|
+
|
450
|
+
def get_critical_violations(
|
451
|
+
self, time_window: timedelta = timedelta(hours=1)
|
452
|
+
) -> int:
|
453
|
+
"""Get count of critical violations in time window."""
|
454
|
+
violations_metric = self.get_metric("security_violations_total")
|
455
|
+
|
456
|
+
if not violations_metric:
|
457
|
+
return 0
|
458
|
+
|
459
|
+
cutoff = datetime.now(UTC) - time_window
|
460
|
+
critical_points = [
|
461
|
+
p
|
462
|
+
for p in violations_metric.points
|
463
|
+
if p.timestamp >= cutoff and p.labels.get("severity") == "critical"
|
464
|
+
]
|
465
|
+
|
466
|
+
return len(critical_points)
|
467
|
+
|
468
|
+
|
469
|
+
class PerformanceMetrics(MetricsCollector):
|
470
|
+
"""Metrics collector for performance monitoring."""
|
471
|
+
|
472
|
+
def __init__(self):
|
473
|
+
"""Initialize performance metrics collector."""
|
474
|
+
super().__init__(max_series=40)
|
475
|
+
|
476
|
+
# Initialize core performance metrics
|
477
|
+
self.create_metric(
|
478
|
+
"response_time", MetricType.TIMER, "Response time", "milliseconds"
|
479
|
+
)
|
480
|
+
self.create_metric("throughput", MetricType.GAUGE, "Requests per second", "rps")
|
481
|
+
self.create_metric("memory_usage", MetricType.GAUGE, "Memory usage", "MB")
|
482
|
+
self.create_metric("cpu_usage", MetricType.GAUGE, "CPU usage", "percent")
|
483
|
+
self.create_metric("error_rate", MetricType.GAUGE, "Error rate", "percent")
|
484
|
+
self.create_metric("slow_operations", MetricType.COUNTER, "Slow operations")
|
485
|
+
|
486
|
+
def record_operation(self, operation: str, duration_ms: float, success: bool):
|
487
|
+
"""Record an operation performance.
|
488
|
+
|
489
|
+
Args:
|
490
|
+
operation: Operation name
|
491
|
+
duration_ms: Duration in milliseconds
|
492
|
+
success: Whether operation succeeded
|
493
|
+
"""
|
494
|
+
labels = {"operation": operation}
|
495
|
+
|
496
|
+
self.record_timer("response_time", duration_ms, labels=labels)
|
497
|
+
|
498
|
+
if not success:
|
499
|
+
self.increment("error_rate", labels=labels)
|
500
|
+
|
501
|
+
# Record slow operations (>1 second)
|
502
|
+
if duration_ms > 1000:
|
503
|
+
self.increment("slow_operations", labels=labels)
|
504
|
+
|
505
|
+
def update_system_metrics(self, memory_mb: float, cpu_percent: float, rps: float):
|
506
|
+
"""Update system-level metrics.
|
507
|
+
|
508
|
+
Args:
|
509
|
+
memory_mb: Memory usage in MB
|
510
|
+
cpu_percent: CPU usage percentage
|
511
|
+
rps: Requests per second
|
512
|
+
"""
|
513
|
+
self.set_gauge("memory_usage", memory_mb)
|
514
|
+
self.set_gauge("cpu_usage", cpu_percent)
|
515
|
+
self.set_gauge("throughput", rps)
|
516
|
+
|
517
|
+
def get_p95_response_time(
|
518
|
+
self, time_window: timedelta = timedelta(hours=1)
|
519
|
+
) -> Optional[float]:
|
520
|
+
"""Get 95th percentile response time."""
|
521
|
+
response_time_metric = self.get_metric("response_time")
|
522
|
+
|
523
|
+
if not response_time_metric:
|
524
|
+
return None
|
525
|
+
|
526
|
+
cutoff = datetime.now(UTC) - time_window
|
527
|
+
relevant_points = [
|
528
|
+
p.value for p in response_time_metric.points if p.timestamp >= cutoff
|
529
|
+
]
|
530
|
+
|
531
|
+
if not relevant_points:
|
532
|
+
return None
|
533
|
+
|
534
|
+
relevant_points.sort()
|
535
|
+
index = int(0.95 * len(relevant_points))
|
536
|
+
return relevant_points[min(index, len(relevant_points) - 1)]
|
537
|
+
|
538
|
+
|
539
|
+
class MetricsRegistry:
|
540
|
+
"""Global registry for metrics collectors."""
|
541
|
+
|
542
|
+
def __init__(self):
|
543
|
+
"""Initialize metrics registry."""
|
544
|
+
self._collectors: Dict[str, MetricsCollector] = {}
|
545
|
+
self._lock = threading.RLock()
|
546
|
+
|
547
|
+
def register_collector(self, name: str, collector: MetricsCollector):
|
548
|
+
"""Register a metrics collector.
|
549
|
+
|
550
|
+
Args:
|
551
|
+
name: Collector name
|
552
|
+
collector: MetricsCollector instance
|
553
|
+
"""
|
554
|
+
with self._lock:
|
555
|
+
self._collectors[name] = collector
|
556
|
+
|
557
|
+
def get_collector(self, name: str) -> Optional[MetricsCollector]:
|
558
|
+
"""Get a metrics collector by name.
|
559
|
+
|
560
|
+
Args:
|
561
|
+
name: Collector name
|
562
|
+
|
563
|
+
Returns:
|
564
|
+
MetricsCollector instance or None
|
565
|
+
"""
|
566
|
+
with self._lock:
|
567
|
+
return self._collectors.get(name)
|
568
|
+
|
569
|
+
def get_all_collectors(self) -> Dict[str, MetricsCollector]:
|
570
|
+
"""Get all registered collectors."""
|
571
|
+
with self._lock:
|
572
|
+
return self._collectors.copy()
|
573
|
+
|
574
|
+
def export_metrics(self, format: str = "json") -> str:
|
575
|
+
"""Export all metrics in specified format.
|
576
|
+
|
577
|
+
Args:
|
578
|
+
format: Export format ("json", "prometheus")
|
579
|
+
|
580
|
+
Returns:
|
581
|
+
Formatted metrics string
|
582
|
+
"""
|
583
|
+
with self._lock:
|
584
|
+
if format == "json":
|
585
|
+
return self._export_json()
|
586
|
+
elif format == "prometheus":
|
587
|
+
return self._export_prometheus()
|
588
|
+
else:
|
589
|
+
raise ValueError(f"Unsupported format: {format}")
|
590
|
+
|
591
|
+
def _export_json(self) -> str:
|
592
|
+
"""Export metrics as JSON."""
|
593
|
+
export_data = {}
|
594
|
+
|
595
|
+
for collector_name, collector in self._collectors.items():
|
596
|
+
collector_data = {}
|
597
|
+
|
598
|
+
for metric_name, metric_series in collector.get_all_metrics().items():
|
599
|
+
series_data = {
|
600
|
+
"type": metric_series.metric_type.value,
|
601
|
+
"description": metric_series.description,
|
602
|
+
"unit": metric_series.unit,
|
603
|
+
"latest_value": metric_series.get_latest_value(),
|
604
|
+
"points": [
|
605
|
+
{
|
606
|
+
"timestamp": point.timestamp.isoformat(),
|
607
|
+
"value": point.value,
|
608
|
+
"labels": point.labels,
|
609
|
+
}
|
610
|
+
for point in list(metric_series.points)[-10:] # Last 10 points
|
611
|
+
],
|
612
|
+
}
|
613
|
+
collector_data[metric_name] = series_data
|
614
|
+
|
615
|
+
export_data[collector_name] = collector_data
|
616
|
+
|
617
|
+
return json.dumps(export_data, indent=2)
|
618
|
+
|
619
|
+
def _export_prometheus(self) -> str:
|
620
|
+
"""Export metrics in Prometheus format."""
|
621
|
+
lines = []
|
622
|
+
|
623
|
+
for collector_name, collector in self._collectors.items():
|
624
|
+
for metric_name, metric_series in collector.get_all_metrics().items():
|
625
|
+
# Prometheus metric name
|
626
|
+
prom_name = f"kailash_{collector_name}_{metric_name}"
|
627
|
+
|
628
|
+
# Help text
|
629
|
+
lines.append(f"# HELP {prom_name} {metric_series.description}")
|
630
|
+
lines.append(f"# TYPE {prom_name} {metric_series.metric_type.value}")
|
631
|
+
|
632
|
+
# Latest value with labels
|
633
|
+
latest_point = (
|
634
|
+
metric_series.points[-1] if metric_series.points else None
|
635
|
+
)
|
636
|
+
if latest_point:
|
637
|
+
label_str = ""
|
638
|
+
if latest_point.labels:
|
639
|
+
label_pairs = [
|
640
|
+
f'{k}="{v}"' for k, v in latest_point.labels.items()
|
641
|
+
]
|
642
|
+
label_str = "{" + ",".join(label_pairs) + "}"
|
643
|
+
|
644
|
+
lines.append(f"{prom_name}{label_str} {latest_point.value}")
|
645
|
+
|
646
|
+
lines.append("") # Empty line between metrics
|
647
|
+
|
648
|
+
return "\n".join(lines)
|
649
|
+
|
650
|
+
|
651
|
+
# Global metrics registry
|
652
|
+
_global_registry = MetricsRegistry()
|
653
|
+
|
654
|
+
# Register default collectors
|
655
|
+
_global_registry.register_collector("validation", ValidationMetrics())
|
656
|
+
_global_registry.register_collector("security", SecurityMetrics())
|
657
|
+
_global_registry.register_collector("performance", PerformanceMetrics())
|
658
|
+
|
659
|
+
|
660
|
+
def get_metrics_registry() -> MetricsRegistry:
|
661
|
+
"""Get the global metrics registry."""
|
662
|
+
return _global_registry
|
663
|
+
|
664
|
+
|
665
|
+
def get_validation_metrics() -> ValidationMetrics:
|
666
|
+
"""Get the validation metrics collector."""
|
667
|
+
return _global_registry.get_collector("validation")
|
668
|
+
|
669
|
+
|
670
|
+
def get_security_metrics() -> SecurityMetrics:
|
671
|
+
"""Get the security metrics collector."""
|
672
|
+
return _global_registry.get_collector("security")
|
673
|
+
|
674
|
+
|
675
|
+
def get_performance_metrics() -> PerformanceMetrics:
|
676
|
+
"""Get the performance metrics collector."""
|
677
|
+
return _global_registry.get_collector("performance")
|
kailash/nodes/__init__.py
CHANGED
@@ -9,7 +9,7 @@ import asyncio
|
|
9
9
|
import hashlib
|
10
10
|
import json
|
11
11
|
from dataclasses import dataclass
|
12
|
-
from datetime import datetime
|
12
|
+
from datetime import UTC, datetime
|
13
13
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
14
14
|
from uuid import uuid4
|
15
15
|
|
@@ -269,7 +269,7 @@ class SemanticMemoryStoreNode(Node):
|
|
269
269
|
|
270
270
|
# Store items
|
271
271
|
ids = []
|
272
|
-
now = datetime.
|
272
|
+
now = datetime.now(UTC)
|
273
273
|
|
274
274
|
for i, (text, embedding) in enumerate(zip(contents, result.embeddings)):
|
275
275
|
item = SemanticMemoryItem(
|