skillpool 4.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skillpool/__init__.py +74 -0
- skillpool/__main__.py +6 -0
- skillpool/adapters/__init__.py +8 -0
- skillpool/adapters/base.py +41 -0
- skillpool/adapters/claude_adapter.py +36 -0
- skillpool/adapters/codex_adapter.py +92 -0
- skillpool/adapters/hermes_adapter.py +38 -0
- skillpool/audit/__init__.py +651 -0
- skillpool/bridge/__init__.py +16 -0
- skillpool/bridge/freeze_detector.py +134 -0
- skillpool/bridge/maintenance.py +119 -0
- skillpool/bridge/wal_manager.py +136 -0
- skillpool/clawmem_client.py +176 -0
- skillpool/cli.py +700 -0
- skillpool/combiner/__init__.py +31 -0
- skillpool/combiner/lifecycle.py +453 -0
- skillpool/combiner/models.py +99 -0
- skillpool/config.py +34 -0
- skillpool/cost/__init__.py +111 -0
- skillpool/cost/audit_hash.py +51 -0
- skillpool/cost/budget_tracker.py +66 -0
- skillpool/cost/dashboard.py +189 -0
- skillpool/cost/models.py +129 -0
- skillpool/cost/token_governor.py +264 -0
- skillpool/cost/trace_ceiling.py +38 -0
- skillpool/csdf.py +126 -0
- skillpool/evolver/__init__.py +978 -0
- skillpool/gain/__init__.py +285 -0
- skillpool/gate.py +282 -0
- skillpool/gate_policy/__init__.py +31 -0
- skillpool/gate_policy/incremental.py +157 -0
- skillpool/gate_policy/parser.py +258 -0
- skillpool/gate_policy/state_machine.py +432 -0
- skillpool/graph/__init__.py +14 -0
- skillpool/graph/ppr.py +279 -0
- skillpool/health/__init__.py +73 -0
- skillpool/health/check.py +85 -0
- skillpool/health/degradation.py +90 -0
- skillpool/health/models.py +43 -0
- skillpool/hooks/__init__.py +4 -0
- skillpool/hooks/security_scanner.py +288 -0
- skillpool/lifecycle.py +150 -0
- skillpool/materializer/__init__.py +124 -0
- skillpool/materializer/budget_cropper.py +178 -0
- skillpool/materializer/csdf_loader.py +114 -0
- skillpool/materializer/lazy_loader.py +265 -0
- skillpool/materializer/lifecycle_filter.py +93 -0
- skillpool/materializer/mapper.py +178 -0
- skillpool/materializer/models.py +66 -0
- skillpool/mcp_server.py +2005 -0
- skillpool/monitor/__init__.py +576 -0
- skillpool/monitor/bug_collector.py +392 -0
- skillpool/monitor/defect_classifier.py +218 -0
- skillpool/monitor/self_healing.py +530 -0
- skillpool/monitor/telemetry_bridge.py +197 -0
- skillpool/paradigm/__init__.py +312 -0
- skillpool/paradigm/override.py +285 -0
- skillpool/profile.py +94 -0
- skillpool/quality.py +254 -0
- skillpool/registry/__init__.py +509 -0
- skillpool/registry/models.py +98 -0
- skillpool/resolver/__init__.py +320 -0
- skillpool/resolver/cache.py +103 -0
- skillpool/resolver/circuit_breaker.py +103 -0
- skillpool/resolver/conflict_detector.py +111 -0
- skillpool/resolver/health_filter.py +38 -0
- skillpool/resolver/models.py +154 -0
- skillpool/resolver/rate_limiter.py +48 -0
- skillpool/resolver/skill_graph.py +183 -0
- skillpool/review/__init__.py +242 -0
- skillpool/review/async_queue.py +96 -0
- skillpool/review/checkpoint_runner.py +345 -0
- skillpool/review/models.py +164 -0
- skillpool/review/suspect_marker.py +39 -0
- skillpool/review/veto_evaluator.py +94 -0
- skillpool/router/__init__.py +481 -0
- skillpool/schemas.py +119 -0
- skillpool/synergy/__init__.py +240 -0
- skillpool/synergy/detector.py +5 -0
- skillpool/telemetry.py +126 -0
- skillpool/utils/__init__.py +21 -0
- skillpool/utils/changelog.py +218 -0
- skillpool/utils/logger.py +273 -0
- skillpool/utils/runtime_audit.py +163 -0
- skillpool/utils/time_utils.py +13 -0
- skillpool-4.3.0.dist-info/METADATA +21 -0
- skillpool-4.3.0.dist-info/RECORD +90 -0
- skillpool-4.3.0.dist-info/WHEEL +5 -0
- skillpool-4.3.0.dist-info/entry_points.txt +3 -0
- skillpool-4.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
"""Monitor Layer — Runtime observability and SLO tracking.
|
|
2
|
+
|
|
3
|
+
Architecture constraint:
|
|
4
|
+
- Monitor collects OTel telemetry and SLO data
|
|
5
|
+
- MUST NOT publish versions or replace Audit
|
|
6
|
+
- Monitor is observation only, no control
|
|
7
|
+
|
|
8
|
+
Open source enhancements:
|
|
9
|
+
- Five-dimension evaluation (SkillNet)
|
|
10
|
+
- Trajectory aggregation G(s) (SkillClaw)
|
|
11
|
+
- PRM scoring support
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"Alert",
|
|
18
|
+
"AlertSeverity",
|
|
19
|
+
"BugCollector",
|
|
20
|
+
"BugRecord",
|
|
21
|
+
"BugSeverity",
|
|
22
|
+
"DefectClassifier",
|
|
23
|
+
"DefectType",
|
|
24
|
+
"DefectTypeDetailed",
|
|
25
|
+
"EvaluationLevel",
|
|
26
|
+
"FiveDimensionEvaluation",
|
|
27
|
+
"HealingAction",
|
|
28
|
+
"HealingProposal",
|
|
29
|
+
"HealingStatus",
|
|
30
|
+
"Metric",
|
|
31
|
+
"MetricType",
|
|
32
|
+
"MonitorLayer",
|
|
33
|
+
"SelfHealingLoop",
|
|
34
|
+
"TelemetryBridge",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
from dataclasses import dataclass, field
|
|
38
|
+
from datetime import UTC, datetime
|
|
39
|
+
from enum import StrEnum
|
|
40
|
+
|
|
41
|
+
from skillpool.monitor.telemetry_bridge import TelemetryBridge
|
|
42
|
+
from skillpool.monitor.bug_collector import BugCollector as BugCollector
|
|
43
|
+
from skillpool.monitor.bug_collector import BugRecord, BugSeverity, DefectType
|
|
44
|
+
from skillpool.monitor.defect_classifier import DefectClassifier, DefectType as DefectTypeDetailed
|
|
45
|
+
from skillpool.monitor.self_healing import (
|
|
46
|
+
HealingAction,
|
|
47
|
+
HealingProposal,
|
|
48
|
+
HealingStatus,
|
|
49
|
+
SelfHealingLoop,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class MetricType(StrEnum):
|
|
54
|
+
"""Metric types."""
|
|
55
|
+
|
|
56
|
+
COUNTER = "counter"
|
|
57
|
+
GAUGE = "gauge"
|
|
58
|
+
HISTOGRAM = "histogram"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class AlertSeverity(StrEnum):
|
|
62
|
+
"""Alert severity levels."""
|
|
63
|
+
|
|
64
|
+
INFO = "info"
|
|
65
|
+
WARNING = "warning"
|
|
66
|
+
ERROR = "error"
|
|
67
|
+
CRITICAL = "critical"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class EvaluationLevel(StrEnum):
|
|
71
|
+
"""Evaluation quality levels."""
|
|
72
|
+
|
|
73
|
+
GOOD = "Good"
|
|
74
|
+
AVERAGE = "Average"
|
|
75
|
+
POOR = "Poor"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class FiveDimensionEvaluation:
|
|
80
|
+
"""
|
|
81
|
+
Five-dimension skill evaluation (from SkillNet).
|
|
82
|
+
|
|
83
|
+
Dimensions:
|
|
84
|
+
- Safety: Security and isolation quality
|
|
85
|
+
- Completeness: Feature coverage and documentation
|
|
86
|
+
- Executability: Runtime reliability and performance
|
|
87
|
+
- Maintainability: Code quality and update frequency
|
|
88
|
+
- Cost_awareness: Resource efficiency
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
skill_id: str
|
|
92
|
+
safety: EvaluationLevel
|
|
93
|
+
safety_score: float
|
|
94
|
+
safety_reason: str
|
|
95
|
+
|
|
96
|
+
completeness: EvaluationLevel
|
|
97
|
+
completeness_score: float
|
|
98
|
+
completeness_reason: str
|
|
99
|
+
|
|
100
|
+
executability: EvaluationLevel
|
|
101
|
+
executability_score: float
|
|
102
|
+
executability_reason: str
|
|
103
|
+
|
|
104
|
+
maintainability: EvaluationLevel
|
|
105
|
+
maintainability_score: float
|
|
106
|
+
maintainability_reason: str
|
|
107
|
+
|
|
108
|
+
cost_awareness: EvaluationLevel
|
|
109
|
+
cost_awareness_score: float
|
|
110
|
+
cost_awareness_reason: str
|
|
111
|
+
|
|
112
|
+
overall_score: float = 0.0
|
|
113
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
114
|
+
|
|
115
|
+
def __post_init__(self) -> None:
|
|
116
|
+
"""Calculate overall score from five dimensions."""
|
|
117
|
+
self.overall_score = (
|
|
118
|
+
self.safety_score * 0.25
|
|
119
|
+
+ self.completeness_score * 0.20
|
|
120
|
+
+ self.executability_score * 0.25
|
|
121
|
+
+ self.maintainability_score * 0.15
|
|
122
|
+
+ self.cost_awareness_score * 0.15
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class Metric:
|
|
128
|
+
"""Single metric measurement."""
|
|
129
|
+
|
|
130
|
+
name: str
|
|
131
|
+
value: float
|
|
132
|
+
metric_type: MetricType
|
|
133
|
+
labels: dict[str, str] = field(default_factory=dict)
|
|
134
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class Alert:
|
|
139
|
+
"""Monitoring alert."""
|
|
140
|
+
|
|
141
|
+
alert_id: str
|
|
142
|
+
severity: AlertSeverity
|
|
143
|
+
message: str
|
|
144
|
+
skill_id: str | None = None
|
|
145
|
+
labels: dict[str, str] = field(default_factory=dict)
|
|
146
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class MonitorLayer:
|
|
150
|
+
"""
|
|
151
|
+
Monitor layer — runtime observability and SLO tracking.
|
|
152
|
+
|
|
153
|
+
Hard rules:
|
|
154
|
+
- Collects OTel telemetry and SLO data
|
|
155
|
+
- MUST NOT publish versions
|
|
156
|
+
- MUST NOT replace Audit
|
|
157
|
+
- Observation only, no control
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, audit_layer=None) -> None:
|
|
161
|
+
self._audit = audit_layer
|
|
162
|
+
self._metrics: dict[str, list[Metric]] = {}
|
|
163
|
+
self._alerts: list[Alert] = []
|
|
164
|
+
self._slo_targets: dict[str, float] = {}
|
|
165
|
+
self._evaluations: dict[str, FiveDimensionEvaluation] = {}
|
|
166
|
+
self._trajectories: dict[str, list[dict]] = {}
|
|
167
|
+
self._prm_scores: dict[str, list[float]] = {}
|
|
168
|
+
self._error_budgets: dict[str, dict] = {}
|
|
169
|
+
self._bug_collector = BugCollector(audit_layer=audit_layer)
|
|
170
|
+
|
|
171
|
+
def record_metric(
|
|
172
|
+
self,
|
|
173
|
+
name: str,
|
|
174
|
+
value: float,
|
|
175
|
+
metric_type: MetricType = MetricType.GAUGE,
|
|
176
|
+
labels: dict[str, str] | None = None,
|
|
177
|
+
) -> None:
|
|
178
|
+
"""Record a metric measurement."""
|
|
179
|
+
metric = Metric(
|
|
180
|
+
name=name,
|
|
181
|
+
value=value,
|
|
182
|
+
metric_type=metric_type,
|
|
183
|
+
labels=labels or {},
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if name not in self._metrics:
|
|
187
|
+
self._metrics[name] = []
|
|
188
|
+
self._metrics[name].append(metric)
|
|
189
|
+
|
|
190
|
+
def record_latency(
|
|
191
|
+
self,
|
|
192
|
+
skill_id: str,
|
|
193
|
+
latency_ms: float,
|
|
194
|
+
success: bool,
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Record skill execution latency."""
|
|
197
|
+
self.record_metric(
|
|
198
|
+
name="skill_execution_latency_ms",
|
|
199
|
+
value=latency_ms,
|
|
200
|
+
metric_type=MetricType.HISTOGRAM,
|
|
201
|
+
labels={
|
|
202
|
+
"skill_id": skill_id,
|
|
203
|
+
"success": str(success),
|
|
204
|
+
},
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
slo = self._slo_targets.get(f"{skill_id}.latency_p99")
|
|
208
|
+
if slo and latency_ms > slo:
|
|
209
|
+
self._create_alert(
|
|
210
|
+
severity=AlertSeverity.WARNING,
|
|
211
|
+
message=f"Latency SLO breach: {skill_id} latency {latency_ms}ms > SLO {slo}ms",
|
|
212
|
+
skill_id=skill_id,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def record_error(
|
|
216
|
+
self,
|
|
217
|
+
skill_id: str,
|
|
218
|
+
error_type: str,
|
|
219
|
+
error_message: str,
|
|
220
|
+
) -> None:
|
|
221
|
+
"""Record skill execution error."""
|
|
222
|
+
self.record_metric(
|
|
223
|
+
name="skill_errors_total",
|
|
224
|
+
value=1,
|
|
225
|
+
metric_type=MetricType.COUNTER,
|
|
226
|
+
labels={
|
|
227
|
+
"skill_id": skill_id,
|
|
228
|
+
"error_type": error_type,
|
|
229
|
+
},
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
self._create_alert(
|
|
233
|
+
severity=AlertSeverity.ERROR,
|
|
234
|
+
message=f"Skill error: {skill_id} - {error_type}: {error_message}",
|
|
235
|
+
skill_id=skill_id,
|
|
236
|
+
labels={"error_type": error_type},
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def record_bug(
|
|
240
|
+
self,
|
|
241
|
+
severity: BugSeverity,
|
|
242
|
+
defect_type: DefectType,
|
|
243
|
+
message: str,
|
|
244
|
+
skill_id: str | None = None,
|
|
245
|
+
context: dict | None = None,
|
|
246
|
+
) -> BugRecord:
|
|
247
|
+
"""Record a bug via the BugCollector pipeline."""
|
|
248
|
+
return self._bug_collector.record(
|
|
249
|
+
severity=severity,
|
|
250
|
+
defect_type=defect_type,
|
|
251
|
+
message=message,
|
|
252
|
+
skill_id=skill_id,
|
|
253
|
+
context=context,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def set_slo_target(self, metric_name: str, target: float) -> None:
|
|
257
|
+
"""Set SLO target for a metric."""
|
|
258
|
+
self._slo_targets[metric_name] = target
|
|
259
|
+
|
|
260
|
+
def check_slo_compliance(self, skill_id: str) -> dict[str, bool]:
|
|
261
|
+
"""Check if skill meets SLO targets."""
|
|
262
|
+
compliance = {}
|
|
263
|
+
|
|
264
|
+
latency_slo = self._slo_targets.get(f"{skill_id}.latency_p99")
|
|
265
|
+
if latency_slo:
|
|
266
|
+
metrics = self._metrics.get("skill_execution_latency_ms", [])
|
|
267
|
+
skill_metrics = [m for m in metrics if m.labels.get("skill_id") == skill_id]
|
|
268
|
+
|
|
269
|
+
if skill_metrics:
|
|
270
|
+
p99 = self._calculate_p99([m.value for m in skill_metrics])
|
|
271
|
+
compliance["latency_p99"] = p99 <= latency_slo
|
|
272
|
+
else:
|
|
273
|
+
compliance["latency_p99"] = True
|
|
274
|
+
|
|
275
|
+
error_slo = self._slo_targets.get(f"{skill_id}.error_rate")
|
|
276
|
+
if error_slo:
|
|
277
|
+
compliance["error_rate"] = True
|
|
278
|
+
|
|
279
|
+
return compliance
|
|
280
|
+
|
|
281
|
+
def _calculate_p99(self, values: list[float]) -> float:
|
|
282
|
+
"""Calculate P99 from list of values."""
|
|
283
|
+
if not values:
|
|
284
|
+
return 0.0
|
|
285
|
+
sorted_values = sorted(values)
|
|
286
|
+
index = int(len(sorted_values) * 0.99)
|
|
287
|
+
return sorted_values[min(index, len(sorted_values) - 1)]
|
|
288
|
+
|
|
289
|
+
def _create_alert(
|
|
290
|
+
self,
|
|
291
|
+
severity: AlertSeverity,
|
|
292
|
+
message: str,
|
|
293
|
+
skill_id: str | None = None,
|
|
294
|
+
labels: dict | None = None,
|
|
295
|
+
) -> Alert:
|
|
296
|
+
"""Create and store alert."""
|
|
297
|
+
alert = Alert(
|
|
298
|
+
alert_id=f"alert-{len(self._alerts) + 1}",
|
|
299
|
+
severity=severity,
|
|
300
|
+
message=message,
|
|
301
|
+
skill_id=skill_id,
|
|
302
|
+
labels=labels or {},
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
self._alerts.append(alert)
|
|
306
|
+
|
|
307
|
+
if self._audit:
|
|
308
|
+
self._audit.append(
|
|
309
|
+
action="create_alert",
|
|
310
|
+
result=severity.value,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
return alert
|
|
314
|
+
|
|
315
|
+
def get_alerts(
|
|
316
|
+
self,
|
|
317
|
+
skill_id: str | None = None,
|
|
318
|
+
severity: AlertSeverity | None = None,
|
|
319
|
+
) -> list[Alert]:
|
|
320
|
+
"""Get alerts, optionally filtered."""
|
|
321
|
+
alerts = self._alerts
|
|
322
|
+
|
|
323
|
+
if skill_id:
|
|
324
|
+
alerts = [a for a in alerts if a.skill_id == skill_id]
|
|
325
|
+
|
|
326
|
+
if severity:
|
|
327
|
+
alerts = [a for a in alerts if a.severity == severity]
|
|
328
|
+
|
|
329
|
+
return alerts
|
|
330
|
+
|
|
331
|
+
def get_metrics(self, name: str | None = None) -> dict[str, list[Metric]]:
|
|
332
|
+
"""Get recorded metrics."""
|
|
333
|
+
if name:
|
|
334
|
+
return {name: self._metrics.get(name, [])}
|
|
335
|
+
return self._metrics
|
|
336
|
+
|
|
337
|
+
# === Five-Dimension Evaluation ===
|
|
338
|
+
|
|
339
|
+
def evaluate_skill(
|
|
340
|
+
self,
|
|
341
|
+
skill_id: str,
|
|
342
|
+
metrics: dict[str, float],
|
|
343
|
+
) -> FiveDimensionEvaluation:
|
|
344
|
+
"""
|
|
345
|
+
Perform five-dimension evaluation on a skill.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
skill_id: Skill to evaluate
|
|
349
|
+
metrics: Dict containing error_rate, security_issues, coverage,
|
|
350
|
+
doc_completeness, avg_latency_ms, p99_latency_ms,
|
|
351
|
+
update_frequency_days, resource_efficiency
|
|
352
|
+
"""
|
|
353
|
+
safety_score = 1.0 - min(metrics.get("error_rate", 0), 0.5)
|
|
354
|
+
security_issues = metrics.get("security_issues", 0)
|
|
355
|
+
if security_issues > 0:
|
|
356
|
+
safety_score *= 0.5
|
|
357
|
+
safety_level = self._score_to_level(safety_score)
|
|
358
|
+
safety_reason = f"Error rate: {metrics.get('error_rate', 0):.2%}, Security issues: {security_issues}"
|
|
359
|
+
|
|
360
|
+
completeness_score = metrics.get("coverage", 0.5) * 0.5 + metrics.get("doc_completeness", 0.5) * 0.5
|
|
361
|
+
completeness_level = self._score_to_level(completeness_score)
|
|
362
|
+
completeness_reason = (
|
|
363
|
+
f"Coverage: {metrics.get('coverage', 0):.2%}, Docs: {metrics.get('doc_completeness', 0):.2%}"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
p99_latency = metrics.get("p99_latency_ms", 1000)
|
|
367
|
+
latency_score = max(0, 1.0 - (p99_latency / 10000))
|
|
368
|
+
success_rate = 1.0 - metrics.get("error_rate", 0)
|
|
369
|
+
executability_score = latency_score * 0.5 + success_rate * 0.5
|
|
370
|
+
executability_level = self._score_to_level(executability_score)
|
|
371
|
+
executability_reason = f"P99 latency: {p99_latency}ms, Success rate: {success_rate:.2%}"
|
|
372
|
+
|
|
373
|
+
update_days = metrics.get("update_frequency_days", 30)
|
|
374
|
+
maintain_score = max(0, 1.0 - (update_days / 90))
|
|
375
|
+
maintainability_level = self._score_to_level(maintain_score)
|
|
376
|
+
maintainability_reason = f"Last update: {update_days} days ago"
|
|
377
|
+
|
|
378
|
+
resource_eff = metrics.get("resource_efficiency", 0.5)
|
|
379
|
+
cost_awareness_score = resource_eff
|
|
380
|
+
cost_awareness_level = self._score_to_level(cost_awareness_score)
|
|
381
|
+
cost_awareness_reason = f"Resource efficiency: {resource_eff:.2%}"
|
|
382
|
+
|
|
383
|
+
evaluation = FiveDimensionEvaluation(
|
|
384
|
+
skill_id=skill_id,
|
|
385
|
+
safety=safety_level,
|
|
386
|
+
safety_score=safety_score,
|
|
387
|
+
safety_reason=safety_reason,
|
|
388
|
+
completeness=completeness_level,
|
|
389
|
+
completeness_score=completeness_score,
|
|
390
|
+
completeness_reason=completeness_reason,
|
|
391
|
+
executability=executability_level,
|
|
392
|
+
executability_score=executability_score,
|
|
393
|
+
executability_reason=executability_reason,
|
|
394
|
+
maintainability=maintainability_level,
|
|
395
|
+
maintainability_score=maintain_score,
|
|
396
|
+
maintainability_reason=maintainability_reason,
|
|
397
|
+
cost_awareness=cost_awareness_level,
|
|
398
|
+
cost_awareness_score=cost_awareness_score,
|
|
399
|
+
cost_awareness_reason=cost_awareness_reason,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
self._evaluations[skill_id] = evaluation
|
|
403
|
+
return evaluation
|
|
404
|
+
|
|
405
|
+
def _score_to_level(self, score: float) -> EvaluationLevel:
|
|
406
|
+
"""Convert numeric score to evaluation level."""
|
|
407
|
+
if score >= 0.7:
|
|
408
|
+
return EvaluationLevel.GOOD
|
|
409
|
+
elif score >= 0.4:
|
|
410
|
+
return EvaluationLevel.AVERAGE
|
|
411
|
+
else:
|
|
412
|
+
return EvaluationLevel.POOR
|
|
413
|
+
|
|
414
|
+
def get_evaluation(self, skill_id: str) -> FiveDimensionEvaluation | None:
|
|
415
|
+
"""Get latest five-dimension evaluation for a skill."""
|
|
416
|
+
return self._evaluations.get(skill_id)
|
|
417
|
+
|
|
418
|
+
# === Trajectory Aggregation G(s) ===
|
|
419
|
+
|
|
420
|
+
def record_trajectory(
|
|
421
|
+
self,
|
|
422
|
+
skill_id: str,
|
|
423
|
+
trajectory: dict,
|
|
424
|
+
prm_score: float | None = None,
|
|
425
|
+
) -> None:
|
|
426
|
+
"""Record an execution trajectory for aggregation."""
|
|
427
|
+
if skill_id not in self._trajectories:
|
|
428
|
+
self._trajectories[skill_id] = []
|
|
429
|
+
self._trajectories[skill_id].append(
|
|
430
|
+
{
|
|
431
|
+
"trajectory": trajectory,
|
|
432
|
+
"timestamp": datetime.now(UTC).isoformat(),
|
|
433
|
+
}
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
if prm_score is not None:
|
|
437
|
+
if skill_id not in self._prm_scores:
|
|
438
|
+
self._prm_scores[skill_id] = []
|
|
439
|
+
self._prm_scores[skill_id].append(prm_score)
|
|
440
|
+
|
|
441
|
+
def aggregate_trajectories(self, skill_id: str) -> dict:
|
|
442
|
+
"""Aggregate trajectories G(s) for a skill (SkillClaw method)."""
|
|
443
|
+
trajectories = self._trajectories.get(skill_id, [])
|
|
444
|
+
prm_scores = self._prm_scores.get(skill_id, [])
|
|
445
|
+
|
|
446
|
+
if not trajectories:
|
|
447
|
+
return {"skill_id": skill_id, "trajectory_count": 0}
|
|
448
|
+
|
|
449
|
+
avg_prm = sum(prm_scores) / len(prm_scores) if prm_scores else None
|
|
450
|
+
|
|
451
|
+
error_patterns = []
|
|
452
|
+
for t in trajectories:
|
|
453
|
+
traj = t.get("trajectory", {})
|
|
454
|
+
if traj.get("error"):
|
|
455
|
+
error_patterns.append(traj.get("error_type", "unknown"))
|
|
456
|
+
|
|
457
|
+
success_rate = 1.0 - (len(error_patterns) / len(trajectories)) if trajectories else 1.0
|
|
458
|
+
|
|
459
|
+
return {
|
|
460
|
+
"skill_id": skill_id,
|
|
461
|
+
"trajectory_count": len(trajectories),
|
|
462
|
+
"avg_prm_score": avg_prm,
|
|
463
|
+
"success_rate": success_rate,
|
|
464
|
+
"error_patterns": list(set(error_patterns)),
|
|
465
|
+
"error_count": len(error_patterns),
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
def get_skill_performance_summary(self, skill_id: str) -> dict:
|
|
469
|
+
"""Get comprehensive performance summary for a skill."""
|
|
470
|
+
evaluation = self.get_evaluation(skill_id)
|
|
471
|
+
aggregation = self.aggregate_trajectories(skill_id)
|
|
472
|
+
slo_compliance = self.check_slo_compliance(skill_id)
|
|
473
|
+
|
|
474
|
+
return {
|
|
475
|
+
"skill_id": skill_id,
|
|
476
|
+
"five_dimension": evaluation.__dict__ if evaluation else None,
|
|
477
|
+
"trajectory_aggregation": aggregation,
|
|
478
|
+
"slo_compliance": slo_compliance,
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
# === Error Budget Policy ===
|
|
482
|
+
|
|
483
|
+
def set_error_budget(self, skill_id: str, slo_target: float, window_days: int = 30) -> None:
|
|
484
|
+
"""Set error budget for a skill.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
skill_id: Skill to set budget for.
|
|
488
|
+
slo_target: SLO target as decimal (e.g., 0.999 for 99.9%).
|
|
489
|
+
window_days: Budget window in days (default 30).
|
|
490
|
+
"""
|
|
491
|
+
budget = 1.0 - slo_target # error budget = 1 - SLO
|
|
492
|
+
self._error_budgets[skill_id] = {
|
|
493
|
+
"slo_target": slo_target,
|
|
494
|
+
"error_budget": budget,
|
|
495
|
+
"window_days": window_days,
|
|
496
|
+
"errors_consumed": 0.0,
|
|
497
|
+
"total_requests": 0,
|
|
498
|
+
"failed_requests": 0,
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
def record_budget_request(self, skill_id: str, success: bool) -> None:
|
|
502
|
+
"""Record a request against the error budget."""
|
|
503
|
+
budget = self._error_budgets.get(skill_id)
|
|
504
|
+
if not budget:
|
|
505
|
+
return
|
|
506
|
+
budget["total_requests"] += 1
|
|
507
|
+
if not success:
|
|
508
|
+
budget["failed_requests"] += 1
|
|
509
|
+
budget["errors_consumed"] = budget["failed_requests"] / max(budget["total_requests"], 1)
|
|
510
|
+
|
|
511
|
+
def get_error_budget_status(self, skill_id: str) -> dict | None:
|
|
512
|
+
"""Get error budget status for a skill.
|
|
513
|
+
|
|
514
|
+
Returns dict with: slo_target, error_budget, consumed_pct, remaining_pct,
|
|
515
|
+
burn_rate, estimated_exhaustion_days.
|
|
516
|
+
"""
|
|
517
|
+
budget = self._error_budgets.get(skill_id)
|
|
518
|
+
if not budget:
|
|
519
|
+
return None
|
|
520
|
+
|
|
521
|
+
consumed_pct = budget["errors_consumed"] / budget["error_budget"] if budget["error_budget"] > 0 else 0.0
|
|
522
|
+
remaining_pct = max(0.0, 1.0 - consumed_pct)
|
|
523
|
+
|
|
524
|
+
# Burn rate: how fast the budget is being consumed
|
|
525
|
+
# Simple model: consumed_pct / (window_days * progress_ratio)
|
|
526
|
+
actual_error_rate = budget["failed_requests"] / max(budget["total_requests"], 1)
|
|
527
|
+
burn_rate = actual_error_rate / budget["error_budget"] if budget["error_budget"] > 0 else 0.0
|
|
528
|
+
|
|
529
|
+
# Estimated exhaustion
|
|
530
|
+
if burn_rate > 0 and remaining_pct > 0:
|
|
531
|
+
estimated_exhaustion_days = (remaining_pct * budget["window_days"]) / burn_rate
|
|
532
|
+
else:
|
|
533
|
+
estimated_exhaustion_days = float("inf") if remaining_pct > 0 else 0.0
|
|
534
|
+
|
|
535
|
+
return {
|
|
536
|
+
"skill_id": skill_id,
|
|
537
|
+
"slo_target": budget["slo_target"],
|
|
538
|
+
"error_budget": budget["error_budget"],
|
|
539
|
+
"consumed_pct": round(consumed_pct, 4),
|
|
540
|
+
"remaining_pct": round(remaining_pct, 4),
|
|
541
|
+
"burn_rate": round(burn_rate, 4),
|
|
542
|
+
"estimated_exhaustion_days": round(estimated_exhaustion_days, 1),
|
|
543
|
+
"total_requests": budget["total_requests"],
|
|
544
|
+
"failed_requests": budget["failed_requests"],
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
def to_prometheus(self) -> str:
|
|
548
|
+
"""Export metrics in Prometheus exposition format.
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
String in Prometheus text format suitable for /metrics endpoint.
|
|
552
|
+
"""
|
|
553
|
+
lines = []
|
|
554
|
+
for name, metrics in self._metrics.items():
|
|
555
|
+
# Prometheus metric name: replace dots with underscores
|
|
556
|
+
prom_name = f"skillpool_{name.replace('.', '_').replace('-', '_')}"
|
|
557
|
+
# TYPE header
|
|
558
|
+
metric_type_map = {
|
|
559
|
+
MetricType.COUNTER: "counter",
|
|
560
|
+
MetricType.GAUGE: "gauge",
|
|
561
|
+
MetricType.HISTOGRAM: "histogram",
|
|
562
|
+
}
|
|
563
|
+
prom_type = metric_type_map.get(metrics[0].metric_type, "gauge") if metrics else "gauge"
|
|
564
|
+
lines.append(f"# TYPE {prom_name} {prom_type}")
|
|
565
|
+
# Data lines
|
|
566
|
+
for m in metrics:
|
|
567
|
+
if m.labels:
|
|
568
|
+
label_str = ",".join(f'{k}="{v}"' for k, v in sorted(m.labels.items()))
|
|
569
|
+
lines.append(f"{prom_name}{{{label_str}}} {m.value}")
|
|
570
|
+
else:
|
|
571
|
+
lines.append(f"{prom_name} {m.value}")
|
|
572
|
+
# Alerts as gauge
|
|
573
|
+
if self._alerts:
|
|
574
|
+
lines.append("# TYPE skillpool_alerts_total gauge")
|
|
575
|
+
lines.append(f"skillpool_alerts_total {len(self._alerts)}")
|
|
576
|
+
return "\n".join(lines) + "\n"
|