skillpool 4.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skillpool/__init__.py +74 -0
- skillpool/__main__.py +6 -0
- skillpool/adapters/__init__.py +8 -0
- skillpool/adapters/base.py +41 -0
- skillpool/adapters/claude_adapter.py +36 -0
- skillpool/adapters/codex_adapter.py +92 -0
- skillpool/adapters/hermes_adapter.py +38 -0
- skillpool/audit/__init__.py +651 -0
- skillpool/bridge/__init__.py +16 -0
- skillpool/bridge/freeze_detector.py +134 -0
- skillpool/bridge/maintenance.py +119 -0
- skillpool/bridge/wal_manager.py +136 -0
- skillpool/clawmem_client.py +176 -0
- skillpool/cli.py +700 -0
- skillpool/combiner/__init__.py +31 -0
- skillpool/combiner/lifecycle.py +453 -0
- skillpool/combiner/models.py +99 -0
- skillpool/config.py +34 -0
- skillpool/cost/__init__.py +111 -0
- skillpool/cost/audit_hash.py +51 -0
- skillpool/cost/budget_tracker.py +66 -0
- skillpool/cost/dashboard.py +189 -0
- skillpool/cost/models.py +129 -0
- skillpool/cost/token_governor.py +264 -0
- skillpool/cost/trace_ceiling.py +38 -0
- skillpool/csdf.py +126 -0
- skillpool/evolver/__init__.py +978 -0
- skillpool/gain/__init__.py +285 -0
- skillpool/gate.py +282 -0
- skillpool/gate_policy/__init__.py +31 -0
- skillpool/gate_policy/incremental.py +157 -0
- skillpool/gate_policy/parser.py +258 -0
- skillpool/gate_policy/state_machine.py +432 -0
- skillpool/graph/__init__.py +14 -0
- skillpool/graph/ppr.py +279 -0
- skillpool/health/__init__.py +73 -0
- skillpool/health/check.py +85 -0
- skillpool/health/degradation.py +90 -0
- skillpool/health/models.py +43 -0
- skillpool/hooks/__init__.py +4 -0
- skillpool/hooks/security_scanner.py +288 -0
- skillpool/lifecycle.py +150 -0
- skillpool/materializer/__init__.py +124 -0
- skillpool/materializer/budget_cropper.py +178 -0
- skillpool/materializer/csdf_loader.py +114 -0
- skillpool/materializer/lazy_loader.py +265 -0
- skillpool/materializer/lifecycle_filter.py +93 -0
- skillpool/materializer/mapper.py +178 -0
- skillpool/materializer/models.py +66 -0
- skillpool/mcp_server.py +2005 -0
- skillpool/monitor/__init__.py +576 -0
- skillpool/monitor/bug_collector.py +392 -0
- skillpool/monitor/defect_classifier.py +218 -0
- skillpool/monitor/self_healing.py +530 -0
- skillpool/monitor/telemetry_bridge.py +197 -0
- skillpool/paradigm/__init__.py +312 -0
- skillpool/paradigm/override.py +285 -0
- skillpool/profile.py +94 -0
- skillpool/quality.py +254 -0
- skillpool/registry/__init__.py +509 -0
- skillpool/registry/models.py +98 -0
- skillpool/resolver/__init__.py +320 -0
- skillpool/resolver/cache.py +103 -0
- skillpool/resolver/circuit_breaker.py +103 -0
- skillpool/resolver/conflict_detector.py +111 -0
- skillpool/resolver/health_filter.py +38 -0
- skillpool/resolver/models.py +154 -0
- skillpool/resolver/rate_limiter.py +48 -0
- skillpool/resolver/skill_graph.py +183 -0
- skillpool/review/__init__.py +242 -0
- skillpool/review/async_queue.py +96 -0
- skillpool/review/checkpoint_runner.py +345 -0
- skillpool/review/models.py +164 -0
- skillpool/review/suspect_marker.py +39 -0
- skillpool/review/veto_evaluator.py +94 -0
- skillpool/router/__init__.py +481 -0
- skillpool/schemas.py +119 -0
- skillpool/synergy/__init__.py +240 -0
- skillpool/synergy/detector.py +5 -0
- skillpool/telemetry.py +126 -0
- skillpool/utils/__init__.py +21 -0
- skillpool/utils/changelog.py +218 -0
- skillpool/utils/logger.py +273 -0
- skillpool/utils/runtime_audit.py +163 -0
- skillpool/utils/time_utils.py +13 -0
- skillpool-4.3.0.dist-info/METADATA +21 -0
- skillpool-4.3.0.dist-info/RECORD +90 -0
- skillpool-4.3.0.dist-info/WHEEL +5 -0
- skillpool-4.3.0.dist-info/entry_points.txt +3 -0
- skillpool-4.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""AsyncReviewQueue — in-memory review queue with cooldown tracking."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
from skillpool.review.models import ReviewStatus, ReviewTriggerRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class _QueueEntry:
|
|
14
|
+
"""Internal tracking entry for a submitted review."""
|
|
15
|
+
|
|
16
|
+
review_id: str
|
|
17
|
+
request: ReviewTriggerRequest
|
|
18
|
+
status: ReviewStatus = ReviewStatus.QUEUED
|
|
19
|
+
submitted_at: float = field(default_factory=time.time)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AsyncReviewQueue:
|
|
23
|
+
"""In-memory review queue with cooldown enforcement.
|
|
24
|
+
|
|
25
|
+
- max_concurrent: maximum number of reviews that can be PROCESSING at once
|
|
26
|
+
- cooldown_seconds: minimum time between reviews for the same skill_id
|
|
27
|
+
|
|
28
|
+
Usage:
|
|
29
|
+
queue = AsyncReviewQueue()
|
|
30
|
+
review_id = queue.submit(request)
|
|
31
|
+
status = queue.get_status(review_id)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, max_concurrent: int = 10, cooldown_seconds: float = 86400.0) -> None:
|
|
35
|
+
self.max_concurrent = max_concurrent
|
|
36
|
+
self.cooldown_seconds = cooldown_seconds
|
|
37
|
+
self._entries: dict[str, _QueueEntry] = {}
|
|
38
|
+
self._skill_last_review: dict[str, float] = {}
|
|
39
|
+
|
|
40
|
+
def submit(self, request: ReviewTriggerRequest) -> str:
|
|
41
|
+
"""Submit a review request. Returns the review_id.
|
|
42
|
+
|
|
43
|
+
Raises ValueError if any affected skill is still in cooldown.
|
|
44
|
+
Raises RuntimeError if max_concurrent reviews are already processing.
|
|
45
|
+
"""
|
|
46
|
+
# Check cooldown for all affected skills
|
|
47
|
+
now = time.time()
|
|
48
|
+
for skill_id in request.affected_skills:
|
|
49
|
+
last = self._skill_last_review.get(skill_id, 0.0)
|
|
50
|
+
if now - last < self.cooldown_seconds:
|
|
51
|
+
remaining = round(self.cooldown_seconds - (now - last), 1)
|
|
52
|
+
raise ValueError(f"Skill '{skill_id}' is in cooldown ({remaining}s remaining)")
|
|
53
|
+
|
|
54
|
+
# Check max concurrent
|
|
55
|
+
processing_count = sum(1 for e in self._entries.values() if e.status == ReviewStatus.PROCESSING)
|
|
56
|
+
if processing_count >= self.max_concurrent:
|
|
57
|
+
raise RuntimeError(f"Max concurrent reviews ({self.max_concurrent}) reached")
|
|
58
|
+
|
|
59
|
+
review_id = uuid.uuid4().hex[:16]
|
|
60
|
+
entry = _QueueEntry(
|
|
61
|
+
review_id=review_id,
|
|
62
|
+
request=request,
|
|
63
|
+
status=ReviewStatus.QUEUED,
|
|
64
|
+
submitted_at=now,
|
|
65
|
+
)
|
|
66
|
+
self._entries[review_id] = entry
|
|
67
|
+
|
|
68
|
+
# Mark skill cooldown timestamps
|
|
69
|
+
for skill_id in request.affected_skills:
|
|
70
|
+
self._skill_last_review[skill_id] = now
|
|
71
|
+
|
|
72
|
+
return review_id
|
|
73
|
+
|
|
74
|
+
def get_status(self, review_id: str) -> ReviewStatus:
|
|
75
|
+
"""Get the current status of a review."""
|
|
76
|
+
entry = self._entries.get(review_id)
|
|
77
|
+
if entry is None:
|
|
78
|
+
raise KeyError(f"Review '{review_id}' not found")
|
|
79
|
+
return entry.status
|
|
80
|
+
|
|
81
|
+
def set_status(self, review_id: str, status: ReviewStatus) -> None:
|
|
82
|
+
"""Update the status of a review entry."""
|
|
83
|
+
entry = self._entries.get(review_id)
|
|
84
|
+
if entry is None:
|
|
85
|
+
raise KeyError(f"Review '{review_id}' not found")
|
|
86
|
+
entry.status = status
|
|
87
|
+
|
|
88
|
+
def is_in_cooldown(self, skill_id: str) -> bool:
|
|
89
|
+
"""Check whether a skill is currently in cooldown."""
|
|
90
|
+
last = self._skill_last_review.get(skill_id, 0.0)
|
|
91
|
+
return (time.time() - last) < self.cooldown_seconds
|
|
92
|
+
|
|
93
|
+
def clear(self) -> None:
|
|
94
|
+
"""Clear all entries and cooldown tracking (for testing)."""
|
|
95
|
+
self._entries.clear()
|
|
96
|
+
self._skill_last_review.clear()
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
"""CheckpointRunner — runs dimension scoring for each checkpoint level."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import logging
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Callable, Optional
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
from skillpool.config import get_data_dir
|
|
13
|
+
from skillpool.review.models import CheckpointLevel
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Dimensions per checkpoint level
|
|
18
|
+
SHADOW_DIMENSIONS = ("D1", "D2", "D4", "D6", "D8", "D9", "D12")
|
|
19
|
+
BASELINE_DIMENSIONS = ("D3", "D5", "D7", "D10", "D11")
|
|
20
|
+
ALL_DIMENSIONS = BASELINE_DIMENSIONS + SHADOW_DIMENSIONS
|
|
21
|
+
|
|
22
|
+
# Dimension → required skills mapping
|
|
23
|
+
DIMENSION_SKILLS: dict[str, list[str]] = {
|
|
24
|
+
"D1": ["S01"],
|
|
25
|
+
"D2": ["S02"],
|
|
26
|
+
"D3": ["S05a", "S05b", "S06"],
|
|
27
|
+
"D4": ["S04"],
|
|
28
|
+
"D5": ["S09", "S10"],
|
|
29
|
+
"D6": ["S07", "S08"],
|
|
30
|
+
"D7": ["S13a", "S13b"],
|
|
31
|
+
"D8": ["S11"],
|
|
32
|
+
"D9": ["S12"],
|
|
33
|
+
"D10": ["S19"],
|
|
34
|
+
"D11": ["S20"],
|
|
35
|
+
"D12": ["S21"],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Required fields in CSDF YAML for scoring
|
|
39
|
+
REQUIRED_CSDF_FIELDS = {"id", "name", "version", "dimension", "description"}
|
|
40
|
+
|
|
41
|
+
# Default skills directory
|
|
42
|
+
_DEFAULT_SKILLS_DIR = get_data_dir() / "skills"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CheckpointRunner:
|
|
46
|
+
"""Runs dimension scoring for a given checkpoint level.
|
|
47
|
+
|
|
48
|
+
Evaluates each dimension by inspecting the corresponding skill YAML files:
|
|
49
|
+
- File existence and YAML parseability
|
|
50
|
+
- Required field completeness
|
|
51
|
+
- Checklist item count and severity distribution
|
|
52
|
+
|
|
53
|
+
Falls back to deterministic hash-based scores when skill files are unavailable.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, seed: Optional[int] = None, skills_dir: Optional[Path] = None):
|
|
57
|
+
self._base_seed = seed
|
|
58
|
+
self._skills_dir = skills_dir or _DEFAULT_SKILLS_DIR
|
|
59
|
+
|
|
60
|
+
def run_checkpoint(
|
|
61
|
+
self,
|
|
62
|
+
level: CheckpointLevel,
|
|
63
|
+
skills: list[str],
|
|
64
|
+
) -> dict[str, float]:
|
|
65
|
+
"""Evaluate dimensions for the given checkpoint level.
|
|
66
|
+
|
|
67
|
+
Returns a dict of dimension → score (0.0-10.0).
|
|
68
|
+
"""
|
|
69
|
+
dimensions = self._dimensions_for_level(level)
|
|
70
|
+
scores: dict[str, float] = {}
|
|
71
|
+
for d in dimensions:
|
|
72
|
+
scores[d] = self._score_dimension(d)
|
|
73
|
+
return scores
|
|
74
|
+
|
|
75
|
+
def _dimensions_for_level(self, level: CheckpointLevel) -> tuple[str, ...]:
|
|
76
|
+
"""Return the dimensions to evaluate for each checkpoint level."""
|
|
77
|
+
if level == CheckpointLevel.L1:
|
|
78
|
+
return SHADOW_DIMENSIONS
|
|
79
|
+
elif level == CheckpointLevel.L2:
|
|
80
|
+
return ALL_DIMENSIONS
|
|
81
|
+
elif level == CheckpointLevel.L3:
|
|
82
|
+
return BASELINE_DIMENSIONS
|
|
83
|
+
elif level == CheckpointLevel.L4:
|
|
84
|
+
return BASELINE_DIMENSIONS
|
|
85
|
+
return ALL_DIMENSIONS
|
|
86
|
+
|
|
87
|
+
def _score_dimension(self, dimension: str) -> float:
|
|
88
|
+
"""Score a single dimension by evaluating its associated skill files."""
|
|
89
|
+
skill_ids = DIMENSION_SKILLS.get(dimension, [])
|
|
90
|
+
if not skill_ids:
|
|
91
|
+
return self._fallback_score(dimension)
|
|
92
|
+
|
|
93
|
+
scores: list[float] = []
|
|
94
|
+
for sid in skill_ids:
|
|
95
|
+
s = self._score_skill(sid)
|
|
96
|
+
scores.append(s)
|
|
97
|
+
|
|
98
|
+
return round(sum(scores) / len(scores), 2) if scores else self._fallback_score(dimension)
|
|
99
|
+
|
|
100
|
+
def _score_skill(self, skill_id: str) -> float:
|
|
101
|
+
"""Score a single skill based on CSDF file quality."""
|
|
102
|
+
yaml_path = self._find_skill_yaml(skill_id)
|
|
103
|
+
if yaml_path is None:
|
|
104
|
+
return self._fallback_score(skill_id)
|
|
105
|
+
|
|
106
|
+
# Parse YAML
|
|
107
|
+
try:
|
|
108
|
+
content = yaml_path.read_text(encoding="utf-8")
|
|
109
|
+
data = yaml.safe_load(content)
|
|
110
|
+
except (yaml.YAMLError, OSError):
|
|
111
|
+
return 3.0 # Unparseable YAML → low score
|
|
112
|
+
|
|
113
|
+
if not isinstance(data, dict):
|
|
114
|
+
return 2.0
|
|
115
|
+
|
|
116
|
+
# Determine which dimension this skill primarily serves
|
|
117
|
+
primary_dim = self._skill_dimension(skill_id)
|
|
118
|
+
|
|
119
|
+
# Use dimension-specific scoring if available
|
|
120
|
+
if primary_dim:
|
|
121
|
+
dim_scorer = self._DIMENSION_SCORERS.get(primary_dim)
|
|
122
|
+
if dim_scorer:
|
|
123
|
+
return dim_scorer(self, data, content)
|
|
124
|
+
|
|
125
|
+
# Default: generic quality scoring
|
|
126
|
+
return self._score_generic(data)
|
|
127
|
+
|
|
128
|
+
def _skill_dimension(self, skill_id: str) -> Optional[str]:
|
|
129
|
+
"""Return the primary dimension for a skill ID."""
|
|
130
|
+
for dim, skills in DIMENSION_SKILLS.items():
|
|
131
|
+
if skill_id in skills:
|
|
132
|
+
return dim
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
# ── Dimension-specific scorers ──
|
|
136
|
+
|
|
137
|
+
def _score_d3_security(self, data: dict, raw_content: str) -> float:
|
|
138
|
+
"""D3 安全合规性: SecurityScanner scan + CSDF field quality."""
|
|
139
|
+
score = self._score_generic(data)
|
|
140
|
+
# Bonus: SecurityScanner scan passes
|
|
141
|
+
try:
|
|
142
|
+
from skillpool.hooks.security_scanner import SecurityScanner
|
|
143
|
+
|
|
144
|
+
scanner = SecurityScanner()
|
|
145
|
+
result = scanner.full_check(raw_content)
|
|
146
|
+
if result.is_safe:
|
|
147
|
+
score += 2.0
|
|
148
|
+
elif result.threat_level.value == "warning":
|
|
149
|
+
score += 0.5
|
|
150
|
+
# CRITICAL means is_safe=False → no bonus
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.warning("Security scanner unavailable, skipping scan bonus: %s", e)
|
|
153
|
+
|
|
154
|
+
# Bonus: explicit security fields in CSDF
|
|
155
|
+
if data.get("security_scan_required"):
|
|
156
|
+
score += 0.5
|
|
157
|
+
if data.get("veto_rule"):
|
|
158
|
+
score += 0.5
|
|
159
|
+
|
|
160
|
+
return min(round(score, 2), 10.0)
|
|
161
|
+
|
|
162
|
+
def _score_d5_resilience(self, data: dict, raw_content: str) -> float:
|
|
163
|
+
"""D5 弹性容错: Degradation config + recovery strategy."""
|
|
164
|
+
score = self._score_generic(data)
|
|
165
|
+
|
|
166
|
+
# Bonus: degradation/fallback declared
|
|
167
|
+
if data.get("fallback") or data.get("degradation"):
|
|
168
|
+
score += 1.5
|
|
169
|
+
# Bonus: recovery/retry declared
|
|
170
|
+
if data.get("recovery") or data.get("retry_strategy"):
|
|
171
|
+
score += 1.0
|
|
172
|
+
# Bonus: circuit breaker or timeout declared
|
|
173
|
+
if data.get("circuit_breaker") or data.get("timeout"):
|
|
174
|
+
score += 0.5
|
|
175
|
+
|
|
176
|
+
# Penalty: no resilience fields at all
|
|
177
|
+
resilience_fields = {
|
|
178
|
+
"fallback",
|
|
179
|
+
"degradation",
|
|
180
|
+
"recovery",
|
|
181
|
+
"retry_strategy",
|
|
182
|
+
"circuit_breaker",
|
|
183
|
+
"timeout",
|
|
184
|
+
"grace_period",
|
|
185
|
+
}
|
|
186
|
+
if not any(k in data for k in resilience_fields):
|
|
187
|
+
# Check checklist for resilience-related items
|
|
188
|
+
checklist = data.get("checklist", [])
|
|
189
|
+
has_resilience_item = any(
|
|
190
|
+
isinstance(item, dict)
|
|
191
|
+
and any(
|
|
192
|
+
kw in str(item.get("description", "")).lower()
|
|
193
|
+
for kw in ("fallback", "degrad", "recover", "retry", "timeout")
|
|
194
|
+
)
|
|
195
|
+
for item in checklist
|
|
196
|
+
)
|
|
197
|
+
if not has_resilience_item:
|
|
198
|
+
score -= 1.0
|
|
199
|
+
|
|
200
|
+
return min(max(round(score, 2), 0.0), 10.0)
|
|
201
|
+
|
|
202
|
+
def _score_d7_testability(self, data: dict, raw_content: str) -> float:
|
|
203
|
+
"""D7 可测试性: Test coverage + BDD specification."""
|
|
204
|
+
score = self._score_generic(data)
|
|
205
|
+
|
|
206
|
+
# Bonus: test_coverage field declared
|
|
207
|
+
coverage = data.get("test_coverage")
|
|
208
|
+
if isinstance(coverage, (int, float)):
|
|
209
|
+
if coverage >= 90:
|
|
210
|
+
score += 2.0
|
|
211
|
+
elif coverage >= 80:
|
|
212
|
+
score += 1.5
|
|
213
|
+
elif coverage >= 70:
|
|
214
|
+
score += 1.0
|
|
215
|
+
else:
|
|
216
|
+
score += 0.5
|
|
217
|
+
elif coverage:
|
|
218
|
+
score += 0.5
|
|
219
|
+
|
|
220
|
+
# Bonus: BDD/acceptance criteria in checklist
|
|
221
|
+
checklist = data.get("checklist", [])
|
|
222
|
+
bdd_keywords = {"given", "when", "then", "should", "must", "verify"}
|
|
223
|
+
bdd_count = sum(
|
|
224
|
+
1
|
|
225
|
+
for item in checklist
|
|
226
|
+
if isinstance(item, dict) and any(kw in str(item.get("description", "")).lower() for kw in bdd_keywords)
|
|
227
|
+
)
|
|
228
|
+
if bdd_count >= 3:
|
|
229
|
+
score += 1.5
|
|
230
|
+
elif bdd_count >= 1:
|
|
231
|
+
score += 0.5
|
|
232
|
+
|
|
233
|
+
return min(round(score, 2), 10.0)
|
|
234
|
+
|
|
235
|
+
def _score_d10_protocol(self, data: dict, raw_content: str) -> float:
|
|
236
|
+
"""D10 协议时效性: Version freshness + deprecation status."""
|
|
237
|
+
score = self._score_generic(data)
|
|
238
|
+
|
|
239
|
+
# Bonus: version follows semver
|
|
240
|
+
version = str(data.get("version", ""))
|
|
241
|
+
import re
|
|
242
|
+
|
|
243
|
+
if re.match(r"\d+\.\d+\.\d+", version):
|
|
244
|
+
score += 0.5
|
|
245
|
+
|
|
246
|
+
# Bonus: last_updated or effective_date declared
|
|
247
|
+
if data.get("last_updated") or data.get("effective_date"):
|
|
248
|
+
score += 0.5
|
|
249
|
+
|
|
250
|
+
# Penalty: deprecated without replacement
|
|
251
|
+
if data.get("deprecated"):
|
|
252
|
+
replacement = data.get("replacement")
|
|
253
|
+
if not replacement:
|
|
254
|
+
score -= 1.0
|
|
255
|
+
|
|
256
|
+
return min(max(round(score, 2), 0.0), 10.0)
|
|
257
|
+
|
|
258
|
+
def _score_d11_feasibility(self, data: dict, raw_content: str) -> float:
|
|
259
|
+
"""D11 工程可行性: Dependency completeness + implementation hints."""
|
|
260
|
+
score = self._score_generic(data)
|
|
261
|
+
|
|
262
|
+
# Bonus: dependencies declared (even empty = explicitly checked)
|
|
263
|
+
if "dependencies" in data:
|
|
264
|
+
score += 0.5
|
|
265
|
+
deps = data.get("dependencies", [])
|
|
266
|
+
if isinstance(deps, list) and deps:
|
|
267
|
+
score += 0.5 # Has actual dependencies documented
|
|
268
|
+
|
|
269
|
+
# Bonus: implementation hints
|
|
270
|
+
if data.get("implementation") or data.get("implementation_notes"):
|
|
271
|
+
score += 1.0
|
|
272
|
+
|
|
273
|
+
# Penalty: no dependencies field and no checklist
|
|
274
|
+
if "dependencies" not in data and not data.get("checklist"):
|
|
275
|
+
score -= 1.0
|
|
276
|
+
|
|
277
|
+
return min(max(round(score, 2), 0.0), 10.0)
|
|
278
|
+
|
|
279
|
+
# Dimension → scorer mapping
|
|
280
|
+
_DIMENSION_SCORERS: dict[str, Callable] = {
|
|
281
|
+
"D3": _score_d3_security,
|
|
282
|
+
"D5": _score_d5_resilience,
|
|
283
|
+
"D7": _score_d7_testability,
|
|
284
|
+
"D10": _score_d10_protocol,
|
|
285
|
+
"D11": _score_d11_feasibility,
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
def _score_generic(self, data: dict) -> float:
|
|
289
|
+
"""Generic skill quality scoring (field completeness + checklist)."""
|
|
290
|
+
score = 0.0
|
|
291
|
+
|
|
292
|
+
# 1. Required field completeness (0-4 points)
|
|
293
|
+
present = REQUIRED_CSDF_FIELDS & set(data.keys())
|
|
294
|
+
field_ratio = len(present) / len(REQUIRED_CSDF_FIELDS) if REQUIRED_CSDF_FIELDS else 1.0
|
|
295
|
+
score += 4.0 * field_ratio
|
|
296
|
+
|
|
297
|
+
# 2. Checklist quality (0-3 points)
|
|
298
|
+
checklist = data.get("checklist", [])
|
|
299
|
+
if isinstance(checklist, list) and checklist:
|
|
300
|
+
item_count = len(checklist)
|
|
301
|
+
if item_count >= 8:
|
|
302
|
+
score += 3.0
|
|
303
|
+
elif item_count >= 5:
|
|
304
|
+
score += 2.0
|
|
305
|
+
elif item_count >= 2:
|
|
306
|
+
score += 1.0
|
|
307
|
+
else:
|
|
308
|
+
score += 0.5
|
|
309
|
+
|
|
310
|
+
severities = [item.get("severity", "") for item in checklist if isinstance(item, dict)]
|
|
311
|
+
if "critical" in severities:
|
|
312
|
+
score += 0.5
|
|
313
|
+
else:
|
|
314
|
+
score += 0.3
|
|
315
|
+
|
|
316
|
+
# 3. Description quality (0-1.5 points)
|
|
317
|
+
desc = data.get("description", "")
|
|
318
|
+
if desc and len(str(desc).strip()) > 20:
|
|
319
|
+
score += 1.5
|
|
320
|
+
elif desc:
|
|
321
|
+
score += 0.7
|
|
322
|
+
|
|
323
|
+
# 4. Weight/veto declaration (0-1 point)
|
|
324
|
+
if "weight" in data:
|
|
325
|
+
score += 0.5
|
|
326
|
+
if "veto_rule" in data:
|
|
327
|
+
score += 0.5
|
|
328
|
+
|
|
329
|
+
return min(round(score, 2), 10.0)
|
|
330
|
+
|
|
331
|
+
def _find_skill_yaml(self, skill_id: str) -> Optional[Path]:
|
|
332
|
+
"""Find the CSDF YAML file for a skill ID."""
|
|
333
|
+
if not self._skills_dir.exists():
|
|
334
|
+
return None
|
|
335
|
+
for p in self._skills_dir.iterdir():
|
|
336
|
+
if p.name.startswith(f"{skill_id}-") and p.suffix == ".yaml":
|
|
337
|
+
return p
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
def _fallback_score(self, key: str) -> float:
|
|
341
|
+
"""Generate a deterministic fallback score when skill files are unavailable."""
|
|
342
|
+
seed = self._base_seed or int(hashlib.sha256(key.encode()).hexdigest()[:8], 16)
|
|
343
|
+
combined = f"{key}:{seed}".encode()
|
|
344
|
+
h = int(hashlib.sha256(combined).hexdigest()[:8], 16)
|
|
345
|
+
return round(5.0 + (h % 5000) / 1000.0, 2)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Review models — Pydantic schemas for multi-dimension review pipeline.
|
|
2
|
+
|
|
3
|
+
Aligned with contracts/sdd/review-trigger-spec.yaml
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ReviewTrigger(StrEnum):
|
|
15
|
+
"""What triggered this review."""
|
|
16
|
+
|
|
17
|
+
L3_REGRESSION_FAIL = "l3_regression_fail"
|
|
18
|
+
L4_E2E_FAIL = "l4_e2e_fail"
|
|
19
|
+
L5_ERROR_BUDGET_BURN = "l5_error_budget_burn"
|
|
20
|
+
MANUAL = "manual"
|
|
21
|
+
SCHEDULED_DIMENSION_SCAN = "scheduled_dimension_scan"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CheckpointLevel(StrEnum):
|
|
25
|
+
"""Review checkpoint level — determines which dimensions are evaluated."""
|
|
26
|
+
|
|
27
|
+
L1 = "L1"
|
|
28
|
+
L2 = "L2"
|
|
29
|
+
L3 = "L3"
|
|
30
|
+
L4 = "L4"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Checkpoint SLA timeouts (per spec)
|
|
34
|
+
CHECKPOINT_SLA_TIMEOUTS: dict[CheckpointLevel, float] = {
|
|
35
|
+
CheckpointLevel.L1: 10.0, # seconds
|
|
36
|
+
CheckpointLevel.L2: 60.0,
|
|
37
|
+
CheckpointLevel.L3: 120.0,
|
|
38
|
+
CheckpointLevel.L4: 300.0,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class VetoRule(StrEnum):
|
|
43
|
+
"""Veto rules V1-V6 with human-readable conditions."""
|
|
44
|
+
|
|
45
|
+
V1 = "V1" # D3 < 7.0 → block
|
|
46
|
+
V2 = "V2" # D5 < 7.0 → block
|
|
47
|
+
V3 = "V3" # D7 < 7.5 → block
|
|
48
|
+
V4 = "V4" # D11 < 6.0 → block
|
|
49
|
+
V5 = "V5" # D10 < 5.5 → risk_notice (not block)
|
|
50
|
+
V6 = "V6" # baseline_avg < 7.5 → veto_explanation
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ReviewStatus(StrEnum):
|
|
54
|
+
"""Status of a review execution."""
|
|
55
|
+
|
|
56
|
+
COMPLETED = "completed"
|
|
57
|
+
PARTIAL = "partial"
|
|
58
|
+
FAILED = "failed"
|
|
59
|
+
QUEUED = "queued"
|
|
60
|
+
PROCESSING = "processing"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class UpgradeRecommendation(StrEnum):
|
|
64
|
+
"""Recommended upgrade action based on review results."""
|
|
65
|
+
|
|
66
|
+
PATCH = "PATCH"
|
|
67
|
+
MINOR = "MINOR"
|
|
68
|
+
MAJOR = "MAJOR"
|
|
69
|
+
NONE = "NONE"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class FailedTestDetail(BaseModel):
|
|
73
|
+
"""Structured failed test information (per schema)."""
|
|
74
|
+
|
|
75
|
+
test_name: str
|
|
76
|
+
expected: str = ""
|
|
77
|
+
actual: str = ""
|
|
78
|
+
skill_id: str = ""
|
|
79
|
+
duration_ms: float = 0.0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class VetoDetail(BaseModel):
|
|
83
|
+
"""Detail of a single veto rule evaluation."""
|
|
84
|
+
|
|
85
|
+
rule: VetoRule
|
|
86
|
+
dimension: str
|
|
87
|
+
score: float
|
|
88
|
+
threshold: float
|
|
89
|
+
blocks: bool = Field(description="Whether this veto blocks admission")
|
|
90
|
+
recommendation: str = Field(default="", description="Action recommendation")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class SuspectSkill(BaseModel):
|
|
94
|
+
"""A skill marked as suspect during review."""
|
|
95
|
+
|
|
96
|
+
skill_id: str
|
|
97
|
+
reason: str
|
|
98
|
+
suspected_dimension: str = ""
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class BlindSpotFound(BaseModel):
|
|
102
|
+
"""New blind spot discovered during review."""
|
|
103
|
+
|
|
104
|
+
id: str
|
|
105
|
+
description: str
|
|
106
|
+
dimension: str
|
|
107
|
+
severity: str = "P2"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class ReviewTriggerRequest(BaseModel):
|
|
111
|
+
"""Request to trigger a multi-dimension review.
|
|
112
|
+
|
|
113
|
+
Aligned with contracts/sdd/review-trigger-spec.yaml:
|
|
114
|
+
- trace_id: W3C TraceContext
|
|
115
|
+
- failed_tests: structured objects (not flat strings)
|
|
116
|
+
- baseline_metrics: before/after metrics for comparison
|
|
117
|
+
- pipeline_url: CI pipeline reference
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
trigger: ReviewTrigger
|
|
121
|
+
checkpoint: CheckpointLevel
|
|
122
|
+
affected_skills: list[str] = Field(min_length=1, description="Skill IDs under review")
|
|
123
|
+
failed_tests: Optional[list[str]] = Field(default=None, description="Test IDs that failed (backward compat)")
|
|
124
|
+
failed_test_details: list[FailedTestDetail] = Field(default_factory=list, description="Structured failed test info")
|
|
125
|
+
trace_id: str = Field(default="", description="W3C TraceContext trace_id")
|
|
126
|
+
baseline_metrics: dict[str, float] = Field(
|
|
127
|
+
default_factory=dict, description="Baseline metrics (previous_recall, current_recall, etc.)"
|
|
128
|
+
)
|
|
129
|
+
pipeline_url: str = Field(default="", description="CI pipeline URL for audit")
|
|
130
|
+
|
|
131
|
+
def get_all_failed_tests(self) -> list[str]:
|
|
132
|
+
"""Get all failed test names (from both flat and structured sources)."""
|
|
133
|
+
flat = self.failed_tests or []
|
|
134
|
+
structured = [d.test_name for d in self.failed_test_details]
|
|
135
|
+
# Merge, dedup
|
|
136
|
+
return list(dict.fromkeys(flat + structured))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ReviewTriggerResponse(BaseModel):
|
|
140
|
+
"""Response from a review trigger execution.
|
|
141
|
+
|
|
142
|
+
Aligned with contracts/sdd/review-trigger-spec.yaml:
|
|
143
|
+
- new_blind_spots: blind spots discovered
|
|
144
|
+
- estimated_cost: token/cost estimate
|
|
145
|
+
- merkle_commit: ClawMem savepoint hash
|
|
146
|
+
- retry_after_seconds: for async polling
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
review_id: str
|
|
150
|
+
status: ReviewStatus
|
|
151
|
+
checkpoint: CheckpointLevel
|
|
152
|
+
scores: dict[str, float] = Field(default_factory=dict, description="Dimension → score (D1-D12)")
|
|
153
|
+
veto_triggered: bool = False
|
|
154
|
+
veto_details: list[VetoDetail] = Field(default_factory=list)
|
|
155
|
+
suspect_skills: list[SuspectSkill] = Field(default_factory=list)
|
|
156
|
+
recommendation: UpgradeRecommendation = UpgradeRecommendation.NONE
|
|
157
|
+
duration_ms: float = 0.0
|
|
158
|
+
# Schema-aligned fields
|
|
159
|
+
new_blind_spots: list[BlindSpotFound] = Field(default_factory=list, description="New blind spots discovered")
|
|
160
|
+
estimated_cost: dict[str, Any] = Field(
|
|
161
|
+
default_factory=dict, description="Cost estimate {review_tokens, review_cost_usd}"
|
|
162
|
+
)
|
|
163
|
+
merkle_commit: str = Field(default="", description="ClawMem SAVEPOINT hash")
|
|
164
|
+
retry_after_seconds: float = Field(default=0.0, description="Seconds until async result available (0 = sync)")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""SuspectMarker — tracks skills marked as suspect during review."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from skillpool.review.models import SuspectSkill
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SuspectMarker:
|
|
9
|
+
"""Tracks skills marked as suspect during review.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
marker = SuspectMarker()
|
|
13
|
+
marker.mark("S05a", reason="D3 below threshold", suspected_dimension="D3")
|
|
14
|
+
assert marker.is_suspect("S05a")
|
|
15
|
+
marker.clear()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
self._suspects: dict[str, SuspectSkill] = {}
|
|
20
|
+
|
|
21
|
+
def mark(self, skill_id: str, reason: str, suspected_dimension: str = "") -> None:
|
|
22
|
+
"""Add a skill to the suspect set."""
|
|
23
|
+
self._suspects[skill_id] = SuspectSkill(
|
|
24
|
+
skill_id=skill_id,
|
|
25
|
+
reason=reason,
|
|
26
|
+
suspected_dimension=suspected_dimension,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def is_suspect(self, skill_id: str) -> bool:
|
|
30
|
+
"""Check whether a skill is currently marked as suspect."""
|
|
31
|
+
return skill_id in self._suspects
|
|
32
|
+
|
|
33
|
+
def clear(self) -> None:
|
|
34
|
+
"""Remove all skills from the suspect set."""
|
|
35
|
+
self._suspects.clear()
|
|
36
|
+
|
|
37
|
+
def list_suspects(self) -> list[SuspectSkill]:
|
|
38
|
+
"""Return all currently marked suspect skills."""
|
|
39
|
+
return list(self._suspects.values())
|