skillpool 4.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. skillpool/__init__.py +74 -0
  2. skillpool/__main__.py +6 -0
  3. skillpool/adapters/__init__.py +8 -0
  4. skillpool/adapters/base.py +41 -0
  5. skillpool/adapters/claude_adapter.py +36 -0
  6. skillpool/adapters/codex_adapter.py +92 -0
  7. skillpool/adapters/hermes_adapter.py +38 -0
  8. skillpool/audit/__init__.py +651 -0
  9. skillpool/bridge/__init__.py +16 -0
  10. skillpool/bridge/freeze_detector.py +134 -0
  11. skillpool/bridge/maintenance.py +119 -0
  12. skillpool/bridge/wal_manager.py +136 -0
  13. skillpool/clawmem_client.py +176 -0
  14. skillpool/cli.py +700 -0
  15. skillpool/combiner/__init__.py +31 -0
  16. skillpool/combiner/lifecycle.py +453 -0
  17. skillpool/combiner/models.py +99 -0
  18. skillpool/config.py +34 -0
  19. skillpool/cost/__init__.py +111 -0
  20. skillpool/cost/audit_hash.py +51 -0
  21. skillpool/cost/budget_tracker.py +66 -0
  22. skillpool/cost/dashboard.py +189 -0
  23. skillpool/cost/models.py +129 -0
  24. skillpool/cost/token_governor.py +264 -0
  25. skillpool/cost/trace_ceiling.py +38 -0
  26. skillpool/csdf.py +126 -0
  27. skillpool/evolver/__init__.py +978 -0
  28. skillpool/gain/__init__.py +285 -0
  29. skillpool/gate.py +282 -0
  30. skillpool/gate_policy/__init__.py +31 -0
  31. skillpool/gate_policy/incremental.py +157 -0
  32. skillpool/gate_policy/parser.py +258 -0
  33. skillpool/gate_policy/state_machine.py +432 -0
  34. skillpool/graph/__init__.py +14 -0
  35. skillpool/graph/ppr.py +279 -0
  36. skillpool/health/__init__.py +73 -0
  37. skillpool/health/check.py +85 -0
  38. skillpool/health/degradation.py +90 -0
  39. skillpool/health/models.py +43 -0
  40. skillpool/hooks/__init__.py +4 -0
  41. skillpool/hooks/security_scanner.py +288 -0
  42. skillpool/lifecycle.py +150 -0
  43. skillpool/materializer/__init__.py +124 -0
  44. skillpool/materializer/budget_cropper.py +178 -0
  45. skillpool/materializer/csdf_loader.py +114 -0
  46. skillpool/materializer/lazy_loader.py +265 -0
  47. skillpool/materializer/lifecycle_filter.py +93 -0
  48. skillpool/materializer/mapper.py +178 -0
  49. skillpool/materializer/models.py +66 -0
  50. skillpool/mcp_server.py +2005 -0
  51. skillpool/monitor/__init__.py +576 -0
  52. skillpool/monitor/bug_collector.py +392 -0
  53. skillpool/monitor/defect_classifier.py +218 -0
  54. skillpool/monitor/self_healing.py +530 -0
  55. skillpool/monitor/telemetry_bridge.py +197 -0
  56. skillpool/paradigm/__init__.py +312 -0
  57. skillpool/paradigm/override.py +285 -0
  58. skillpool/profile.py +94 -0
  59. skillpool/quality.py +254 -0
  60. skillpool/registry/__init__.py +509 -0
  61. skillpool/registry/models.py +98 -0
  62. skillpool/resolver/__init__.py +320 -0
  63. skillpool/resolver/cache.py +103 -0
  64. skillpool/resolver/circuit_breaker.py +103 -0
  65. skillpool/resolver/conflict_detector.py +111 -0
  66. skillpool/resolver/health_filter.py +38 -0
  67. skillpool/resolver/models.py +154 -0
  68. skillpool/resolver/rate_limiter.py +48 -0
  69. skillpool/resolver/skill_graph.py +183 -0
  70. skillpool/review/__init__.py +242 -0
  71. skillpool/review/async_queue.py +96 -0
  72. skillpool/review/checkpoint_runner.py +345 -0
  73. skillpool/review/models.py +164 -0
  74. skillpool/review/suspect_marker.py +39 -0
  75. skillpool/review/veto_evaluator.py +94 -0
  76. skillpool/router/__init__.py +481 -0
  77. skillpool/schemas.py +119 -0
  78. skillpool/synergy/__init__.py +240 -0
  79. skillpool/synergy/detector.py +5 -0
  80. skillpool/telemetry.py +126 -0
  81. skillpool/utils/__init__.py +21 -0
  82. skillpool/utils/changelog.py +218 -0
  83. skillpool/utils/logger.py +273 -0
  84. skillpool/utils/runtime_audit.py +163 -0
  85. skillpool/utils/time_utils.py +13 -0
  86. skillpool-4.3.0.dist-info/METADATA +21 -0
  87. skillpool-4.3.0.dist-info/RECORD +90 -0
  88. skillpool-4.3.0.dist-info/WHEEL +5 -0
  89. skillpool-4.3.0.dist-info/entry_points.txt +3 -0
  90. skillpool-4.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,96 @@
1
+ """AsyncReviewQueue — in-memory review queue with cooldown tracking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ import uuid
7
+ from dataclasses import dataclass, field
8
+
9
+ from skillpool.review.models import ReviewStatus, ReviewTriggerRequest
10
+
11
+
12
+ @dataclass
13
+ class _QueueEntry:
14
+ """Internal tracking entry for a submitted review."""
15
+
16
+ review_id: str
17
+ request: ReviewTriggerRequest
18
+ status: ReviewStatus = ReviewStatus.QUEUED
19
+ submitted_at: float = field(default_factory=time.time)
20
+
21
+
22
+ class AsyncReviewQueue:
23
+ """In-memory review queue with cooldown enforcement.
24
+
25
+ - max_concurrent: maximum number of reviews that can be PROCESSING at once
26
+ - cooldown_seconds: minimum time between reviews for the same skill_id
27
+
28
+ Usage:
29
+ queue = AsyncReviewQueue()
30
+ review_id = queue.submit(request)
31
+ status = queue.get_status(review_id)
32
+ """
33
+
34
+ def __init__(self, max_concurrent: int = 10, cooldown_seconds: float = 86400.0) -> None:
35
+ self.max_concurrent = max_concurrent
36
+ self.cooldown_seconds = cooldown_seconds
37
+ self._entries: dict[str, _QueueEntry] = {}
38
+ self._skill_last_review: dict[str, float] = {}
39
+
40
+ def submit(self, request: ReviewTriggerRequest) -> str:
41
+ """Submit a review request. Returns the review_id.
42
+
43
+ Raises ValueError if any affected skill is still in cooldown.
44
+ Raises RuntimeError if max_concurrent reviews are already processing.
45
+ """
46
+ # Check cooldown for all affected skills
47
+ now = time.time()
48
+ for skill_id in request.affected_skills:
49
+ last = self._skill_last_review.get(skill_id, 0.0)
50
+ if now - last < self.cooldown_seconds:
51
+ remaining = round(self.cooldown_seconds - (now - last), 1)
52
+ raise ValueError(f"Skill '{skill_id}' is in cooldown ({remaining}s remaining)")
53
+
54
+ # Check max concurrent
55
+ processing_count = sum(1 for e in self._entries.values() if e.status == ReviewStatus.PROCESSING)
56
+ if processing_count >= self.max_concurrent:
57
+ raise RuntimeError(f"Max concurrent reviews ({self.max_concurrent}) reached")
58
+
59
+ review_id = uuid.uuid4().hex[:16]
60
+ entry = _QueueEntry(
61
+ review_id=review_id,
62
+ request=request,
63
+ status=ReviewStatus.QUEUED,
64
+ submitted_at=now,
65
+ )
66
+ self._entries[review_id] = entry
67
+
68
+ # Mark skill cooldown timestamps
69
+ for skill_id in request.affected_skills:
70
+ self._skill_last_review[skill_id] = now
71
+
72
+ return review_id
73
+
74
+ def get_status(self, review_id: str) -> ReviewStatus:
75
+ """Get the current status of a review."""
76
+ entry = self._entries.get(review_id)
77
+ if entry is None:
78
+ raise KeyError(f"Review '{review_id}' not found")
79
+ return entry.status
80
+
81
+ def set_status(self, review_id: str, status: ReviewStatus) -> None:
82
+ """Update the status of a review entry."""
83
+ entry = self._entries.get(review_id)
84
+ if entry is None:
85
+ raise KeyError(f"Review '{review_id}' not found")
86
+ entry.status = status
87
+
88
+ def is_in_cooldown(self, skill_id: str) -> bool:
89
+ """Check whether a skill is currently in cooldown."""
90
+ last = self._skill_last_review.get(skill_id, 0.0)
91
+ return (time.time() - last) < self.cooldown_seconds
92
+
93
+ def clear(self) -> None:
94
+ """Clear all entries and cooldown tracking (for testing)."""
95
+ self._entries.clear()
96
+ self._skill_last_review.clear()
@@ -0,0 +1,345 @@
1
+ """CheckpointRunner — runs dimension scoring for each checkpoint level."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import Callable, Optional
9
+
10
+ import yaml
11
+
12
+ from skillpool.config import get_data_dir
13
+ from skillpool.review.models import CheckpointLevel
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Dimensions per checkpoint level
18
+ SHADOW_DIMENSIONS = ("D1", "D2", "D4", "D6", "D8", "D9", "D12")
19
+ BASELINE_DIMENSIONS = ("D3", "D5", "D7", "D10", "D11")
20
+ ALL_DIMENSIONS = BASELINE_DIMENSIONS + SHADOW_DIMENSIONS
21
+
22
+ # Dimension → required skills mapping
23
+ DIMENSION_SKILLS: dict[str, list[str]] = {
24
+ "D1": ["S01"],
25
+ "D2": ["S02"],
26
+ "D3": ["S05a", "S05b", "S06"],
27
+ "D4": ["S04"],
28
+ "D5": ["S09", "S10"],
29
+ "D6": ["S07", "S08"],
30
+ "D7": ["S13a", "S13b"],
31
+ "D8": ["S11"],
32
+ "D9": ["S12"],
33
+ "D10": ["S19"],
34
+ "D11": ["S20"],
35
+ "D12": ["S21"],
36
+ }
37
+
38
+ # Required fields in CSDF YAML for scoring
39
+ REQUIRED_CSDF_FIELDS = {"id", "name", "version", "dimension", "description"}
40
+
41
+ # Default skills directory
42
+ _DEFAULT_SKILLS_DIR = get_data_dir() / "skills"
43
+
44
+
45
+ class CheckpointRunner:
46
+ """Runs dimension scoring for a given checkpoint level.
47
+
48
+ Evaluates each dimension by inspecting the corresponding skill YAML files:
49
+ - File existence and YAML parseability
50
+ - Required field completeness
51
+ - Checklist item count and severity distribution
52
+
53
+ Falls back to deterministic hash-based scores when skill files are unavailable.
54
+ """
55
+
56
+ def __init__(self, seed: Optional[int] = None, skills_dir: Optional[Path] = None):
57
+ self._base_seed = seed
58
+ self._skills_dir = skills_dir or _DEFAULT_SKILLS_DIR
59
+
60
+ def run_checkpoint(
61
+ self,
62
+ level: CheckpointLevel,
63
+ skills: list[str],
64
+ ) -> dict[str, float]:
65
+ """Evaluate dimensions for the given checkpoint level.
66
+
67
+ Returns a dict of dimension → score (0.0-10.0).
68
+ """
69
+ dimensions = self._dimensions_for_level(level)
70
+ scores: dict[str, float] = {}
71
+ for d in dimensions:
72
+ scores[d] = self._score_dimension(d)
73
+ return scores
74
+
75
+ def _dimensions_for_level(self, level: CheckpointLevel) -> tuple[str, ...]:
76
+ """Return the dimensions to evaluate for each checkpoint level."""
77
+ if level == CheckpointLevel.L1:
78
+ return SHADOW_DIMENSIONS
79
+ elif level == CheckpointLevel.L2:
80
+ return ALL_DIMENSIONS
81
+ elif level == CheckpointLevel.L3:
82
+ return BASELINE_DIMENSIONS
83
+ elif level == CheckpointLevel.L4:
84
+ return BASELINE_DIMENSIONS
85
+ return ALL_DIMENSIONS
86
+
87
+ def _score_dimension(self, dimension: str) -> float:
88
+ """Score a single dimension by evaluating its associated skill files."""
89
+ skill_ids = DIMENSION_SKILLS.get(dimension, [])
90
+ if not skill_ids:
91
+ return self._fallback_score(dimension)
92
+
93
+ scores: list[float] = []
94
+ for sid in skill_ids:
95
+ s = self._score_skill(sid)
96
+ scores.append(s)
97
+
98
+ return round(sum(scores) / len(scores), 2) if scores else self._fallback_score(dimension)
99
+
100
+ def _score_skill(self, skill_id: str) -> float:
101
+ """Score a single skill based on CSDF file quality."""
102
+ yaml_path = self._find_skill_yaml(skill_id)
103
+ if yaml_path is None:
104
+ return self._fallback_score(skill_id)
105
+
106
+ # Parse YAML
107
+ try:
108
+ content = yaml_path.read_text(encoding="utf-8")
109
+ data = yaml.safe_load(content)
110
+ except (yaml.YAMLError, OSError):
111
+ return 3.0 # Unparseable YAML → low score
112
+
113
+ if not isinstance(data, dict):
114
+ return 2.0
115
+
116
+ # Determine which dimension this skill primarily serves
117
+ primary_dim = self._skill_dimension(skill_id)
118
+
119
+ # Use dimension-specific scoring if available
120
+ if primary_dim:
121
+ dim_scorer = self._DIMENSION_SCORERS.get(primary_dim)
122
+ if dim_scorer:
123
+ return dim_scorer(self, data, content)
124
+
125
+ # Default: generic quality scoring
126
+ return self._score_generic(data)
127
+
128
+ def _skill_dimension(self, skill_id: str) -> Optional[str]:
129
+ """Return the primary dimension for a skill ID."""
130
+ for dim, skills in DIMENSION_SKILLS.items():
131
+ if skill_id in skills:
132
+ return dim
133
+ return None
134
+
135
+ # ── Dimension-specific scorers ──
136
+
137
+ def _score_d3_security(self, data: dict, raw_content: str) -> float:
138
+ """D3 安全合规性: SecurityScanner scan + CSDF field quality."""
139
+ score = self._score_generic(data)
140
+ # Bonus: SecurityScanner scan passes
141
+ try:
142
+ from skillpool.hooks.security_scanner import SecurityScanner
143
+
144
+ scanner = SecurityScanner()
145
+ result = scanner.full_check(raw_content)
146
+ if result.is_safe:
147
+ score += 2.0
148
+ elif result.threat_level.value == "warning":
149
+ score += 0.5
150
+ # CRITICAL means is_safe=False → no bonus
151
+ except Exception as e:
152
+ logger.warning("Security scanner unavailable, skipping scan bonus: %s", e)
153
+
154
+ # Bonus: explicit security fields in CSDF
155
+ if data.get("security_scan_required"):
156
+ score += 0.5
157
+ if data.get("veto_rule"):
158
+ score += 0.5
159
+
160
+ return min(round(score, 2), 10.0)
161
+
162
+ def _score_d5_resilience(self, data: dict, raw_content: str) -> float:
163
+ """D5 弹性容错: Degradation config + recovery strategy."""
164
+ score = self._score_generic(data)
165
+
166
+ # Bonus: degradation/fallback declared
167
+ if data.get("fallback") or data.get("degradation"):
168
+ score += 1.5
169
+ # Bonus: recovery/retry declared
170
+ if data.get("recovery") or data.get("retry_strategy"):
171
+ score += 1.0
172
+ # Bonus: circuit breaker or timeout declared
173
+ if data.get("circuit_breaker") or data.get("timeout"):
174
+ score += 0.5
175
+
176
+ # Penalty: no resilience fields at all
177
+ resilience_fields = {
178
+ "fallback",
179
+ "degradation",
180
+ "recovery",
181
+ "retry_strategy",
182
+ "circuit_breaker",
183
+ "timeout",
184
+ "grace_period",
185
+ }
186
+ if not any(k in data for k in resilience_fields):
187
+ # Check checklist for resilience-related items
188
+ checklist = data.get("checklist", [])
189
+ has_resilience_item = any(
190
+ isinstance(item, dict)
191
+ and any(
192
+ kw in str(item.get("description", "")).lower()
193
+ for kw in ("fallback", "degrad", "recover", "retry", "timeout")
194
+ )
195
+ for item in checklist
196
+ )
197
+ if not has_resilience_item:
198
+ score -= 1.0
199
+
200
+ return min(max(round(score, 2), 0.0), 10.0)
201
+
202
+ def _score_d7_testability(self, data: dict, raw_content: str) -> float:
203
+ """D7 可测试性: Test coverage + BDD specification."""
204
+ score = self._score_generic(data)
205
+
206
+ # Bonus: test_coverage field declared
207
+ coverage = data.get("test_coverage")
208
+ if isinstance(coverage, (int, float)):
209
+ if coverage >= 90:
210
+ score += 2.0
211
+ elif coverage >= 80:
212
+ score += 1.5
213
+ elif coverage >= 70:
214
+ score += 1.0
215
+ else:
216
+ score += 0.5
217
+ elif coverage:
218
+ score += 0.5
219
+
220
+ # Bonus: BDD/acceptance criteria in checklist
221
+ checklist = data.get("checklist", [])
222
+ bdd_keywords = {"given", "when", "then", "should", "must", "verify"}
223
+ bdd_count = sum(
224
+ 1
225
+ for item in checklist
226
+ if isinstance(item, dict) and any(kw in str(item.get("description", "")).lower() for kw in bdd_keywords)
227
+ )
228
+ if bdd_count >= 3:
229
+ score += 1.5
230
+ elif bdd_count >= 1:
231
+ score += 0.5
232
+
233
+ return min(round(score, 2), 10.0)
234
+
235
+ def _score_d10_protocol(self, data: dict, raw_content: str) -> float:
236
+ """D10 协议时效性: Version freshness + deprecation status."""
237
+ score = self._score_generic(data)
238
+
239
+ # Bonus: version follows semver
240
+ version = str(data.get("version", ""))
241
+ import re
242
+
243
+ if re.match(r"\d+\.\d+\.\d+", version):
244
+ score += 0.5
245
+
246
+ # Bonus: last_updated or effective_date declared
247
+ if data.get("last_updated") or data.get("effective_date"):
248
+ score += 0.5
249
+
250
+ # Penalty: deprecated without replacement
251
+ if data.get("deprecated"):
252
+ replacement = data.get("replacement")
253
+ if not replacement:
254
+ score -= 1.0
255
+
256
+ return min(max(round(score, 2), 0.0), 10.0)
257
+
258
+ def _score_d11_feasibility(self, data: dict, raw_content: str) -> float:
259
+ """D11 工程可行性: Dependency completeness + implementation hints."""
260
+ score = self._score_generic(data)
261
+
262
+ # Bonus: dependencies declared (even empty = explicitly checked)
263
+ if "dependencies" in data:
264
+ score += 0.5
265
+ deps = data.get("dependencies", [])
266
+ if isinstance(deps, list) and deps:
267
+ score += 0.5 # Has actual dependencies documented
268
+
269
+ # Bonus: implementation hints
270
+ if data.get("implementation") or data.get("implementation_notes"):
271
+ score += 1.0
272
+
273
+ # Penalty: no dependencies field and no checklist
274
+ if "dependencies" not in data and not data.get("checklist"):
275
+ score -= 1.0
276
+
277
+ return min(max(round(score, 2), 0.0), 10.0)
278
+
279
+ # Dimension → scorer mapping
280
+ _DIMENSION_SCORERS: dict[str, Callable] = {
281
+ "D3": _score_d3_security,
282
+ "D5": _score_d5_resilience,
283
+ "D7": _score_d7_testability,
284
+ "D10": _score_d10_protocol,
285
+ "D11": _score_d11_feasibility,
286
+ }
287
+
288
+ def _score_generic(self, data: dict) -> float:
289
+ """Generic skill quality scoring (field completeness + checklist)."""
290
+ score = 0.0
291
+
292
+ # 1. Required field completeness (0-4 points)
293
+ present = REQUIRED_CSDF_FIELDS & set(data.keys())
294
+ field_ratio = len(present) / len(REQUIRED_CSDF_FIELDS) if REQUIRED_CSDF_FIELDS else 1.0
295
+ score += 4.0 * field_ratio
296
+
297
+ # 2. Checklist quality (0-3 points)
298
+ checklist = data.get("checklist", [])
299
+ if isinstance(checklist, list) and checklist:
300
+ item_count = len(checklist)
301
+ if item_count >= 8:
302
+ score += 3.0
303
+ elif item_count >= 5:
304
+ score += 2.0
305
+ elif item_count >= 2:
306
+ score += 1.0
307
+ else:
308
+ score += 0.5
309
+
310
+ severities = [item.get("severity", "") for item in checklist if isinstance(item, dict)]
311
+ if "critical" in severities:
312
+ score += 0.5
313
+ else:
314
+ score += 0.3
315
+
316
+ # 3. Description quality (0-1.5 points)
317
+ desc = data.get("description", "")
318
+ if desc and len(str(desc).strip()) > 20:
319
+ score += 1.5
320
+ elif desc:
321
+ score += 0.7
322
+
323
+ # 4. Weight/veto declaration (0-1 point)
324
+ if "weight" in data:
325
+ score += 0.5
326
+ if "veto_rule" in data:
327
+ score += 0.5
328
+
329
+ return min(round(score, 2), 10.0)
330
+
331
+ def _find_skill_yaml(self, skill_id: str) -> Optional[Path]:
332
+ """Find the CSDF YAML file for a skill ID."""
333
+ if not self._skills_dir.exists():
334
+ return None
335
+ for p in self._skills_dir.iterdir():
336
+ if p.name.startswith(f"{skill_id}-") and p.suffix == ".yaml":
337
+ return p
338
+ return None
339
+
340
+ def _fallback_score(self, key: str) -> float:
341
+ """Generate a deterministic fallback score when skill files are unavailable."""
342
+ seed = self._base_seed or int(hashlib.sha256(key.encode()).hexdigest()[:8], 16)
343
+ combined = f"{key}:{seed}".encode()
344
+ h = int(hashlib.sha256(combined).hexdigest()[:8], 16)
345
+ return round(5.0 + (h % 5000) / 1000.0, 2)
@@ -0,0 +1,164 @@
1
+ """Review models — Pydantic schemas for multi-dimension review pipeline.
2
+
3
+ Aligned with contracts/sdd/review-trigger-spec.yaml
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from enum import StrEnum
9
+ from typing import Any, Optional
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+
14
+ class ReviewTrigger(StrEnum):
15
+ """What triggered this review."""
16
+
17
+ L3_REGRESSION_FAIL = "l3_regression_fail"
18
+ L4_E2E_FAIL = "l4_e2e_fail"
19
+ L5_ERROR_BUDGET_BURN = "l5_error_budget_burn"
20
+ MANUAL = "manual"
21
+ SCHEDULED_DIMENSION_SCAN = "scheduled_dimension_scan"
22
+
23
+
24
+ class CheckpointLevel(StrEnum):
25
+ """Review checkpoint level — determines which dimensions are evaluated."""
26
+
27
+ L1 = "L1"
28
+ L2 = "L2"
29
+ L3 = "L3"
30
+ L4 = "L4"
31
+
32
+
33
+ # Checkpoint SLA timeouts (per spec)
34
+ CHECKPOINT_SLA_TIMEOUTS: dict[CheckpointLevel, float] = {
35
+ CheckpointLevel.L1: 10.0, # seconds
36
+ CheckpointLevel.L2: 60.0,
37
+ CheckpointLevel.L3: 120.0,
38
+ CheckpointLevel.L4: 300.0,
39
+ }
40
+
41
+
42
+ class VetoRule(StrEnum):
43
+ """Veto rules V1-V6 with human-readable conditions."""
44
+
45
+ V1 = "V1" # D3 < 7.0 → block
46
+ V2 = "V2" # D5 < 7.0 → block
47
+ V3 = "V3" # D7 < 7.5 → block
48
+ V4 = "V4" # D11 < 6.0 → block
49
+ V5 = "V5" # D10 < 5.5 → risk_notice (not block)
50
+ V6 = "V6" # baseline_avg < 7.5 → veto_explanation
51
+
52
+
53
+ class ReviewStatus(StrEnum):
54
+ """Status of a review execution."""
55
+
56
+ COMPLETED = "completed"
57
+ PARTIAL = "partial"
58
+ FAILED = "failed"
59
+ QUEUED = "queued"
60
+ PROCESSING = "processing"
61
+
62
+
63
+ class UpgradeRecommendation(StrEnum):
64
+ """Recommended upgrade action based on review results."""
65
+
66
+ PATCH = "PATCH"
67
+ MINOR = "MINOR"
68
+ MAJOR = "MAJOR"
69
+ NONE = "NONE"
70
+
71
+
72
+ class FailedTestDetail(BaseModel):
73
+ """Structured failed test information (per schema)."""
74
+
75
+ test_name: str
76
+ expected: str = ""
77
+ actual: str = ""
78
+ skill_id: str = ""
79
+ duration_ms: float = 0.0
80
+
81
+
82
+ class VetoDetail(BaseModel):
83
+ """Detail of a single veto rule evaluation."""
84
+
85
+ rule: VetoRule
86
+ dimension: str
87
+ score: float
88
+ threshold: float
89
+ blocks: bool = Field(description="Whether this veto blocks admission")
90
+ recommendation: str = Field(default="", description="Action recommendation")
91
+
92
+
93
+ class SuspectSkill(BaseModel):
94
+ """A skill marked as suspect during review."""
95
+
96
+ skill_id: str
97
+ reason: str
98
+ suspected_dimension: str = ""
99
+
100
+
101
+ class BlindSpotFound(BaseModel):
102
+ """New blind spot discovered during review."""
103
+
104
+ id: str
105
+ description: str
106
+ dimension: str
107
+ severity: str = "P2"
108
+
109
+
110
+ class ReviewTriggerRequest(BaseModel):
111
+ """Request to trigger a multi-dimension review.
112
+
113
+ Aligned with contracts/sdd/review-trigger-spec.yaml:
114
+ - trace_id: W3C TraceContext
115
+ - failed_tests: structured objects (not flat strings)
116
+ - baseline_metrics: before/after metrics for comparison
117
+ - pipeline_url: CI pipeline reference
118
+ """
119
+
120
+ trigger: ReviewTrigger
121
+ checkpoint: CheckpointLevel
122
+ affected_skills: list[str] = Field(min_length=1, description="Skill IDs under review")
123
+ failed_tests: Optional[list[str]] = Field(default=None, description="Test IDs that failed (backward compat)")
124
+ failed_test_details: list[FailedTestDetail] = Field(default_factory=list, description="Structured failed test info")
125
+ trace_id: str = Field(default="", description="W3C TraceContext trace_id")
126
+ baseline_metrics: dict[str, float] = Field(
127
+ default_factory=dict, description="Baseline metrics (previous_recall, current_recall, etc.)"
128
+ )
129
+ pipeline_url: str = Field(default="", description="CI pipeline URL for audit")
130
+
131
+ def get_all_failed_tests(self) -> list[str]:
132
+ """Get all failed test names (from both flat and structured sources)."""
133
+ flat = self.failed_tests or []
134
+ structured = [d.test_name for d in self.failed_test_details]
135
+ # Merge, dedup
136
+ return list(dict.fromkeys(flat + structured))
137
+
138
+
139
+ class ReviewTriggerResponse(BaseModel):
140
+ """Response from a review trigger execution.
141
+
142
+ Aligned with contracts/sdd/review-trigger-spec.yaml:
143
+ - new_blind_spots: blind spots discovered
144
+ - estimated_cost: token/cost estimate
145
+ - merkle_commit: ClawMem savepoint hash
146
+ - retry_after_seconds: for async polling
147
+ """
148
+
149
+ review_id: str
150
+ status: ReviewStatus
151
+ checkpoint: CheckpointLevel
152
+ scores: dict[str, float] = Field(default_factory=dict, description="Dimension → score (D1-D12)")
153
+ veto_triggered: bool = False
154
+ veto_details: list[VetoDetail] = Field(default_factory=list)
155
+ suspect_skills: list[SuspectSkill] = Field(default_factory=list)
156
+ recommendation: UpgradeRecommendation = UpgradeRecommendation.NONE
157
+ duration_ms: float = 0.0
158
+ # Schema-aligned fields
159
+ new_blind_spots: list[BlindSpotFound] = Field(default_factory=list, description="New blind spots discovered")
160
+ estimated_cost: dict[str, Any] = Field(
161
+ default_factory=dict, description="Cost estimate {review_tokens, review_cost_usd}"
162
+ )
163
+ merkle_commit: str = Field(default="", description="ClawMem SAVEPOINT hash")
164
+ retry_after_seconds: float = Field(default=0.0, description="Seconds until async result available (0 = sync)")
@@ -0,0 +1,39 @@
1
+ """SuspectMarker — tracks skills marked as suspect during review."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from skillpool.review.models import SuspectSkill
6
+
7
+
8
+ class SuspectMarker:
9
+ """Tracks skills marked as suspect during review.
10
+
11
+ Usage:
12
+ marker = SuspectMarker()
13
+ marker.mark("S05a", reason="D3 below threshold", suspected_dimension="D3")
14
+ assert marker.is_suspect("S05a")
15
+ marker.clear()
16
+ """
17
+
18
+ def __init__(self) -> None:
19
+ self._suspects: dict[str, SuspectSkill] = {}
20
+
21
+ def mark(self, skill_id: str, reason: str, suspected_dimension: str = "") -> None:
22
+ """Add a skill to the suspect set."""
23
+ self._suspects[skill_id] = SuspectSkill(
24
+ skill_id=skill_id,
25
+ reason=reason,
26
+ suspected_dimension=suspected_dimension,
27
+ )
28
+
29
+ def is_suspect(self, skill_id: str) -> bool:
30
+ """Check whether a skill is currently marked as suspect."""
31
+ return skill_id in self._suspects
32
+
33
+ def clear(self) -> None:
34
+ """Remove all skills from the suspect set."""
35
+ self._suspects.clear()
36
+
37
+ def list_suspects(self) -> list[SuspectSkill]:
38
+ """Return all currently marked suspect skills."""
39
+ return list(self._suspects.values())