devsquad 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsquad-3.6.0.dist-info/METADATA +944 -0
- devsquad-3.6.0.dist-info/RECORD +95 -0
- devsquad-3.6.0.dist-info/WHEEL +5 -0
- devsquad-3.6.0.dist-info/entry_points.txt +2 -0
- devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
- devsquad-3.6.0.dist-info/top_level.txt +2 -0
- scripts/__init__.py +0 -0
- scripts/ai_semantic_matcher.py +512 -0
- scripts/alert_manager.py +505 -0
- scripts/api/__init__.py +43 -0
- scripts/api/models.py +386 -0
- scripts/api/routes/__init__.py +20 -0
- scripts/api/routes/dispatch.py +348 -0
- scripts/api/routes/lifecycle.py +330 -0
- scripts/api/routes/metrics_gates.py +347 -0
- scripts/api_server.py +318 -0
- scripts/auth.py +451 -0
- scripts/cli/__init__.py +1 -0
- scripts/cli/cli_visual.py +642 -0
- scripts/cli.py +1094 -0
- scripts/collaboration/__init__.py +212 -0
- scripts/collaboration/_version.py +1 -0
- scripts/collaboration/agent_briefing.py +656 -0
- scripts/collaboration/ai_semantic_matcher.py +260 -0
- scripts/collaboration/anchor_checker.py +281 -0
- scripts/collaboration/anti_rationalization.py +470 -0
- scripts/collaboration/async_integration_example.py +255 -0
- scripts/collaboration/batch_scheduler.py +149 -0
- scripts/collaboration/checkpoint_manager.py +561 -0
- scripts/collaboration/ci_feedback_adapter.py +351 -0
- scripts/collaboration/code_map_generator.py +247 -0
- scripts/collaboration/concern_pack_loader.py +352 -0
- scripts/collaboration/confidence_score.py +496 -0
- scripts/collaboration/config_loader.py +188 -0
- scripts/collaboration/consensus.py +244 -0
- scripts/collaboration/context_compressor.py +533 -0
- scripts/collaboration/coordinator.py +668 -0
- scripts/collaboration/dispatcher.py +1636 -0
- scripts/collaboration/dual_layer_context.py +128 -0
- scripts/collaboration/enhanced_worker.py +539 -0
- scripts/collaboration/feature_usage_tracker.py +206 -0
- scripts/collaboration/five_axis_consensus.py +334 -0
- scripts/collaboration/input_validator.py +401 -0
- scripts/collaboration/integration_example.py +287 -0
- scripts/collaboration/intent_workflow_mapper.py +350 -0
- scripts/collaboration/language_parsers.py +269 -0
- scripts/collaboration/lifecycle_protocol.py +1446 -0
- scripts/collaboration/llm_backend.py +453 -0
- scripts/collaboration/llm_cache.py +448 -0
- scripts/collaboration/llm_cache_async.py +347 -0
- scripts/collaboration/llm_retry.py +387 -0
- scripts/collaboration/llm_retry_async.py +389 -0
- scripts/collaboration/mce_adapter.py +597 -0
- scripts/collaboration/memory_bridge.py +1607 -0
- scripts/collaboration/models.py +537 -0
- scripts/collaboration/null_providers.py +297 -0
- scripts/collaboration/operation_classifier.py +289 -0
- scripts/collaboration/output_slicer.py +225 -0
- scripts/collaboration/performance_monitor.py +462 -0
- scripts/collaboration/permission_guard.py +865 -0
- scripts/collaboration/prompt_assembler.py +756 -0
- scripts/collaboration/prompt_variant_generator.py +483 -0
- scripts/collaboration/protocols.py +267 -0
- scripts/collaboration/report_formatter.py +352 -0
- scripts/collaboration/retrospective.py +279 -0
- scripts/collaboration/role_matcher.py +92 -0
- scripts/collaboration/role_template_market.py +352 -0
- scripts/collaboration/rule_collector.py +678 -0
- scripts/collaboration/scratchpad.py +346 -0
- scripts/collaboration/skill_registry.py +151 -0
- scripts/collaboration/skillifier.py +878 -0
- scripts/collaboration/standardized_role_template.py +317 -0
- scripts/collaboration/task_completion_checker.py +237 -0
- scripts/collaboration/test_quality_guard.py +695 -0
- scripts/collaboration/unified_gate_engine.py +598 -0
- scripts/collaboration/usage_tracker.py +309 -0
- scripts/collaboration/user_friendly_error.py +176 -0
- scripts/collaboration/verification_gate.py +312 -0
- scripts/collaboration/warmup_manager.py +635 -0
- scripts/collaboration/worker.py +513 -0
- scripts/collaboration/workflow_engine.py +684 -0
- scripts/dashboard.py +1088 -0
- scripts/generate_benchmark_report.py +786 -0
- scripts/history_manager.py +604 -0
- scripts/mcp_server.py +289 -0
- skills/__init__.py +32 -0
- skills/dispatch/handler.py +52 -0
- skills/intent/handler.py +59 -0
- skills/registry.py +67 -0
- skills/retrospective/__init__.py +0 -0
- skills/retrospective/handler.py +125 -0
- skills/review/handler.py +356 -0
- skills/security/handler.py +454 -0
- skills/test/__init__.py +0 -0
- skills/test/handler.py +78 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
FeatureUsageTracker - V3.6.0 Feature Usage Statistics
|
|
5
|
+
|
|
6
|
+
Tracks invocation counts for all DevSquad features, enabling
|
|
7
|
+
data-driven decisions about which features to strengthen, simplify,
|
|
8
|
+
or deprecate.
|
|
9
|
+
|
|
10
|
+
Design:
|
|
11
|
+
- Thread-safe counter with atomic increments
|
|
12
|
+
- Zero overhead when disabled (feature_flag check)
|
|
13
|
+
- Periodic persistence to JSON
|
|
14
|
+
- Query API for usage reports
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import threading
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Dict, List, Optional, Any
|
|
23
|
+
from collections import defaultdict
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FeatureUsageTracker:
|
|
29
|
+
"""
|
|
30
|
+
Thread-safe feature usage counter.
|
|
31
|
+
|
|
32
|
+
Usage:
|
|
33
|
+
tracker = FeatureUsageTracker()
|
|
34
|
+
tracker.tick("anchor_check")
|
|
35
|
+
tracker.tick("anchor_check")
|
|
36
|
+
tracker.tick("retrospective")
|
|
37
|
+
print(tracker.report())
|
|
38
|
+
# {"anchor_check": 2, "retrospective": 1}
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
KNOWN_FEATURES = frozenset([
|
|
42
|
+
"dispatch",
|
|
43
|
+
"anchor_check",
|
|
44
|
+
"anchor_drift_detected",
|
|
45
|
+
"retrospective",
|
|
46
|
+
"retrospective_stored",
|
|
47
|
+
"retrospective_loaded",
|
|
48
|
+
"fallback_backend_primary",
|
|
49
|
+
"fallback_backend_failover",
|
|
50
|
+
"consensus_vote",
|
|
51
|
+
"consensus_split",
|
|
52
|
+
"consensus_escalated",
|
|
53
|
+
"checkpoint_save",
|
|
54
|
+
"checkpoint_load",
|
|
55
|
+
"workflow_step",
|
|
56
|
+
"workflow_handoff",
|
|
57
|
+
"lifecycle_gate_check",
|
|
58
|
+
"lifecycle_gate_blocked",
|
|
59
|
+
"permission_check",
|
|
60
|
+
"permission_blocked",
|
|
61
|
+
"input_validation",
|
|
62
|
+
"input_blocked",
|
|
63
|
+
"context_compression",
|
|
64
|
+
"memory_capture",
|
|
65
|
+
"memory_recall",
|
|
66
|
+
"skillify_proposal",
|
|
67
|
+
"semantic_matcher",
|
|
68
|
+
"role_matcher",
|
|
69
|
+
"prompt_assembly",
|
|
70
|
+
"verification_gate",
|
|
71
|
+
"task_completion_check",
|
|
72
|
+
])
|
|
73
|
+
|
|
74
|
+
def __init__(self, persist_path: Optional[str] = None, auto_persist_interval: int = 100):
|
|
75
|
+
self._counts: Dict[str, int] = defaultdict(int)
|
|
76
|
+
self._first_seen: Dict[str, str] = {}
|
|
77
|
+
self._last_seen: Dict[str, str] = {}
|
|
78
|
+
self._lock = threading.RLock()
|
|
79
|
+
self._persist_path = persist_path
|
|
80
|
+
self._auto_persist_interval = auto_persist_interval
|
|
81
|
+
self._total_ticks = 0
|
|
82
|
+
self._session_start = datetime.now().isoformat()
|
|
83
|
+
if persist_path:
|
|
84
|
+
self._load(persist_path)
|
|
85
|
+
|
|
86
|
+
def tick(self, feature: str, count: int = 1) -> int:
|
|
87
|
+
"""Increment usage count for a feature. Returns new count."""
|
|
88
|
+
now = datetime.now().isoformat()
|
|
89
|
+
with self._lock:
|
|
90
|
+
self._counts[feature] += count
|
|
91
|
+
if feature not in self._first_seen:
|
|
92
|
+
self._first_seen[feature] = now
|
|
93
|
+
self._last_seen[feature] = now
|
|
94
|
+
self._total_ticks += count
|
|
95
|
+
|
|
96
|
+
if (self._persist_path
|
|
97
|
+
and self._auto_persist_interval > 0
|
|
98
|
+
and self._total_ticks % self._auto_persist_interval == 0):
|
|
99
|
+
self.persist()
|
|
100
|
+
|
|
101
|
+
return self._counts[feature]
|
|
102
|
+
|
|
103
|
+
def get_count(self, feature: str) -> int:
|
|
104
|
+
with self._lock:
|
|
105
|
+
return self._counts.get(feature, 0)
|
|
106
|
+
|
|
107
|
+
def get_all_counts(self) -> Dict[str, int]:
|
|
108
|
+
with self._lock:
|
|
109
|
+
return dict(self._counts)
|
|
110
|
+
|
|
111
|
+
def get_unused_features(self) -> List[str]:
|
|
112
|
+
used = set(self._counts.keys())
|
|
113
|
+
return sorted(self.KNOWN_FEATURES - used)
|
|
114
|
+
|
|
115
|
+
def get_low_usage_features(self, threshold: int = 3) -> List[str]:
|
|
116
|
+
return sorted(f for f, c in self._counts.items() if c <= threshold)
|
|
117
|
+
|
|
118
|
+
def get_high_usage_features(self, top_n: int = 10) -> List[tuple]:
|
|
119
|
+
with self._lock:
|
|
120
|
+
sorted_features = sorted(self._counts.items(), key=lambda x: x[1], reverse=True)
|
|
121
|
+
return sorted_features[:top_n]
|
|
122
|
+
|
|
123
|
+
def report(self) -> Dict[str, Any]:
|
|
124
|
+
with self._lock:
|
|
125
|
+
total = sum(self._counts.values())
|
|
126
|
+
return {
|
|
127
|
+
"session_start": self._session_start,
|
|
128
|
+
"total_invocations": total,
|
|
129
|
+
"unique_features_used": len(self._counts),
|
|
130
|
+
"unique_features_available": len(self.KNOWN_FEATURES),
|
|
131
|
+
"coverage_ratio": len(self._counts) / max(len(self.KNOWN_FEATURES), 1),
|
|
132
|
+
"top_features": self.get_high_usage_features(10),
|
|
133
|
+
"unused_features": self.get_unused_features(),
|
|
134
|
+
"low_usage_features": self.get_low_usage_features(3),
|
|
135
|
+
"feature_details": {
|
|
136
|
+
f: {
|
|
137
|
+
"count": c,
|
|
138
|
+
"first_seen": self._first_seen.get(f, ""),
|
|
139
|
+
"last_seen": self._last_seen.get(f, ""),
|
|
140
|
+
}
|
|
141
|
+
for f, c in sorted(self._counts.items(), key=lambda x: x[1], reverse=True)
|
|
142
|
+
},
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
def report_markdown(self) -> str:
|
|
146
|
+
r = self.report()
|
|
147
|
+
lines = [
|
|
148
|
+
"# Feature Usage Report",
|
|
149
|
+
"",
|
|
150
|
+
f"**Session**: {r['session_start']}",
|
|
151
|
+
f"**Total Invocations**: {r['total_invocations']}",
|
|
152
|
+
f"**Features Used**: {r['unique_features_used']}/{r['unique_features_available']} ({r['coverage_ratio']:.0%})",
|
|
153
|
+
"",
|
|
154
|
+
"## Top Features",
|
|
155
|
+
"",
|
|
156
|
+
"| Feature | Count |",
|
|
157
|
+
"|---------|-------|",
|
|
158
|
+
]
|
|
159
|
+
for feat, count in r["top_features"]:
|
|
160
|
+
lines.append(f"| {feat} | {count} |")
|
|
161
|
+
|
|
162
|
+
if r["unused_features"]:
|
|
163
|
+
lines.extend(["", "## Unused Features", ""])
|
|
164
|
+
for f in r["unused_features"]:
|
|
165
|
+
lines.append(f"- {f}")
|
|
166
|
+
|
|
167
|
+
if r["low_usage_features"]:
|
|
168
|
+
lines.extend(["", "## Low Usage Features (≤3)", ""])
|
|
169
|
+
for f in r["low_usage_features"]:
|
|
170
|
+
lines.append(f"- {f} ({self._counts.get(f, 0)})")
|
|
171
|
+
|
|
172
|
+
lines.extend(["", "---", f"*Generated by FeatureUsageTracker V3.6.0*"])
|
|
173
|
+
return "\n".join(lines)
|
|
174
|
+
|
|
175
|
+
def persist(self, path: Optional[str] = None):
|
|
176
|
+
target = path or self._persist_path
|
|
177
|
+
if not target:
|
|
178
|
+
return
|
|
179
|
+
try:
|
|
180
|
+
data = self.report()
|
|
181
|
+
Path(target).parent.mkdir(parents=True, exist_ok=True)
|
|
182
|
+
with open(target, "w", encoding="utf-8") as f:
|
|
183
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
184
|
+
logger.debug("Feature usage persisted to %s", target)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.warning("Failed to persist feature usage: %s", e)
|
|
187
|
+
|
|
188
|
+
def _load(self, path: str):
|
|
189
|
+
try:
|
|
190
|
+
if Path(path).exists():
|
|
191
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
192
|
+
data = json.load(f)
|
|
193
|
+
for feat, info in data.get("feature_details", {}).items():
|
|
194
|
+
self._counts[feat] = info.get("count", 0)
|
|
195
|
+
self._first_seen[feat] = info.get("first_seen", "")
|
|
196
|
+
self._last_seen[feat] = info.get("last_seen", "")
|
|
197
|
+
logger.info("Loaded %d feature usage records from %s", len(self._counts), path)
|
|
198
|
+
except Exception as e:
|
|
199
|
+
logger.warning("Failed to load feature usage: %s", e)
|
|
200
|
+
|
|
201
|
+
def reset(self):
|
|
202
|
+
with self._lock:
|
|
203
|
+
self._counts.clear()
|
|
204
|
+
self._first_seen.clear()
|
|
205
|
+
self._last_seen.clear()
|
|
206
|
+
self._total_ticks = 0
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Five-Axis Consensus Engine (P1-4)
|
|
5
|
+
|
|
6
|
+
Extends voting dimensions from generic to five-axis review:
|
|
7
|
+
1. Correctness: Logic correctness, bug-free, meets requirements
|
|
8
|
+
2. Readability: Code clarity, naming, comments, structure
|
|
9
|
+
3. Architecture: Design patterns, modularity, scalability
|
|
10
|
+
4. Security: Vulnerabilities, input validation, data protection
|
|
11
|
+
5. Performance: Efficiency, resource usage, bottlenecks
|
|
12
|
+
|
|
13
|
+
Spec reference: SPEC_V35_Agent_Skills_Quality_Framework.md Section 7.4
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from typing import Any, Dict, List, Optional
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ReviewAxis(Enum):
|
|
26
|
+
"""Five axes for code review consensus."""
|
|
27
|
+
CORRECTNESS = "correctness"
|
|
28
|
+
READABILITY = "readability"
|
|
29
|
+
ARCHITECTURE = "architecture"
|
|
30
|
+
SECURITY = "security"
|
|
31
|
+
PERFORMANCE = "performance"
|
|
32
|
+
OPERABILITY = "operability"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class AxisVote:
|
|
37
|
+
"""A vote on a specific review axis."""
|
|
38
|
+
axis: ReviewAxis
|
|
39
|
+
score: float # 0.0 to 1.0
|
|
40
|
+
confidence: float # 0.0 to 1.0
|
|
41
|
+
comment: str = ""
|
|
42
|
+
voter_id: str = ""
|
|
43
|
+
|
|
44
|
+
def is_positive(self) -> bool:
|
|
45
|
+
return self.score >= 0.6
|
|
46
|
+
|
|
47
|
+
def is_negative(self) -> bool:
|
|
48
|
+
return self.score < 0.4
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
51
|
+
return {
|
|
52
|
+
"axis": self.axis.value,
|
|
53
|
+
"score": round(self.score, 2),
|
|
54
|
+
"confidence": round(self.confidence, 2),
|
|
55
|
+
"comment": self.comment,
|
|
56
|
+
"voter_id": self.voter_id,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class FiveAxisReview:
|
|
62
|
+
"""Complete five-axis review from a single reviewer."""
|
|
63
|
+
reviewer_id: str
|
|
64
|
+
role: str
|
|
65
|
+
votes: List[AxisVote] = field(default_factory=list)
|
|
66
|
+
overall_score: float = 0.0
|
|
67
|
+
summary: str = ""
|
|
68
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
69
|
+
|
|
70
|
+
def calculate_overall(self) -> float:
|
|
71
|
+
if not self.votes:
|
|
72
|
+
return 0.0
|
|
73
|
+
weighted_sum = sum(v.score * v.confidence for v in self.votes)
|
|
74
|
+
total_weight = sum(v.confidence for v in self.votes)
|
|
75
|
+
self.overall_score = weighted_sum / total_weight if total_weight > 0 else 0.0
|
|
76
|
+
return self.overall_score
|
|
77
|
+
|
|
78
|
+
def get_vote_for_axis(self, axis: ReviewAxis) -> Optional[AxisVote]:
|
|
79
|
+
for v in self.votes:
|
|
80
|
+
if v.axis == axis:
|
|
81
|
+
return v
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class ConsensusResult:
|
|
87
|
+
"""Aggregated consensus result across all reviewers."""
|
|
88
|
+
reviews: List[FiveAxisReview] = field(default_factory=list)
|
|
89
|
+
axis_consensus: Dict[str, float] = field(default_factory=dict)
|
|
90
|
+
overall_consensus: float = 0.0
|
|
91
|
+
verdict: str = "" # APPROVE / CONDITIONAL / REJECT
|
|
92
|
+
action_items: List[Dict[str, Any]] = field(default_factory=list)
|
|
93
|
+
|
|
94
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
95
|
+
return {
|
|
96
|
+
"review_count": len(self.reviews),
|
|
97
|
+
"axis_consensus": {k: round(v, 2) for k, v in self.axis_consensus.items()},
|
|
98
|
+
"overall_consensus": round(self.overall_consensus, 2),
|
|
99
|
+
"verdict": self.verdict,
|
|
100
|
+
"action_items": self.action_items,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class FiveAxisConsensusEngine:
|
|
105
|
+
"""
|
|
106
|
+
Five-axis consensus engine for multi-dimensional code review.
|
|
107
|
+
|
|
108
|
+
Usage:
|
|
109
|
+
engine = FiveAxisConsensusEngine()
|
|
110
|
+
review = engine.create_review("coder_1", "solo-coder")
|
|
111
|
+
engine.add_axis_vote(review, ReviewAxis.CORRECTNESS, 0.9, 0.8, "Logic looks correct")
|
|
112
|
+
|
|
113
|
+
result = engine.compute_consensus([review])
|
|
114
|
+
print(result.verdict) # APPROVE/CONDITIONAL/REJECT
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
DEFAULT_AXIS_WEIGHTS: Dict[ReviewAxis, float] = {
|
|
118
|
+
ReviewAxis.CORRECTNESS: 0.30,
|
|
119
|
+
ReviewAxis.SECURITY: 0.25,
|
|
120
|
+
ReviewAxis.ARCHITECTURE: 0.20,
|
|
121
|
+
ReviewAxis.PERFORMANCE: 0.15,
|
|
122
|
+
ReviewAxis.READABILITY: 0.10,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
CONSENSUS_THRESHOLDS = {
|
|
126
|
+
"APPROVE": 0.75,
|
|
127
|
+
"CONDITIONAL": 0.50,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
custom_weights: Optional[Dict[ReviewAxis, float]] = None,
|
|
133
|
+
strict_mode: bool = False,
|
|
134
|
+
replace_weights: bool = False,
|
|
135
|
+
):
|
|
136
|
+
"""
|
|
137
|
+
Initialize consensus engine.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
custom_weights: Override default axis weights
|
|
141
|
+
strict_mode: If True, any negative vote on security blocks approval
|
|
142
|
+
replace_weights: If True, custom_weights fully replace defaults instead of merging
|
|
143
|
+
"""
|
|
144
|
+
if replace_weights and custom_weights:
|
|
145
|
+
self._weights = dict(custom_weights)
|
|
146
|
+
else:
|
|
147
|
+
self._weights = dict(self.DEFAULT_AXIS_WEIGHTS)
|
|
148
|
+
if custom_weights:
|
|
149
|
+
self._weights.update(custom_weights)
|
|
150
|
+
self._strict_mode = strict_mode
|
|
151
|
+
|
|
152
|
+
def create_review(
|
|
153
|
+
self,
|
|
154
|
+
reviewer_id: str,
|
|
155
|
+
role: str,
|
|
156
|
+
) -> FiveAxisReview:
|
|
157
|
+
"""Create a new empty review."""
|
|
158
|
+
return FiveAxisReview(
|
|
159
|
+
reviewer_id=reviewer_id,
|
|
160
|
+
role=role,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def add_axis_vote(
|
|
164
|
+
self,
|
|
165
|
+
review: FiveAxisReview,
|
|
166
|
+
axis: ReviewAxis,
|
|
167
|
+
score: float,
|
|
168
|
+
confidence: float,
|
|
169
|
+
comment: str = "",
|
|
170
|
+
) -> AxisVote:
|
|
171
|
+
"""Add a vote on a specific axis to a review."""
|
|
172
|
+
vote = AxisVote(
|
|
173
|
+
axis=axis,
|
|
174
|
+
score=max(0.0, min(1.0, score)),
|
|
175
|
+
confidence=max(0.0, min(1.0, confidence)),
|
|
176
|
+
comment=comment,
|
|
177
|
+
voter_id=review.reviewer_id,
|
|
178
|
+
)
|
|
179
|
+
review.votes.append(vote)
|
|
180
|
+
return vote
|
|
181
|
+
|
|
182
|
+
def compute_consensus(
|
|
183
|
+
self,
|
|
184
|
+
reviews: List[FiveAxisReview],
|
|
185
|
+
) -> ConsensusResult:
|
|
186
|
+
"""
|
|
187
|
+
Compute consensus across multiple reviews.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
reviews: List of completed reviews
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
ConsensusResult with verdict and details
|
|
194
|
+
"""
|
|
195
|
+
result = ConsensusResult(reviews=reviews)
|
|
196
|
+
|
|
197
|
+
if not reviews:
|
|
198
|
+
result.verdict = "REJECT"
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
# Calculate per-axis consensus
|
|
202
|
+
axis_scores: Dict[ReviewAxis, List[float]] = {axis: [] for axis in ReviewAxis}
|
|
203
|
+
|
|
204
|
+
for review in reviews:
|
|
205
|
+
review.calculate_overall()
|
|
206
|
+
for vote in review.votes:
|
|
207
|
+
axis_scores[vote.axis].append(vote.score * vote.confidence)
|
|
208
|
+
|
|
209
|
+
# Weighted average per axis
|
|
210
|
+
for axis, scores in axis_scores.items():
|
|
211
|
+
if scores:
|
|
212
|
+
avg = sum(scores) / len(scores)
|
|
213
|
+
result.axis_consensus[axis.value] = avg
|
|
214
|
+
|
|
215
|
+
# Calculate overall weighted consensus
|
|
216
|
+
overall = 0.0
|
|
217
|
+
total_weight = 0.0
|
|
218
|
+
for axis, weight in self._weights.items():
|
|
219
|
+
axis_score = result.axis_consensus.get(axis.value, 0.0)
|
|
220
|
+
overall += axis_score * weight
|
|
221
|
+
total_weight += weight
|
|
222
|
+
|
|
223
|
+
result.overall_consensus = overall / total_weight if total_weight > 0 else 0.0
|
|
224
|
+
|
|
225
|
+
# Determine verdict
|
|
226
|
+
if self._strict_mode:
|
|
227
|
+
# In strict mode, any low security score triggers conditional/reject
|
|
228
|
+
sec_score = result.axis_consensus.get("security", 1.0)
|
|
229
|
+
if sec_score < 0.5:
|
|
230
|
+
result.verdict = "REJECT"
|
|
231
|
+
result.action_items.append({
|
|
232
|
+
"axis": "security",
|
|
233
|
+
"severity": "critical",
|
|
234
|
+
"message": "Security concerns must be resolved before approval",
|
|
235
|
+
})
|
|
236
|
+
elif result.overall_consensus >= self.CONSENSUS_THRESHOLDS["APPROVE"]:
|
|
237
|
+
result.verdict = "APPROVE"
|
|
238
|
+
elif result.overall_consensus >= self.CONSENSUS_THRESHOLDS["CONDITIONAL"]:
|
|
239
|
+
result.verdict = "CONDITIONAL"
|
|
240
|
+
else:
|
|
241
|
+
result.verdict = "REJECT"
|
|
242
|
+
else:
|
|
243
|
+
if result.overall_consensus >= self.CONSENSUS_THRESHOLDS["APPROVE"]:
|
|
244
|
+
result.verdict = "APPROVE"
|
|
245
|
+
elif result.overall_consensus >= self.CONSENSUS_THRESHOLDS["CONDITIONAL"]:
|
|
246
|
+
result.verdict = "CONDITIONAL"
|
|
247
|
+
else:
|
|
248
|
+
result.verdict = "REJECT"
|
|
249
|
+
|
|
250
|
+
# Generate action items for weak axes
|
|
251
|
+
for axis_name, score in result.axis_consensus.items():
|
|
252
|
+
if score < 0.5:
|
|
253
|
+
severity = "critical" if axis_name == "security" else "warning"
|
|
254
|
+
result.action_items.append({
|
|
255
|
+
"axis": axis_name,
|
|
256
|
+
"severity": severity,
|
|
257
|
+
"message": f"{axis_name.capitalize()} score ({score:.2f}) below threshold (0.50)",
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
return result
|
|
261
|
+
|
|
262
|
+
def get_axis_names(self) -> List[str]:
|
|
263
|
+
"""Return list of axis names for this engine's configured weights."""
|
|
264
|
+
return [axis.value for axis in self._weights.keys()]
|
|
265
|
+
|
|
266
|
+
def get_default_weights(self) -> Dict[str, float]:
|
|
267
|
+
"""Return current weights as string-keyed dict."""
|
|
268
|
+
return {k.value: v for k, v in self._weights.items()}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def create_default_engine() -> FiveAxisConsensusEngine:
|
|
272
|
+
"""Create engine with default settings."""
|
|
273
|
+
return FiveAxisConsensusEngine()
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def create_strict_engine() -> FiveAxisConsensusEngine:
|
|
277
|
+
"""Create engine in strict mode (security veto)."""
|
|
278
|
+
return FiveAxisConsensusEngine(strict_mode=True)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def create_security_focused_engine() -> FiveAxisConsensusEngine:
|
|
282
|
+
"""Create engine with higher security weight."""
|
|
283
|
+
custom = {
|
|
284
|
+
ReviewAxis.SECURITY: 0.40,
|
|
285
|
+
ReviewAxis.CORRECTNESS: 0.25,
|
|
286
|
+
ReviewAxis.ARCHITECTURE: 0.15,
|
|
287
|
+
ReviewAxis.PERFORMANCE: 0.10,
|
|
288
|
+
ReviewAxis.READABILITY: 0.10,
|
|
289
|
+
}
|
|
290
|
+
return FiveAxisConsensusEngine(custom_weights=custom)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
WALKTHROUGH_AXIS_WEIGHTS: Dict[ReviewAxis, float] = {
|
|
294
|
+
ReviewAxis.CORRECTNESS: 0.25,
|
|
295
|
+
ReviewAxis.SECURITY: 0.25,
|
|
296
|
+
ReviewAxis.ARCHITECTURE: 0.20,
|
|
297
|
+
ReviewAxis.OPERABILITY: 0.15,
|
|
298
|
+
ReviewAxis.READABILITY: 0.15,
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
WALKTHROUGH_OPERABILITY_CHECKS = [
|
|
302
|
+
"deployment_feasibility",
|
|
303
|
+
"logging_standards",
|
|
304
|
+
"monitoring_instrumentation",
|
|
305
|
+
"disaster_recovery",
|
|
306
|
+
"configuration_management",
|
|
307
|
+
"performance_operations",
|
|
308
|
+
]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def create_walkthrough_engine() -> FiveAxisConsensusEngine:
|
|
312
|
+
"""
|
|
313
|
+
Create walkthrough-specific five-axis consensus engine.
|
|
314
|
+
|
|
315
|
+
Replaces Performance axis with Operability axis for code walkthrough:
|
|
316
|
+
- Correctness (0.25): Logic correctness, bug-free
|
|
317
|
+
- Security (0.25): Vulnerabilities, compliance (strict mode veto preserved)
|
|
318
|
+
- Architecture (0.20): Design patterns, modularity
|
|
319
|
+
- Operability (0.15): Deployment, monitoring, disaster recovery, config management
|
|
320
|
+
- Readability (0.15): Code clarity, maintainability
|
|
321
|
+
|
|
322
|
+
Operability axis checks:
|
|
323
|
+
- Deployment feasibility (Docker/K8s config completeness)
|
|
324
|
+
- Logging standards (key operations logged, appropriate log levels)
|
|
325
|
+
- Monitoring instrumentation (core metrics monitored, alert thresholds set)
|
|
326
|
+
- Disaster recovery (degradation plan, rollback mechanism)
|
|
327
|
+
- Configuration management (externalized config, environment isolation)
|
|
328
|
+
- Performance operations (resource usage, response time, capacity planning, SLA)
|
|
329
|
+
"""
|
|
330
|
+
return FiveAxisConsensusEngine(
|
|
331
|
+
custom_weights=WALKTHROUGH_AXIS_WEIGHTS,
|
|
332
|
+
strict_mode=True,
|
|
333
|
+
replace_weights=True,
|
|
334
|
+
)
|