devsquad 3.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. devsquad-3.6.0.dist-info/METADATA +944 -0
  2. devsquad-3.6.0.dist-info/RECORD +95 -0
  3. devsquad-3.6.0.dist-info/WHEEL +5 -0
  4. devsquad-3.6.0.dist-info/entry_points.txt +2 -0
  5. devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
  6. devsquad-3.6.0.dist-info/top_level.txt +2 -0
  7. scripts/__init__.py +0 -0
  8. scripts/ai_semantic_matcher.py +512 -0
  9. scripts/alert_manager.py +505 -0
  10. scripts/api/__init__.py +43 -0
  11. scripts/api/models.py +386 -0
  12. scripts/api/routes/__init__.py +20 -0
  13. scripts/api/routes/dispatch.py +348 -0
  14. scripts/api/routes/lifecycle.py +330 -0
  15. scripts/api/routes/metrics_gates.py +347 -0
  16. scripts/api_server.py +318 -0
  17. scripts/auth.py +451 -0
  18. scripts/cli/__init__.py +1 -0
  19. scripts/cli/cli_visual.py +642 -0
  20. scripts/cli.py +1094 -0
  21. scripts/collaboration/__init__.py +212 -0
  22. scripts/collaboration/_version.py +1 -0
  23. scripts/collaboration/agent_briefing.py +656 -0
  24. scripts/collaboration/ai_semantic_matcher.py +260 -0
  25. scripts/collaboration/anchor_checker.py +281 -0
  26. scripts/collaboration/anti_rationalization.py +470 -0
  27. scripts/collaboration/async_integration_example.py +255 -0
  28. scripts/collaboration/batch_scheduler.py +149 -0
  29. scripts/collaboration/checkpoint_manager.py +561 -0
  30. scripts/collaboration/ci_feedback_adapter.py +351 -0
  31. scripts/collaboration/code_map_generator.py +247 -0
  32. scripts/collaboration/concern_pack_loader.py +352 -0
  33. scripts/collaboration/confidence_score.py +496 -0
  34. scripts/collaboration/config_loader.py +188 -0
  35. scripts/collaboration/consensus.py +244 -0
  36. scripts/collaboration/context_compressor.py +533 -0
  37. scripts/collaboration/coordinator.py +668 -0
  38. scripts/collaboration/dispatcher.py +1636 -0
  39. scripts/collaboration/dual_layer_context.py +128 -0
  40. scripts/collaboration/enhanced_worker.py +539 -0
  41. scripts/collaboration/feature_usage_tracker.py +206 -0
  42. scripts/collaboration/five_axis_consensus.py +334 -0
  43. scripts/collaboration/input_validator.py +401 -0
  44. scripts/collaboration/integration_example.py +287 -0
  45. scripts/collaboration/intent_workflow_mapper.py +350 -0
  46. scripts/collaboration/language_parsers.py +269 -0
  47. scripts/collaboration/lifecycle_protocol.py +1446 -0
  48. scripts/collaboration/llm_backend.py +453 -0
  49. scripts/collaboration/llm_cache.py +448 -0
  50. scripts/collaboration/llm_cache_async.py +347 -0
  51. scripts/collaboration/llm_retry.py +387 -0
  52. scripts/collaboration/llm_retry_async.py +389 -0
  53. scripts/collaboration/mce_adapter.py +597 -0
  54. scripts/collaboration/memory_bridge.py +1607 -0
  55. scripts/collaboration/models.py +537 -0
  56. scripts/collaboration/null_providers.py +297 -0
  57. scripts/collaboration/operation_classifier.py +289 -0
  58. scripts/collaboration/output_slicer.py +225 -0
  59. scripts/collaboration/performance_monitor.py +462 -0
  60. scripts/collaboration/permission_guard.py +865 -0
  61. scripts/collaboration/prompt_assembler.py +756 -0
  62. scripts/collaboration/prompt_variant_generator.py +483 -0
  63. scripts/collaboration/protocols.py +267 -0
  64. scripts/collaboration/report_formatter.py +352 -0
  65. scripts/collaboration/retrospective.py +279 -0
  66. scripts/collaboration/role_matcher.py +92 -0
  67. scripts/collaboration/role_template_market.py +352 -0
  68. scripts/collaboration/rule_collector.py +678 -0
  69. scripts/collaboration/scratchpad.py +346 -0
  70. scripts/collaboration/skill_registry.py +151 -0
  71. scripts/collaboration/skillifier.py +878 -0
  72. scripts/collaboration/standardized_role_template.py +317 -0
  73. scripts/collaboration/task_completion_checker.py +237 -0
  74. scripts/collaboration/test_quality_guard.py +695 -0
  75. scripts/collaboration/unified_gate_engine.py +598 -0
  76. scripts/collaboration/usage_tracker.py +309 -0
  77. scripts/collaboration/user_friendly_error.py +176 -0
  78. scripts/collaboration/verification_gate.py +312 -0
  79. scripts/collaboration/warmup_manager.py +635 -0
  80. scripts/collaboration/worker.py +513 -0
  81. scripts/collaboration/workflow_engine.py +684 -0
  82. scripts/dashboard.py +1088 -0
  83. scripts/generate_benchmark_report.py +786 -0
  84. scripts/history_manager.py +604 -0
  85. scripts/mcp_server.py +289 -0
  86. skills/__init__.py +32 -0
  87. skills/dispatch/handler.py +52 -0
  88. skills/intent/handler.py +59 -0
  89. skills/registry.py +67 -0
  90. skills/retrospective/__init__.py +0 -0
  91. skills/retrospective/handler.py +125 -0
  92. skills/review/handler.py +356 -0
  93. skills/security/handler.py +454 -0
  94. skills/test/__init__.py +0 -0
  95. skills/test/handler.py +78 -0
@@ -0,0 +1,260 @@
1
+ #!/usr/bin/env python3
2
+ import json
3
+ import hashlib
4
+ import logging
5
+ from typing import Dict, List, Any, Optional
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+
9
+ from .models import ROLE_REGISTRY, ROLE_ALIASES
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class SemanticMatchResult:
16
+ role_id: str
17
+ role_name: str
18
+ confidence: float
19
+ reasoning: str
20
+ matched_capabilities: List[str] = field(default_factory=list)
21
+ relevance_score: float = 0.0
22
+ explanation: str = ""
23
+ metadata: Dict[str, Any] = field(default_factory=dict)
24
+
25
+
26
+ class AISemanticMatcher:
27
+ """
28
+ AI-powered semantic role matcher.
29
+
30
+ Uses LLM backend for deep semantic understanding of task requirements,
31
+ falling back to keyword matching when no LLM is available.
32
+
33
+ Workflow:
34
+ 1. Analyze task requirements, extract key semantic info
35
+ 2. Understand role capabilities and responsibilities
36
+ 3. Match based on semantic similarity
37
+ 4. Generate explainable matching results
38
+ """
39
+
40
+ MATCH_PROMPT_TEMPLATE = """You are an intelligent role matching expert. Analyze the following task and match the most suitable roles.
41
+
42
+ ## Task
43
+ **Description**: {task_description}
44
+ **Required capabilities**: {required_capabilities}
45
+
46
+ ## Available Roles
47
+ {role_descriptions}
48
+
49
+ ## Requirements
50
+ 1. Analyze the core needs and key capability requirements of the task
51
+ 2. Evaluate each role's match with the task
52
+ 3. Consider the role's professional capabilities and experience
53
+ 4. Provide matching reasons and confidence scores
54
+
55
+ Return the matching results in JSON format:
56
+ {{
57
+ "matches": [
58
+ {{
59
+ "role_id": "role ID",
60
+ "role_name": "role name",
61
+ "confidence": 0.0-1.0,
62
+ "reasoning": "matching reasoning",
63
+ "matched_capabilities": ["matched capability list"],
64
+ "relevance_score": 0.0-1.0,
65
+ "explanation": "detailed explanation"
66
+ }}
67
+ ],
68
+ "best_match": "best match role ID",
69
+ "analysis": "overall analysis"
70
+ }}"""
71
+
72
+ def __init__(self, llm_backend=None):
73
+ self.llm_backend = llm_backend
74
+ self.match_cache: Dict[str, SemanticMatchResult] = {}
75
+ self.match_history: List[Dict[str, Any]] = []
76
+
77
+ def match(
78
+ self,
79
+ task_description: str,
80
+ required_capabilities: List[str] = None,
81
+ preferred_skills: List[str] = None,
82
+ use_cache: bool = True,
83
+ ) -> List[SemanticMatchResult]:
84
+ """
85
+ Perform intelligent role matching using AI.
86
+
87
+ Args:
88
+ task_description: Task description text
89
+ required_capabilities: Required capability list
90
+ preferred_skills: Preferred skill list
91
+ use_cache: Whether to use cached results
92
+
93
+ Returns:
94
+ List[SemanticMatchResult]: Matched results sorted by confidence
95
+ """
96
+ cache_key = self._generate_cache_key(task_description)
97
+
98
+ if use_cache and cache_key in self.match_cache:
99
+ logger.info("Using cached match result")
100
+ return [self.match_cache[cache_key]]
101
+
102
+ roles = self._build_role_list()
103
+ role_descriptions = self._build_role_descriptions(roles)
104
+
105
+ if self.llm_backend:
106
+ try:
107
+ prompt = self.MATCH_PROMPT_TEMPLATE.format(
108
+ task_description=task_description,
109
+ required_capabilities=required_capabilities or [],
110
+ role_descriptions=role_descriptions,
111
+ )
112
+ ai_response = self.llm_backend.generate(prompt)
113
+ results = self._parse_ai_response(ai_response, roles)
114
+
115
+ if results and use_cache:
116
+ self.match_cache[cache_key] = results[0]
117
+
118
+ self._record_match(task_description, results)
119
+ return results
120
+ except Exception as e:
121
+ logger.warning("AI matching failed, falling back to keyword: %s", e)
122
+
123
+ results = self._keyword_match(task_description, roles)
124
+ self._record_match(task_description, results)
125
+ return results
126
+
127
+ def _build_role_list(self) -> List[Dict[str, Any]]:
128
+ roles = []
129
+ for role_id, rdef in ROLE_REGISTRY.items():
130
+ if rdef.status == "core":
131
+ roles.append({
132
+ 'id': role_id,
133
+ 'name': rdef.name,
134
+ 'description': rdef.description,
135
+ 'capabilities': rdef.keywords,
136
+ 'skills': rdef.keywords[:3],
137
+ 'keywords': rdef.keywords,
138
+ })
139
+ return roles
140
+
141
+ def _build_role_descriptions(self, roles: List[Dict[str, Any]]) -> str:
142
+ descriptions = []
143
+ for i, role in enumerate(roles, 1):
144
+ desc = f"{i}. **{role.get('name', 'Unknown')}** ({role.get('id', 'unknown')})\n"
145
+ desc += f" - Responsibilities: {role.get('description', '')}\n"
146
+ desc += f" - Capabilities: {', '.join(role.get('capabilities', []))}\n"
147
+ descriptions.append(desc)
148
+ return "\n".join(descriptions)
149
+
150
+ EN_KEYWORD_MAP = {
151
+ "architect": ["architecture", "design", "system", "microservice", "tech stack", "api design", "performance", "module", "interface"],
152
+ "product-manager": ["requirement", "prd", "user story", "product", "feature", "acceptance", "competitive", "experience"],
153
+ "tester": ["test", "quality", "qa", "automated", "performance test", "bug", "defect", "verification", "edge case"],
154
+ "solo-coder": ["implement", "develop", "code", "fix", "optimize", "refactor", "review", "best practice"],
155
+ "ui-designer": ["ui", "interface", "frontend", "visual", "interaction", "prototype", "ux", "accessibility"],
156
+ "devops": ["ci/cd", "deploy", "monitor", "infrastructure", "docker", "kubernetes", "container", "devops"],
157
+ "security": ["security", "vulnerability", "audit", "threat", "encryption", "auth", "owasp", "compliance"],
158
+ }
159
+
160
+ def _keyword_match(self, task_description: str, roles: List[Dict[str, Any]]) -> List[SemanticMatchResult]:
161
+ task_lower = task_description.lower()
162
+ results = []
163
+
164
+ for role in roles:
165
+ role_id = role.get('id', '')
166
+ keywords = role.get('keywords', [])
167
+ en_keywords = self.EN_KEYWORD_MAP.get(role_id, [])
168
+
169
+ cn_match = sum(1 for kw in keywords if kw.lower() in task_lower)
170
+ en_match = sum(1 for kw in en_keywords if kw.lower() in task_lower)
171
+ match_count = cn_match + en_match
172
+
173
+ if match_count > 0:
174
+ confidence = min(0.5 + match_count * 0.1, 0.95)
175
+ result = SemanticMatchResult(
176
+ role_id=role_id,
177
+ role_name=role.get('name', ''),
178
+ confidence=confidence,
179
+ reasoning=f"Keyword match: {match_count} keywords matched",
180
+ matched_capabilities=role.get('capabilities', [])[:3],
181
+ relevance_score=match_count / 10.0,
182
+ explanation="Task requirements are highly related to this role's responsibilities",
183
+ )
184
+ results.append(result)
185
+
186
+ results.sort(key=lambda r: r.confidence, reverse=True)
187
+
188
+ if not results:
189
+ results.append(SemanticMatchResult(
190
+ role_id="solo-coder",
191
+ role_name="Solo Developer",
192
+ confidence=0.5,
193
+ reasoning="Default role: no specific keyword match",
194
+ matched_capabilities=["general development"],
195
+ relevance_score=0.3,
196
+ explanation="No specific role matched, using default developer role",
197
+ ))
198
+
199
+ return results
200
+
201
+ def _parse_ai_response(self, response: str, roles: List[Dict[str, Any]]) -> List[SemanticMatchResult]:
202
+ try:
203
+ if isinstance(response, str):
204
+ data = json.loads(response)
205
+ else:
206
+ data = response
207
+
208
+ results = []
209
+ for match_data in data.get('matches', []):
210
+ result = SemanticMatchResult(
211
+ role_id=match_data.get('role_id', ''),
212
+ role_name=match_data.get('role_name', ''),
213
+ confidence=float(match_data.get('confidence', 0.0)),
214
+ reasoning=match_data.get('reasoning', ''),
215
+ matched_capabilities=match_data.get('matched_capabilities', []),
216
+ relevance_score=float(match_data.get('relevance_score', 0.0)),
217
+ explanation=match_data.get('explanation', ''),
218
+ metadata={
219
+ 'best_match': data.get('best_match'),
220
+ 'analysis': data.get('analysis'),
221
+ },
222
+ )
223
+ results.append(result)
224
+
225
+ results.sort(key=lambda r: r.confidence, reverse=True)
226
+ return results
227
+ except Exception as e:
228
+ logger.warning("Failed to parse AI response: %s", e)
229
+ return []
230
+
231
+ def _generate_cache_key(self, task_description: str) -> str:
232
+ content = f"{task_description}|{len(ROLE_REGISTRY)}"
233
+ return hashlib.md5(content.encode('utf-8')).hexdigest()
234
+
235
+ def _record_match(self, task_description: str, results: List[SemanticMatchResult]):
236
+ record = {
237
+ 'task_description': task_description[:100],
238
+ 'timestamp': datetime.now().isoformat(),
239
+ 'results_count': len(results),
240
+ 'top_role': results[0].role_id if results else None,
241
+ 'top_confidence': results[0].confidence if results else 0.0,
242
+ }
243
+ self.match_history.append(record)
244
+
245
+ def get_match_history(self, limit: int = 10) -> List[Dict]:
246
+ return self.match_history[-limit:]
247
+
248
+ def clear_cache(self):
249
+ self.match_cache.clear()
250
+
251
+ def explain_match(self, result: SemanticMatchResult) -> str:
252
+ explanation = (
253
+ f"Match: {result.role_name} ({result.role_id})\n"
254
+ f"Confidence: {result.confidence:.1%}\n"
255
+ f"Relevance: {result.relevance_score:.1%}\n\n"
256
+ f"Reasoning:\n{result.reasoning}\n\n"
257
+ f"Explanation:\n{result.explanation}\n\n"
258
+ f"Matched capabilities: {', '.join(result.matched_capabilities) if result.matched_capabilities else 'None'}"
259
+ )
260
+ return explanation
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ AnchorChecker - V3.6.0 Goal Alignment Engine
5
+
6
+ Checks whether current output remains aligned with the original task goal
7
+ at key decision points (anchor points), preventing goal drift during
8
+ long-running Agent tasks.
9
+
10
+ Design Principles:
11
+ - No LLM calls: Pure algorithmic matching (keyword + TF-IDF)
12
+ - Trigger only at key nodes: step complete, phase gate, conflict, direction change
13
+ - Lightweight: <50ms per check
14
+ - Non-blocking: Warnings written to Scratchpad, execution continues
15
+ """
16
+
17
+ import re
18
+ import math
19
+ import logging
20
+ from datetime import datetime
21
+ from collections import Counter
22
+ from typing import Dict, List, Optional, Any
23
+
24
+ from .models import (
25
+ StructuredGoal, GoalItem, GoalItemStatus,
26
+ AnchorResult, AnchorTrigger, DriftItem, DriftSeverity,
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ _GOAL_PATTERNS = [
32
+ (r"(?:需要|必须|shall|must|should|需要实现|需要完成|要求)[::\s]*(.+?)(?:[。\n;;]|$)", 0.9),
33
+ (r"(?:目标|objective|goal|target)[::\s]*(.+?)(?:[。\n;;]|$)", 0.85),
34
+ (r"(?:实现|implement|build|create|develop|design)[::\s]*(.+?)(?:[。\n;;]|$)", 0.8),
35
+ (r"(?:确保|ensure|guarantee|verify)[::\s]*(.+?)(?:[。\n;;]|$)", 0.75),
36
+ (r"(?:支持|support|provide|enable)[::\s]*(.+?)(?:[。\n;;]|$)", 0.7),
37
+ (r"^\s*[-•*]\s*(.+?)$", 0.6),
38
+ (r"^\s*\d+[.、)\]]\s*(.+?)$", 0.6),
39
+ ]
40
+
41
+ _STOP_WORDS = frozenset([
42
+ "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都", "一", "一个",
43
+ "上", "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好",
44
+ "自己", "这", "他", "她", "它", "们", "那", "些", "什么", "怎么", "如何",
45
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
46
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
47
+ "should", "may", "might", "shall", "can", "need", "to", "of", "in",
48
+ "for", "on", "with", "at", "by", "from", "as", "into", "through",
49
+ "and", "or", "but", "if", "then", "else", "when", "up", "out",
50
+ "this", "that", "these", "those", "it", "its", "i", "me", "my",
51
+ ])
52
+
53
+
54
+ def _tokenize(text: str) -> List[str]:
55
+ result = []
56
+ english_tokens = re.findall(r'[a-zA-Z_]{2,}', text.lower())
57
+ result.extend(english_tokens)
58
+
59
+ chinese_segments = re.findall(r'[\u4e00-\u9fff]+', text)
60
+ for seg in chinese_segments:
61
+ if len(seg) <= 4:
62
+ result.append(seg)
63
+ else:
64
+ for i in range(len(seg) - 1):
65
+ result.append(seg[i:i + 2])
66
+ for i in range(len(seg) - 3):
67
+ result.append(seg[i:i + 4])
68
+
69
+ filtered = []
70
+ for t in result:
71
+ if t in _STOP_WORDS:
72
+ continue
73
+ if len(t) <= 1 and not re.match(r'[\u4e00-\u9fff]', t):
74
+ continue
75
+ filtered.append(t)
76
+ return filtered
77
+
78
+
79
+ def _compute_tfidf_vectors(documents: List[List[str]]) -> List[Dict[str, float]]:
80
+ if not documents:
81
+ return []
82
+ doc_freq = Counter()
83
+ for doc in documents:
84
+ unique_terms = set(doc)
85
+ for term in unique_terms:
86
+ doc_freq[term] += 1
87
+ n_docs = len(documents)
88
+ idf = {term: math.log(n_docs / (freq + 1)) + 1.0 for term, freq in doc_freq.items()}
89
+ vectors = []
90
+ for doc in documents:
91
+ tf = Counter(doc)
92
+ total = len(doc) if doc else 1
93
+ vec = {term: (count / total) * idf.get(term, 1.0) for term, count in tf.items()}
94
+ vectors.append(vec)
95
+ return vectors
96
+
97
+
98
+ def _cosine_similarity(v1: Dict[str, float], v2: Dict[str, float]) -> float:
99
+ common = set(v1.keys()) & set(v2.keys())
100
+ if not common:
101
+ return 0.0
102
+ dot = sum(v1[k] * v2[k] for k in common)
103
+ norm1 = math.sqrt(sum(v ** 2 for v in v1.values()))
104
+ norm2 = math.sqrt(sum(v ** 2 for v in v2.values()))
105
+ if norm1 == 0 or norm2 == 0:
106
+ return 0.0
107
+ return dot / (norm1 * norm2)
108
+
109
+
110
+ class AnchorChecker:
111
+ """
112
+ Goal alignment checker that runs at key decision points.
113
+
114
+ Usage:
115
+ checker = AnchorChecker()
116
+ goal = checker.parse_goal("Design a secure auth system with JWT and RBAC")
117
+ result = checker.check(goal, "Implemented JWT token generation...", trigger=AnchorTrigger.STEP_COMPLETE)
118
+ if not result.aligned:
119
+ print(f"DRIFT: {result.recommendation}")
120
+ """
121
+
122
+ DRIFT_THRESHOLD = 0.3
123
+ COVERAGE_THRESHOLD = 0.6
124
+
125
+ def __init__(self, drift_threshold: float = 0.3, coverage_threshold: float = 0.6):
126
+ self._drift_threshold = drift_threshold
127
+ self._coverage_threshold = coverage_threshold
128
+ self._check_history: List[AnchorResult] = []
129
+
130
+ def parse_goal(self, task_description: str) -> StructuredGoal:
131
+ """Parse a free-form task description into a StructuredGoal."""
132
+ items = []
133
+ item_id = 0
134
+ seen = set()
135
+
136
+ for pattern, _ in _GOAL_PATTERNS:
137
+ for match in re.finditer(pattern, task_description, re.MULTILINE | re.IGNORECASE):
138
+ desc = match.group(1).strip()
139
+ if desc and len(desc) > 3 and desc not in seen:
140
+ seen.add(desc)
141
+ keywords = _tokenize(desc)
142
+ items.append(GoalItem(
143
+ item_id=f"G{item_id}",
144
+ description=desc,
145
+ keywords=keywords,
146
+ ))
147
+ item_id += 1
148
+
149
+ if not items:
150
+ keywords = _tokenize(task_description)
151
+ items.append(GoalItem(
152
+ item_id="G0",
153
+ description=task_description.strip(),
154
+ keywords=keywords,
155
+ ))
156
+
157
+ return StructuredGoal(
158
+ goal_id=f"goal_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
159
+ original_description=task_description,
160
+ items=items,
161
+ created_at=datetime.now().isoformat(),
162
+ )
163
+
164
+ def check(
165
+ self,
166
+ goal: StructuredGoal,
167
+ current_output: str,
168
+ trigger: AnchorTrigger = AnchorTrigger.STEP_COMPLETE,
169
+ context: Optional[Dict[str, Any]] = None,
170
+ ) -> AnchorResult:
171
+ """
172
+ Check whether current output aligns with the original goal.
173
+
174
+ Args:
175
+ goal: The structured goal to check against.
176
+ current_output: The current output text to evaluate.
177
+ trigger: What triggered this anchor check.
178
+ context: Optional additional context (e.g., step info).
179
+
180
+ Returns:
181
+ AnchorResult with alignment status, coverage, drift info.
182
+ """
183
+ output_tokens = _tokenize(current_output)
184
+ output_text_lower = current_output.lower()
185
+
186
+ all_docs = [item.keywords for item in goal.items] + [output_tokens]
187
+ all_vectors = _compute_tfidf_vectors(all_docs)
188
+ output_vector = all_vectors[-1]
189
+
190
+ uncovered = []
191
+ drifts = []
192
+
193
+ for i, item in enumerate(goal.items):
194
+ item_vector = all_vectors[i]
195
+ sim = _cosine_similarity(item_vector, output_vector)
196
+
197
+ keyword_hits = sum(1 for kw in item.keywords if kw in output_text_lower)
198
+ keyword_ratio = keyword_hits / max(len(item.keywords), 1)
199
+
200
+ coverage = max(sim, keyword_ratio * 0.8 + sim * 0.2)
201
+ coverage = min(coverage, 1.0)
202
+
203
+ item.coverage_score = coverage
204
+
205
+ if coverage >= 0.7:
206
+ item.status = GoalItemStatus.FULLY_COVERED
207
+ elif coverage >= 0.3:
208
+ item.status = GoalItemStatus.PARTIALLY_COVERED
209
+ uncovered.append(item.description)
210
+ else:
211
+ item.status = GoalItemStatus.PENDING
212
+ uncovered.append(item.description)
213
+
214
+ overall_coverage = goal.overall_coverage
215
+ drift_score = 1.0 - overall_coverage
216
+
217
+ output_terms = set(output_tokens)
218
+ goal_terms = set()
219
+ for item in goal.items:
220
+ goal_terms.update(item.keywords)
221
+
222
+ extra_terms = output_terms - goal_terms
223
+ if extra_terms and goal_terms:
224
+ extra_ratio = len(extra_terms) / max(len(output_terms), 1)
225
+ if extra_ratio > 0.5:
226
+ drift_score = min(drift_score + 0.1, 1.0)
227
+ drifts.append(DriftItem(
228
+ content=f"Output contains significant off-topic content ({extra_ratio:.0%} new terms)",
229
+ severity=DriftSeverity.MEDIUM,
230
+ reason=f"New terms not in goal: {', '.join(list(extra_terms)[:5])}",
231
+ ))
232
+
233
+ aligned = drift_score < self._drift_threshold and overall_coverage >= self._coverage_threshold
234
+
235
+ recommendation = ""
236
+ if not aligned:
237
+ if uncovered:
238
+ recommendation = f"Goal drift detected. Uncovered goals: {'; '.join(uncovered[:3])}"
239
+ if drifts:
240
+ recommendation += f" | Drifts: {'; '.join(d.reason for d in drifts[:2])}"
241
+
242
+ result = AnchorResult(
243
+ aligned=aligned,
244
+ trigger=trigger,
245
+ coverage=overall_coverage,
246
+ drift_score=drift_score,
247
+ drifts=drifts,
248
+ uncovered_goals=uncovered,
249
+ recommendation=recommendation,
250
+ checked_at=datetime.now().isoformat(),
251
+ )
252
+
253
+ self._check_history.append(result)
254
+
255
+ if not aligned:
256
+ logger.warning(
257
+ "Anchor check FAILED: coverage=%.0f%%, drift=%.0f%%, trigger=%s, rec=%s",
258
+ overall_coverage * 100, drift_score * 100, trigger.value, recommendation[:80],
259
+ )
260
+ else:
261
+ logger.debug(
262
+ "Anchor check PASSED: coverage=%.0f%%, drift=%.0f%%, trigger=%s",
263
+ overall_coverage * 100, drift_score * 100, trigger.value,
264
+ )
265
+
266
+ return result
267
+
268
+ @property
269
+ def check_history(self) -> List[AnchorResult]:
270
+ return list(self._check_history)
271
+
272
+ @property
273
+ def drift_count(self) -> int:
274
+ return sum(1 for r in self._check_history if not r.aligned)
275
+
276
+ @property
277
+ def total_checks(self) -> int:
278
+ return len(self._check_history)
279
+
280
+ def reset(self):
281
+ self._check_history.clear()