devsquad 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsquad-3.6.0.dist-info/METADATA +944 -0
- devsquad-3.6.0.dist-info/RECORD +95 -0
- devsquad-3.6.0.dist-info/WHEEL +5 -0
- devsquad-3.6.0.dist-info/entry_points.txt +2 -0
- devsquad-3.6.0.dist-info/licenses/LICENSE +21 -0
- devsquad-3.6.0.dist-info/top_level.txt +2 -0
- scripts/__init__.py +0 -0
- scripts/ai_semantic_matcher.py +512 -0
- scripts/alert_manager.py +505 -0
- scripts/api/__init__.py +43 -0
- scripts/api/models.py +386 -0
- scripts/api/routes/__init__.py +20 -0
- scripts/api/routes/dispatch.py +348 -0
- scripts/api/routes/lifecycle.py +330 -0
- scripts/api/routes/metrics_gates.py +347 -0
- scripts/api_server.py +318 -0
- scripts/auth.py +451 -0
- scripts/cli/__init__.py +1 -0
- scripts/cli/cli_visual.py +642 -0
- scripts/cli.py +1094 -0
- scripts/collaboration/__init__.py +212 -0
- scripts/collaboration/_version.py +1 -0
- scripts/collaboration/agent_briefing.py +656 -0
- scripts/collaboration/ai_semantic_matcher.py +260 -0
- scripts/collaboration/anchor_checker.py +281 -0
- scripts/collaboration/anti_rationalization.py +470 -0
- scripts/collaboration/async_integration_example.py +255 -0
- scripts/collaboration/batch_scheduler.py +149 -0
- scripts/collaboration/checkpoint_manager.py +561 -0
- scripts/collaboration/ci_feedback_adapter.py +351 -0
- scripts/collaboration/code_map_generator.py +247 -0
- scripts/collaboration/concern_pack_loader.py +352 -0
- scripts/collaboration/confidence_score.py +496 -0
- scripts/collaboration/config_loader.py +188 -0
- scripts/collaboration/consensus.py +244 -0
- scripts/collaboration/context_compressor.py +533 -0
- scripts/collaboration/coordinator.py +668 -0
- scripts/collaboration/dispatcher.py +1636 -0
- scripts/collaboration/dual_layer_context.py +128 -0
- scripts/collaboration/enhanced_worker.py +539 -0
- scripts/collaboration/feature_usage_tracker.py +206 -0
- scripts/collaboration/five_axis_consensus.py +334 -0
- scripts/collaboration/input_validator.py +401 -0
- scripts/collaboration/integration_example.py +287 -0
- scripts/collaboration/intent_workflow_mapper.py +350 -0
- scripts/collaboration/language_parsers.py +269 -0
- scripts/collaboration/lifecycle_protocol.py +1446 -0
- scripts/collaboration/llm_backend.py +453 -0
- scripts/collaboration/llm_cache.py +448 -0
- scripts/collaboration/llm_cache_async.py +347 -0
- scripts/collaboration/llm_retry.py +387 -0
- scripts/collaboration/llm_retry_async.py +389 -0
- scripts/collaboration/mce_adapter.py +597 -0
- scripts/collaboration/memory_bridge.py +1607 -0
- scripts/collaboration/models.py +537 -0
- scripts/collaboration/null_providers.py +297 -0
- scripts/collaboration/operation_classifier.py +289 -0
- scripts/collaboration/output_slicer.py +225 -0
- scripts/collaboration/performance_monitor.py +462 -0
- scripts/collaboration/permission_guard.py +865 -0
- scripts/collaboration/prompt_assembler.py +756 -0
- scripts/collaboration/prompt_variant_generator.py +483 -0
- scripts/collaboration/protocols.py +267 -0
- scripts/collaboration/report_formatter.py +352 -0
- scripts/collaboration/retrospective.py +279 -0
- scripts/collaboration/role_matcher.py +92 -0
- scripts/collaboration/role_template_market.py +352 -0
- scripts/collaboration/rule_collector.py +678 -0
- scripts/collaboration/scratchpad.py +346 -0
- scripts/collaboration/skill_registry.py +151 -0
- scripts/collaboration/skillifier.py +878 -0
- scripts/collaboration/standardized_role_template.py +317 -0
- scripts/collaboration/task_completion_checker.py +237 -0
- scripts/collaboration/test_quality_guard.py +695 -0
- scripts/collaboration/unified_gate_engine.py +598 -0
- scripts/collaboration/usage_tracker.py +309 -0
- scripts/collaboration/user_friendly_error.py +176 -0
- scripts/collaboration/verification_gate.py +312 -0
- scripts/collaboration/warmup_manager.py +635 -0
- scripts/collaboration/worker.py +513 -0
- scripts/collaboration/workflow_engine.py +684 -0
- scripts/dashboard.py +1088 -0
- scripts/generate_benchmark_report.py +786 -0
- scripts/history_manager.py +604 -0
- scripts/mcp_server.py +289 -0
- skills/__init__.py +32 -0
- skills/dispatch/handler.py +52 -0
- skills/intent/handler.py +59 -0
- skills/registry.py +67 -0
- skills/retrospective/__init__.py +0 -0
- skills/retrospective/handler.py +125 -0
- skills/review/handler.py +356 -0
- skills/security/handler.py +454 -0
- skills/test/__init__.py +0 -0
- skills/test/handler.py +78 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import json
|
|
3
|
+
import hashlib
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Dict, List, Any, Optional
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
from .models import ROLE_REGISTRY, ROLE_ALIASES
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class SemanticMatchResult:
|
|
16
|
+
role_id: str
|
|
17
|
+
role_name: str
|
|
18
|
+
confidence: float
|
|
19
|
+
reasoning: str
|
|
20
|
+
matched_capabilities: List[str] = field(default_factory=list)
|
|
21
|
+
relevance_score: float = 0.0
|
|
22
|
+
explanation: str = ""
|
|
23
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AISemanticMatcher:
|
|
27
|
+
"""
|
|
28
|
+
AI-powered semantic role matcher.
|
|
29
|
+
|
|
30
|
+
Uses LLM backend for deep semantic understanding of task requirements,
|
|
31
|
+
falling back to keyword matching when no LLM is available.
|
|
32
|
+
|
|
33
|
+
Workflow:
|
|
34
|
+
1. Analyze task requirements, extract key semantic info
|
|
35
|
+
2. Understand role capabilities and responsibilities
|
|
36
|
+
3. Match based on semantic similarity
|
|
37
|
+
4. Generate explainable matching results
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
MATCH_PROMPT_TEMPLATE = """You are an intelligent role matching expert. Analyze the following task and match the most suitable roles.
|
|
41
|
+
|
|
42
|
+
## Task
|
|
43
|
+
**Description**: {task_description}
|
|
44
|
+
**Required capabilities**: {required_capabilities}
|
|
45
|
+
|
|
46
|
+
## Available Roles
|
|
47
|
+
{role_descriptions}
|
|
48
|
+
|
|
49
|
+
## Requirements
|
|
50
|
+
1. Analyze the core needs and key capability requirements of the task
|
|
51
|
+
2. Evaluate each role's match with the task
|
|
52
|
+
3. Consider the role's professional capabilities and experience
|
|
53
|
+
4. Provide matching reasons and confidence scores
|
|
54
|
+
|
|
55
|
+
Return the matching results in JSON format:
|
|
56
|
+
{{
|
|
57
|
+
"matches": [
|
|
58
|
+
{{
|
|
59
|
+
"role_id": "role ID",
|
|
60
|
+
"role_name": "role name",
|
|
61
|
+
"confidence": 0.0-1.0,
|
|
62
|
+
"reasoning": "matching reasoning",
|
|
63
|
+
"matched_capabilities": ["matched capability list"],
|
|
64
|
+
"relevance_score": 0.0-1.0,
|
|
65
|
+
"explanation": "detailed explanation"
|
|
66
|
+
}}
|
|
67
|
+
],
|
|
68
|
+
"best_match": "best match role ID",
|
|
69
|
+
"analysis": "overall analysis"
|
|
70
|
+
}}"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, llm_backend=None):
|
|
73
|
+
self.llm_backend = llm_backend
|
|
74
|
+
self.match_cache: Dict[str, SemanticMatchResult] = {}
|
|
75
|
+
self.match_history: List[Dict[str, Any]] = []
|
|
76
|
+
|
|
77
|
+
def match(
|
|
78
|
+
self,
|
|
79
|
+
task_description: str,
|
|
80
|
+
required_capabilities: List[str] = None,
|
|
81
|
+
preferred_skills: List[str] = None,
|
|
82
|
+
use_cache: bool = True,
|
|
83
|
+
) -> List[SemanticMatchResult]:
|
|
84
|
+
"""
|
|
85
|
+
Perform intelligent role matching using AI.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
task_description: Task description text
|
|
89
|
+
required_capabilities: Required capability list
|
|
90
|
+
preferred_skills: Preferred skill list
|
|
91
|
+
use_cache: Whether to use cached results
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List[SemanticMatchResult]: Matched results sorted by confidence
|
|
95
|
+
"""
|
|
96
|
+
cache_key = self._generate_cache_key(task_description)
|
|
97
|
+
|
|
98
|
+
if use_cache and cache_key in self.match_cache:
|
|
99
|
+
logger.info("Using cached match result")
|
|
100
|
+
return [self.match_cache[cache_key]]
|
|
101
|
+
|
|
102
|
+
roles = self._build_role_list()
|
|
103
|
+
role_descriptions = self._build_role_descriptions(roles)
|
|
104
|
+
|
|
105
|
+
if self.llm_backend:
|
|
106
|
+
try:
|
|
107
|
+
prompt = self.MATCH_PROMPT_TEMPLATE.format(
|
|
108
|
+
task_description=task_description,
|
|
109
|
+
required_capabilities=required_capabilities or [],
|
|
110
|
+
role_descriptions=role_descriptions,
|
|
111
|
+
)
|
|
112
|
+
ai_response = self.llm_backend.generate(prompt)
|
|
113
|
+
results = self._parse_ai_response(ai_response, roles)
|
|
114
|
+
|
|
115
|
+
if results and use_cache:
|
|
116
|
+
self.match_cache[cache_key] = results[0]
|
|
117
|
+
|
|
118
|
+
self._record_match(task_description, results)
|
|
119
|
+
return results
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.warning("AI matching failed, falling back to keyword: %s", e)
|
|
122
|
+
|
|
123
|
+
results = self._keyword_match(task_description, roles)
|
|
124
|
+
self._record_match(task_description, results)
|
|
125
|
+
return results
|
|
126
|
+
|
|
127
|
+
def _build_role_list(self) -> List[Dict[str, Any]]:
|
|
128
|
+
roles = []
|
|
129
|
+
for role_id, rdef in ROLE_REGISTRY.items():
|
|
130
|
+
if rdef.status == "core":
|
|
131
|
+
roles.append({
|
|
132
|
+
'id': role_id,
|
|
133
|
+
'name': rdef.name,
|
|
134
|
+
'description': rdef.description,
|
|
135
|
+
'capabilities': rdef.keywords,
|
|
136
|
+
'skills': rdef.keywords[:3],
|
|
137
|
+
'keywords': rdef.keywords,
|
|
138
|
+
})
|
|
139
|
+
return roles
|
|
140
|
+
|
|
141
|
+
def _build_role_descriptions(self, roles: List[Dict[str, Any]]) -> str:
|
|
142
|
+
descriptions = []
|
|
143
|
+
for i, role in enumerate(roles, 1):
|
|
144
|
+
desc = f"{i}. **{role.get('name', 'Unknown')}** ({role.get('id', 'unknown')})\n"
|
|
145
|
+
desc += f" - Responsibilities: {role.get('description', '')}\n"
|
|
146
|
+
desc += f" - Capabilities: {', '.join(role.get('capabilities', []))}\n"
|
|
147
|
+
descriptions.append(desc)
|
|
148
|
+
return "\n".join(descriptions)
|
|
149
|
+
|
|
150
|
+
EN_KEYWORD_MAP = {
|
|
151
|
+
"architect": ["architecture", "design", "system", "microservice", "tech stack", "api design", "performance", "module", "interface"],
|
|
152
|
+
"product-manager": ["requirement", "prd", "user story", "product", "feature", "acceptance", "competitive", "experience"],
|
|
153
|
+
"tester": ["test", "quality", "qa", "automated", "performance test", "bug", "defect", "verification", "edge case"],
|
|
154
|
+
"solo-coder": ["implement", "develop", "code", "fix", "optimize", "refactor", "review", "best practice"],
|
|
155
|
+
"ui-designer": ["ui", "interface", "frontend", "visual", "interaction", "prototype", "ux", "accessibility"],
|
|
156
|
+
"devops": ["ci/cd", "deploy", "monitor", "infrastructure", "docker", "kubernetes", "container", "devops"],
|
|
157
|
+
"security": ["security", "vulnerability", "audit", "threat", "encryption", "auth", "owasp", "compliance"],
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
def _keyword_match(self, task_description: str, roles: List[Dict[str, Any]]) -> List[SemanticMatchResult]:
|
|
161
|
+
task_lower = task_description.lower()
|
|
162
|
+
results = []
|
|
163
|
+
|
|
164
|
+
for role in roles:
|
|
165
|
+
role_id = role.get('id', '')
|
|
166
|
+
keywords = role.get('keywords', [])
|
|
167
|
+
en_keywords = self.EN_KEYWORD_MAP.get(role_id, [])
|
|
168
|
+
|
|
169
|
+
cn_match = sum(1 for kw in keywords if kw.lower() in task_lower)
|
|
170
|
+
en_match = sum(1 for kw in en_keywords if kw.lower() in task_lower)
|
|
171
|
+
match_count = cn_match + en_match
|
|
172
|
+
|
|
173
|
+
if match_count > 0:
|
|
174
|
+
confidence = min(0.5 + match_count * 0.1, 0.95)
|
|
175
|
+
result = SemanticMatchResult(
|
|
176
|
+
role_id=role_id,
|
|
177
|
+
role_name=role.get('name', ''),
|
|
178
|
+
confidence=confidence,
|
|
179
|
+
reasoning=f"Keyword match: {match_count} keywords matched",
|
|
180
|
+
matched_capabilities=role.get('capabilities', [])[:3],
|
|
181
|
+
relevance_score=match_count / 10.0,
|
|
182
|
+
explanation="Task requirements are highly related to this role's responsibilities",
|
|
183
|
+
)
|
|
184
|
+
results.append(result)
|
|
185
|
+
|
|
186
|
+
results.sort(key=lambda r: r.confidence, reverse=True)
|
|
187
|
+
|
|
188
|
+
if not results:
|
|
189
|
+
results.append(SemanticMatchResult(
|
|
190
|
+
role_id="solo-coder",
|
|
191
|
+
role_name="Solo Developer",
|
|
192
|
+
confidence=0.5,
|
|
193
|
+
reasoning="Default role: no specific keyword match",
|
|
194
|
+
matched_capabilities=["general development"],
|
|
195
|
+
relevance_score=0.3,
|
|
196
|
+
explanation="No specific role matched, using default developer role",
|
|
197
|
+
))
|
|
198
|
+
|
|
199
|
+
return results
|
|
200
|
+
|
|
201
|
+
def _parse_ai_response(self, response: str, roles: List[Dict[str, Any]]) -> List[SemanticMatchResult]:
|
|
202
|
+
try:
|
|
203
|
+
if isinstance(response, str):
|
|
204
|
+
data = json.loads(response)
|
|
205
|
+
else:
|
|
206
|
+
data = response
|
|
207
|
+
|
|
208
|
+
results = []
|
|
209
|
+
for match_data in data.get('matches', []):
|
|
210
|
+
result = SemanticMatchResult(
|
|
211
|
+
role_id=match_data.get('role_id', ''),
|
|
212
|
+
role_name=match_data.get('role_name', ''),
|
|
213
|
+
confidence=float(match_data.get('confidence', 0.0)),
|
|
214
|
+
reasoning=match_data.get('reasoning', ''),
|
|
215
|
+
matched_capabilities=match_data.get('matched_capabilities', []),
|
|
216
|
+
relevance_score=float(match_data.get('relevance_score', 0.0)),
|
|
217
|
+
explanation=match_data.get('explanation', ''),
|
|
218
|
+
metadata={
|
|
219
|
+
'best_match': data.get('best_match'),
|
|
220
|
+
'analysis': data.get('analysis'),
|
|
221
|
+
},
|
|
222
|
+
)
|
|
223
|
+
results.append(result)
|
|
224
|
+
|
|
225
|
+
results.sort(key=lambda r: r.confidence, reverse=True)
|
|
226
|
+
return results
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.warning("Failed to parse AI response: %s", e)
|
|
229
|
+
return []
|
|
230
|
+
|
|
231
|
+
def _generate_cache_key(self, task_description: str) -> str:
|
|
232
|
+
content = f"{task_description}|{len(ROLE_REGISTRY)}"
|
|
233
|
+
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
|
234
|
+
|
|
235
|
+
def _record_match(self, task_description: str, results: List[SemanticMatchResult]):
|
|
236
|
+
record = {
|
|
237
|
+
'task_description': task_description[:100],
|
|
238
|
+
'timestamp': datetime.now().isoformat(),
|
|
239
|
+
'results_count': len(results),
|
|
240
|
+
'top_role': results[0].role_id if results else None,
|
|
241
|
+
'top_confidence': results[0].confidence if results else 0.0,
|
|
242
|
+
}
|
|
243
|
+
self.match_history.append(record)
|
|
244
|
+
|
|
245
|
+
def get_match_history(self, limit: int = 10) -> List[Dict]:
|
|
246
|
+
return self.match_history[-limit:]
|
|
247
|
+
|
|
248
|
+
def clear_cache(self):
|
|
249
|
+
self.match_cache.clear()
|
|
250
|
+
|
|
251
|
+
def explain_match(self, result: SemanticMatchResult) -> str:
|
|
252
|
+
explanation = (
|
|
253
|
+
f"Match: {result.role_name} ({result.role_id})\n"
|
|
254
|
+
f"Confidence: {result.confidence:.1%}\n"
|
|
255
|
+
f"Relevance: {result.relevance_score:.1%}\n\n"
|
|
256
|
+
f"Reasoning:\n{result.reasoning}\n\n"
|
|
257
|
+
f"Explanation:\n{result.explanation}\n\n"
|
|
258
|
+
f"Matched capabilities: {', '.join(result.matched_capabilities) if result.matched_capabilities else 'None'}"
|
|
259
|
+
)
|
|
260
|
+
return explanation
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
AnchorChecker - V3.6.0 Goal Alignment Engine
|
|
5
|
+
|
|
6
|
+
Checks whether current output remains aligned with the original task goal
|
|
7
|
+
at key decision points (anchor points), preventing goal drift during
|
|
8
|
+
long-running Agent tasks.
|
|
9
|
+
|
|
10
|
+
Design Principles:
|
|
11
|
+
- No LLM calls: Pure algorithmic matching (keyword + TF-IDF)
|
|
12
|
+
- Trigger only at key nodes: step complete, phase gate, conflict, direction change
|
|
13
|
+
- Lightweight: <50ms per check
|
|
14
|
+
- Non-blocking: Warnings written to Scratchpad, execution continues
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
import math
|
|
19
|
+
import logging
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from collections import Counter
|
|
22
|
+
from typing import Dict, List, Optional, Any
|
|
23
|
+
|
|
24
|
+
from .models import (
|
|
25
|
+
StructuredGoal, GoalItem, GoalItemStatus,
|
|
26
|
+
AnchorResult, AnchorTrigger, DriftItem, DriftSeverity,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
_GOAL_PATTERNS = [
|
|
32
|
+
(r"(?:需要|必须|shall|must|should|需要实现|需要完成|要求)[::\s]*(.+?)(?:[。\n;;]|$)", 0.9),
|
|
33
|
+
(r"(?:目标|objective|goal|target)[::\s]*(.+?)(?:[。\n;;]|$)", 0.85),
|
|
34
|
+
(r"(?:实现|implement|build|create|develop|design)[::\s]*(.+?)(?:[。\n;;]|$)", 0.8),
|
|
35
|
+
(r"(?:确保|ensure|guarantee|verify)[::\s]*(.+?)(?:[。\n;;]|$)", 0.75),
|
|
36
|
+
(r"(?:支持|support|provide|enable)[::\s]*(.+?)(?:[。\n;;]|$)", 0.7),
|
|
37
|
+
(r"^\s*[-•*]\s*(.+?)$", 0.6),
|
|
38
|
+
(r"^\s*\d+[.、)\]]\s*(.+?)$", 0.6),
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
_STOP_WORDS = frozenset([
|
|
42
|
+
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都", "一", "一个",
|
|
43
|
+
"上", "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好",
|
|
44
|
+
"自己", "这", "他", "她", "它", "们", "那", "些", "什么", "怎么", "如何",
|
|
45
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
|
46
|
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
|
47
|
+
"should", "may", "might", "shall", "can", "need", "to", "of", "in",
|
|
48
|
+
"for", "on", "with", "at", "by", "from", "as", "into", "through",
|
|
49
|
+
"and", "or", "but", "if", "then", "else", "when", "up", "out",
|
|
50
|
+
"this", "that", "these", "those", "it", "its", "i", "me", "my",
|
|
51
|
+
])
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _tokenize(text: str) -> List[str]:
|
|
55
|
+
result = []
|
|
56
|
+
english_tokens = re.findall(r'[a-zA-Z_]{2,}', text.lower())
|
|
57
|
+
result.extend(english_tokens)
|
|
58
|
+
|
|
59
|
+
chinese_segments = re.findall(r'[\u4e00-\u9fff]+', text)
|
|
60
|
+
for seg in chinese_segments:
|
|
61
|
+
if len(seg) <= 4:
|
|
62
|
+
result.append(seg)
|
|
63
|
+
else:
|
|
64
|
+
for i in range(len(seg) - 1):
|
|
65
|
+
result.append(seg[i:i + 2])
|
|
66
|
+
for i in range(len(seg) - 3):
|
|
67
|
+
result.append(seg[i:i + 4])
|
|
68
|
+
|
|
69
|
+
filtered = []
|
|
70
|
+
for t in result:
|
|
71
|
+
if t in _STOP_WORDS:
|
|
72
|
+
continue
|
|
73
|
+
if len(t) <= 1 and not re.match(r'[\u4e00-\u9fff]', t):
|
|
74
|
+
continue
|
|
75
|
+
filtered.append(t)
|
|
76
|
+
return filtered
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _compute_tfidf_vectors(documents: List[List[str]]) -> List[Dict[str, float]]:
|
|
80
|
+
if not documents:
|
|
81
|
+
return []
|
|
82
|
+
doc_freq = Counter()
|
|
83
|
+
for doc in documents:
|
|
84
|
+
unique_terms = set(doc)
|
|
85
|
+
for term in unique_terms:
|
|
86
|
+
doc_freq[term] += 1
|
|
87
|
+
n_docs = len(documents)
|
|
88
|
+
idf = {term: math.log(n_docs / (freq + 1)) + 1.0 for term, freq in doc_freq.items()}
|
|
89
|
+
vectors = []
|
|
90
|
+
for doc in documents:
|
|
91
|
+
tf = Counter(doc)
|
|
92
|
+
total = len(doc) if doc else 1
|
|
93
|
+
vec = {term: (count / total) * idf.get(term, 1.0) for term, count in tf.items()}
|
|
94
|
+
vectors.append(vec)
|
|
95
|
+
return vectors
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _cosine_similarity(v1: Dict[str, float], v2: Dict[str, float]) -> float:
|
|
99
|
+
common = set(v1.keys()) & set(v2.keys())
|
|
100
|
+
if not common:
|
|
101
|
+
return 0.0
|
|
102
|
+
dot = sum(v1[k] * v2[k] for k in common)
|
|
103
|
+
norm1 = math.sqrt(sum(v ** 2 for v in v1.values()))
|
|
104
|
+
norm2 = math.sqrt(sum(v ** 2 for v in v2.values()))
|
|
105
|
+
if norm1 == 0 or norm2 == 0:
|
|
106
|
+
return 0.0
|
|
107
|
+
return dot / (norm1 * norm2)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class AnchorChecker:
|
|
111
|
+
"""
|
|
112
|
+
Goal alignment checker that runs at key decision points.
|
|
113
|
+
|
|
114
|
+
Usage:
|
|
115
|
+
checker = AnchorChecker()
|
|
116
|
+
goal = checker.parse_goal("Design a secure auth system with JWT and RBAC")
|
|
117
|
+
result = checker.check(goal, "Implemented JWT token generation...", trigger=AnchorTrigger.STEP_COMPLETE)
|
|
118
|
+
if not result.aligned:
|
|
119
|
+
print(f"DRIFT: {result.recommendation}")
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
DRIFT_THRESHOLD = 0.3
|
|
123
|
+
COVERAGE_THRESHOLD = 0.6
|
|
124
|
+
|
|
125
|
+
def __init__(self, drift_threshold: float = 0.3, coverage_threshold: float = 0.6):
|
|
126
|
+
self._drift_threshold = drift_threshold
|
|
127
|
+
self._coverage_threshold = coverage_threshold
|
|
128
|
+
self._check_history: List[AnchorResult] = []
|
|
129
|
+
|
|
130
|
+
def parse_goal(self, task_description: str) -> StructuredGoal:
|
|
131
|
+
"""Parse a free-form task description into a StructuredGoal."""
|
|
132
|
+
items = []
|
|
133
|
+
item_id = 0
|
|
134
|
+
seen = set()
|
|
135
|
+
|
|
136
|
+
for pattern, _ in _GOAL_PATTERNS:
|
|
137
|
+
for match in re.finditer(pattern, task_description, re.MULTILINE | re.IGNORECASE):
|
|
138
|
+
desc = match.group(1).strip()
|
|
139
|
+
if desc and len(desc) > 3 and desc not in seen:
|
|
140
|
+
seen.add(desc)
|
|
141
|
+
keywords = _tokenize(desc)
|
|
142
|
+
items.append(GoalItem(
|
|
143
|
+
item_id=f"G{item_id}",
|
|
144
|
+
description=desc,
|
|
145
|
+
keywords=keywords,
|
|
146
|
+
))
|
|
147
|
+
item_id += 1
|
|
148
|
+
|
|
149
|
+
if not items:
|
|
150
|
+
keywords = _tokenize(task_description)
|
|
151
|
+
items.append(GoalItem(
|
|
152
|
+
item_id="G0",
|
|
153
|
+
description=task_description.strip(),
|
|
154
|
+
keywords=keywords,
|
|
155
|
+
))
|
|
156
|
+
|
|
157
|
+
return StructuredGoal(
|
|
158
|
+
goal_id=f"goal_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
|
159
|
+
original_description=task_description,
|
|
160
|
+
items=items,
|
|
161
|
+
created_at=datetime.now().isoformat(),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def check(
|
|
165
|
+
self,
|
|
166
|
+
goal: StructuredGoal,
|
|
167
|
+
current_output: str,
|
|
168
|
+
trigger: AnchorTrigger = AnchorTrigger.STEP_COMPLETE,
|
|
169
|
+
context: Optional[Dict[str, Any]] = None,
|
|
170
|
+
) -> AnchorResult:
|
|
171
|
+
"""
|
|
172
|
+
Check whether current output aligns with the original goal.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
goal: The structured goal to check against.
|
|
176
|
+
current_output: The current output text to evaluate.
|
|
177
|
+
trigger: What triggered this anchor check.
|
|
178
|
+
context: Optional additional context (e.g., step info).
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
AnchorResult with alignment status, coverage, drift info.
|
|
182
|
+
"""
|
|
183
|
+
output_tokens = _tokenize(current_output)
|
|
184
|
+
output_text_lower = current_output.lower()
|
|
185
|
+
|
|
186
|
+
all_docs = [item.keywords for item in goal.items] + [output_tokens]
|
|
187
|
+
all_vectors = _compute_tfidf_vectors(all_docs)
|
|
188
|
+
output_vector = all_vectors[-1]
|
|
189
|
+
|
|
190
|
+
uncovered = []
|
|
191
|
+
drifts = []
|
|
192
|
+
|
|
193
|
+
for i, item in enumerate(goal.items):
|
|
194
|
+
item_vector = all_vectors[i]
|
|
195
|
+
sim = _cosine_similarity(item_vector, output_vector)
|
|
196
|
+
|
|
197
|
+
keyword_hits = sum(1 for kw in item.keywords if kw in output_text_lower)
|
|
198
|
+
keyword_ratio = keyword_hits / max(len(item.keywords), 1)
|
|
199
|
+
|
|
200
|
+
coverage = max(sim, keyword_ratio * 0.8 + sim * 0.2)
|
|
201
|
+
coverage = min(coverage, 1.0)
|
|
202
|
+
|
|
203
|
+
item.coverage_score = coverage
|
|
204
|
+
|
|
205
|
+
if coverage >= 0.7:
|
|
206
|
+
item.status = GoalItemStatus.FULLY_COVERED
|
|
207
|
+
elif coverage >= 0.3:
|
|
208
|
+
item.status = GoalItemStatus.PARTIALLY_COVERED
|
|
209
|
+
uncovered.append(item.description)
|
|
210
|
+
else:
|
|
211
|
+
item.status = GoalItemStatus.PENDING
|
|
212
|
+
uncovered.append(item.description)
|
|
213
|
+
|
|
214
|
+
overall_coverage = goal.overall_coverage
|
|
215
|
+
drift_score = 1.0 - overall_coverage
|
|
216
|
+
|
|
217
|
+
output_terms = set(output_tokens)
|
|
218
|
+
goal_terms = set()
|
|
219
|
+
for item in goal.items:
|
|
220
|
+
goal_terms.update(item.keywords)
|
|
221
|
+
|
|
222
|
+
extra_terms = output_terms - goal_terms
|
|
223
|
+
if extra_terms and goal_terms:
|
|
224
|
+
extra_ratio = len(extra_terms) / max(len(output_terms), 1)
|
|
225
|
+
if extra_ratio > 0.5:
|
|
226
|
+
drift_score = min(drift_score + 0.1, 1.0)
|
|
227
|
+
drifts.append(DriftItem(
|
|
228
|
+
content=f"Output contains significant off-topic content ({extra_ratio:.0%} new terms)",
|
|
229
|
+
severity=DriftSeverity.MEDIUM,
|
|
230
|
+
reason=f"New terms not in goal: {', '.join(list(extra_terms)[:5])}",
|
|
231
|
+
))
|
|
232
|
+
|
|
233
|
+
aligned = drift_score < self._drift_threshold and overall_coverage >= self._coverage_threshold
|
|
234
|
+
|
|
235
|
+
recommendation = ""
|
|
236
|
+
if not aligned:
|
|
237
|
+
if uncovered:
|
|
238
|
+
recommendation = f"Goal drift detected. Uncovered goals: {'; '.join(uncovered[:3])}"
|
|
239
|
+
if drifts:
|
|
240
|
+
recommendation += f" | Drifts: {'; '.join(d.reason for d in drifts[:2])}"
|
|
241
|
+
|
|
242
|
+
result = AnchorResult(
|
|
243
|
+
aligned=aligned,
|
|
244
|
+
trigger=trigger,
|
|
245
|
+
coverage=overall_coverage,
|
|
246
|
+
drift_score=drift_score,
|
|
247
|
+
drifts=drifts,
|
|
248
|
+
uncovered_goals=uncovered,
|
|
249
|
+
recommendation=recommendation,
|
|
250
|
+
checked_at=datetime.now().isoformat(),
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
self._check_history.append(result)
|
|
254
|
+
|
|
255
|
+
if not aligned:
|
|
256
|
+
logger.warning(
|
|
257
|
+
"Anchor check FAILED: coverage=%.0f%%, drift=%.0f%%, trigger=%s, rec=%s",
|
|
258
|
+
overall_coverage * 100, drift_score * 100, trigger.value, recommendation[:80],
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
logger.debug(
|
|
262
|
+
"Anchor check PASSED: coverage=%.0f%%, drift=%.0f%%, trigger=%s",
|
|
263
|
+
overall_coverage * 100, drift_score * 100, trigger.value,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return result
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def check_history(self) -> List[AnchorResult]:
|
|
270
|
+
return list(self._check_history)
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def drift_count(self) -> int:
|
|
274
|
+
return sum(1 for r in self._check_history if not r.aligned)
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def total_checks(self) -> int:
|
|
278
|
+
return len(self._check_history)
|
|
279
|
+
|
|
280
|
+
def reset(self):
|
|
281
|
+
self._check_history.clear()
|