@jaguilar87/gaia-ops 2.2.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +137 -1
- package/README.en.md +29 -23
- package/README.md +24 -17
- package/agents/{claude-architect.md → gaia.md} +6 -6
- package/commands/{architect.md → gaia.md} +6 -6
- package/config/AGENTS.md +5 -5
- package/config/agent-catalog.md +14 -14
- package/config/context-contracts.md +4 -4
- package/config/embeddings_info.json +14 -0
- package/config/intent_embeddings.json +2002 -0
- package/config/intent_embeddings.npy +0 -0
- package/index.js +3 -1
- package/package.json +3 -2
- package/speckit/README.en.md +20 -69
- package/templates/CLAUDE.template.md +5 -13
- package/tests/README.en.md +224 -0
- package/tests/README.md +338 -0
- package/tests/fixtures/project-context.aws.json +53 -0
- package/tests/fixtures/project-context.gcp.json +53 -0
- package/tests/integration/RUN_TESTS.md +185 -0
- package/tests/integration/__init__.py +0 -0
- package/tests/integration/test_hooks_integration.py +473 -0
- package/tests/integration/test_hooks_workflow.py +397 -0
- package/tests/permissions-validation/MANUAL_VALIDATION.md +434 -0
- package/tests/permissions-validation/test_permissions_validation.py +527 -0
- package/tests/system/__init__.py +0 -0
- package/tests/system/permissions_helpers.py +318 -0
- package/tests/system/test_agent_definitions.py +166 -0
- package/tests/system/test_configuration_files.py +121 -0
- package/tests/system/test_directory_structure.py +231 -0
- package/tests/system/test_permissions_system.py +1006 -0
- package/tests/tools/__init__.py +0 -0
- package/tests/tools/test_agent_router.py +266 -0
- package/tests/tools/test_clarify_engine.py +413 -0
- package/tests/tools/test_context_provider.py +157 -0
- package/tests/validators/__init__.py +0 -0
- package/tests/validators/test_approval_gate.py +415 -0
- package/tests/validators/test_commit_validator.py +446 -0
- package/tools/context_provider.py +28 -7
- package/tools/generate_embeddings.py +3 -3
- package/tools/semantic_matcher.py +2 -2
|
File without changes
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test suite for agent_router.py
|
|
3
|
+
Tests semantic routing, intent classification, and capability validation
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Add tools directory to path
|
|
11
|
+
TOOLS_PATH = Path(__file__).resolve().parents[2] / "tools"
|
|
12
|
+
sys.path.insert(0, str(TOOLS_PATH))
|
|
13
|
+
|
|
14
|
+
from agent_router import IntentClassifier, CapabilityValidator, AgentRouter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestIntentClassifier:
|
|
18
|
+
"""Test suite for semantic intent classification"""
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def classifier(self):
|
|
22
|
+
"""Initialize intent classifier"""
|
|
23
|
+
return IntentClassifier()
|
|
24
|
+
|
|
25
|
+
def test_infrastructure_creation_intent(self, classifier):
|
|
26
|
+
"""Should classify 'create cluster' as infrastructure_creation"""
|
|
27
|
+
request = "create a new gke cluster"
|
|
28
|
+
intent, confidence = classifier.classify(request)
|
|
29
|
+
|
|
30
|
+
assert intent == "infrastructure_creation", \
|
|
31
|
+
f"Expected infrastructure_creation, got {intent}"
|
|
32
|
+
assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
|
|
33
|
+
assert 0 <= confidence <= 1, "Confidence should be normalized to 0-1"
|
|
34
|
+
|
|
35
|
+
def test_infrastructure_diagnosis_intent(self, classifier):
|
|
36
|
+
"""Should classify 'diagnose connectivity' as infrastructure_diagnosis"""
|
|
37
|
+
request = "diagnose cluster connectivity issues"
|
|
38
|
+
intent, confidence = classifier.classify(request)
|
|
39
|
+
|
|
40
|
+
assert intent == "infrastructure_diagnosis", \
|
|
41
|
+
f"Expected infrastructure_diagnosis, got {intent}"
|
|
42
|
+
assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
|
|
43
|
+
|
|
44
|
+
def test_kubernetes_operations_intent(self, classifier):
|
|
45
|
+
"""Should classify 'check pod status' as kubernetes_operations"""
|
|
46
|
+
request = "check pod status in tcm-non-prod namespace"
|
|
47
|
+
intent, confidence = classifier.classify(request)
|
|
48
|
+
|
|
49
|
+
assert intent == "kubernetes_operations", \
|
|
50
|
+
f"Expected kubernetes_operations, got {intent}"
|
|
51
|
+
assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
|
|
52
|
+
|
|
53
|
+
def test_application_development_intent(self, classifier):
|
|
54
|
+
"""Should classify 'build docker image' as application_development"""
|
|
55
|
+
request = "build docker image and run tests"
|
|
56
|
+
intent, confidence = classifier.classify(request)
|
|
57
|
+
|
|
58
|
+
assert intent == "application_development", \
|
|
59
|
+
f"Expected application_development, got {intent}"
|
|
60
|
+
assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
|
|
61
|
+
|
|
62
|
+
def test_infrastructure_validation_intent(self, classifier):
|
|
63
|
+
"""Should classify 'validate terraform' as infrastructure_validation"""
|
|
64
|
+
request = "validate terraform configuration"
|
|
65
|
+
intent, confidence = classifier.classify(request)
|
|
66
|
+
|
|
67
|
+
assert intent == "infrastructure_validation", \
|
|
68
|
+
f"Expected infrastructure_validation, got {intent}"
|
|
69
|
+
assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
|
|
70
|
+
|
|
71
|
+
def test_ambiguous_request_low_confidence(self, classifier):
|
|
72
|
+
"""Ambiguous requests should return None intent or low confidence"""
|
|
73
|
+
request = "what should i do?"
|
|
74
|
+
intent, confidence = classifier.classify(request)
|
|
75
|
+
|
|
76
|
+
if intent is None:
|
|
77
|
+
assert confidence == 0.0
|
|
78
|
+
else:
|
|
79
|
+
assert confidence < 0.3, \
|
|
80
|
+
"Ambiguous request should have low confidence"
|
|
81
|
+
|
|
82
|
+
def test_classification_consistency(self, classifier):
|
|
83
|
+
"""Same request should always classify to same intent"""
|
|
84
|
+
request = "create a new vpc network"
|
|
85
|
+
|
|
86
|
+
results = []
|
|
87
|
+
for _ in range(5):
|
|
88
|
+
intent, _ = classifier.classify(request)
|
|
89
|
+
results.append(intent)
|
|
90
|
+
|
|
91
|
+
assert len(set(results)) == 1, \
|
|
92
|
+
"Classification should be deterministic and consistent"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class TestCapabilityValidator:
|
|
96
|
+
"""Test suite for agent capability validation"""
|
|
97
|
+
|
|
98
|
+
@pytest.fixture
|
|
99
|
+
def validator(self):
|
|
100
|
+
"""Initialize capability validator"""
|
|
101
|
+
return CapabilityValidator()
|
|
102
|
+
|
|
103
|
+
def test_terraform_can_create_infrastructure(self, validator):
|
|
104
|
+
"""terraform-architect should handle infrastructure_creation"""
|
|
105
|
+
is_valid = validator.validate("terraform-architect", "infrastructure_creation")
|
|
106
|
+
assert is_valid is True, \
|
|
107
|
+
"terraform-architect should handle infrastructure_creation"
|
|
108
|
+
|
|
109
|
+
def test_terraform_cannot_do_kubernetes(self, validator):
|
|
110
|
+
"""terraform-architect should not handle kubernetes_operations"""
|
|
111
|
+
is_valid = validator.validate("terraform-architect", "kubernetes_operations")
|
|
112
|
+
assert is_valid is False, \
|
|
113
|
+
"terraform-architect cannot handle kubernetes_operations"
|
|
114
|
+
|
|
115
|
+
def test_unknown_agent_returns_false(self, validator):
|
|
116
|
+
"""Unknown agents should return False"""
|
|
117
|
+
is_valid = validator.validate("unknown-agent", "infrastructure_creation")
|
|
118
|
+
assert is_valid is False, "Unknown agents should be invalid"
|
|
119
|
+
|
|
120
|
+
def test_find_fallback_agent_for_diagnosis(self, validator):
|
|
121
|
+
"""Should find valid fallback for infrastructure_diagnosis"""
|
|
122
|
+
fallback = validator.find_fallback_agent("infrastructure_diagnosis")
|
|
123
|
+
|
|
124
|
+
assert fallback is not None, "Should find a fallback agent"
|
|
125
|
+
assert validator.validate(fallback, "infrastructure_diagnosis"), \
|
|
126
|
+
"Fallback agent should be capable"
|
|
127
|
+
|
|
128
|
+
def test_fallback_excludes_agent(self, validator):
|
|
129
|
+
"""Fallback should exclude specified agent"""
|
|
130
|
+
primary = "terraform-architect"
|
|
131
|
+
fallback = validator.find_fallback_agent(
|
|
132
|
+
"infrastructure_validation",
|
|
133
|
+
exclude=primary
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
assert fallback != primary, \
|
|
137
|
+
"Fallback should not be the same as excluded agent"
|
|
138
|
+
|
|
139
|
+
def test_capability_matrix_consistency(self, validator):
|
|
140
|
+
"""Capability matrix should be well-defined"""
|
|
141
|
+
agents = list(validator.agent_capabilities.keys())
|
|
142
|
+
assert len(agents) >= 4, "Should have at least 4 agents"
|
|
143
|
+
|
|
144
|
+
for agent, capabilities in validator.agent_capabilities.items():
|
|
145
|
+
assert "can_do" in capabilities, \
|
|
146
|
+
f"{agent} should have 'can_do' list"
|
|
147
|
+
assert "cannot_do" in capabilities, \
|
|
148
|
+
f"{agent} should have 'cannot_do' list"
|
|
149
|
+
|
|
150
|
+
# No intent should be in both lists
|
|
151
|
+
conflict = set(capabilities["can_do"]) & set(capabilities["cannot_do"])
|
|
152
|
+
assert len(conflict) == 0, \
|
|
153
|
+
f"{agent} has conflicting capabilities: {conflict}"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class TestAgentRouter:
|
|
157
|
+
"""Integration tests for AgentRouter"""
|
|
158
|
+
|
|
159
|
+
@pytest.fixture
|
|
160
|
+
def router(self):
|
|
161
|
+
"""Initialize agent router"""
|
|
162
|
+
return AgentRouter()
|
|
163
|
+
|
|
164
|
+
def test_router_has_semantic_routing(self, router):
|
|
165
|
+
"""Router should have semantic routing capability"""
|
|
166
|
+
assert hasattr(router, '_route_semantic'), \
|
|
167
|
+
"Router should have _route_semantic method"
|
|
168
|
+
assert hasattr(router, 'intent_classifier'), \
|
|
169
|
+
"Router should have intent_classifier"
|
|
170
|
+
assert hasattr(router, 'capability_validator'), \
|
|
171
|
+
"Router should have capability_validator"
|
|
172
|
+
|
|
173
|
+
def test_semantic_routing_returns_proper_format(self, router):
|
|
174
|
+
"""_route_semantic should return (agent, confidence, reason)"""
|
|
175
|
+
agent, confidence, reason = router._route_semantic("create a cluster")
|
|
176
|
+
|
|
177
|
+
assert isinstance(agent, str), "Agent should be a string"
|
|
178
|
+
assert isinstance(confidence, float), "Confidence should be a float"
|
|
179
|
+
assert isinstance(reason, str), "Reason should be a string"
|
|
180
|
+
assert 0 <= confidence <= 1, "Confidence should be normalized 0-1"
|
|
181
|
+
|
|
182
|
+
def test_semantic_routing_selects_valid_agent(self, router):
|
|
183
|
+
"""_route_semantic should only select valid agents"""
|
|
184
|
+
test_requests = [
|
|
185
|
+
"create vpc",
|
|
186
|
+
"diagnose connectivity",
|
|
187
|
+
"check pod logs",
|
|
188
|
+
"build docker image",
|
|
189
|
+
"validate terraform"
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
valid_agents = [
|
|
193
|
+
"terraform-architect",
|
|
194
|
+
"gitops-operator",
|
|
195
|
+
"gcp-troubleshooter",
|
|
196
|
+
"devops-developer"
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
for request in test_requests:
|
|
200
|
+
agent, _, _ = router._route_semantic(request)
|
|
201
|
+
assert agent in valid_agents, \
|
|
202
|
+
f"Got invalid agent {agent} for request: {request}"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class TestRoutingAccuracy:
|
|
206
|
+
"""Accuracy tests for semantic routing"""
|
|
207
|
+
|
|
208
|
+
@pytest.fixture
|
|
209
|
+
def router(self):
|
|
210
|
+
"""Initialize agent router"""
|
|
211
|
+
return AgentRouter()
|
|
212
|
+
|
|
213
|
+
def test_semantic_routing_accuracy_golden_set(self, router):
|
|
214
|
+
"""Test accuracy on golden set of requests"""
|
|
215
|
+
golden_set = [
|
|
216
|
+
# infrastructure_creation -> terraform-architect
|
|
217
|
+
("create a new gke cluster", "terraform-architect"),
|
|
218
|
+
("provision vpc for prod", "terraform-architect"),
|
|
219
|
+
("deploy infrastructure changes", "terraform-architect"),
|
|
220
|
+
|
|
221
|
+
# infrastructure_diagnosis -> gcp-troubleshooter
|
|
222
|
+
("diagnose connectivity issues", "gcp-troubleshooter"),
|
|
223
|
+
("troubleshoot cluster crash", "gcp-troubleshooter"),
|
|
224
|
+
|
|
225
|
+
# kubernetes_operations -> gitops-operator
|
|
226
|
+
("check pod status in default", "gitops-operator"),
|
|
227
|
+
("verify flux reconciliation", "gitops-operator"),
|
|
228
|
+
|
|
229
|
+
# application_development -> devops-developer
|
|
230
|
+
("build docker image", "devops-developer"),
|
|
231
|
+
("run unit tests", "devops-developer"),
|
|
232
|
+
|
|
233
|
+
# infrastructure_validation -> terraform-architect
|
|
234
|
+
("validate terraform config", "terraform-architect"),
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
correct = 0
|
|
238
|
+
failures = []
|
|
239
|
+
|
|
240
|
+
for request, expected_agent in golden_set:
|
|
241
|
+
agent, _, _ = router._route_semantic(request)
|
|
242
|
+
if agent == expected_agent:
|
|
243
|
+
correct += 1
|
|
244
|
+
else:
|
|
245
|
+
failures.append({
|
|
246
|
+
"request": request,
|
|
247
|
+
"expected": expected_agent,
|
|
248
|
+
"got": agent
|
|
249
|
+
})
|
|
250
|
+
|
|
251
|
+
accuracy = correct / len(golden_set)
|
|
252
|
+
|
|
253
|
+
if failures:
|
|
254
|
+
print("\nRouting failures:")
|
|
255
|
+
for failure in failures:
|
|
256
|
+
print(f" '{failure['request']}'")
|
|
257
|
+
print(f" Expected: {failure['expected']}, Got: {failure['got']}")
|
|
258
|
+
|
|
259
|
+
assert accuracy >= 0.75, \
|
|
260
|
+
f"Semantic routing accuracy should be >= 75%, got {accuracy*100:.1f}%"
|
|
261
|
+
|
|
262
|
+
print(f"\nSemantic Routing Accuracy: {accuracy*100:.1f}% ({correct}/{len(golden_set)})")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
if __name__ == "__main__":
|
|
266
|
+
pytest.main([__file__, "-v", "--tb=short"])
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for clarify_engine.py
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
# Add tools to path (gaia-ops/tools)
|
|
11
|
+
tools_path = os.path.join(os.path.dirname(__file__), '..', '..', 'tools')
|
|
12
|
+
sys.path.insert(0, tools_path)
|
|
13
|
+
|
|
14
|
+
from clarify_engine import ClarificationEngine, request_clarification, process_clarification
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@pytest.fixture
|
|
18
|
+
def mock_project_context():
|
|
19
|
+
"""Mock project-context.json with test data."""
|
|
20
|
+
return {
|
|
21
|
+
"sections": {
|
|
22
|
+
"application_services": [
|
|
23
|
+
{
|
|
24
|
+
"name": "tcm-api",
|
|
25
|
+
"tech_stack": "NestJS",
|
|
26
|
+
"namespace": "tcm-non-prod",
|
|
27
|
+
"port": 3001,
|
|
28
|
+
"status": "running"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"name": "tcm-web",
|
|
32
|
+
"tech_stack": "React SPA",
|
|
33
|
+
"namespace": "tcm-non-prod",
|
|
34
|
+
"port": 3000,
|
|
35
|
+
"status": "running"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"name": "pg-api",
|
|
39
|
+
"tech_stack": "Spring Boot",
|
|
40
|
+
"namespace": "pg-non-prod",
|
|
41
|
+
"port": 8086,
|
|
42
|
+
"status": "running"
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
"cluster_details": {
|
|
46
|
+
"primary_namespaces": ["tcm-non-prod", "pg-non-prod"]
|
|
47
|
+
},
|
|
48
|
+
"project_details": {
|
|
49
|
+
"environment": "non-prod"
|
|
50
|
+
},
|
|
51
|
+
"terraform_infrastructure": {
|
|
52
|
+
"modules": {
|
|
53
|
+
"tcm-redis": {
|
|
54
|
+
"resources": "Memorystore Redis",
|
|
55
|
+
"status": "running",
|
|
56
|
+
"tier": "BASIC"
|
|
57
|
+
},
|
|
58
|
+
"pg-redis": {
|
|
59
|
+
"resources": "Memorystore Redis",
|
|
60
|
+
"status": "running",
|
|
61
|
+
"tier": "STANDARD_HA"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.fixture
|
|
70
|
+
def engine_with_mock_context(mock_project_context, tmp_path):
|
|
71
|
+
"""Create engine with mock project context."""
|
|
72
|
+
# Create temporary project-context.json
|
|
73
|
+
context_file = tmp_path / "project-context.json"
|
|
74
|
+
with open(context_file, "w") as f:
|
|
75
|
+
json.dump(mock_project_context, f)
|
|
76
|
+
|
|
77
|
+
engine = ClarificationEngine(project_context_path=str(context_file))
|
|
78
|
+
return engine
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_detect_service_ambiguity(engine_with_mock_context):
|
|
82
|
+
"""Test detection of ambiguous service references."""
|
|
83
|
+
result = engine_with_mock_context.detect_ambiguity("Check the API")
|
|
84
|
+
|
|
85
|
+
assert result["needs_clarification"] == True
|
|
86
|
+
assert result["ambiguity_score"] > 30
|
|
87
|
+
assert len(result["ambiguity_points"]) > 0
|
|
88
|
+
assert "service" in result["ambiguity_points"][0]["pattern"]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_detect_namespace_ambiguity(engine_with_mock_context):
|
|
92
|
+
"""Test detection of ambiguous namespace references."""
|
|
93
|
+
result = engine_with_mock_context.detect_ambiguity("Deploy to cluster")
|
|
94
|
+
|
|
95
|
+
assert result["needs_clarification"] == True
|
|
96
|
+
assert any("namespace" in a["pattern"] for a in result["ambiguity_points"])
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_detect_environment_warning(engine_with_mock_context):
|
|
100
|
+
"""Test detection of environment mismatch."""
|
|
101
|
+
result = engine_with_mock_context.detect_ambiguity("Deploy to production")
|
|
102
|
+
|
|
103
|
+
assert result["needs_clarification"] == True
|
|
104
|
+
assert any("environment" in a["pattern"] for a in result["ambiguity_points"])
|
|
105
|
+
# Environment ambiguity has highest weight (90)
|
|
106
|
+
if result["ambiguity_points"]:
|
|
107
|
+
assert result["ambiguity_score"] >= 70 # High weight
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_detect_resource_ambiguity(engine_with_mock_context):
|
|
111
|
+
"""Test detection of ambiguous Redis resources."""
|
|
112
|
+
result = engine_with_mock_context.detect_ambiguity("Check the Redis")
|
|
113
|
+
|
|
114
|
+
assert result["needs_clarification"] == True
|
|
115
|
+
assert any("resource" in a["pattern"] for a in result["ambiguity_points"])
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_no_ambiguity_specific_prompt(engine_with_mock_context):
|
|
119
|
+
"""Test that specific prompts don't trigger clarification."""
|
|
120
|
+
result = engine_with_mock_context.detect_ambiguity(
|
|
121
|
+
"Check tcm-api service in tcm-non-prod namespace"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
assert result["needs_clarification"] == False
|
|
125
|
+
assert result["ambiguity_score"] <= 30
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_generate_questions(engine_with_mock_context):
|
|
129
|
+
"""Test question generation with rich options."""
|
|
130
|
+
ambiguity_analysis = {
|
|
131
|
+
"needs_clarification": True,
|
|
132
|
+
"ambiguity_score": 80,
|
|
133
|
+
"ambiguity_points": [
|
|
134
|
+
{
|
|
135
|
+
"pattern": "service_ambiguity",
|
|
136
|
+
"detected_keyword": "the api",
|
|
137
|
+
"ambiguity_reason": "Multiple services",
|
|
138
|
+
"available_options": ["tcm-api", "pg-api"],
|
|
139
|
+
"services_metadata": {
|
|
140
|
+
"tcm-api": {
|
|
141
|
+
"tech_stack": "NestJS",
|
|
142
|
+
"namespace": "tcm-non-prod",
|
|
143
|
+
"port": 3001,
|
|
144
|
+
"status": "running"
|
|
145
|
+
},
|
|
146
|
+
"pg-api": {
|
|
147
|
+
"tech_stack": "Spring Boot",
|
|
148
|
+
"namespace": "pg-non-prod",
|
|
149
|
+
"port": 8086,
|
|
150
|
+
"status": "running"
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
"suggested_question": "Which API?",
|
|
154
|
+
"weight": 80,
|
|
155
|
+
"allow_multiple": False
|
|
156
|
+
}
|
|
157
|
+
],
|
|
158
|
+
"suggested_questions": ["Which API?"]
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
result = engine_with_mock_context.generate_questions(ambiguity_analysis)
|
|
162
|
+
|
|
163
|
+
assert "summary" in result
|
|
164
|
+
assert "question_config" in result
|
|
165
|
+
assert len(result["question_config"]["questions"]) == 1
|
|
166
|
+
|
|
167
|
+
# Check question structure
|
|
168
|
+
question = result["question_config"]["questions"][0]
|
|
169
|
+
assert question["question"] == "Which API?"
|
|
170
|
+
assert question["multiSelect"] == False
|
|
171
|
+
assert len(question["options"]) == 2 # 2 options (tcm-api, pg-api)
|
|
172
|
+
|
|
173
|
+
# Check options have emoji and rich descriptions
|
|
174
|
+
for option in question["options"]:
|
|
175
|
+
assert "label" in option
|
|
176
|
+
assert "description" in option
|
|
177
|
+
# Service options should have some emoji (❓, 📦, 🎯, etc.)
|
|
178
|
+
# Just verify label is not empty and has some kind of prefix
|
|
179
|
+
assert len(option["label"]) > 0
|
|
180
|
+
assert ("Namespace:" in option["description"] or "N/A" in option["description"]
|
|
181
|
+
or "Tech" in option["description"])
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_generate_questions_with_catchall(engine_with_mock_context):
|
|
185
|
+
"""Test question generation with 4th 'All' option."""
|
|
186
|
+
ambiguity_analysis = {
|
|
187
|
+
"needs_clarification": True,
|
|
188
|
+
"ambiguity_score": 80,
|
|
189
|
+
"ambiguity_points": [
|
|
190
|
+
{
|
|
191
|
+
"pattern": "service_ambiguity",
|
|
192
|
+
"detected_keyword": "services",
|
|
193
|
+
"ambiguity_reason": "Multiple services",
|
|
194
|
+
"available_options": ["tcm-api", "tcm-web", "pg-api", "pg-web"], # 4+ options
|
|
195
|
+
"services_metadata": {},
|
|
196
|
+
"suggested_question": "Which services?",
|
|
197
|
+
"weight": 80,
|
|
198
|
+
"allow_multiple": False
|
|
199
|
+
}
|
|
200
|
+
],
|
|
201
|
+
"suggested_questions": ["Which services?"]
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
result = engine_with_mock_context.generate_questions(ambiguity_analysis)
|
|
205
|
+
|
|
206
|
+
question = result["question_config"]["questions"][0]
|
|
207
|
+
# Should have 4 options: 3 specific + 1 "All"
|
|
208
|
+
assert len(question["options"]) == 4
|
|
209
|
+
# Last option should be "All"
|
|
210
|
+
assert "Todos" in question["options"][-1]["label"] or "🌐" in question["options"][-1]["label"]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def test_enrich_prompt(engine_with_mock_context):
|
|
214
|
+
"""Test prompt enrichment with user responses."""
|
|
215
|
+
original_prompt = "Check the API"
|
|
216
|
+
user_responses = {"question_1": "📦 tcm-api"}
|
|
217
|
+
clarification_context = {
|
|
218
|
+
"ambiguities": [
|
|
219
|
+
{
|
|
220
|
+
"pattern": "service_ambiguity",
|
|
221
|
+
"suggested_question": "Which API?",
|
|
222
|
+
"available_options": ["tcm-api", "pg-api"]
|
|
223
|
+
}
|
|
224
|
+
]
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
enriched = engine_with_mock_context.enrich_prompt(
|
|
228
|
+
original_prompt,
|
|
229
|
+
user_responses,
|
|
230
|
+
clarification_context
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
assert "Check the API" in enriched
|
|
234
|
+
assert "tcm-api" in enriched
|
|
235
|
+
assert "[Clarification" in enriched
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def test_clean_answer(engine_with_mock_context):
|
|
239
|
+
"""Test emoji removal from user answers."""
|
|
240
|
+
assert engine_with_mock_context._clean_answer("📦 tcm-api") == "tcm-api"
|
|
241
|
+
assert engine_with_mock_context._clean_answer("🎯 tcm-non-prod") == "tcm-non-prod"
|
|
242
|
+
assert engine_with_mock_context._clean_answer("plain text") == "plain text"
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def test_validate_answer_exact_match(engine_with_mock_context):
|
|
246
|
+
"""Test exact answer validation."""
|
|
247
|
+
ambiguity = {
|
|
248
|
+
"available_options": ["tcm-api", "pg-api"]
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
assert engine_with_mock_context._validate_answer("tcm-api", ambiguity) == "tcm-api"
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def test_validate_answer_fuzzy_match(engine_with_mock_context):
|
|
255
|
+
"""Test fuzzy matching of user answers."""
|
|
256
|
+
ambiguity = {
|
|
257
|
+
"available_options": ["tcm-api", "pg-api"]
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# User types "tcm api" (with space)
|
|
261
|
+
result = engine_with_mock_context._validate_answer("tcm api", ambiguity)
|
|
262
|
+
assert "tcm-api" in result.lower() or result == "tcm api" # Either matched or kept as-is
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def test_validate_answer_all_keyword(engine_with_mock_context):
|
|
266
|
+
"""Test 'all' keyword detection."""
|
|
267
|
+
ambiguity = {
|
|
268
|
+
"available_options": ["tcm-api", "pg-api"]
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
result = engine_with_mock_context._validate_answer("todos", ambiguity)
|
|
272
|
+
assert "Todos" in result or "tcm-api" in result
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def test_convenience_function_request_clarification():
|
|
276
|
+
"""Test convenience function with minimal setup."""
|
|
277
|
+
# This will use actual project-context.json if it exists
|
|
278
|
+
result = request_clarification("Check the API")
|
|
279
|
+
|
|
280
|
+
# Should return dict with expected keys
|
|
281
|
+
assert "needs_clarification" in result
|
|
282
|
+
|
|
283
|
+
if result["needs_clarification"]:
|
|
284
|
+
assert "summary" in result
|
|
285
|
+
assert "question_config" in result
|
|
286
|
+
assert "engine_instance" in result
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def test_command_context_filtering(engine_with_mock_context):
|
|
290
|
+
"""Test that command context filters patterns."""
|
|
291
|
+
# Mock config to disable service_ambiguity for a specific command
|
|
292
|
+
engine_with_mock_context.config["command_rules"]["test_command"] = {
|
|
293
|
+
"enabled": True,
|
|
294
|
+
"patterns": ["namespace_ambiguity"] # Only namespace, not service
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
result = engine_with_mock_context.detect_ambiguity(
|
|
298
|
+
"Check the API",
|
|
299
|
+
command_context={"command": "test_command"}
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Service ambiguity should be filtered out
|
|
303
|
+
if result["ambiguity_points"]:
|
|
304
|
+
assert all(a["pattern"] != "service_ambiguity" for a in result["ambiguity_points"])
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def test_disabled_command(engine_with_mock_context):
|
|
308
|
+
"""Test that disabled commands skip clarification."""
|
|
309
|
+
engine_with_mock_context.config["command_rules"]["disabled_command"] = {
|
|
310
|
+
"enabled": False
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
result = engine_with_mock_context.detect_ambiguity(
|
|
314
|
+
"Check the API",
|
|
315
|
+
command_context={"command": "disabled_command"}
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
assert result["needs_clarification"] == False
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def test_multiple_ambiguities_sorted_by_weight(engine_with_mock_context):
|
|
322
|
+
"""Test that multiple ambiguities are sorted by weight."""
|
|
323
|
+
result = engine_with_mock_context.detect_ambiguity(
|
|
324
|
+
"Deploy the API to cluster in production"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
if len(result["ambiguity_points"]) > 1:
|
|
328
|
+
# Should be sorted by weight (descending)
|
|
329
|
+
weights = [a["weight"] for a in result["ambiguity_points"]]
|
|
330
|
+
assert weights == sorted(weights, reverse=True)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def test_get_option_metadata_service(engine_with_mock_context):
|
|
334
|
+
"""Test metadata generation for service options."""
|
|
335
|
+
ambiguity = {
|
|
336
|
+
"pattern": "service_ambiguity",
|
|
337
|
+
"services_metadata": {
|
|
338
|
+
"tcm-api": {
|
|
339
|
+
"tech_stack": "NestJS",
|
|
340
|
+
"namespace": "tcm-non-prod",
|
|
341
|
+
"port": 3001,
|
|
342
|
+
"status": "running"
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
metadata = engine_with_mock_context._get_option_metadata("tcm-api", ambiguity)
|
|
348
|
+
|
|
349
|
+
assert "NestJS" in metadata
|
|
350
|
+
assert "tcm-non-prod" in metadata
|
|
351
|
+
assert "3001" in metadata
|
|
352
|
+
assert "✅" in metadata # Running status emoji
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def test_get_option_metadata_namespace(engine_with_mock_context):
|
|
356
|
+
"""Test metadata generation for namespace options."""
|
|
357
|
+
ambiguity = {
|
|
358
|
+
"pattern": "namespace_ambiguity",
|
|
359
|
+
"namespace_metadata": {
|
|
360
|
+
"tcm-non-prod": {
|
|
361
|
+
"services": ["tcm-api", "tcm-web"],
|
|
362
|
+
"service_count": 2
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
metadata = engine_with_mock_context._get_option_metadata("tcm-non-prod", ambiguity)
|
|
368
|
+
|
|
369
|
+
assert "tcm-api" in metadata
|
|
370
|
+
assert "2 servicios" in metadata or "2 services" in metadata
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def test_spanish_keywords_detected(engine_with_mock_context):
|
|
374
|
+
"""Test that Spanish keywords are detected."""
|
|
375
|
+
# Spanish: "Chequea el servicio"
|
|
376
|
+
result = engine_with_mock_context.detect_ambiguity("Chequea el servicio")
|
|
377
|
+
|
|
378
|
+
assert result["needs_clarification"] == True
|
|
379
|
+
assert any("service" in a["pattern"] for a in result["ambiguity_points"])
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def test_log_clarification(engine_with_mock_context, tmp_path):
|
|
383
|
+
"""Test that clarification is logged properly."""
|
|
384
|
+
# Override log path to temp directory
|
|
385
|
+
log_file = tmp_path / "clarifications.jsonl"
|
|
386
|
+
engine_with_mock_context.clarification_log_path = str(log_file)
|
|
387
|
+
os.makedirs(tmp_path, exist_ok=True)
|
|
388
|
+
|
|
389
|
+
ambiguity_analysis = {
|
|
390
|
+
"ambiguity_score": 80,
|
|
391
|
+
"ambiguity_points": [
|
|
392
|
+
{"pattern": "service_ambiguity"}
|
|
393
|
+
]
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
engine_with_mock_context.log_clarification(
|
|
397
|
+
"Original prompt",
|
|
398
|
+
"Enriched prompt",
|
|
399
|
+
ambiguity_analysis,
|
|
400
|
+
{"question_1": "tcm-api"}
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
# Check log file exists and has content
|
|
404
|
+
assert log_file.exists()
|
|
405
|
+
with open(log_file, "r") as f:
|
|
406
|
+
log_entry = json.loads(f.read())
|
|
407
|
+
assert log_entry["original_prompt"] == "Original prompt"
|
|
408
|
+
assert log_entry["enriched_prompt"] == "Enriched prompt"
|
|
409
|
+
assert log_entry["ambiguity_score"] == 80
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
if __name__ == "__main__":
|
|
413
|
+
pytest.main([__file__, "-v"])
|