@jaguilar87/gaia-ops 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +137 -1
  2. package/README.en.md +29 -23
  3. package/README.md +24 -17
  4. package/agents/{claude-architect.md → gaia.md} +6 -6
  5. package/commands/{architect.md → gaia.md} +6 -6
  6. package/config/AGENTS.md +5 -5
  7. package/config/agent-catalog.md +14 -14
  8. package/config/context-contracts.md +4 -4
  9. package/config/embeddings_info.json +14 -0
  10. package/config/intent_embeddings.json +2002 -0
  11. package/config/intent_embeddings.npy +0 -0
  12. package/index.js +3 -1
  13. package/package.json +3 -2
  14. package/speckit/README.en.md +20 -69
  15. package/templates/CLAUDE.template.md +5 -13
  16. package/tests/README.en.md +224 -0
  17. package/tests/README.md +338 -0
  18. package/tests/fixtures/project-context.aws.json +53 -0
  19. package/tests/fixtures/project-context.gcp.json +53 -0
  20. package/tests/integration/RUN_TESTS.md +185 -0
  21. package/tests/integration/__init__.py +0 -0
  22. package/tests/integration/test_hooks_integration.py +473 -0
  23. package/tests/integration/test_hooks_workflow.py +397 -0
  24. package/tests/permissions-validation/MANUAL_VALIDATION.md +434 -0
  25. package/tests/permissions-validation/test_permissions_validation.py +527 -0
  26. package/tests/system/__init__.py +0 -0
  27. package/tests/system/permissions_helpers.py +318 -0
  28. package/tests/system/test_agent_definitions.py +166 -0
  29. package/tests/system/test_configuration_files.py +121 -0
  30. package/tests/system/test_directory_structure.py +231 -0
  31. package/tests/system/test_permissions_system.py +1006 -0
  32. package/tests/tools/__init__.py +0 -0
  33. package/tests/tools/test_agent_router.py +266 -0
  34. package/tests/tools/test_clarify_engine.py +413 -0
  35. package/tests/tools/test_context_provider.py +157 -0
  36. package/tests/validators/__init__.py +0 -0
  37. package/tests/validators/test_approval_gate.py +415 -0
  38. package/tests/validators/test_commit_validator.py +446 -0
  39. package/tools/context_provider.py +28 -7
  40. package/tools/generate_embeddings.py +3 -3
  41. package/tools/semantic_matcher.py +2 -2
File without changes
@@ -0,0 +1,266 @@
1
+ """
2
+ Test suite for agent_router.py
3
+ Tests semantic routing, intent classification, and capability validation
4
+ """
5
+
6
+ import pytest
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ # Add tools directory to path
11
+ TOOLS_PATH = Path(__file__).resolve().parents[2] / "tools"
12
+ sys.path.insert(0, str(TOOLS_PATH))
13
+
14
+ from agent_router import IntentClassifier, CapabilityValidator, AgentRouter
15
+
16
+
17
+ class TestIntentClassifier:
18
+ """Test suite for semantic intent classification"""
19
+
20
+ @pytest.fixture
21
+ def classifier(self):
22
+ """Initialize intent classifier"""
23
+ return IntentClassifier()
24
+
25
+ def test_infrastructure_creation_intent(self, classifier):
26
+ """Should classify 'create cluster' as infrastructure_creation"""
27
+ request = "create a new gke cluster"
28
+ intent, confidence = classifier.classify(request)
29
+
30
+ assert intent == "infrastructure_creation", \
31
+ f"Expected infrastructure_creation, got {intent}"
32
+ assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
33
+ assert 0 <= confidence <= 1, "Confidence should be normalized to 0-1"
34
+
35
+ def test_infrastructure_diagnosis_intent(self, classifier):
36
+ """Should classify 'diagnose connectivity' as infrastructure_diagnosis"""
37
+ request = "diagnose cluster connectivity issues"
38
+ intent, confidence = classifier.classify(request)
39
+
40
+ assert intent == "infrastructure_diagnosis", \
41
+ f"Expected infrastructure_diagnosis, got {intent}"
42
+ assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
43
+
44
+ def test_kubernetes_operations_intent(self, classifier):
45
+ """Should classify 'check pod status' as kubernetes_operations"""
46
+ request = "check pod status in tcm-non-prod namespace"
47
+ intent, confidence = classifier.classify(request)
48
+
49
+ assert intent == "kubernetes_operations", \
50
+ f"Expected kubernetes_operations, got {intent}"
51
+ assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
52
+
53
+ def test_application_development_intent(self, classifier):
54
+ """Should classify 'build docker image' as application_development"""
55
+ request = "build docker image and run tests"
56
+ intent, confidence = classifier.classify(request)
57
+
58
+ assert intent == "application_development", \
59
+ f"Expected application_development, got {intent}"
60
+ assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
61
+
62
+ def test_infrastructure_validation_intent(self, classifier):
63
+ """Should classify 'validate terraform' as infrastructure_validation"""
64
+ request = "validate terraform configuration"
65
+ intent, confidence = classifier.classify(request)
66
+
67
+ assert intent == "infrastructure_validation", \
68
+ f"Expected infrastructure_validation, got {intent}"
69
+ assert confidence > 0.3, f"Confidence {confidence} should be > 0.3"
70
+
71
+ def test_ambiguous_request_low_confidence(self, classifier):
72
+ """Ambiguous requests should return None intent or low confidence"""
73
+ request = "what should i do?"
74
+ intent, confidence = classifier.classify(request)
75
+
76
+ if intent is None:
77
+ assert confidence == 0.0
78
+ else:
79
+ assert confidence < 0.3, \
80
+ "Ambiguous request should have low confidence"
81
+
82
+ def test_classification_consistency(self, classifier):
83
+ """Same request should always classify to same intent"""
84
+ request = "create a new vpc network"
85
+
86
+ results = []
87
+ for _ in range(5):
88
+ intent, _ = classifier.classify(request)
89
+ results.append(intent)
90
+
91
+ assert len(set(results)) == 1, \
92
+ "Classification should be deterministic and consistent"
93
+
94
+
95
+ class TestCapabilityValidator:
96
+ """Test suite for agent capability validation"""
97
+
98
+ @pytest.fixture
99
+ def validator(self):
100
+ """Initialize capability validator"""
101
+ return CapabilityValidator()
102
+
103
+ def test_terraform_can_create_infrastructure(self, validator):
104
+ """terraform-architect should handle infrastructure_creation"""
105
+ is_valid = validator.validate("terraform-architect", "infrastructure_creation")
106
+ assert is_valid is True, \
107
+ "terraform-architect should handle infrastructure_creation"
108
+
109
+ def test_terraform_cannot_do_kubernetes(self, validator):
110
+ """terraform-architect should not handle kubernetes_operations"""
111
+ is_valid = validator.validate("terraform-architect", "kubernetes_operations")
112
+ assert is_valid is False, \
113
+ "terraform-architect cannot handle kubernetes_operations"
114
+
115
+ def test_unknown_agent_returns_false(self, validator):
116
+ """Unknown agents should return False"""
117
+ is_valid = validator.validate("unknown-agent", "infrastructure_creation")
118
+ assert is_valid is False, "Unknown agents should be invalid"
119
+
120
+ def test_find_fallback_agent_for_diagnosis(self, validator):
121
+ """Should find valid fallback for infrastructure_diagnosis"""
122
+ fallback = validator.find_fallback_agent("infrastructure_diagnosis")
123
+
124
+ assert fallback is not None, "Should find a fallback agent"
125
+ assert validator.validate(fallback, "infrastructure_diagnosis"), \
126
+ "Fallback agent should be capable"
127
+
128
+ def test_fallback_excludes_agent(self, validator):
129
+ """Fallback should exclude specified agent"""
130
+ primary = "terraform-architect"
131
+ fallback = validator.find_fallback_agent(
132
+ "infrastructure_validation",
133
+ exclude=primary
134
+ )
135
+
136
+ assert fallback != primary, \
137
+ "Fallback should not be the same as excluded agent"
138
+
139
+ def test_capability_matrix_consistency(self, validator):
140
+ """Capability matrix should be well-defined"""
141
+ agents = list(validator.agent_capabilities.keys())
142
+ assert len(agents) >= 4, "Should have at least 4 agents"
143
+
144
+ for agent, capabilities in validator.agent_capabilities.items():
145
+ assert "can_do" in capabilities, \
146
+ f"{agent} should have 'can_do' list"
147
+ assert "cannot_do" in capabilities, \
148
+ f"{agent} should have 'cannot_do' list"
149
+
150
+ # No intent should be in both lists
151
+ conflict = set(capabilities["can_do"]) & set(capabilities["cannot_do"])
152
+ assert len(conflict) == 0, \
153
+ f"{agent} has conflicting capabilities: {conflict}"
154
+
155
+
156
+ class TestAgentRouter:
157
+ """Integration tests for AgentRouter"""
158
+
159
+ @pytest.fixture
160
+ def router(self):
161
+ """Initialize agent router"""
162
+ return AgentRouter()
163
+
164
+ def test_router_has_semantic_routing(self, router):
165
+ """Router should have semantic routing capability"""
166
+ assert hasattr(router, '_route_semantic'), \
167
+ "Router should have _route_semantic method"
168
+ assert hasattr(router, 'intent_classifier'), \
169
+ "Router should have intent_classifier"
170
+ assert hasattr(router, 'capability_validator'), \
171
+ "Router should have capability_validator"
172
+
173
+ def test_semantic_routing_returns_proper_format(self, router):
174
+ """_route_semantic should return (agent, confidence, reason)"""
175
+ agent, confidence, reason = router._route_semantic("create a cluster")
176
+
177
+ assert isinstance(agent, str), "Agent should be a string"
178
+ assert isinstance(confidence, float), "Confidence should be a float"
179
+ assert isinstance(reason, str), "Reason should be a string"
180
+ assert 0 <= confidence <= 1, "Confidence should be normalized 0-1"
181
+
182
+ def test_semantic_routing_selects_valid_agent(self, router):
183
+ """_route_semantic should only select valid agents"""
184
+ test_requests = [
185
+ "create vpc",
186
+ "diagnose connectivity",
187
+ "check pod logs",
188
+ "build docker image",
189
+ "validate terraform"
190
+ ]
191
+
192
+ valid_agents = [
193
+ "terraform-architect",
194
+ "gitops-operator",
195
+ "gcp-troubleshooter",
196
+ "devops-developer"
197
+ ]
198
+
199
+ for request in test_requests:
200
+ agent, _, _ = router._route_semantic(request)
201
+ assert agent in valid_agents, \
202
+ f"Got invalid agent {agent} for request: {request}"
203
+
204
+
205
+ class TestRoutingAccuracy:
206
+ """Accuracy tests for semantic routing"""
207
+
208
+ @pytest.fixture
209
+ def router(self):
210
+ """Initialize agent router"""
211
+ return AgentRouter()
212
+
213
+ def test_semantic_routing_accuracy_golden_set(self, router):
214
+ """Test accuracy on golden set of requests"""
215
+ golden_set = [
216
+ # infrastructure_creation -> terraform-architect
217
+ ("create a new gke cluster", "terraform-architect"),
218
+ ("provision vpc for prod", "terraform-architect"),
219
+ ("deploy infrastructure changes", "terraform-architect"),
220
+
221
+ # infrastructure_diagnosis -> gcp-troubleshooter
222
+ ("diagnose connectivity issues", "gcp-troubleshooter"),
223
+ ("troubleshoot cluster crash", "gcp-troubleshooter"),
224
+
225
+ # kubernetes_operations -> gitops-operator
226
+ ("check pod status in default", "gitops-operator"),
227
+ ("verify flux reconciliation", "gitops-operator"),
228
+
229
+ # application_development -> devops-developer
230
+ ("build docker image", "devops-developer"),
231
+ ("run unit tests", "devops-developer"),
232
+
233
+ # infrastructure_validation -> terraform-architect
234
+ ("validate terraform config", "terraform-architect"),
235
+ ]
236
+
237
+ correct = 0
238
+ failures = []
239
+
240
+ for request, expected_agent in golden_set:
241
+ agent, _, _ = router._route_semantic(request)
242
+ if agent == expected_agent:
243
+ correct += 1
244
+ else:
245
+ failures.append({
246
+ "request": request,
247
+ "expected": expected_agent,
248
+ "got": agent
249
+ })
250
+
251
+ accuracy = correct / len(golden_set)
252
+
253
+ if failures:
254
+ print("\nRouting failures:")
255
+ for failure in failures:
256
+ print(f" '{failure['request']}'")
257
+ print(f" Expected: {failure['expected']}, Got: {failure['got']}")
258
+
259
+ assert accuracy >= 0.75, \
260
+ f"Semantic routing accuracy should be >= 75%, got {accuracy*100:.1f}%"
261
+
262
+ print(f"\nSemantic Routing Accuracy: {accuracy*100:.1f}% ({correct}/{len(golden_set)})")
263
+
264
+
265
+ if __name__ == "__main__":
266
+ pytest.main([__file__, "-v", "--tb=short"])
@@ -0,0 +1,413 @@
1
+ """
2
+ Unit tests for clarify_engine.py
3
+ """
4
+
5
+ import pytest
6
+ import json
7
+ import sys
8
+ import os
9
+
10
+ # Add tools to path (gaia-ops/tools)
11
+ tools_path = os.path.join(os.path.dirname(__file__), '..', '..', 'tools')
12
+ sys.path.insert(0, tools_path)
13
+
14
+ from clarify_engine import ClarificationEngine, request_clarification, process_clarification
15
+
16
+
17
+ @pytest.fixture
18
+ def mock_project_context():
19
+ """Mock project-context.json with test data."""
20
+ return {
21
+ "sections": {
22
+ "application_services": [
23
+ {
24
+ "name": "tcm-api",
25
+ "tech_stack": "NestJS",
26
+ "namespace": "tcm-non-prod",
27
+ "port": 3001,
28
+ "status": "running"
29
+ },
30
+ {
31
+ "name": "tcm-web",
32
+ "tech_stack": "React SPA",
33
+ "namespace": "tcm-non-prod",
34
+ "port": 3000,
35
+ "status": "running"
36
+ },
37
+ {
38
+ "name": "pg-api",
39
+ "tech_stack": "Spring Boot",
40
+ "namespace": "pg-non-prod",
41
+ "port": 8086,
42
+ "status": "running"
43
+ }
44
+ ],
45
+ "cluster_details": {
46
+ "primary_namespaces": ["tcm-non-prod", "pg-non-prod"]
47
+ },
48
+ "project_details": {
49
+ "environment": "non-prod"
50
+ },
51
+ "terraform_infrastructure": {
52
+ "modules": {
53
+ "tcm-redis": {
54
+ "resources": "Memorystore Redis",
55
+ "status": "running",
56
+ "tier": "BASIC"
57
+ },
58
+ "pg-redis": {
59
+ "resources": "Memorystore Redis",
60
+ "status": "running",
61
+ "tier": "STANDARD_HA"
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+
68
+
69
+ @pytest.fixture
70
+ def engine_with_mock_context(mock_project_context, tmp_path):
71
+ """Create engine with mock project context."""
72
+ # Create temporary project-context.json
73
+ context_file = tmp_path / "project-context.json"
74
+ with open(context_file, "w") as f:
75
+ json.dump(mock_project_context, f)
76
+
77
+ engine = ClarificationEngine(project_context_path=str(context_file))
78
+ return engine
79
+
80
+
81
+ def test_detect_service_ambiguity(engine_with_mock_context):
82
+ """Test detection of ambiguous service references."""
83
+ result = engine_with_mock_context.detect_ambiguity("Check the API")
84
+
85
+ assert result["needs_clarification"] == True
86
+ assert result["ambiguity_score"] > 30
87
+ assert len(result["ambiguity_points"]) > 0
88
+ assert "service" in result["ambiguity_points"][0]["pattern"]
89
+
90
+
91
+ def test_detect_namespace_ambiguity(engine_with_mock_context):
92
+ """Test detection of ambiguous namespace references."""
93
+ result = engine_with_mock_context.detect_ambiguity("Deploy to cluster")
94
+
95
+ assert result["needs_clarification"] == True
96
+ assert any("namespace" in a["pattern"] for a in result["ambiguity_points"])
97
+
98
+
99
+ def test_detect_environment_warning(engine_with_mock_context):
100
+ """Test detection of environment mismatch."""
101
+ result = engine_with_mock_context.detect_ambiguity("Deploy to production")
102
+
103
+ assert result["needs_clarification"] == True
104
+ assert any("environment" in a["pattern"] for a in result["ambiguity_points"])
105
+ # Environment ambiguity has highest weight (90)
106
+ if result["ambiguity_points"]:
107
+ assert result["ambiguity_score"] >= 70 # High weight
108
+
109
+
110
+ def test_detect_resource_ambiguity(engine_with_mock_context):
111
+ """Test detection of ambiguous Redis resources."""
112
+ result = engine_with_mock_context.detect_ambiguity("Check the Redis")
113
+
114
+ assert result["needs_clarification"] == True
115
+ assert any("resource" in a["pattern"] for a in result["ambiguity_points"])
116
+
117
+
118
+ def test_no_ambiguity_specific_prompt(engine_with_mock_context):
119
+ """Test that specific prompts don't trigger clarification."""
120
+ result = engine_with_mock_context.detect_ambiguity(
121
+ "Check tcm-api service in tcm-non-prod namespace"
122
+ )
123
+
124
+ assert result["needs_clarification"] == False
125
+ assert result["ambiguity_score"] <= 30
126
+
127
+
128
+ def test_generate_questions(engine_with_mock_context):
129
+ """Test question generation with rich options."""
130
+ ambiguity_analysis = {
131
+ "needs_clarification": True,
132
+ "ambiguity_score": 80,
133
+ "ambiguity_points": [
134
+ {
135
+ "pattern": "service_ambiguity",
136
+ "detected_keyword": "the api",
137
+ "ambiguity_reason": "Multiple services",
138
+ "available_options": ["tcm-api", "pg-api"],
139
+ "services_metadata": {
140
+ "tcm-api": {
141
+ "tech_stack": "NestJS",
142
+ "namespace": "tcm-non-prod",
143
+ "port": 3001,
144
+ "status": "running"
145
+ },
146
+ "pg-api": {
147
+ "tech_stack": "Spring Boot",
148
+ "namespace": "pg-non-prod",
149
+ "port": 8086,
150
+ "status": "running"
151
+ }
152
+ },
153
+ "suggested_question": "Which API?",
154
+ "weight": 80,
155
+ "allow_multiple": False
156
+ }
157
+ ],
158
+ "suggested_questions": ["Which API?"]
159
+ }
160
+
161
+ result = engine_with_mock_context.generate_questions(ambiguity_analysis)
162
+
163
+ assert "summary" in result
164
+ assert "question_config" in result
165
+ assert len(result["question_config"]["questions"]) == 1
166
+
167
+ # Check question structure
168
+ question = result["question_config"]["questions"][0]
169
+ assert question["question"] == "Which API?"
170
+ assert question["multiSelect"] == False
171
+ assert len(question["options"]) == 2 # 2 options (tcm-api, pg-api)
172
+
173
+ # Check options have emoji and rich descriptions
174
+ for option in question["options"]:
175
+ assert "label" in option
176
+ assert "description" in option
177
+ # Service options should have some emoji (❓, 📦, 🎯, etc.)
178
+ # Just verify label is not empty and has some kind of prefix
179
+ assert len(option["label"]) > 0
180
+ assert ("Namespace:" in option["description"] or "N/A" in option["description"]
181
+ or "Tech" in option["description"])
182
+
183
+
184
+ def test_generate_questions_with_catchall(engine_with_mock_context):
185
+ """Test question generation with 4th 'All' option."""
186
+ ambiguity_analysis = {
187
+ "needs_clarification": True,
188
+ "ambiguity_score": 80,
189
+ "ambiguity_points": [
190
+ {
191
+ "pattern": "service_ambiguity",
192
+ "detected_keyword": "services",
193
+ "ambiguity_reason": "Multiple services",
194
+ "available_options": ["tcm-api", "tcm-web", "pg-api", "pg-web"], # 4+ options
195
+ "services_metadata": {},
196
+ "suggested_question": "Which services?",
197
+ "weight": 80,
198
+ "allow_multiple": False
199
+ }
200
+ ],
201
+ "suggested_questions": ["Which services?"]
202
+ }
203
+
204
+ result = engine_with_mock_context.generate_questions(ambiguity_analysis)
205
+
206
+ question = result["question_config"]["questions"][0]
207
+ # Should have 4 options: 3 specific + 1 "All"
208
+ assert len(question["options"]) == 4
209
+ # Last option should be "All"
210
+ assert "Todos" in question["options"][-1]["label"] or "🌐" in question["options"][-1]["label"]
211
+
212
+
213
+ def test_enrich_prompt(engine_with_mock_context):
214
+ """Test prompt enrichment with user responses."""
215
+ original_prompt = "Check the API"
216
+ user_responses = {"question_1": "📦 tcm-api"}
217
+ clarification_context = {
218
+ "ambiguities": [
219
+ {
220
+ "pattern": "service_ambiguity",
221
+ "suggested_question": "Which API?",
222
+ "available_options": ["tcm-api", "pg-api"]
223
+ }
224
+ ]
225
+ }
226
+
227
+ enriched = engine_with_mock_context.enrich_prompt(
228
+ original_prompt,
229
+ user_responses,
230
+ clarification_context
231
+ )
232
+
233
+ assert "Check the API" in enriched
234
+ assert "tcm-api" in enriched
235
+ assert "[Clarification" in enriched
236
+
237
+
238
+ def test_clean_answer(engine_with_mock_context):
239
+ """Test emoji removal from user answers."""
240
+ assert engine_with_mock_context._clean_answer("📦 tcm-api") == "tcm-api"
241
+ assert engine_with_mock_context._clean_answer("🎯 tcm-non-prod") == "tcm-non-prod"
242
+ assert engine_with_mock_context._clean_answer("plain text") == "plain text"
243
+
244
+
245
+ def test_validate_answer_exact_match(engine_with_mock_context):
246
+ """Test exact answer validation."""
247
+ ambiguity = {
248
+ "available_options": ["tcm-api", "pg-api"]
249
+ }
250
+
251
+ assert engine_with_mock_context._validate_answer("tcm-api", ambiguity) == "tcm-api"
252
+
253
+
254
+ def test_validate_answer_fuzzy_match(engine_with_mock_context):
255
+ """Test fuzzy matching of user answers."""
256
+ ambiguity = {
257
+ "available_options": ["tcm-api", "pg-api"]
258
+ }
259
+
260
+ # User types "tcm api" (with space)
261
+ result = engine_with_mock_context._validate_answer("tcm api", ambiguity)
262
+ assert "tcm-api" in result.lower() or result == "tcm api" # Either matched or kept as-is
263
+
264
+
265
+ def test_validate_answer_all_keyword(engine_with_mock_context):
266
+ """Test 'all' keyword detection."""
267
+ ambiguity = {
268
+ "available_options": ["tcm-api", "pg-api"]
269
+ }
270
+
271
+ result = engine_with_mock_context._validate_answer("todos", ambiguity)
272
+ assert "Todos" in result or "tcm-api" in result
273
+
274
+
275
+ def test_convenience_function_request_clarification():
276
+ """Test convenience function with minimal setup."""
277
+ # This will use actual project-context.json if it exists
278
+ result = request_clarification("Check the API")
279
+
280
+ # Should return dict with expected keys
281
+ assert "needs_clarification" in result
282
+
283
+ if result["needs_clarification"]:
284
+ assert "summary" in result
285
+ assert "question_config" in result
286
+ assert "engine_instance" in result
287
+
288
+
289
+ def test_command_context_filtering(engine_with_mock_context):
290
+ """Test that command context filters patterns."""
291
+ # Mock config to disable service_ambiguity for a specific command
292
+ engine_with_mock_context.config["command_rules"]["test_command"] = {
293
+ "enabled": True,
294
+ "patterns": ["namespace_ambiguity"] # Only namespace, not service
295
+ }
296
+
297
+ result = engine_with_mock_context.detect_ambiguity(
298
+ "Check the API",
299
+ command_context={"command": "test_command"}
300
+ )
301
+
302
+ # Service ambiguity should be filtered out
303
+ if result["ambiguity_points"]:
304
+ assert all(a["pattern"] != "service_ambiguity" for a in result["ambiguity_points"])
305
+
306
+
307
+ def test_disabled_command(engine_with_mock_context):
308
+ """Test that disabled commands skip clarification."""
309
+ engine_with_mock_context.config["command_rules"]["disabled_command"] = {
310
+ "enabled": False
311
+ }
312
+
313
+ result = engine_with_mock_context.detect_ambiguity(
314
+ "Check the API",
315
+ command_context={"command": "disabled_command"}
316
+ )
317
+
318
+ assert result["needs_clarification"] == False
319
+
320
+
321
+ def test_multiple_ambiguities_sorted_by_weight(engine_with_mock_context):
322
+ """Test that multiple ambiguities are sorted by weight."""
323
+ result = engine_with_mock_context.detect_ambiguity(
324
+ "Deploy the API to cluster in production"
325
+ )
326
+
327
+ if len(result["ambiguity_points"]) > 1:
328
+ # Should be sorted by weight (descending)
329
+ weights = [a["weight"] for a in result["ambiguity_points"]]
330
+ assert weights == sorted(weights, reverse=True)
331
+
332
+
333
+ def test_get_option_metadata_service(engine_with_mock_context):
334
+ """Test metadata generation for service options."""
335
+ ambiguity = {
336
+ "pattern": "service_ambiguity",
337
+ "services_metadata": {
338
+ "tcm-api": {
339
+ "tech_stack": "NestJS",
340
+ "namespace": "tcm-non-prod",
341
+ "port": 3001,
342
+ "status": "running"
343
+ }
344
+ }
345
+ }
346
+
347
+ metadata = engine_with_mock_context._get_option_metadata("tcm-api", ambiguity)
348
+
349
+ assert "NestJS" in metadata
350
+ assert "tcm-non-prod" in metadata
351
+ assert "3001" in metadata
352
+ assert "✅" in metadata # Running status emoji
353
+
354
+
355
+ def test_get_option_metadata_namespace(engine_with_mock_context):
356
+ """Test metadata generation for namespace options."""
357
+ ambiguity = {
358
+ "pattern": "namespace_ambiguity",
359
+ "namespace_metadata": {
360
+ "tcm-non-prod": {
361
+ "services": ["tcm-api", "tcm-web"],
362
+ "service_count": 2
363
+ }
364
+ }
365
+ }
366
+
367
+ metadata = engine_with_mock_context._get_option_metadata("tcm-non-prod", ambiguity)
368
+
369
+ assert "tcm-api" in metadata
370
+ assert "2 servicios" in metadata or "2 services" in metadata
371
+
372
+
373
+ def test_spanish_keywords_detected(engine_with_mock_context):
374
+ """Test that Spanish keywords are detected."""
375
+ # Spanish: "Chequea el servicio"
376
+ result = engine_with_mock_context.detect_ambiguity("Chequea el servicio")
377
+
378
+ assert result["needs_clarification"] == True
379
+ assert any("service" in a["pattern"] for a in result["ambiguity_points"])
380
+
381
+
382
+ def test_log_clarification(engine_with_mock_context, tmp_path):
383
+ """Test that clarification is logged properly."""
384
+ # Override log path to temp directory
385
+ log_file = tmp_path / "clarifications.jsonl"
386
+ engine_with_mock_context.clarification_log_path = str(log_file)
387
+ os.makedirs(tmp_path, exist_ok=True)
388
+
389
+ ambiguity_analysis = {
390
+ "ambiguity_score": 80,
391
+ "ambiguity_points": [
392
+ {"pattern": "service_ambiguity"}
393
+ ]
394
+ }
395
+
396
+ engine_with_mock_context.log_clarification(
397
+ "Original prompt",
398
+ "Enriched prompt",
399
+ ambiguity_analysis,
400
+ {"question_1": "tcm-api"}
401
+ )
402
+
403
+ # Check log file exists and has content
404
+ assert log_file.exists()
405
+ with open(log_file, "r") as f:
406
+ log_entry = json.loads(f.read())
407
+ assert log_entry["original_prompt"] == "Original prompt"
408
+ assert log_entry["enriched_prompt"] == "Enriched prompt"
409
+ assert log_entry["ambiguity_score"] == 80
410
+
411
+
412
+ if __name__ == "__main__":
413
+ pytest.main([__file__, "-v"])