@jaguilar87/gaia-ops 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +137 -1
  2. package/README.en.md +29 -23
  3. package/README.md +24 -17
  4. package/agents/{claude-architect.md → gaia.md} +6 -6
  5. package/commands/{architect.md → gaia.md} +6 -6
  6. package/config/AGENTS.md +5 -5
  7. package/config/agent-catalog.md +14 -14
  8. package/config/context-contracts.md +4 -4
  9. package/config/embeddings_info.json +14 -0
  10. package/config/intent_embeddings.json +2002 -0
  11. package/config/intent_embeddings.npy +0 -0
  12. package/index.js +3 -1
  13. package/package.json +3 -2
  14. package/speckit/README.en.md +20 -69
  15. package/templates/CLAUDE.template.md +5 -13
  16. package/tests/README.en.md +224 -0
  17. package/tests/README.md +338 -0
  18. package/tests/fixtures/project-context.aws.json +53 -0
  19. package/tests/fixtures/project-context.gcp.json +53 -0
  20. package/tests/integration/RUN_TESTS.md +185 -0
  21. package/tests/integration/__init__.py +0 -0
  22. package/tests/integration/test_hooks_integration.py +473 -0
  23. package/tests/integration/test_hooks_workflow.py +397 -0
  24. package/tests/permissions-validation/MANUAL_VALIDATION.md +434 -0
  25. package/tests/permissions-validation/test_permissions_validation.py +527 -0
  26. package/tests/system/__init__.py +0 -0
  27. package/tests/system/permissions_helpers.py +318 -0
  28. package/tests/system/test_agent_definitions.py +166 -0
  29. package/tests/system/test_configuration_files.py +121 -0
  30. package/tests/system/test_directory_structure.py +231 -0
  31. package/tests/system/test_permissions_system.py +1006 -0
  32. package/tests/tools/__init__.py +0 -0
  33. package/tests/tools/test_agent_router.py +266 -0
  34. package/tests/tools/test_clarify_engine.py +413 -0
  35. package/tests/tools/test_context_provider.py +157 -0
  36. package/tests/validators/__init__.py +0 -0
  37. package/tests/validators/test_approval_gate.py +415 -0
  38. package/tests/validators/test_commit_validator.py +446 -0
  39. package/tools/context_provider.py +28 -7
  40. package/tools/generate_embeddings.py +3 -3
  41. package/tools/semantic_matcher.py +2 -2
@@ -0,0 +1,397 @@
1
+ """
2
+ Workflow integration tests for hooks system.
3
+
4
+ Tests complete workflows:
5
+ - Pre-hook validation → Command execution → Post-hook audit
6
+ - Settings merge → Permission resolution → Hook enforcement
7
+ - GitOps workflow validation
8
+ - Error handling and recovery
9
+ - Tier escalation scenarios
10
+ """
11
+
12
+ import pytest
13
+ import sys
14
+ import json
15
+ import tempfile
16
+ import os
17
+ from pathlib import Path
18
+ from typing import Dict, Any
19
+
20
+ # Add parent directories to path for imports
21
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "hooks"))
22
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "tests" / "system"))
23
+
24
+ try:
25
+ from pre_tool_use import PolicyEngine, SecurityTier, pre_tool_use_hook
26
+ PRE_HOOK_AVAILABLE = True
27
+ except ImportError:
28
+ PRE_HOOK_AVAILABLE = False
29
+
30
+ try:
31
+ from post_tool_use import post_tool_use_hook, AuditLogger
32
+ POST_HOOK_AVAILABLE = True
33
+ except ImportError:
34
+ POST_HOOK_AVAILABLE = False
35
+
36
+ from permissions_helpers import (
37
+ merge_settings,
38
+ get_permission_decision,
39
+ load_project_settings,
40
+ load_shared_settings
41
+ )
42
+
43
+
44
+ class TestCompleteWorkflow:
45
+ """Test complete hook workflow from validation to audit"""
46
+
47
+ @pytest.mark.skipif(not (PRE_HOOK_AVAILABLE and POST_HOOK_AVAILABLE),
48
+ reason="Hooks not available")
49
+ def test_read_operation_complete_flow(self):
50
+ """Test complete flow for read operation"""
51
+ # Phase 1: Pre-hook validation
52
+ pre_result = pre_tool_use_hook("bash", {"command": "kubectl get pods"})
53
+ assert pre_result is None, "Read operation should pass pre-hook"
54
+
55
+ # Phase 2: Command execution (simulated)
56
+ command_result = "pod/test-pod 1/1 Running"
57
+ duration = 0.5
58
+
59
+ # Phase 3: Post-hook audit
60
+ with tempfile.TemporaryDirectory() as tmpdir:
61
+ audit_logger = AuditLogger(log_dir=tmpdir)
62
+ audit_logger.log_execution(
63
+ "bash",
64
+ {"command": "kubectl get pods"},
65
+ command_result,
66
+ duration,
67
+ 0
68
+ )
69
+
70
+ # Verify audit log created
71
+ log_files = list(Path(tmpdir).glob("*.jsonl"))
72
+ assert len(log_files) > 0
73
+
74
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
75
+ def test_blocked_operation_stops_at_pre_hook(self):
76
+ """Test that blocked operations don't proceed past pre-hook"""
77
+ # Phase 1: Pre-hook validation (should block)
78
+ pre_result = pre_tool_use_hook("bash", {"command": "terraform apply"})
79
+ assert pre_result is not None, "Write operation should be blocked"
80
+
81
+ # Phase 2: Command should NOT execute
82
+ # (In real system, Claude Code stops here)
83
+
84
+ # Phase 3: Post-hook should NOT be called
85
+ # (Verified by system - we just document the expected behavior)
86
+
87
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
88
+ def test_validation_operation_workflow(self):
89
+ """Test workflow for validation operations (T1)"""
90
+ # These should be allowed
91
+ commands = [
92
+ "terraform validate",
93
+ "terraform plan",
94
+ "helm template myapp ./chart",
95
+ "kubectl apply -f test.yaml --dry-run=client"
96
+ ]
97
+
98
+ for command in commands:
99
+ pre_result = pre_tool_use_hook("bash", {"command": command})
100
+ assert pre_result is None, f"Validation command should be allowed: {command}"
101
+
102
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
103
+ def test_tier_escalation_blocked(self):
104
+ """Test that tier escalation from T1 to T3 is blocked"""
105
+ # T1 validation - allowed
106
+ pre_result = pre_tool_use_hook("bash", {"command": "terraform plan"})
107
+ assert pre_result is None
108
+
109
+ # T3 realization - blocked
110
+ pre_result = pre_tool_use_hook("bash", {"command": "terraform apply"})
111
+ assert pre_result is not None
112
+ assert "blocked" in pre_result.lower()
113
+
114
+
115
+ class TestErrorHandlingWorkflow:
116
+ """Test error handling in workflow scenarios"""
117
+
118
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
119
+ def test_invalid_command_handling(self):
120
+ """Test handling of invalid commands"""
121
+ result = pre_tool_use_hook("bash", {"command": ""})
122
+ assert result is not None
123
+ assert "error" in result.lower() or "empty" in result.lower()
124
+
125
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
126
+ def test_malformed_parameters_handling(self):
127
+ """Test handling of malformed parameters"""
128
+ # Missing command parameter
129
+ result = pre_tool_use_hook("bash", {})
130
+ assert result is not None
131
+
132
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
133
+ def test_non_bash_tool_passes_through(self):
134
+ """Test that non-bash tools pass through pre-hook"""
135
+ result = pre_tool_use_hook("read", {"file_path": "/tmp/test.txt"})
136
+ assert result is None, "Non-bash tools should pass through"
137
+
138
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="PolicyEngine not available")
139
+ def test_policy_engine_error_handling(self):
140
+ """Test that PolicyEngine handles errors gracefully"""
141
+ engine = PolicyEngine()
142
+
143
+ # Invalid tool name type
144
+ is_allowed, tier, reason = engine.validate_command(123, "test")
145
+ assert is_allowed is False
146
+ assert "invalid" in reason.lower()
147
+
148
+ # Invalid command type
149
+ is_allowed, tier, reason = engine.validate_command("bash", None)
150
+ assert is_allowed is False
151
+
152
+ @pytest.mark.skipif(not POST_HOOK_AVAILABLE, reason="Post-hook not available")
153
+ def test_audit_logger_creates_directories(self):
154
+ """Test that AuditLogger creates necessary directories"""
155
+ with tempfile.TemporaryDirectory() as tmpdir:
156
+ log_dir = Path(tmpdir) / "nested" / "logs"
157
+ audit_logger = AuditLogger(log_dir=str(log_dir))
158
+
159
+ assert log_dir.exists(), "AuditLogger should create log directory"
160
+
161
+
162
+ class TestSettingsMergeWorkflow:
163
+ """Test settings merge and permission resolution workflow"""
164
+
165
+ @pytest.fixture
166
+ def project_settings(self):
167
+ """Project-specific settings"""
168
+ return {
169
+ "permissions": {
170
+ "bash": {
171
+ "deny": ["rm -rf"],
172
+ "allow": ["ls", "cat"]
173
+ }
174
+ },
175
+ "environment": "production"
176
+ }
177
+
178
+ @pytest.fixture
179
+ def shared_settings(self):
180
+ """Shared settings"""
181
+ return {
182
+ "permissions": {
183
+ "bash": {
184
+ "deny": ["terraform destroy"],
185
+ "allow": ["kubectl get", "kubectl describe"],
186
+ "ask": {
187
+ "terraform apply": "Confirm?"
188
+ }
189
+ }
190
+ },
191
+ "environment": "development"
192
+ }
193
+
194
+ def test_merge_combines_permissions(self, project_settings, shared_settings):
195
+ """Test that merge combines permissions from both settings"""
196
+ merged = merge_settings(project_settings, shared_settings)
197
+
198
+ # Project deny should override shared deny (list replacement)
199
+ assert "rm -rf" in merged["permissions"]["bash"]["deny"]
200
+
201
+ # Shared deny is NOT in merged (project overrides)
202
+ assert "terraform destroy" not in merged["permissions"]["bash"]["deny"]
203
+
204
+ # But ask dict from shared should be in merged (not in project)
205
+ assert "terraform apply" in merged["permissions"]["bash"]["ask"]
206
+
207
+ def test_merge_project_overrides_shared(self, project_settings, shared_settings):
208
+ """Test that project settings override shared settings"""
209
+ merged = merge_settings(project_settings, shared_settings)
210
+
211
+ # Project environment should override shared
212
+ assert merged["environment"] == "production"
213
+
214
+ def test_merged_settings_permission_resolution(self, project_settings, shared_settings):
215
+ """Test permission resolution with merged settings"""
216
+ merged = merge_settings(project_settings, shared_settings)
217
+
218
+ # Test deny from project (project overrides shared)
219
+ decision = get_permission_decision("rm -rf /tmp", "bash", merged)
220
+ assert decision == "deny"
221
+
222
+ # Test deny from shared - NOT in merged (project overrode deny list)
223
+ decision = get_permission_decision("terraform destroy", "bash", merged)
224
+ assert decision == "default_deny" # Not in deny list, not in allow list
225
+
226
+ # Test allow from project (project overrode allow list)
227
+ decision = get_permission_decision("ls", "bash", merged)
228
+ assert decision == "allow"
229
+
230
+ # Test allow from shared - NOT in merged (project overrode allow list)
231
+ decision = get_permission_decision("kubectl get pods", "bash", merged)
232
+ assert decision == "default_deny" # Not in allow list (project replaced it)
233
+
234
+ # Test ask from shared (dicts are merged, not replaced)
235
+ decision = get_permission_decision("terraform apply", "bash", merged)
236
+ assert decision == "ask"
237
+
238
+
239
+ class TestGitOpsWorkflow:
240
+ """Test GitOps-specific workflow scenarios"""
241
+
242
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
243
+ def test_gitops_read_workflow(self):
244
+ """Test GitOps read workflow"""
245
+ read_commands = [
246
+ "kubectl get pods -n production",
247
+ "kubectl describe deployment myapp",
248
+ "helm list -n production",
249
+ "flux get kustomizations"
250
+ ]
251
+
252
+ for command in read_commands:
253
+ result = pre_tool_use_hook("bash", {"command": command})
254
+ assert result is None, f"GitOps read should be allowed: {command}"
255
+
256
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
257
+ def test_gitops_write_blocked(self):
258
+ """Test that GitOps write operations are blocked"""
259
+ write_commands = [
260
+ "kubectl apply -f deployment.yaml",
261
+ "kubectl delete pod test-pod",
262
+ "helm install myapp ./chart",
263
+ "flux reconcile helmrelease myapp"
264
+ ]
265
+
266
+ for command in write_commands:
267
+ result = pre_tool_use_hook("bash", {"command": command})
268
+ assert result is not None, f"GitOps write should be blocked: {command}"
269
+
270
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="Pre-hook not available")
271
+ def test_gitops_validation_workflow(self):
272
+ """Test GitOps validation workflow (dry-run, template)"""
273
+ validation_commands = [
274
+ "kubectl apply -f deployment.yaml --dry-run=client",
275
+ "helm template myapp ./chart",
276
+ "helm install myapp ./chart --dry-run"
277
+ ]
278
+
279
+ for command in validation_commands:
280
+ result = pre_tool_use_hook("bash", {"command": command})
281
+ assert result is None, f"GitOps validation should be allowed: {command}"
282
+
283
+
284
+ class TestTierEscalationWorkflow:
285
+ """Test tier escalation scenarios"""
286
+
287
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="PolicyEngine not available")
288
+ def test_tier_progression(self):
289
+ """Test that tiers progress logically"""
290
+ engine = PolicyEngine()
291
+
292
+ # T0: Read only
293
+ tier = engine.classify_command_tier("kubectl get pods")
294
+ assert tier == SecurityTier.T0_READ_ONLY
295
+
296
+ # T1: Validation
297
+ tier = engine.classify_command_tier("terraform validate")
298
+ assert tier == SecurityTier.T1_VALIDATION
299
+
300
+ # T2: Dry-run
301
+ tier = engine.classify_command_tier("kubectl apply -f test.yaml --dry-run=client")
302
+ assert tier == SecurityTier.T2_DRY_RUN
303
+
304
+ # T3: Blocked
305
+ tier = engine.classify_command_tier("terraform apply")
306
+ assert tier == SecurityTier.T3_BLOCKED
307
+
308
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="PolicyEngine not available")
309
+ def test_cannot_skip_tiers(self):
310
+ """Test that T0 commands cannot escalate directly to T3"""
311
+ engine = PolicyEngine()
312
+
313
+ # Read operation is T0
314
+ is_allowed, tier, _ = engine.validate_command("bash", "kubectl get pods")
315
+ assert is_allowed is True
316
+ assert tier == SecurityTier.T0_READ_ONLY
317
+
318
+ # Write operation is T3 (blocked)
319
+ is_allowed, tier, _ = engine.validate_command("bash", "kubectl delete pod test")
320
+ assert is_allowed is False
321
+ assert tier == SecurityTier.T3_BLOCKED
322
+
323
+ @pytest.mark.skipif(not PRE_HOOK_AVAILABLE, reason="PolicyEngine not available")
324
+ def test_dry_run_bridges_validation_to_realization(self):
325
+ """Test that dry-run (T2) bridges validation (T1) to realization (T3)"""
326
+ engine = PolicyEngine()
327
+
328
+ # T1: Validation
329
+ tier = engine.classify_command_tier("terraform plan")
330
+ assert tier == SecurityTier.T1_VALIDATION
331
+
332
+ # T2: Dry-run (approved path to T3)
333
+ tier = engine.classify_command_tier("terraform apply --help") # Not actually blocked
334
+ # Note: This test shows the conceptual bridge, not actual execution
335
+
336
+ # T3: Realization (requires explicit approval)
337
+ tier = engine.classify_command_tier("terraform apply")
338
+ assert tier == SecurityTier.T3_BLOCKED
339
+
340
+
341
+ class TestAuditTrailWorkflow:
342
+ """Test audit trail creation and integrity"""
343
+
344
+ @pytest.mark.skipif(not POST_HOOK_AVAILABLE, reason="Post-hook not available")
345
+ def test_audit_trail_captures_all_fields(self):
346
+ """Test that audit trail captures all required fields"""
347
+ with tempfile.TemporaryDirectory() as tmpdir:
348
+ audit_logger = AuditLogger(log_dir=tmpdir)
349
+
350
+ audit_logger.log_execution(
351
+ tool_name="bash",
352
+ parameters={"command": "kubectl get pods"},
353
+ result="pod/test-pod 1/1 Running",
354
+ duration=0.5,
355
+ exit_code=0
356
+ )
357
+
358
+ # Read the audit log
359
+ log_files = list(Path(tmpdir).glob("*.jsonl"))
360
+ assert len(log_files) > 0
361
+
362
+ with open(log_files[0], 'r') as f:
363
+ log_entry = json.loads(f.read().strip())
364
+
365
+ # Verify all required fields
366
+ assert "timestamp" in log_entry
367
+ assert "tool_name" in log_entry
368
+ assert "command" in log_entry
369
+ assert "duration_ms" in log_entry
370
+ assert "exit_code" in log_entry
371
+ assert log_entry["tool_name"] == "bash"
372
+ assert log_entry["command"] == "kubectl get pods"
373
+
374
+ @pytest.mark.skipif(not POST_HOOK_AVAILABLE, reason="Post-hook not available")
375
+ def test_audit_trail_handles_large_output(self):
376
+ """Test that audit trail handles large command output"""
377
+ with tempfile.TemporaryDirectory() as tmpdir:
378
+ audit_logger = AuditLogger(log_dir=tmpdir)
379
+
380
+ # Simulate large output
381
+ large_output = "line\n" * 10000
382
+
383
+ audit_logger.log_execution(
384
+ tool_name="bash",
385
+ parameters={"command": "kubectl get all"},
386
+ result=large_output,
387
+ duration=2.5,
388
+ exit_code=0
389
+ )
390
+
391
+ # Verify log was created without errors
392
+ log_files = list(Path(tmpdir).glob("*.jsonl"))
393
+ assert len(log_files) > 0
394
+
395
+
396
+ if __name__ == "__main__":
397
+ pytest.main([__file__, "-v", "--tb=short"])