@jaguilar87/gaia-ops 3.10.2 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/pre_tool_use.py +73 -1
- package/package.json +7 -2
- package/tests/conftest.py +166 -0
- package/tests/integration/test_subagent_lifecycle.py +676 -0
- package/tests/layer1_prompt_regression/test_agent_frontmatter.py +152 -0
- package/tests/layer1_prompt_regression/test_agent_prompt_content.py +171 -0
- package/tests/layer1_prompt_regression/test_context_contracts.py +139 -0
- package/tests/layer1_prompt_regression/test_routing_table.py +95 -0
- package/tests/layer1_prompt_regression/test_security_tier_consistency.py +117 -0
- package/tests/layer1_prompt_regression/test_skill_content_rules.py +147 -0
- package/tests/layer1_prompt_regression/test_skills_cross_reference.py +168 -0
- package/tests/layer2_llm_evaluation/conftest.py +6 -0
- package/tests/layer2_llm_evaluation/helpers/promptfoo_runner.py +132 -0
- package/tests/layer2_llm_evaluation/test_agent_behavior.py +198 -0
- package/tests/layer3_e2e/conftest.py +6 -0
- package/tests/layer3_e2e/helpers/claude_headless.py +169 -0
- package/tests/layer3_e2e/test_hook_lifecycle.py +160 -0
- package/tests/layer3_e2e/test_installation_smoke.py +117 -0
- package/tests/promptfoo.yaml +126 -0
package/hooks/pre_tool_use.py
CHANGED
|
@@ -152,6 +152,75 @@ def _load_agent_skills(subagent_type: str) -> str:
|
|
|
152
152
|
return "\n\n---\n\n".join(parts) if parts else ""
|
|
153
153
|
|
|
154
154
|
|
|
155
|
+
def _build_context_update_reminder(subagent_type: str) -> str:
|
|
156
|
+
"""
|
|
157
|
+
Check which writable sections are empty and build a reminder.
|
|
158
|
+
|
|
159
|
+
Reads the context contracts to find writable sections for this agent,
|
|
160
|
+
then checks project-context.json to see which are empty.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Reminder string or empty string if no empty sections.
|
|
164
|
+
"""
|
|
165
|
+
if subagent_type not in PROJECT_AGENTS:
|
|
166
|
+
return ""
|
|
167
|
+
|
|
168
|
+
# Load contracts to find writable sections
|
|
169
|
+
contracts_paths = [
|
|
170
|
+
Path(".claude/config/context-contracts.gcp.json"),
|
|
171
|
+
Path(".claude/config/context-contracts.aws.json"),
|
|
172
|
+
Path(__file__).parent.parent / "config" / "context-contracts.gcp.json",
|
|
173
|
+
Path(__file__).parent.parent / "config" / "context-contracts.aws.json",
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
writable = []
|
|
177
|
+
for cp in contracts_paths:
|
|
178
|
+
if cp.exists():
|
|
179
|
+
try:
|
|
180
|
+
data = json.loads(cp.read_text())
|
|
181
|
+
agent_perms = data.get("agents", {}).get(subagent_type, {})
|
|
182
|
+
writable = agent_perms.get("write", [])
|
|
183
|
+
if writable:
|
|
184
|
+
break
|
|
185
|
+
except Exception:
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
if not writable:
|
|
189
|
+
return ""
|
|
190
|
+
|
|
191
|
+
# Load project-context.json to find empty sections
|
|
192
|
+
pc_paths = [
|
|
193
|
+
Path(".claude/project-context/project-context.json"),
|
|
194
|
+
Path("project-context.json"),
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
sections = {}
|
|
198
|
+
for pp in pc_paths:
|
|
199
|
+
if pp.exists():
|
|
200
|
+
try:
|
|
201
|
+
pc = json.loads(pp.read_text())
|
|
202
|
+
sections = pc.get("sections", {})
|
|
203
|
+
break
|
|
204
|
+
except Exception:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
# Find empty writable sections
|
|
208
|
+
empty = []
|
|
209
|
+
for section_name in writable:
|
|
210
|
+
section_data = sections.get(section_name, {})
|
|
211
|
+
if not section_data or section_data == {}:
|
|
212
|
+
empty.append(section_name)
|
|
213
|
+
|
|
214
|
+
if not empty:
|
|
215
|
+
return ""
|
|
216
|
+
|
|
217
|
+
empty_list = ", ".join(f"`{s}`" for s in empty)
|
|
218
|
+
return (
|
|
219
|
+
f"\n**CONTEXT_UPDATE REQUIRED:** Your writable sections {empty_list} "
|
|
220
|
+
f"are currently EMPTY. After completing your task, you MUST emit a "
|
|
221
|
+
f"CONTEXT_UPDATE block with any data you discovered. "
|
|
222
|
+
f"See \"Context Updater Protocol\" above for the format.\n\n"
|
|
223
|
+
)
|
|
155
224
|
|
|
156
225
|
|
|
157
226
|
def _should_inject_on_resume(parameters: dict) -> bool:
|
|
@@ -339,13 +408,16 @@ def _inject_project_context(parameters: dict) -> dict:
|
|
|
339
408
|
skills_content = _load_agent_skills(subagent_type)
|
|
340
409
|
skills_section = f"\n\n---\n\n# Agent Skills (Auto-Injected)\n\n{skills_content}" if skills_content else ""
|
|
341
410
|
|
|
411
|
+
# Build context update reminder for empty writable sections
|
|
412
|
+
update_reminder = _build_context_update_reminder(subagent_type)
|
|
413
|
+
|
|
342
414
|
# Inject context and skills into prompt
|
|
343
415
|
enriched_prompt = f"""# Project Context (Auto-Injected)
|
|
344
416
|
|
|
345
417
|
{json.dumps(context_payload, indent=2)}
|
|
346
418
|
|
|
347
419
|
{pending_warning}---{skills_section}
|
|
348
|
-
|
|
420
|
+
{update_reminder}
|
|
349
421
|
# User Task
|
|
350
422
|
|
|
351
423
|
{prompt}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jaguilar87/gaia-ops",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.11.0",
|
|
4
4
|
"description": "Multi-agent orchestration system for Claude Code - DevOps automation toolkit",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -54,7 +54,12 @@
|
|
|
54
54
|
"index.js"
|
|
55
55
|
],
|
|
56
56
|
"scripts": {
|
|
57
|
-
"test": "python3 -m pytest tests/ -v",
|
|
57
|
+
"test": "python3 -m pytest tests/ -v --ignore=tests/layer2_llm_evaluation --ignore=tests/layer3_e2e",
|
|
58
|
+
"test:layer1": "python3 -m pytest tests/ -v --ignore=tests/layer2_llm_evaluation --ignore=tests/layer3_e2e",
|
|
59
|
+
"test:layer2": "python3 -m pytest tests/layer2_llm_evaluation/ -v -m llm",
|
|
60
|
+
"test:layer3": "python3 -m pytest tests/layer3_e2e/ -v -m e2e",
|
|
61
|
+
"test:all": "python3 -m pytest tests/ -v -m ''",
|
|
62
|
+
"test:promptfoo": "npx promptfoo eval --config tests/promptfoo.yaml",
|
|
58
63
|
"lint": "eslint .",
|
|
59
64
|
"clean": "find . -type d -name '__pycache__' -exec rm -rf {} + 2>/dev/null || true",
|
|
60
65
|
"pre-publish": "node bin/pre-publish-validate.js",
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Root conftest.py - Shared test infrastructure for gaia-ops.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- Custom markers: llm, e2e (auto-skipped in default test runs)
|
|
6
|
+
- Session fixtures: package_root, agents_dir, skills_dir, config_dir, hooks_dir
|
|
7
|
+
- Frontmatter parser (manual, no PyYAML dependency)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ============================================================================
|
|
15
|
+
# MARKERS
|
|
16
|
+
# ============================================================================
|
|
17
|
+
|
|
18
|
+
def pytest_configure(config):
|
|
19
|
+
"""Register custom markers."""
|
|
20
|
+
config.addinivalue_line("markers", "llm: LLM evaluation tests (require ANTHROPIC_API_KEY)")
|
|
21
|
+
config.addinivalue_line("markers", "e2e: E2E headless tests (require claude CLI)")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def pytest_collection_modifyitems(config, items):
|
|
25
|
+
"""Auto-skip llm and e2e tests unless explicitly requested via -m flag."""
|
|
26
|
+
# If user explicitly passed -m, respect that
|
|
27
|
+
markexpr = config.getoption("-m", default="")
|
|
28
|
+
if markexpr:
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
skip_llm = pytest.mark.skip(reason="LLM tests skipped by default (use -m llm)")
|
|
32
|
+
skip_e2e = pytest.mark.skip(reason="E2E tests skipped by default (use -m e2e)")
|
|
33
|
+
|
|
34
|
+
for item in items:
|
|
35
|
+
if "llm" in item.keywords:
|
|
36
|
+
item.add_marker(skip_llm)
|
|
37
|
+
if "e2e" in item.keywords:
|
|
38
|
+
item.add_marker(skip_e2e)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ============================================================================
|
|
42
|
+
# SESSION FIXTURES
|
|
43
|
+
# ============================================================================
|
|
44
|
+
|
|
45
|
+
@pytest.fixture(scope="session")
|
|
46
|
+
def package_root():
|
|
47
|
+
"""Root of the gaia-ops package."""
|
|
48
|
+
root = Path(__file__).resolve().parents[1]
|
|
49
|
+
return root.resolve() if root.is_symlink() else root
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@pytest.fixture(scope="session")
|
|
53
|
+
def agents_dir(package_root):
|
|
54
|
+
"""Directory containing agent definition .md files."""
|
|
55
|
+
d = package_root / "agents"
|
|
56
|
+
return d.resolve() if d.is_symlink() else d
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@pytest.fixture(scope="session")
|
|
60
|
+
def skills_dir(package_root):
|
|
61
|
+
"""Directory containing skill directories with SKILL.md files."""
|
|
62
|
+
d = package_root / "skills"
|
|
63
|
+
return d.resolve() if d.is_symlink() else d
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.fixture(scope="session")
|
|
67
|
+
def config_dir(package_root):
|
|
68
|
+
"""Directory containing config files (context-contracts, etc)."""
|
|
69
|
+
d = package_root / "config"
|
|
70
|
+
return d.resolve() if d.is_symlink() else d
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@pytest.fixture(scope="session")
|
|
74
|
+
def hooks_dir(package_root):
|
|
75
|
+
"""Directory containing hook scripts."""
|
|
76
|
+
d = package_root / "hooks"
|
|
77
|
+
return d.resolve() if d.is_symlink() else d
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@pytest.fixture(scope="session")
|
|
81
|
+
def claude_md_content(package_root):
|
|
82
|
+
"""Content of the orchestrator CLAUDE.md."""
|
|
83
|
+
return (package_root / "CLAUDE.md").read_text()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.fixture(scope="session")
|
|
87
|
+
def all_agent_files(agents_dir):
|
|
88
|
+
"""All agent .md files (excluding READMEs)."""
|
|
89
|
+
return [f for f in agents_dir.glob("*.md") if "README" not in f.name.upper()]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@pytest.fixture(scope="session")
|
|
93
|
+
def all_skill_dirs(skills_dir):
|
|
94
|
+
"""All skill directories that contain a SKILL.md."""
|
|
95
|
+
return [d for d in skills_dir.iterdir() if d.is_dir() and (d / "SKILL.md").exists()]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ============================================================================
|
|
99
|
+
# FRONTMATTER PARSER (manual, no PyYAML)
|
|
100
|
+
# ============================================================================
|
|
101
|
+
|
|
102
|
+
def parse_frontmatter(text):
|
|
103
|
+
"""
|
|
104
|
+
Parse YAML frontmatter from markdown text (manual parser, no PyYAML).
|
|
105
|
+
|
|
106
|
+
Supports simple key-value pairs and lists (- item).
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
text: Full markdown text starting with ---
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
dict with parsed frontmatter fields, or empty dict if no frontmatter
|
|
113
|
+
"""
|
|
114
|
+
if not text.startswith("---"):
|
|
115
|
+
return {}
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
end = text.index("---", 3)
|
|
119
|
+
except ValueError:
|
|
120
|
+
return {}
|
|
121
|
+
|
|
122
|
+
fm_text = text[3:end]
|
|
123
|
+
result = {}
|
|
124
|
+
current_key = None
|
|
125
|
+
current_list = None
|
|
126
|
+
|
|
127
|
+
for line in fm_text.splitlines():
|
|
128
|
+
stripped = line.strip()
|
|
129
|
+
if not stripped or stripped.startswith("#"):
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# List item under current key
|
|
133
|
+
if stripped.startswith("- ") and current_key and current_list is not None:
|
|
134
|
+
current_list.append(stripped[2:].strip())
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# New key-value pair
|
|
138
|
+
if ":" in stripped:
|
|
139
|
+
# End previous list
|
|
140
|
+
if current_key and current_list is not None:
|
|
141
|
+
result[current_key] = current_list
|
|
142
|
+
|
|
143
|
+
key, _, value = stripped.partition(":")
|
|
144
|
+
key = key.strip()
|
|
145
|
+
value = value.strip()
|
|
146
|
+
|
|
147
|
+
if value:
|
|
148
|
+
result[key] = value
|
|
149
|
+
current_key = key
|
|
150
|
+
current_list = None
|
|
151
|
+
else:
|
|
152
|
+
# Start of a list
|
|
153
|
+
current_key = key
|
|
154
|
+
current_list = []
|
|
155
|
+
else:
|
|
156
|
+
# Not a key-value, not a list item - end list
|
|
157
|
+
if current_key and current_list is not None:
|
|
158
|
+
result[current_key] = current_list
|
|
159
|
+
current_key = None
|
|
160
|
+
current_list = None
|
|
161
|
+
|
|
162
|
+
# Finalize last list
|
|
163
|
+
if current_key and current_list is not None:
|
|
164
|
+
result[current_key] = current_list
|
|
165
|
+
|
|
166
|
+
return result
|