agentv 4.26.1 → 4.27.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/{chunk-JA4WQNE6.js → chunk-47JX7NNZ.js} +10 -2
  2. package/dist/chunk-47JX7NNZ.js.map +1 -0
  3. package/dist/{chunk-XBUHMRX2.js → chunk-V3LWJB5X.js} +431 -49
  4. package/dist/chunk-V3LWJB5X.js.map +1 -0
  5. package/dist/cli.js +2 -2
  6. package/dist/index.js +2 -2
  7. package/dist/{interactive-YMKWKPD7.js → interactive-L6PIIFNQ.js} +2 -2
  8. package/dist/skills/agentv-bench/LICENSE.txt +202 -0
  9. package/dist/skills/agentv-bench/SKILL.md +459 -0
  10. package/dist/skills/agentv-bench/agents/analyzer.md +177 -0
  11. package/dist/skills/agentv-bench/agents/comparator.md +247 -0
  12. package/dist/skills/agentv-bench/agents/executor.md +30 -0
  13. package/dist/skills/agentv-bench/agents/grader.md +238 -0
  14. package/dist/skills/agentv-bench/agents/mutator.md +172 -0
  15. package/dist/skills/agentv-bench/references/autoresearch.md +309 -0
  16. package/dist/skills/agentv-bench/references/description-optimization.md +66 -0
  17. package/dist/skills/agentv-bench/references/environment-adaptation.md +82 -0
  18. package/dist/skills/agentv-bench/references/eval-yaml-spec.md +338 -0
  19. package/dist/skills/agentv-bench/references/migrating-from-skill-creator.md +103 -0
  20. package/dist/skills/agentv-bench/references/schemas.md +432 -0
  21. package/dist/skills/agentv-bench/references/subagent-pipeline.md +181 -0
  22. package/dist/skills/agentv-bench/scripts/trajectory.html +462 -0
  23. package/dist/skills/agentv-eval-review/SKILL.md +53 -0
  24. package/dist/skills/agentv-eval-review/scripts/lint_eval.py +239 -0
  25. package/dist/skills/agentv-eval-writer/SKILL.md +707 -0
  26. package/dist/skills/agentv-eval-writer/references/config-schema.json +63 -0
  27. package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +119 -0
  28. package/dist/skills/agentv-eval-writer/references/eval-schema.json +19077 -0
  29. package/dist/skills/agentv-eval-writer/references/rubric-evaluator.md +114 -0
  30. package/dist/skills/agentv-governance/SKILL.md +79 -0
  31. package/dist/skills/agentv-governance/references/eu-ai-act-risk-tiers.md +37 -0
  32. package/dist/skills/agentv-governance/references/governance-yaml-shape.md +125 -0
  33. package/dist/skills/agentv-governance/references/iso-42001-controls.md +46 -0
  34. package/dist/skills/agentv-governance/references/lint-rules.md +169 -0
  35. package/dist/skills/agentv-governance/references/mitre-atlas.md +38 -0
  36. package/dist/skills/agentv-governance/references/owasp-agentic-top-10-2025.md +28 -0
  37. package/dist/skills/agentv-governance/references/owasp-llm-top-10-2025.md +25 -0
  38. package/dist/skills/agentv-trace-analyst/SKILL.md +161 -0
  39. package/package.json +1 -1
  40. package/dist/chunk-JA4WQNE6.js.map +0 -1
  41. package/dist/chunk-XBUHMRX2.js.map +0 -1
  42. /package/dist/{interactive-YMKWKPD7.js.map → interactive-L6PIIFNQ.js.map} +0 -0
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env python3
2
+ """Lint AgentV eval YAML files for common issues.
3
+
4
+ Usage: python lint_eval.py <path-to-eval-dir-or-file> [--json]
5
+
6
+ Checks:
7
+ - File uses .eval.yaml extension
8
+ - description field present
9
+ - Each test has id, input, criteria
10
+ - File paths in type:file use leading /
11
+ - assertions blocks present (not relying solely on expected_output)
12
+ - expected_output does not contain evaluation criteria prose
13
+ - Repeated file inputs across tests (should use top-level input)
14
+ - Naming prefix consistency across eval files in same directory
15
+
16
+ Exit code: 0 if no issues, 1 if issues found.
17
+ """
18
+
19
+ import json
20
+ import os
21
+ import re
22
+ import sys
23
+ from pathlib import Path
24
+
25
+ try:
26
+ import yaml
27
+ except ImportError:
28
+ # Fall back to basic YAML parsing if PyYAML not available
29
+ yaml = None
30
+
31
+
32
+ def parse_yaml_basic(text: str) -> dict:
33
+ """Minimal YAML-ish parser for when PyYAML is unavailable."""
34
+ # This is a best-effort fallback; recommend installing PyYAML
35
+ import ast
36
+ # Try json first (YAML is a superset of JSON)
37
+ try:
38
+ return json.loads(text)
39
+ except Exception:
40
+ pass
41
+ return {}
42
+
43
+
44
+ def load_yaml(path: Path) -> dict:
45
+ text = path.read_text(encoding="utf-8")
46
+ if yaml:
47
+ return yaml.safe_load(text) or {}
48
+ return parse_yaml_basic(text)
49
+
50
+
51
+ def lint_file(path: Path) -> list[dict]:
52
+ issues = []
53
+
54
+ def issue(severity: str, msg: str, line: int | None = None):
55
+ issues.append({"file": str(path), "severity": severity, "message": msg, "line": line})
56
+
57
+ # Check extension
58
+ if not path.name.endswith(".eval.yaml"):
59
+ issue("error", f"File should use .eval.yaml extension, got: {path.name}")
60
+
61
+ try:
62
+ data = load_yaml(path)
63
+ except Exception as e:
64
+ issue("error", f"Failed to parse YAML: {e}")
65
+ return issues
66
+
67
+ if not isinstance(data, dict):
68
+ issue("error", "Root element is not a mapping")
69
+ return issues
70
+
71
+ # Check description
72
+ if "description" not in data:
73
+ issue("warning", "Missing top-level 'description' field")
74
+
75
+ tests = data.get("tests", [])
76
+ if not isinstance(tests, list):
77
+ issue("error", "'tests' is not a list")
78
+ return issues
79
+
80
+ if not tests:
81
+ issue("warning", "No tests defined")
82
+ return issues
83
+
84
+ # Check for top-level input (shared file references)
85
+ top_level_input = data.get("input")
86
+
87
+ # Collect file values across tests to detect repetition
88
+ file_values_per_test: list[list[str]] = []
89
+
90
+ for i, test in enumerate(tests):
91
+ test_id = test.get("id", f"test-{i}")
92
+
93
+ if "id" not in test:
94
+ issue("error", f"Test at index {i} missing 'id'")
95
+
96
+ if "input" not in test and top_level_input is None:
97
+ issue("error", f"Test '{test_id}' missing 'input' and no top-level input defined")
98
+
99
+ has_criteria = "criteria" in test
100
+ has_expected = "expected_output" in test
101
+ has_assertions = "assertions" in test
102
+
103
+ if not has_criteria and not has_expected and not has_assertions:
104
+ issue("error", f"Test '{test_id}' needs at least one of: criteria, expected_output, assertions")
105
+
106
+ # Check assertions present
107
+ if not has_assertions and has_expected:
108
+ issue("warning", f"Test '{test_id}' has expected_output but no assertions — add deterministic assertions where possible")
109
+
110
+ # Check expected_output for prose patterns
111
+ if has_expected:
112
+ expected = test["expected_output"]
113
+ expected_text = ""
114
+ if isinstance(expected, str):
115
+ expected_text = expected
116
+ elif isinstance(expected, list):
117
+ for msg in expected:
118
+ if isinstance(msg, dict):
119
+ content = msg.get("content", "")
120
+ if isinstance(content, str):
121
+ expected_text += content
122
+
123
+ prose_patterns = [
124
+ r"[Tt]he agent should",
125
+ r"[Ss]hould identify",
126
+ r"[Ss]hould flag",
127
+ r"[Ss]hould recommend",
128
+ r"[Ss]hould produce",
129
+ r"[Ss]hould detect",
130
+ r"[Ss]hould load",
131
+ r"[Ss]hould run",
132
+ ]
133
+ for pat in prose_patterns:
134
+ if re.search(pat, expected_text):
135
+ issue("warning", f"Test '{test_id}' expected_output contains evaluation criteria prose ('{pat.lstrip('[Tt]').lstrip('[Ss]')}...') — use criteria or assertions instead")
136
+ break
137
+
138
+ # Collect file values from input
139
+ test_files = extract_file_values(test.get("input", []))
140
+ file_values_per_test.append(test_files)
141
+
142
+ # Check file paths for leading /
143
+ for fv in test_files:
144
+ if not fv.startswith("/"):
145
+ issue("warning", f"Test '{test_id}' file path missing leading '/': {fv}")
146
+
147
+ # Check for repeated file inputs
148
+ if len(file_values_per_test) >= 2 and not top_level_input:
149
+ common_files = set(file_values_per_test[0])
150
+ for fvs in file_values_per_test[1:]:
151
+ common_files &= set(fvs)
152
+ if common_files:
153
+ issue("info", f"File input repeated in every test: {', '.join(sorted(common_files))} — consider using top-level input")
154
+
155
+ return issues
156
+
157
+
158
+ def extract_file_values(input_data) -> list[str]:
159
+ """Extract type:file values from input structure."""
160
+ files = []
161
+ if isinstance(input_data, list):
162
+ for item in input_data:
163
+ if isinstance(item, dict):
164
+ content = item.get("content", [])
165
+ if isinstance(content, list):
166
+ for c in content:
167
+ if isinstance(c, dict) and c.get("type") == "file":
168
+ v = c.get("value", "")
169
+ if v:
170
+ files.append(v)
171
+ return files
172
+
173
+
174
+ def lint_directory(path: Path) -> list[dict]:
175
+ issues = []
176
+ eval_files = sorted(path.rglob("*.yaml")) + sorted(path.rglob("*.yml"))
177
+
178
+ if not eval_files:
179
+ issues.append({"file": str(path), "severity": "warning", "message": "No eval files found", "line": None})
180
+ return issues
181
+
182
+ # Check naming prefix consistency
183
+ prefixes = set()
184
+ for f in eval_files:
185
+ name = f.stem.replace(".eval", "")
186
+ parts = name.split("-")
187
+ if len(parts) >= 2:
188
+ prefixes.add(parts[0])
189
+
190
+ if len(prefixes) > 1:
191
+ issues.append({
192
+ "file": str(path),
193
+ "severity": "info",
194
+ "message": f"Inconsistent naming prefixes: {', '.join(sorted(prefixes))}",
195
+ "line": None,
196
+ })
197
+
198
+ for f in eval_files:
199
+ issues.extend(lint_file(f))
200
+
201
+ return issues
202
+
203
+
204
+ def main():
205
+ if len(sys.argv) < 2:
206
+ print(f"Usage: {sys.argv[0]} <path> [--json]", file=sys.stderr)
207
+ sys.exit(2)
208
+
209
+ target = Path(sys.argv[1])
210
+ output_json = "--json" in sys.argv
211
+
212
+ if target.is_file():
213
+ issues = lint_file(target)
214
+ elif target.is_dir():
215
+ issues = lint_directory(target)
216
+ else:
217
+ print(f"Error: {target} not found", file=sys.stderr)
218
+ sys.exit(2)
219
+
220
+ if output_json:
221
+ print(json.dumps(issues, indent=2))
222
+ else:
223
+ for iss in issues:
224
+ line = f":{iss['line']}" if iss.get("line") else ""
225
+ print(f"[{iss['severity'].upper()}] {iss['file']}{line}: {iss['message']}")
226
+
227
+ counts = {}
228
+ for iss in issues:
229
+ counts[iss["severity"]] = counts.get(iss["severity"], 0) + 1
230
+ if issues:
231
+ print(f"\n{len(issues)} issues: {', '.join(f'{v} {k}' for k, v in sorted(counts.items()))}")
232
+ else:
233
+ print("No issues found.")
234
+
235
+ sys.exit(1 if any(i["severity"] == "error" for i in issues) else 0)
236
+
237
+
238
+ if __name__ == "__main__":
239
+ main()