agentv 3.10.2 → 3.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/{chunk-6UE665XI.js → chunk-7LC3VNOC.js} +4 -4
  2. package/dist/{chunk-KGK5NUFG.js → chunk-JUQCB3ZW.js} +56 -15
  3. package/dist/chunk-JUQCB3ZW.js.map +1 -0
  4. package/dist/{chunk-F7LAJMTO.js → chunk-U556GRI3.js} +4 -4
  5. package/dist/{chunk-F7LAJMTO.js.map → chunk-U556GRI3.js.map} +1 -1
  6. package/dist/cli.js +3 -3
  7. package/dist/{dist-3QUJEJUT.js → dist-2X7A3TTC.js} +2 -2
  8. package/dist/index.js +3 -3
  9. package/dist/{interactive-EO6AR2R3.js → interactive-CSA4KIND.js} +3 -3
  10. package/dist/templates/.agentv/.env.example +9 -11
  11. package/dist/templates/.agentv/config.yaml +13 -4
  12. package/dist/templates/.agentv/targets.yaml +16 -0
  13. package/package.json +1 -1
  14. package/dist/chunk-KGK5NUFG.js.map +0 -1
  15. package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +0 -84
  16. package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +0 -144
  17. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +0 -67
  18. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +0 -101
  19. package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +0 -458
  20. package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +0 -36
  21. package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +0 -118
  22. package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +0 -12753
  23. package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -77
  24. package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +0 -50
  25. package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +0 -78
  26. package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +0 -177
  27. package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +0 -316
  28. package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +0 -137
  29. package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +0 -215
  30. package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +0 -27
  31. package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +0 -115
  32. package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +0 -278
  33. package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +0 -333
  34. package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -79
  35. package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +0 -121
  36. package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +0 -298
  37. package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +0 -78
  38. package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +0 -5
  39. package/dist/templates/.github/prompts/agentv-optimize.prompt.md +0 -4
  40. /package/dist/{chunk-6UE665XI.js.map → chunk-7LC3VNOC.js.map} +0 -0
  41. /package/dist/{dist-3QUJEJUT.js.map → dist-2X7A3TTC.js.map} +0 -0
  42. /package/dist/{interactive-EO6AR2R3.js.map → interactive-CSA4KIND.js.map} +0 -0
@@ -1,278 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "title": "AgentV Eval Schema",
4
- "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
5
- "type": "object",
6
- "properties": {
7
- "description": {
8
- "type": "string",
9
- "description": "Description of what this eval suite covers"
10
- },
11
- "target": {
12
- "type": "string",
13
- "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
14
- },
15
- "execution": {
16
- "type": "object",
17
- "description": "Default execution configuration for all eval cases (can be overridden per case)",
18
- "properties": {
19
- "target": {
20
- "type": "string",
21
- "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
22
- },
23
- "evaluators": {
24
- "type": "array",
25
- "description": "Default evaluators for all eval cases (code-based and LLM judges)",
26
- "items": {
27
- "type": "object",
28
- "properties": {
29
- "name": {
30
- "type": "string",
31
- "description": "Evaluator name/identifier"
32
- },
33
- "type": {
34
- "type": "string",
35
- "enum": [
36
- "code",
37
- "llm_judge",
38
- "composite",
39
- "tool_trajectory",
40
- "field_accuracy",
41
- "latency",
42
- "cost",
43
- "token_usage"
44
- ],
45
- "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
46
- },
47
- "script": {
48
- "type": "string",
49
- "description": "Path to evaluator script (for type: code)"
50
- },
51
- "prompt": {
52
- "type": "string",
53
- "description": "Path to judge prompt file (for type: llm_judge)"
54
- }
55
- },
56
- "required": ["name", "type"],
57
- "additionalProperties": true
58
- }
59
- }
60
- },
61
- "additionalProperties": true
62
- },
63
- "evalcases": {
64
- "type": "array",
65
- "description": "Array of evaluation cases",
66
- "minItems": 1,
67
- "items": {
68
- "type": "object",
69
- "properties": {
70
- "id": {
71
- "type": "string",
72
- "description": "Unique identifier for the eval case"
73
- },
74
- "conversation_id": {
75
- "type": "string",
76
- "description": "Optional conversation identifier for threading multiple eval cases together"
77
- },
78
- "expected_outcome": {
79
- "type": "string",
80
- "description": "Description of what the AI should accomplish in this eval"
81
- },
82
- "note": {
83
- "type": "string",
84
- "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
85
- },
86
- "input_messages": {
87
- "type": "array",
88
- "description": "Input messages for the conversation",
89
- "minItems": 1,
90
- "items": {
91
- "type": "object",
92
- "properties": {
93
- "role": {
94
- "type": "string",
95
- "enum": ["system", "user", "assistant", "tool"],
96
- "description": "Message role"
97
- },
98
- "content": {
99
- "oneOf": [
100
- {
101
- "type": "string",
102
- "description": "Simple text content"
103
- },
104
- {
105
- "type": "array",
106
- "description": "Mixed content items (text and file references)",
107
- "items": {
108
- "type": "object",
109
- "properties": {
110
- "type": {
111
- "type": "string",
112
- "enum": ["text", "file"],
113
- "description": "Content type: 'text' for inline content, 'file' for file references"
114
- },
115
- "value": {
116
- "type": "string",
117
- "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
118
- }
119
- },
120
- "required": ["type", "value"],
121
- "additionalProperties": false
122
- }
123
- }
124
- ]
125
- }
126
- },
127
- "required": ["role", "content"],
128
- "additionalProperties": false
129
- }
130
- },
131
- "input": {
132
- "description": "Alias for input_messages with shorthand support. String expands to single user message, array of messages passes through.",
133
- "oneOf": [
134
- {
135
- "type": "string",
136
- "description": "Shorthand: single user message content"
137
- },
138
- {
139
- "type": "array",
140
- "description": "Array of messages (same format as input_messages)",
141
- "items": {
142
- "type": "object",
143
- "properties": {
144
- "role": {
145
- "type": "string",
146
- "enum": ["system", "user", "assistant", "tool"]
147
- },
148
- "content": {
149
- "oneOf": [{ "type": "string" }, { "type": "array" }]
150
- }
151
- },
152
- "required": ["role", "content"]
153
- }
154
- }
155
- ]
156
- },
157
- "expected_messages": {
158
- "type": "array",
159
- "description": "Expected response messages. Canonical form — use this or expected_output (alias). The content of the last entry is derived as the template variable 'reference_answer' for evaluator prompts.",
160
- "minItems": 1,
161
- "items": {
162
- "type": "object",
163
- "properties": {
164
- "role": {
165
- "type": "string",
166
- "enum": ["system", "user", "assistant", "tool"],
167
- "description": "Message role"
168
- },
169
- "content": {
170
- "oneOf": [
171
- {
172
- "type": "string",
173
- "description": "Simple text content"
174
- },
175
- {
176
- "type": "array",
177
- "description": "Mixed content items",
178
- "items": {
179
- "type": "object",
180
- "properties": {
181
- "type": {
182
- "type": "string",
183
- "enum": ["text", "file"]
184
- },
185
- "value": {
186
- "type": "string"
187
- }
188
- },
189
- "required": ["type", "value"],
190
- "additionalProperties": false
191
- }
192
- }
193
- ]
194
- }
195
- },
196
- "required": ["role", "content"],
197
- "additionalProperties": false
198
- }
199
- },
200
- "expected_output": {
201
- "description": "Alias for expected_messages with shorthand support. String expands to single assistant message, object wraps as assistant message content. Resolves to expected_messages internally — the content of the last resolved entry becomes the template variable 'reference_answer'.",
202
- "oneOf": [
203
- {
204
- "type": "string",
205
- "description": "Shorthand: single assistant message content"
206
- },
207
- {
208
- "type": "object",
209
- "description": "Shorthand: structured content wraps as assistant message"
210
- },
211
- {
212
- "type": "array",
213
- "description": "Array of messages (same format as expected_messages)",
214
- "items": {
215
- "type": "object",
216
- "properties": {
217
- "role": {
218
- "type": "string",
219
- "enum": ["system", "user", "assistant", "tool"]
220
- },
221
- "content": {
222
- "oneOf": [{ "type": "string" }, { "type": "object" }, { "type": "array" }]
223
- }
224
- },
225
- "required": ["role", "content"]
226
- }
227
- }
228
- ]
229
- },
230
- "execution": {
231
- "type": "object",
232
- "description": "Per-case execution configuration",
233
- "properties": {
234
- "target": {
235
- "type": "string",
236
- "description": "Override target for this specific eval case"
237
- },
238
- "evaluators": {
239
- "type": "array",
240
- "description": "Multiple evaluators (code-based and LLM judges)",
241
- "items": {
242
- "type": "object",
243
- "properties": {
244
- "name": {
245
- "type": "string",
246
- "description": "Evaluator name/identifier"
247
- },
248
- "type": {
249
- "type": "string",
250
- "enum": ["code", "llm_judge"],
251
- "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
252
- },
253
- "script": {
254
- "type": "string",
255
- "description": "Path to evaluator script (for type: code)"
256
- },
257
- "prompt": {
258
- "type": "string",
259
- "description": "Path to judge prompt file (for type: llm_judge)"
260
- }
261
- },
262
- "required": ["name", "type"],
263
- "additionalProperties": true
264
- }
265
- }
266
- },
267
- "additionalProperties": true
268
- }
269
- },
270
- "required": ["id", "expected_outcome"],
271
- "anyOf": [{ "required": ["input_messages"] }, { "required": ["input"] }],
272
- "additionalProperties": true
273
- }
274
- }
275
- },
276
- "required": ["evalcases"],
277
- "additionalProperties": false
278
- }
@@ -1,333 +0,0 @@
1
- # Example Eval Files
2
-
3
- This document contains complete examples of well-structured eval files demonstrating various AgentV patterns and best practices.
4
-
5
- ## Basic Example: Simple Q&A Eval
6
-
7
- ```yaml
8
- description: Basic arithmetic evaluation
9
- execution:
10
- target: default
11
-
12
- evalcases:
13
- - id: simple-addition
14
- expected_outcome: Correctly calculates 2+2
15
-
16
- input_messages:
17
- - role: user
18
- content: What is 2 + 2?
19
-
20
- expected_messages:
21
- - role: assistant
22
- content: "4"
23
- ```
24
-
25
- ## Code Review with File References
26
-
27
- ```yaml
28
- description: Code review with guidelines
29
- execution:
30
- target: azure_base
31
-
32
- evalcases:
33
- - id: code-review-basic
34
- expected_outcome: Assistant provides helpful code analysis with security considerations
35
-
36
- input_messages:
37
- - role: system
38
- content: You are an expert code reviewer.
39
- - role: user
40
- content:
41
- - type: text
42
- value: |-
43
- Review this function for security issues:
44
-
45
- ```python
46
- def get_user(user_id):
47
- query = f"SELECT * FROM users WHERE id = {user_id}"
48
- return db.execute(query)
49
- ```
50
- - type: file
51
- value: /prompts/security-guidelines.md
52
-
53
- expected_messages:
54
- - role: assistant
55
- content: |-
56
- This code has a critical SQL injection vulnerability. The user_id is directly
57
- interpolated into the query string without sanitization.
58
-
59
- Recommended fix:
60
- ```python
61
- def get_user(user_id):
62
- query = "SELECT * FROM users WHERE id = ?"
63
- return db.execute(query, (user_id,))
64
- ```
65
- ```
66
-
67
- ## Multi-Evaluator Configuration
68
-
69
- ```yaml
70
- description: JSON generation with validation
71
- execution:
72
- target: default
73
-
74
- evalcases:
75
- - id: json-generation-with-validation
76
- expected_outcome: Generates valid JSON with required fields
77
-
78
- execution:
79
- evaluators:
80
- - name: json_format_validator
81
- type: code_judge
82
- script: uv run validate_json.py
83
- cwd: ./evaluators
84
- - name: content_evaluator
85
- type: llm_judge
86
- prompt: ./judges/semantic_correctness.md
87
-
88
- input_messages:
89
- - role: user
90
- content: |-
91
- Generate a JSON object for a user with name "Alice",
92
- email "alice@example.com", and role "admin".
93
-
94
- expected_messages:
95
- - role: assistant
96
- content: |-
97
- {
98
- "name": "Alice",
99
- "email": "alice@example.com",
100
- "role": "admin"
101
- }
102
- ```
103
-
104
- ## Tool Trajectory Evaluation
105
-
106
- Validate that an agent uses specific tools during execution.
107
-
108
- ```yaml
109
- description: Tool usage validation
110
- execution:
111
- target: mock_agent
112
-
113
- evalcases:
114
- # Validate minimum tool usage (order doesn't matter)
115
- - id: research-depth
116
- expected_outcome: Agent researches thoroughly
117
- input_messages:
118
- - role: user
119
- content: Research REST vs GraphQL
120
- execution:
121
- evaluators:
122
- - name: research-check
123
- type: tool_trajectory
124
- mode: any_order
125
- minimums:
126
- knowledgeSearch: 2
127
- documentRetrieve: 1
128
-
129
- # Validate exact tool sequence
130
- - id: auth-flow
131
- expected_outcome: Agent follows auth sequence
132
- input_messages:
133
- - role: user
134
- content: Authenticate user
135
- execution:
136
- evaluators:
137
- - name: auth-sequence
138
- type: tool_trajectory
139
- mode: exact
140
- expected:
141
- - tool: checkCredentials
142
- - tool: generateToken
143
- ```
144
-
145
- ## Static Trace Evaluation
146
-
147
- Evaluate pre-existing trace files without running an agent.
148
-
149
- ```yaml
150
- description: Static trace evaluation
151
- execution:
152
- target: static_trace
153
-
154
- evalcases:
155
- - id: validate-trace-file
156
- expected_outcome: Trace contains required steps
157
- input_messages:
158
- - role: user
159
- content: Analyze trace
160
- execution:
161
- evaluators:
162
- - name: trace-check
163
- type: tool_trajectory
164
- mode: in_order
165
- expected:
166
- - tool: webSearch
167
- - tool: readFile
168
- ```
169
-
170
- ## Multi-Turn Conversation (Single Eval Case)
171
-
172
- ```yaml
173
- description: Multi-turn debugging session with clarifying questions
174
- execution:
175
- target: default
176
-
177
- evalcases:
178
- - id: debug-with-clarification
179
- expected_outcome: |-
180
- Assistant conducts a multi-turn debugging session, asking clarification
181
- questions when needed, correctly diagnosing the bug, and proposing a clear
182
- fix with rationale.
183
-
184
- input_messages:
185
- - role: system
186
- content: You are an expert debugging assistant who reasons step by step, asks clarifying questions, and explains fixes clearly.
187
- - role: user
188
- content: |-
189
- I'm getting an off-by-one error in this function, but I can't see why:
190
-
191
- ```python
192
- def get_items(items):
193
- result = []
194
- for i in range(len(items) - 1):
195
- result.append(items[i])
196
- return result
197
- ```
198
-
199
- Sometimes the last element is missing. Can you help debug this?
200
- - role: assistant
201
- content: |-
202
- I can help debug this. Before I propose a fix, could you tell me:
203
- - What output you expect for an example input list
204
- - What output you actually get
205
- - role: user
206
- content: |-
207
- For `[1, 2, 3, 4]` I expect `[1, 2, 3, 4]`, but I get `[1, 2, 3]`.
208
-
209
- expected_messages:
210
- - role: assistant
211
- content: |-
212
- You have an off-by-one error in your loop bounds.
213
- You're iterating with `range(len(items) - 1)`, which stops before the last index.
214
- To include all items, you can either:
215
- - Use `range(len(items))`, or
216
- - Iterate directly over the list: `for item in items:`
217
-
218
- Here's a corrected version:
219
-
220
- ```python
221
- def get_items(items):
222
- result = []
223
- for item in items:
224
- result.append(item)
225
- return result
226
- ```
227
- ```
228
-
229
- ## Batch CLI Evaluation
230
-
231
- Evaluate external batch runners that process all evalcases in one invocation.
232
-
233
- ```yaml
234
- description: Batch CLI demo (AML screening)
235
- execution:
236
- target: batch_cli
237
-
238
- evalcases:
239
- - id: aml-001
240
- expected_outcome: |-
241
- Batch runner returns JSON with decision=CLEAR.
242
-
243
- expected_messages:
244
- - role: assistant
245
- content:
246
- decision: CLEAR
247
-
248
- input_messages:
249
- - role: system
250
- content: You are a deterministic AML screening batch checker.
251
- - role: user
252
- content:
253
- request:
254
- type: aml_screening_check
255
- jurisdiction: AU
256
- effective_date: 2025-01-01
257
- row:
258
- id: aml-001
259
- customer_name: Example Customer A
260
- origin_country: NZ
261
- destination_country: AU
262
- transaction_type: INTERNATIONAL_TRANSFER
263
- amount: 5000
264
- currency: USD
265
-
266
- execution:
267
- evaluators:
268
- - name: decision-check
269
- type: code_judge
270
- script: bun run ./scripts/check-batch-cli-output.ts
271
- cwd: .
272
-
273
- - id: aml-002
274
- expected_outcome: |-
275
- Batch runner returns JSON with decision=REVIEW.
276
-
277
- expected_messages:
278
- - role: assistant
279
- content:
280
- decision: REVIEW
281
-
282
- input_messages:
283
- - role: system
284
- content: You are a deterministic AML screening batch checker.
285
- - role: user
286
- content:
287
- request:
288
- type: aml_screening_check
289
- jurisdiction: AU
290
- effective_date: 2025-01-01
291
- row:
292
- id: aml-002
293
- customer_name: Example Customer B
294
- origin_country: IR
295
- destination_country: AU
296
- transaction_type: INTERNATIONAL_TRANSFER
297
- amount: 2000
298
- currency: USD
299
-
300
- execution:
301
- evaluators:
302
- - name: decision-check
303
- type: code_judge
304
- script: bun run ./scripts/check-batch-cli-output.ts
305
- cwd: .
306
- ```
307
-
308
- ### Batch CLI Pattern Notes
309
- - **execution.target: batch_cli** - Configure CLI provider with `provider_batching: true`
310
- - **Batch runner** - Reads eval YAML via `--eval` flag, outputs JSONL keyed by `id`
311
- - **Structured input** - Put data in `user.content` as objects for runner to extract
312
- - **Structured expected** - Use `expected_messages.content` with object fields
313
- - **Per-case evaluators** - Each evalcase has its own evaluator to validate output
314
-
315
- ## Notes on Examples
316
-
317
- ### File Path Conventions
318
- - **Absolute paths** (start with `/`): Resolved from repository root
319
- - Example: `/prompts/guidelines.md` → `<repo_root>/prompts/guidelines.md`
320
- - **Relative paths** (start with `./` or `../`): Resolved from eval file directory
321
- - Example: `../../prompts/file.md` → Two directories up, then into prompts/
322
-
323
- ### expected_outcome Writing Tips
324
- - Be specific about what success looks like
325
- - Mention key elements that must be present
326
- - For classification tasks, specify the expected category
327
- - For reasoning tasks, describe the thought process expected
328
-
329
- ### Expected Messages
330
- - Show the pattern, not rigid templates
331
- - Allow for natural language variation
332
- - Focus on semantic correctness over exact matching
333
- - Evaluators will handle the actual validation
@@ -1,79 +0,0 @@
1
- # Rubric Evaluator
2
-
3
- ## Field Reference
4
-
5
- | Field | Type | Default | Description |
6
- |-------|------|---------|-------------|
7
- | `id` | string | auto-generated | Unique identifier |
8
- | `expected_outcome` | string | required* | Criterion being evaluated (*optional if `score_ranges` used) |
9
- | `weight` | number | 1.0 | Relative importance |
10
- | `required` | boolean | true | Failing forces verdict to 'fail' (checklist mode) |
11
- | `required_min_score` | integer | - | Minimum 0-10 score to pass (score-range mode) |
12
- | `score_ranges` | map or array | - | Score range definitions for analytic scoring |
13
-
14
- `description` is a backward-compatible alias for `expected_outcome`.
15
-
16
- ## Checklist Mode
17
-
18
- ```yaml
19
- rubrics:
20
- - Mentions divide-and-conquer approach
21
- - id: complexity
22
- expected_outcome: States time complexity correctly
23
- weight: 2.0
24
- required: true
25
- - id: examples
26
- expected_outcome: Includes code examples
27
- weight: 1.0
28
- required: false
29
- ```
30
-
31
- ## Score-Range Mode
32
-
33
- Shorthand map format (recommended):
34
-
35
- ```yaml
36
- rubrics:
37
- - id: correctness
38
- weight: 2.0
39
- required_min_score: 7
40
- score_ranges:
41
- 0: Critical bugs
42
- 3: Minor bugs
43
- 6: Correct with minor issues
44
- 9: Fully correct
45
- ```
46
-
47
- Map keys are lower bounds (0-10). Each range extends from its key to (next key - 1), with the last extending to 10. Must start at 0.
48
-
49
- Array format is also accepted:
50
-
51
- ```yaml
52
- score_ranges:
53
- - score_range: [0, 2]
54
- expected_outcome: Critical bugs
55
- - score_range: [3, 5]
56
- expected_outcome: Minor bugs
57
- - score_range: [6, 8]
58
- expected_outcome: Correct with minor issues
59
- - score_range: [9, 10]
60
- expected_outcome: Fully correct
61
- ```
62
-
63
- Ranges must be integers 0-10, non-overlapping, covering all values 0-10.
64
-
65
- ## Scoring
66
-
67
- **Checklist:** `score = sum(satisfied weights) / sum(all weights)`
68
-
69
- **Score-range:** `score = weighted_average(raw_score / 10)` per criterion
70
-
71
- ## Verdicts
72
-
73
- | Verdict | Condition |
74
- |---------|-----------|
75
- | `pass` | score >= 0.8 AND all gating criteria satisfied |
76
- | `borderline` | score >= 0.6 AND all gating criteria satisfied |
77
- | `fail` | score < 0.6 OR any gating criterion failed |
78
-
79
- Gating: checklist uses `required: true`, score-range uses `required_min_score: N`.