agentv 1.2.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runCli
4
- } from "./chunk-IVIT4U6S.js";
4
+ } from "./chunk-3RYQPI4H.js";
5
5
  import "./chunk-UE4GLFVL.js";
6
6
 
7
7
  // src/cli.ts
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  app,
3
3
  runCli
4
- } from "./chunk-IVIT4U6S.js";
4
+ } from "./chunk-3RYQPI4H.js";
5
5
  import "./chunk-UE4GLFVL.js";
6
6
  export {
7
7
  app,
@@ -1,23 +1,23 @@
1
- # Copy this file to .env and fill in your credentials
2
-
3
- # Azure OpenAI Configuration
4
- AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
5
- AZURE_OPENAI_API_KEY=your-openai-api-key-here
6
- AZURE_DEPLOYMENT_NAME=gpt-5-chat
7
- AZURE_OPENAI_API_VERSION=2024-12-01-preview
8
-
9
- # Google Gemini
10
- GOOGLE_GENERATIVE_AI_API_KEY=your-gemini-api-key-here
11
- GEMINI_MODEL_NAME=gemini-2.5-flash
12
-
13
- # Anthropic
14
- ANTHROPIC_API_KEY=your-anthropic-api-key-here
15
-
16
- # VS Code Workspace Paths for Execution Targets
17
- # Note: Using forward slashes is recommended for paths in .env files
18
- # to avoid issues with escape characters.
19
- PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
20
-
21
- # CLI provider sample (used by the local_cli target)
22
- CLI_EVALS_DIR=./docs/examples/simple/evals/local-cli
23
- LOCAL_AGENT_TOKEN=dummytoken
1
+ # Copy this file to .env and fill in your credentials
2
+
3
+ # Azure OpenAI Configuration
4
+ AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
5
+ AZURE_OPENAI_API_KEY=your-openai-api-key-here
6
+ AZURE_DEPLOYMENT_NAME=gpt-5-chat
7
+ AZURE_OPENAI_API_VERSION=2024-12-01-preview
8
+
9
+ # Google Gemini
10
+ GOOGLE_GENERATIVE_AI_API_KEY=your-gemini-api-key-here
11
+ GEMINI_MODEL_NAME=gemini-2.5-flash
12
+
13
+ # Anthropic
14
+ ANTHROPIC_API_KEY=your-anthropic-api-key-here
15
+
16
+ # VS Code Workspace Paths for Execution Targets
17
+ # Note: Using forward slashes is recommended for paths in .env files
18
+ # to avoid issues with escape characters.
19
+ PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
20
+
21
+ # CLI provider sample (used by the local_cli target)
22
+ CLI_EVALS_DIR=./docs/examples/simple/evals/local-cli
23
+ LOCAL_AGENT_TOKEN=dummytoken
@@ -1,15 +1,15 @@
1
- $schema: agentv-config-v2
2
-
3
- # Customize which files are treated as guidelines vs regular file content
4
-
5
- # Custom guideline patterns:
6
- guideline_patterns:
7
- - "**/*.instructions.md"
8
- - "**/*.prompt.md"
9
- - "**/SKILL.md"
10
-
11
- # Notes:
12
- # - Patterns use standard glob syntax (via micromatch library)
13
- # - Paths are normalized to forward slashes for cross-platform compatibility
14
- # - Only files matching these patterns are loaded as guidelines
15
- # - All other files referenced in eval cases are treated as regular file content
1
+ $schema: agentv-config-v2
2
+
3
+ # Customize which files are treated as guidelines vs regular file content
4
+
5
+ # Custom guideline patterns:
6
+ guideline_patterns:
7
+ - "**/*.instructions.md"
8
+ - "**/*.prompt.md"
9
+ - "**/SKILL.md"
10
+
11
+ # Notes:
12
+ # - Patterns use standard glob syntax (via micromatch library)
13
+ # - Paths are normalized to forward slashes for cross-platform compatibility
14
+ # - Only files matching these patterns are loaded as guidelines
15
+ # - All other files referenced in eval cases are treated as regular file content
@@ -1,73 +1,71 @@
1
- $schema: agentv-targets-v2.2
2
-
3
- # A list of all supported evaluation targets for the project.
4
- # Each target defines a provider and its specific configuration.
5
- # Actual values for paths/keys are stored in the local .env file.
6
-
7
- targets:
8
- - name: default
9
- provider: azure
10
- endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
11
- api_key: ${{ AZURE_OPENAI_API_KEY }}
12
- model: ${{ AZURE_DEPLOYMENT_NAME }}
13
- # version: ${{ AZURE_OPENAI_API_VERSION }} # Optional: uncomment to override default (2024-12-01-preview)
14
-
15
- - name: vscode
16
- provider: vscode
17
- judge_target: azure_base
18
-
19
- - name: codex
20
- provider: codex
21
- judge_target: azure_base
22
- # Uses the Codex CLI (defaults to `codex` on PATH)
23
- # executable: ${{ CODEX_CLI_PATH }} # Optional: override executable path
24
- # args: # Optional additional CLI arguments
25
- # - --profile
26
- # - ${{ CODEX_PROFILE }}
27
- # - --model
28
- # - ${{ CODEX_MODEL }}
29
- # - --ask-for-approval
30
- # - ${{ CODEX_APPROVAL_PRESET }}
31
- timeout_seconds: 180
32
- cwd: ${{ CODEX_WORKSPACE_DIR }} # Where scratch workspaces are created
33
- log_dir: ${{ CODEX_LOG_DIR }} # Optional: where Codex CLI stream logs are stored (defaults to ./.agentv/logs/codex)
34
- log_format: json # Optional: 'summary' (default) or 'json' for raw event logs
35
-
36
- - name: vscode_projectx
37
- provider: vscode
38
- workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
39
- provider_batching: false
40
- judge_target: azure_base
41
-
42
- - name: vscode_insiders_projectx
43
- provider: vscode-insiders
44
- workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
45
- provider_batching: false
46
- judge_target: azure_base
47
-
48
- - name: azure_base
49
- provider: azure
50
- endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
51
- api_key: ${{ AZURE_OPENAI_API_KEY }}
52
- model: ${{ AZURE_DEPLOYMENT_NAME }}
53
- version: ${{ AZURE_OPENAI_API_VERSION }}
54
-
55
- - name: gemini_base
56
- provider: gemini
57
- api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
58
- model: ${{ GEMINI_MODEL_NAME }}
59
-
60
- - name: local_cli
61
- provider: cli
62
- judge_target: azure_base
63
- # Passes the fully rendered prompt and any attached files to a local Python script
64
- # NOTE: Do not add quotes around {PROMPT} or {FILES} - they are already shell-escaped
65
- command_template: uv run ./mock_cli.py --prompt {PROMPT} {FILES} --output {OUTPUT_FILE}
66
- # Format for each file in {FILES}. {path} and {basename} are automatically shell-escaped, so no quotes needed
67
- files_format: --file {path}
68
- # Optional working directory resolved from .env
69
- cwd: ${{ CLI_EVALS_DIR }}
70
- timeout_seconds: 30
71
- healthcheck:
72
- type: command
73
- command_template: uv run ./mock_cli.py --healthcheck
1
+ # A list of all supported evaluation targets for the project.
2
+ # Each target defines a provider and its specific configuration.
3
+ # Actual values for paths/keys are stored in the local .env file.
4
+
5
+ targets:
6
+ - name: default
7
+ provider: azure
8
+ endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
9
+ api_key: ${{ AZURE_OPENAI_API_KEY }}
10
+ model: ${{ AZURE_DEPLOYMENT_NAME }}
11
+ # version: ${{ AZURE_OPENAI_API_VERSION }} # Optional: uncomment to override default (2024-12-01-preview)
12
+
13
+ - name: vscode
14
+ provider: vscode
15
+ judge_target: azure_base
16
+
17
+ - name: codex
18
+ provider: codex
19
+ judge_target: azure_base
20
+ # Uses the Codex CLI (defaults to `codex` on PATH)
21
+ # executable: ${{ CODEX_CLI_PATH }} # Optional: override executable path
22
+ # args: # Optional additional CLI arguments
23
+ # - --profile
24
+ # - ${{ CODEX_PROFILE }}
25
+ # - --model
26
+ # - ${{ CODEX_MODEL }}
27
+ # - --ask-for-approval
28
+ # - ${{ CODEX_APPROVAL_PRESET }}
29
+ timeout_seconds: 180
30
+ cwd: ${{ CODEX_WORKSPACE_DIR }} # Where scratch workspaces are created
31
+ log_dir: ${{ CODEX_LOG_DIR }} # Optional: where Codex CLI stream logs are stored (defaults to ./.agentv/logs/codex)
32
+ log_format: json # Optional: 'summary' (default) or 'json' for raw event logs
33
+
34
+ - name: vscode_projectx
35
+ provider: vscode
36
+ workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
37
+ provider_batching: false
38
+ judge_target: azure_base
39
+
40
+ - name: vscode_insiders_projectx
41
+ provider: vscode-insiders
42
+ workspace_template: ${{ PROJECTX_WORKSPACE_PATH }}
43
+ provider_batching: false
44
+ judge_target: azure_base
45
+
46
+ - name: azure_base
47
+ provider: azure
48
+ endpoint: ${{ AZURE_OPENAI_ENDPOINT }}
49
+ api_key: ${{ AZURE_OPENAI_API_KEY }}
50
+ model: ${{ AZURE_DEPLOYMENT_NAME }}
51
+ version: ${{ AZURE_OPENAI_API_VERSION }}
52
+
53
+ - name: gemini_base
54
+ provider: gemini
55
+ api_key: ${{ GOOGLE_GENERATIVE_AI_API_KEY }}
56
+ model: ${{ GEMINI_MODEL_NAME }}
57
+
58
+ - name: local_cli
59
+ provider: cli
60
+ judge_target: azure_base
61
+ # Passes the fully rendered prompt and any attached files to a local Python script
62
+ # NOTE: Do not add quotes around {PROMPT} or {FILES} - they are already shell-escaped
63
+ command_template: uv run ./mock_cli.py --prompt {PROMPT} {FILES} --output {OUTPUT_FILE}
64
+ # Format for each file in {FILES}. {path} and {basename} are automatically shell-escaped, so no quotes needed
65
+ files_format: --file {path}
66
+ # Optional working directory resolved from .env
67
+ cwd: ${{ CLI_EVALS_DIR }}
68
+ timeout_seconds: 30
69
+ healthcheck:
70
+ type: command
71
+ command_template: uv run ./mock_cli.py --healthcheck
@@ -1,174 +1,212 @@
1
- ---
2
- name: agentv-eval-builder
3
- description: Create and maintain AgentV YAML evaluation files for testing AI agent performance. Use this skill when creating new eval files, adding eval cases, or configuring custom evaluators (code validators or LLM judges) for agent testing workflows.
4
- ---
5
-
6
- # AgentV Eval Builder
7
-
8
- ## Schema Reference
9
- - Schema: `references/eval-schema.json` (JSON Schema for validation and tooling)
10
- - Format: YAML with structured content arrays
11
- - Examples: `references/example-evals.md`
12
-
13
- ## Feature Reference
14
- - Rubrics: `references/rubric-evaluator.md` - Structured criteria-based evaluation
15
- - Composite Evaluators: `references/composite-evaluator.md` - Combine multiple evaluators
16
- - Tool Trajectory: `references/tool-trajectory-evaluator.md` - Validate agent tool usage
17
- - Custom Evaluators: `references/custom-evaluators.md` - Code and LLM judge templates
18
-
19
- ## Structure Requirements
20
- - Root level: `description` (optional), `target` (optional), `execution` (optional), `evalcases` (required)
21
- - Eval case fields: `id` (required), `expected_outcome` (required), `input_messages` (required)
22
- - Optional fields: `expected_messages`, `conversation_id`, `rubrics`, `execution`
23
- - `expected_messages` is optional - omit for outcome-only evaluation where the LLM judge evaluates based on `expected_outcome` criteria alone
24
- - Message fields: `role` (required), `content` (required)
25
- - Message roles: `system`, `user`, `assistant`, `tool`
26
- - Content types: `text` (inline), `file` (relative or absolute path)
27
- - Attachments (type: `file`) should default to the `user` role
28
- - File paths: Relative (from eval file dir) or absolute with "/" prefix (from repo root)
29
-
30
- ## Custom Evaluators
31
-
32
- Configure multiple evaluators per eval case via `execution.evaluators` array.
33
-
34
- ### Code Evaluators
35
- Scripts that validate output programmatically:
36
-
37
- ```yaml
38
- execution:
39
- evaluators:
40
- - name: json_format_validator
41
- type: code_judge
42
- script: uv run validate_output.py
43
- cwd: ../../evaluators/scripts
44
- ```
45
-
46
- **Contract:**
47
- - Input (stdin): JSON with `question`, `expected_outcome`, `reference_answer`, `candidate_answer`, `guideline_files` (file paths), `input_files` (file paths, excludes guidelines), `input_messages`
48
- - Output (stdout): JSON with `score` (0.0-1.0), `hits`, `misses`, `reasoning`
49
-
50
- **Template:** See `references/custom-evaluators.md` for Python code evaluator template
51
-
52
- ### LLM Judges
53
- Language models evaluate response quality:
54
-
55
- ```yaml
56
- execution:
57
- evaluators:
58
- - name: content_evaluator
59
- type: llm_judge
60
- prompt: /evaluators/prompts/correctness.md
61
- model: gpt-5-chat
62
- ```
63
-
64
- ### Tool Trajectory Evaluators
65
- Validate agent tool usage patterns (requires trace data from provider):
66
-
67
- ```yaml
68
- execution:
69
- evaluators:
70
- - name: research_check
71
- type: tool_trajectory
72
- mode: any_order # Options: any_order, in_order, exact
73
- minimums: # For any_order mode
74
- knowledgeSearch: 2
75
- expected: # For in_order/exact modes
76
- - tool: knowledgeSearch
77
- - tool: documentRetrieve
78
- ```
79
-
80
- See `references/tool-trajectory-evaluator.md` for modes and configuration.
81
-
82
- ### Multiple Evaluators
83
- Define multiple evaluators to run sequentially. The final score is a weighted average of all results.
84
-
85
- ```yaml
86
- execution:
87
- evaluators:
88
- - name: format_check # Runs first
89
- type: code_judge
90
- script: uv run validate_json.py
91
- - name: content_check # Runs second
92
- type: llm_judge
93
- ```
94
-
95
- ### Rubric Evaluator
96
- Inline rubrics for structured criteria-based evaluation:
97
-
98
- ```yaml
99
- evalcases:
100
- - id: explanation-task
101
- expected_outcome: Clear explanation of quicksort
102
- input_messages:
103
- - role: user
104
- content: Explain quicksort
105
- rubrics:
106
- - Mentions divide-and-conquer approach
107
- - Explains the partition step
108
- - id: complexity
109
- description: States time complexity correctly
110
- weight: 2.0
111
- required: true
112
- ```
113
-
114
- See `references/rubric-evaluator.md` for detailed rubric configuration.
115
-
116
- ### Composite Evaluator
117
- Combine multiple evaluators with aggregation:
118
-
119
- ```yaml
120
- execution:
121
- evaluators:
122
- - name: release_gate
123
- type: composite
124
- evaluators:
125
- - name: safety
126
- type: llm_judge
127
- prompt: ./prompts/safety.md
128
- - name: quality
129
- type: llm_judge
130
- prompt: ./prompts/quality.md
131
- aggregator:
132
- type: weighted_average
133
- weights:
134
- safety: 0.3
135
- quality: 0.7
136
- ```
137
-
138
- See `references/composite-evaluator.md` for aggregation types and patterns.
139
-
140
- ## Example
141
- ```yaml
142
- $schema: agentv-eval-v2
143
- description: Example showing basic features and conversation threading
144
- execution:
145
- target: default
146
-
147
- evalcases:
148
- - id: code-review-basic
149
- expected_outcome: Assistant provides helpful code analysis
150
-
151
- input_messages:
152
- - role: system
153
- content: You are an expert code reviewer.
154
- - role: user
155
- content:
156
- - type: text
157
- value: |-
158
- Review this function:
159
-
160
- ```python
161
- def add(a, b):
162
- return a + b
163
- ```
164
- - type: file
165
- value: /prompts/python.instructions.md
166
-
167
- expected_messages:
168
- - role: assistant
169
- content: |-
170
- The function is simple and correct. Suggestions:
171
- - Add type hints: `def add(a: int, b: int) -> int:`
172
- - Add docstring
173
- - Consider validation for edge cases
174
- ```
1
+ ---
2
+ name: agentv-eval-builder
3
+ description: Create and maintain AgentV YAML evaluation files for testing AI agent performance. Use this skill when creating new eval files, adding eval cases, or configuring custom evaluators (code validators or LLM judges) for agent testing workflows.
4
+ ---
5
+
6
+ # AgentV Eval Builder
7
+
8
+ ## Schema Reference
9
+ - Schema: `references/eval-schema.json` (JSON Schema for validation and tooling)
10
+ - Format: YAML with structured content arrays
11
+ - Examples: `references/example-evals.md`
12
+
13
+ ## Feature Reference
14
+ - Rubrics: `references/rubric-evaluator.md` - Structured criteria-based evaluation
15
+ - Composite Evaluators: `references/composite-evaluator.md` - Combine multiple evaluators
16
+ - Tool Trajectory: `references/tool-trajectory-evaluator.md` - Validate agent tool usage
17
+ - Custom Evaluators: `references/custom-evaluators.md` - Code and LLM judge templates
18
+ - Batch CLI: `references/batch-cli-evaluator.md` - Evaluate batch runner output (JSONL)
19
+
20
+ ## Structure Requirements
21
+ - Root level: `description` (optional), `execution` (optional with `target` inside), `evalcases` (required)
22
+ - Eval case fields: `id` (required), `expected_outcome` (required), `input_messages` (required)
23
+ - Optional fields: `expected_messages`, `conversation_id`, `rubrics`, `execution`
24
+ - `expected_messages` is optional - omit for outcome-only evaluation where the LLM judge evaluates based on `expected_outcome` criteria alone
25
+ - Message fields: `role` (required), `content` (required)
26
+ - Message roles: `system`, `user`, `assistant`, `tool`
27
+ - Content types: `text` (inline), `file` (relative or absolute path)
28
+ - Attachments (type: `file`) should default to the `user` role
29
+ - File paths: Relative (from eval file dir) or absolute with "/" prefix (from repo root)
30
+
31
+ ## Custom Evaluators
32
+
33
+ Configure multiple evaluators per eval case via `execution.evaluators` array.
34
+
35
+ ### Code Evaluators
36
+ Scripts that validate output programmatically:
37
+
38
+ ```yaml
39
+ execution:
40
+ evaluators:
41
+ - name: json_format_validator
42
+ type: code_judge
43
+ script: uv run validate_output.py
44
+ cwd: ../../evaluators/scripts
45
+ ```
46
+
47
+ **Contract:**
48
+ - Input (stdin): JSON with `question`, `expected_outcome`, `reference_answer`, `candidate_answer`, `guideline_files` (file paths), `input_files` (file paths, excludes guidelines), `input_messages`
49
+ - Output (stdout): JSON with `score` (0.0-1.0), `hits`, `misses`, `reasoning`
50
+
51
+ **Template:** See `references/custom-evaluators.md` for Python code evaluator template
52
+
53
+ ### LLM Judges
54
+ Language models evaluate response quality:
55
+
56
+ ```yaml
57
+ execution:
58
+ evaluators:
59
+ - name: content_evaluator
60
+ type: llm_judge
61
+ prompt: /evaluators/prompts/correctness.md
62
+ model: gpt-5-chat
63
+ ```
64
+
65
+ ### Tool Trajectory Evaluators
66
+ Validate agent tool usage patterns (requires `output_messages` with `tool_calls` from provider):
67
+
68
+ ```yaml
69
+ execution:
70
+ evaluators:
71
+ - name: research_check
72
+ type: tool_trajectory
73
+ mode: any_order # Options: any_order, in_order, exact
74
+ minimums: # For any_order mode
75
+ knowledgeSearch: 2
76
+ expected: # For in_order/exact modes
77
+ - tool: knowledgeSearch
78
+ - tool: documentRetrieve
79
+ ```
80
+
81
+ See `references/tool-trajectory-evaluator.md` for modes and configuration.
82
+
83
+ ### Multiple Evaluators
84
+ Define multiple evaluators to run sequentially. The final score is a weighted average of all results.
85
+
86
+ ```yaml
87
+ execution:
88
+ evaluators:
89
+ - name: format_check # Runs first
90
+ type: code_judge
91
+ script: uv run validate_json.py
92
+ - name: content_check # Runs second
93
+ type: llm_judge
94
+ ```
95
+
96
+ ### Rubric Evaluator
97
+ Inline rubrics for structured criteria-based evaluation:
98
+
99
+ ```yaml
100
+ evalcases:
101
+ - id: explanation-task
102
+ expected_outcome: Clear explanation of quicksort
103
+ input_messages:
104
+ - role: user
105
+ content: Explain quicksort
106
+ rubrics:
107
+ - Mentions divide-and-conquer approach
108
+ - Explains the partition step
109
+ - id: complexity
110
+ description: States time complexity correctly
111
+ weight: 2.0
112
+ required: true
113
+ ```
114
+
115
+ See `references/rubric-evaluator.md` for detailed rubric configuration.
116
+
117
+ ### Composite Evaluator
118
+ Combine multiple evaluators with aggregation:
119
+
120
+ ```yaml
121
+ execution:
122
+ evaluators:
123
+ - name: release_gate
124
+ type: composite
125
+ evaluators:
126
+ - name: safety
127
+ type: llm_judge
128
+ prompt: ./prompts/safety.md
129
+ - name: quality
130
+ type: llm_judge
131
+ prompt: ./prompts/quality.md
132
+ aggregator:
133
+ type: weighted_average
134
+ weights:
135
+ safety: 0.3
136
+ quality: 0.7
137
+ ```
138
+
139
+ See `references/composite-evaluator.md` for aggregation types and patterns.
140
+
141
+ ### Batch CLI Evaluation
142
+ Evaluate external batch runners that process all evalcases in one invocation:
143
+
144
+ ```yaml
145
+ $schema: agentv-eval-v2
146
+ description: Batch CLI evaluation
147
+ execution:
148
+ target: batch_cli
149
+
150
+ evalcases:
151
+ - id: case-001
152
+ expected_outcome: Returns decision=CLEAR
153
+ expected_messages:
154
+ - role: assistant
155
+ content:
156
+ decision: CLEAR
157
+ input_messages:
158
+ - role: user
159
+ content:
160
+ row:
161
+ id: case-001
162
+ amount: 5000
163
+ execution:
164
+ evaluators:
165
+ - name: decision-check
166
+ type: code_judge
167
+ script: bun run ./scripts/check-output.ts
168
+ cwd: .
169
+ ```
170
+
171
+ **Key pattern:**
172
+ - Batch runner reads eval YAML via `--eval` flag, outputs JSONL keyed by `id`
173
+ - Each evalcase has its own evaluator to validate its corresponding output
174
+ - Use structured `expected_messages.content` for expected output fields
175
+
176
+ See `references/batch-cli-evaluator.md` for full implementation guide.
177
+
178
+ ## Example
179
+ ```yaml
180
+ $schema: agentv-eval-v2
181
+ description: Example showing basic features and conversation threading
182
+ execution:
183
+ target: default
184
+
185
+ evalcases:
186
+ - id: code-review-basic
187
+ expected_outcome: Assistant provides helpful code analysis
188
+
189
+ input_messages:
190
+ - role: system
191
+ content: You are an expert code reviewer.
192
+ - role: user
193
+ content:
194
+ - type: text
195
+ value: |-
196
+ Review this function:
197
+
198
+ ```python
199
+ def add(a, b):
200
+ return a + b
201
+ ```
202
+ - type: file
203
+ value: /prompts/python.instructions.md
204
+
205
+ expected_messages:
206
+ - role: assistant
207
+ content: |-
208
+ The function is simple and correct. Suggestions:
209
+ - Add type hints: `def add(a: int, b: int) -> int:`
210
+ - Add docstring
211
+ - Consider validation for edge cases
212
+ ```