agentv 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,217 +1,217 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "title": "AgentV Eval Schema",
4
- "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
5
- "type": "object",
6
- "properties": {
7
- "$schema": {
8
- "type": "string",
9
- "description": "Schema identifier",
10
- "enum": ["agentv-eval-v2"]
11
- },
12
- "description": {
13
- "type": "string",
14
- "description": "Description of what this eval suite covers"
15
- },
16
- "target": {
17
- "type": "string",
18
- "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
19
- },
20
- "execution": {
21
- "type": "object",
22
- "description": "Default execution configuration for all eval cases (can be overridden per case)",
23
- "properties": {
24
- "target": {
25
- "type": "string",
26
- "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
27
- },
28
- "evaluators": {
29
- "type": "array",
30
- "description": "Default evaluators for all eval cases (code-based and LLM judges)",
31
- "items": {
32
- "type": "object",
33
- "properties": {
34
- "name": {
35
- "type": "string",
36
- "description": "Evaluator name/identifier"
37
- },
38
- "type": {
39
- "type": "string",
40
- "enum": ["code", "llm_judge"],
41
- "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
42
- },
43
- "script": {
44
- "type": "string",
45
- "description": "Path to evaluator script (for type: code)"
46
- },
47
- "prompt": {
48
- "type": "string",
49
- "description": "Path to judge prompt file (for type: llm_judge)"
50
- }
51
- },
52
- "required": ["name", "type"],
53
- "additionalProperties": true
54
- }
55
- }
56
- },
57
- "additionalProperties": true
58
- },
59
- "evalcases": {
60
- "type": "array",
61
- "description": "Array of evaluation cases",
62
- "minItems": 1,
63
- "items": {
64
- "type": "object",
65
- "properties": {
66
- "id": {
67
- "type": "string",
68
- "description": "Unique identifier for the eval case"
69
- },
70
- "conversation_id": {
71
- "type": "string",
72
- "description": "Optional conversation identifier for threading multiple eval cases together"
73
- },
74
- "outcome": {
75
- "type": "string",
76
- "description": "Description of what the AI should accomplish in this eval"
77
- },
78
- "note": {
79
- "type": "string",
80
- "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
81
- },
82
- "input_messages": {
83
- "type": "array",
84
- "description": "Input messages for the conversation",
85
- "minItems": 1,
86
- "items": {
87
- "type": "object",
88
- "properties": {
89
- "role": {
90
- "type": "string",
91
- "enum": ["system", "user", "assistant", "tool"],
92
- "description": "Message role"
93
- },
94
- "content": {
95
- "oneOf": [
96
- {
97
- "type": "string",
98
- "description": "Simple text content"
99
- },
100
- {
101
- "type": "array",
102
- "description": "Mixed content items (text and file references)",
103
- "items": {
104
- "type": "object",
105
- "properties": {
106
- "type": {
107
- "type": "string",
108
- "enum": ["text", "file"],
109
- "description": "Content type: 'text' for inline content, 'file' for file references"
110
- },
111
- "value": {
112
- "type": "string",
113
- "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
114
- }
115
- },
116
- "required": ["type", "value"],
117
- "additionalProperties": false
118
- }
119
- }
120
- ]
121
- }
122
- },
123
- "required": ["role", "content"],
124
- "additionalProperties": false
125
- }
126
- },
127
- "expected_messages": {
128
- "type": "array",
129
- "description": "Expected response messages",
130
- "minItems": 1,
131
- "items": {
132
- "type": "object",
133
- "properties": {
134
- "role": {
135
- "type": "string",
136
- "enum": ["system", "user", "assistant", "tool"],
137
- "description": "Message role"
138
- },
139
- "content": {
140
- "oneOf": [
141
- {
142
- "type": "string",
143
- "description": "Simple text content"
144
- },
145
- {
146
- "type": "array",
147
- "description": "Mixed content items",
148
- "items": {
149
- "type": "object",
150
- "properties": {
151
- "type": {
152
- "type": "string",
153
- "enum": ["text", "file"]
154
- },
155
- "value": {
156
- "type": "string"
157
- }
158
- },
159
- "required": ["type", "value"],
160
- "additionalProperties": false
161
- }
162
- }
163
- ]
164
- }
165
- },
166
- "required": ["role", "content"],
167
- "additionalProperties": false
168
- }
169
- },
170
- "execution": {
171
- "type": "object",
172
- "description": "Per-case execution configuration",
173
- "properties": {
174
- "target": {
175
- "type": "string",
176
- "description": "Override target for this specific eval case"
177
- },
178
- "evaluators": {
179
- "type": "array",
180
- "description": "Multiple evaluators (code-based and LLM judges)",
181
- "items": {
182
- "type": "object",
183
- "properties": {
184
- "name": {
185
- "type": "string",
186
- "description": "Evaluator name/identifier"
187
- },
188
- "type": {
189
- "type": "string",
190
- "enum": ["code", "llm_judge"],
191
- "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
192
- },
193
- "script": {
194
- "type": "string",
195
- "description": "Path to evaluator script (for type: code)"
196
- },
197
- "prompt": {
198
- "type": "string",
199
- "description": "Path to judge prompt file (for type: llm_judge)"
200
- }
201
- },
202
- "required": ["name", "type"],
203
- "additionalProperties": true
204
- }
205
- }
206
- },
207
- "additionalProperties": true
208
- }
209
- },
210
- "required": ["id", "outcome", "input_messages", "expected_messages"],
211
- "additionalProperties": false
212
- }
213
- }
214
- },
215
- "required": ["evalcases"],
216
- "additionalProperties": false
217
- }
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "AgentV Eval Schema",
4
+ "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
5
+ "type": "object",
6
+ "properties": {
7
+ "$schema": {
8
+ "type": "string",
9
+ "description": "Schema identifier",
10
+ "enum": ["agentv-eval-v2"]
11
+ },
12
+ "description": {
13
+ "type": "string",
14
+ "description": "Description of what this eval suite covers"
15
+ },
16
+ "target": {
17
+ "type": "string",
18
+ "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
19
+ },
20
+ "execution": {
21
+ "type": "object",
22
+ "description": "Default execution configuration for all eval cases (can be overridden per case)",
23
+ "properties": {
24
+ "target": {
25
+ "type": "string",
26
+ "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
27
+ },
28
+ "evaluators": {
29
+ "type": "array",
30
+ "description": "Default evaluators for all eval cases (code-based and LLM judges)",
31
+ "items": {
32
+ "type": "object",
33
+ "properties": {
34
+ "name": {
35
+ "type": "string",
36
+ "description": "Evaluator name/identifier"
37
+ },
38
+ "type": {
39
+ "type": "string",
40
+ "enum": ["code", "llm_judge"],
41
+ "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
42
+ },
43
+ "script": {
44
+ "type": "string",
45
+ "description": "Path to evaluator script (for type: code)"
46
+ },
47
+ "prompt": {
48
+ "type": "string",
49
+ "description": "Path to judge prompt file (for type: llm_judge)"
50
+ }
51
+ },
52
+ "required": ["name", "type"],
53
+ "additionalProperties": true
54
+ }
55
+ }
56
+ },
57
+ "additionalProperties": true
58
+ },
59
+ "evalcases": {
60
+ "type": "array",
61
+ "description": "Array of evaluation cases",
62
+ "minItems": 1,
63
+ "items": {
64
+ "type": "object",
65
+ "properties": {
66
+ "id": {
67
+ "type": "string",
68
+ "description": "Unique identifier for the eval case"
69
+ },
70
+ "conversation_id": {
71
+ "type": "string",
72
+ "description": "Optional conversation identifier for threading multiple eval cases together"
73
+ },
74
+ "expected_outcome": {
75
+ "type": "string",
76
+ "description": "Description of what the AI should accomplish in this eval"
77
+ },
78
+ "note": {
79
+ "type": "string",
80
+ "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
81
+ },
82
+ "input_messages": {
83
+ "type": "array",
84
+ "description": "Input messages for the conversation",
85
+ "minItems": 1,
86
+ "items": {
87
+ "type": "object",
88
+ "properties": {
89
+ "role": {
90
+ "type": "string",
91
+ "enum": ["system", "user", "assistant", "tool"],
92
+ "description": "Message role"
93
+ },
94
+ "content": {
95
+ "oneOf": [
96
+ {
97
+ "type": "string",
98
+ "description": "Simple text content"
99
+ },
100
+ {
101
+ "type": "array",
102
+ "description": "Mixed content items (text and file references)",
103
+ "items": {
104
+ "type": "object",
105
+ "properties": {
106
+ "type": {
107
+ "type": "string",
108
+ "enum": ["text", "file"],
109
+ "description": "Content type: 'text' for inline content, 'file' for file references"
110
+ },
111
+ "value": {
112
+ "type": "string",
113
+ "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
114
+ }
115
+ },
116
+ "required": ["type", "value"],
117
+ "additionalProperties": false
118
+ }
119
+ }
120
+ ]
121
+ }
122
+ },
123
+ "required": ["role", "content"],
124
+ "additionalProperties": false
125
+ }
126
+ },
127
+ "expected_messages": {
128
+ "type": "array",
129
+ "description": "Expected response messages",
130
+ "minItems": 1,
131
+ "items": {
132
+ "type": "object",
133
+ "properties": {
134
+ "role": {
135
+ "type": "string",
136
+ "enum": ["system", "user", "assistant", "tool"],
137
+ "description": "Message role"
138
+ },
139
+ "content": {
140
+ "oneOf": [
141
+ {
142
+ "type": "string",
143
+ "description": "Simple text content"
144
+ },
145
+ {
146
+ "type": "array",
147
+ "description": "Mixed content items",
148
+ "items": {
149
+ "type": "object",
150
+ "properties": {
151
+ "type": {
152
+ "type": "string",
153
+ "enum": ["text", "file"]
154
+ },
155
+ "value": {
156
+ "type": "string"
157
+ }
158
+ },
159
+ "required": ["type", "value"],
160
+ "additionalProperties": false
161
+ }
162
+ }
163
+ ]
164
+ }
165
+ },
166
+ "required": ["role", "content"],
167
+ "additionalProperties": false
168
+ }
169
+ },
170
+ "execution": {
171
+ "type": "object",
172
+ "description": "Per-case execution configuration",
173
+ "properties": {
174
+ "target": {
175
+ "type": "string",
176
+ "description": "Override target for this specific eval case"
177
+ },
178
+ "evaluators": {
179
+ "type": "array",
180
+ "description": "Multiple evaluators (code-based and LLM judges)",
181
+ "items": {
182
+ "type": "object",
183
+ "properties": {
184
+ "name": {
185
+ "type": "string",
186
+ "description": "Evaluator name/identifier"
187
+ },
188
+ "type": {
189
+ "type": "string",
190
+ "enum": ["code", "llm_judge"],
191
+ "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
192
+ },
193
+ "script": {
194
+ "type": "string",
195
+ "description": "Path to evaluator script (for type: code)"
196
+ },
197
+ "prompt": {
198
+ "type": "string",
199
+ "description": "Path to judge prompt file (for type: llm_judge)"
200
+ }
201
+ },
202
+ "required": ["name", "type"],
203
+ "additionalProperties": true
204
+ }
205
+ }
206
+ },
207
+ "additionalProperties": true
208
+ }
209
+ },
210
+ "required": ["id", "expected_outcome", "input_messages", "expected_messages"],
211
+ "additionalProperties": false
212
+ }
213
+ }
214
+ },
215
+ "required": ["evalcases"],
216
+ "additionalProperties": false
217
+ }
@@ -11,7 +11,7 @@ target: default
11
11
 
12
12
  evalcases:
13
13
  - id: simple-addition
14
- outcome: Correctly calculates 2+2
14
+ expected_outcome: Correctly calculates 2+2
15
15
 
16
16
  input_messages:
17
17
  - role: user
@@ -31,7 +31,7 @@ target: azure_base
31
31
 
32
32
  evalcases:
33
33
  - id: code-review-basic
34
- outcome: Assistant provides helpful code analysis with security considerations
34
+ expected_outcome: Assistant provides helpful code analysis with security considerations
35
35
 
36
36
  input_messages:
37
37
  - role: system
@@ -73,7 +73,7 @@ target: default
73
73
 
74
74
  evalcases:
75
75
  - id: json-generation-with-validation
76
- outcome: Generates valid JSON with required fields
76
+ expected_outcome: Generates valid JSON with required fields
77
77
 
78
78
  execution:
79
79
  evaluators:
@@ -111,7 +111,7 @@ target: default
111
111
 
112
112
  evalcases:
113
113
  - id: debug-with-clarification
114
- outcome: |-
114
+ expected_outcome: |-
115
115
  Assistant conducts a multi-turn debugging session, asking clarification
116
116
  questions when needed, correctly diagnosing the bug, and proposing a clear
117
117
  fix with rationale.
@@ -169,7 +169,7 @@ evalcases:
169
169
  - **Relative paths** (start with `./` or `../`): Resolved from eval file directory
170
170
  - Example: `../../prompts/file.md` → Two directories up, then into prompts/
171
171
 
172
- ### Outcome Writing Tips
172
+ ### expected_outcome Writing Tips
173
173
  - Be specific about what success looks like
174
174
  - Mention key elements that must be present
175
175
  - For classification tasks, specify the expected category
@@ -0,0 +1,23 @@
1
+ # Example environment configuration for AgentV
2
+ # Copy this file to .env and fill in your credentials
3
+
4
+ # Model Provider Selection (Optional - can be configured via targets.yaml)
5
+ PROVIDER=azure
6
+
7
+ # Azure OpenAI Configuration
8
+ # These are the default environment variable names used in the provided targets.yaml
9
+ AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
10
+ AZURE_OPENAI_API_KEY=your-api-key-here
11
+ AZURE_DEPLOYMENT_NAME=gpt-4o
12
+
13
+ # Anthropic Configuration (if using Anthropic provider)
14
+ ANTHROPIC_API_KEY=your-anthropic-api-key-here
15
+
16
+ # VS Code Workspace Paths for Execution Targets
17
+ # Note: Using forward slashes is recommended for paths in .env files
18
+ # to avoid issues with escape characters.
19
+ PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
20
+
21
+ # CLI provider sample (used by the local_cli target)
22
+ PROJECT_ROOT=D:/GitHub/your-username/agentv/docs/examples/simple
23
+ LOCAL_AGENT_TOKEN=your-cli-token
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentv",
3
- "version": "0.22.0",
3
+ "version": "0.23.0",
4
4
  "description": "CLI entry point for AgentV",
5
5
  "type": "module",
6
6
  "repository": {
@@ -14,7 +14,10 @@
14
14
  "bin": {
15
15
  "agentv": "./dist/cli.js"
16
16
  },
17
- "files": ["dist", "README.md"],
17
+ "files": [
18
+ "dist",
19
+ "README.md"
20
+ ],
18
21
  "scripts": {
19
22
  "dev": "bun --watch src/index.ts",
20
23
  "build": "tsup && bun run copy-readme",