agentv 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runCli
4
- } from "./chunk-ZVSFP6NK.js";
4
+ } from "./chunk-6ZM7WVSC.js";
5
5
  import "./chunk-UE4GLFVL.js";
6
6
 
7
7
  // src/cli.ts
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli().catch((error) => {\n console.error(error);\n process.exit(1);\n});\n"],"mappings":";;;;;;;AAGA,OAAO,EAAE,MAAM,CAAC,UAAU;AACxB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { runCli } from './index.js';\r\n\r\nrunCli().catch((error) => {\r\n console.error(error);\r\n process.exit(1);\r\n});\r\n"],"mappings":";;;;;;;AAGA,OAAO,EAAE,MAAM,CAAC,UAAU;AACxB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  app,
3
3
  runCli
4
- } from "./chunk-ZVSFP6NK.js";
4
+ } from "./chunk-6ZM7WVSC.js";
5
5
  import "./chunk-UE4GLFVL.js";
6
6
  export {
7
7
  app,
@@ -44,7 +44,7 @@ execution:
44
44
  ```
45
45
 
46
46
  **Contract:**
47
- - Input (stdin): JSON with `question`, `expected_outcome`, `reference_answer`, `candidate_answer`, `guideline_paths`, `input_files`, `input_segments`
47
+ - Input (stdin): JSON with `question`, `expected_outcome`, `reference_answer`, `candidate_answer`, `guideline_files` (file paths), `input_files` (file paths, excludes guidelines), `input_messages`
48
48
  - Output (stdout): JSON with `score` (0.0-1.0), `hits`, `misses`, `reasoning`
49
49
 
50
50
  **Template:** See `references/custom-evaluators.md` for Python code evaluator template
@@ -1,217 +1,217 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "title": "AgentV Eval Schema",
4
- "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
5
- "type": "object",
6
- "properties": {
7
- "$schema": {
8
- "type": "string",
9
- "description": "Schema identifier",
10
- "enum": ["agentv-eval-v2"]
11
- },
12
- "description": {
13
- "type": "string",
14
- "description": "Description of what this eval suite covers"
15
- },
16
- "target": {
17
- "type": "string",
18
- "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
19
- },
20
- "execution": {
21
- "type": "object",
22
- "description": "Default execution configuration for all eval cases (can be overridden per case)",
23
- "properties": {
24
- "target": {
25
- "type": "string",
26
- "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
27
- },
28
- "evaluators": {
29
- "type": "array",
30
- "description": "Default evaluators for all eval cases (code-based and LLM judges)",
31
- "items": {
32
- "type": "object",
33
- "properties": {
34
- "name": {
35
- "type": "string",
36
- "description": "Evaluator name/identifier"
37
- },
38
- "type": {
39
- "type": "string",
40
- "enum": ["code", "llm_judge"],
41
- "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
42
- },
43
- "script": {
44
- "type": "string",
45
- "description": "Path to evaluator script (for type: code)"
46
- },
47
- "prompt": {
48
- "type": "string",
49
- "description": "Path to judge prompt file (for type: llm_judge)"
50
- }
51
- },
52
- "required": ["name", "type"],
53
- "additionalProperties": true
54
- }
55
- }
56
- },
57
- "additionalProperties": true
58
- },
59
- "evalcases": {
60
- "type": "array",
61
- "description": "Array of evaluation cases",
62
- "minItems": 1,
63
- "items": {
64
- "type": "object",
65
- "properties": {
66
- "id": {
67
- "type": "string",
68
- "description": "Unique identifier for the eval case"
69
- },
70
- "conversation_id": {
71
- "type": "string",
72
- "description": "Optional conversation identifier for threading multiple eval cases together"
73
- },
74
- "expected_outcome": {
75
- "type": "string",
76
- "description": "Description of what the AI should accomplish in this eval"
77
- },
78
- "note": {
79
- "type": "string",
80
- "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
81
- },
82
- "input_messages": {
83
- "type": "array",
84
- "description": "Input messages for the conversation",
85
- "minItems": 1,
86
- "items": {
87
- "type": "object",
88
- "properties": {
89
- "role": {
90
- "type": "string",
91
- "enum": ["system", "user", "assistant", "tool"],
92
- "description": "Message role"
93
- },
94
- "content": {
95
- "oneOf": [
96
- {
97
- "type": "string",
98
- "description": "Simple text content"
99
- },
100
- {
101
- "type": "array",
102
- "description": "Mixed content items (text and file references)",
103
- "items": {
104
- "type": "object",
105
- "properties": {
106
- "type": {
107
- "type": "string",
108
- "enum": ["text", "file"],
109
- "description": "Content type: 'text' for inline content, 'file' for file references"
110
- },
111
- "value": {
112
- "type": "string",
113
- "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
114
- }
115
- },
116
- "required": ["type", "value"],
117
- "additionalProperties": false
118
- }
119
- }
120
- ]
121
- }
122
- },
123
- "required": ["role", "content"],
124
- "additionalProperties": false
125
- }
126
- },
127
- "expected_messages": {
128
- "type": "array",
129
- "description": "Expected response messages",
130
- "minItems": 1,
131
- "items": {
132
- "type": "object",
133
- "properties": {
134
- "role": {
135
- "type": "string",
136
- "enum": ["system", "user", "assistant", "tool"],
137
- "description": "Message role"
138
- },
139
- "content": {
140
- "oneOf": [
141
- {
142
- "type": "string",
143
- "description": "Simple text content"
144
- },
145
- {
146
- "type": "array",
147
- "description": "Mixed content items",
148
- "items": {
149
- "type": "object",
150
- "properties": {
151
- "type": {
152
- "type": "string",
153
- "enum": ["text", "file"]
154
- },
155
- "value": {
156
- "type": "string"
157
- }
158
- },
159
- "required": ["type", "value"],
160
- "additionalProperties": false
161
- }
162
- }
163
- ]
164
- }
165
- },
166
- "required": ["role", "content"],
167
- "additionalProperties": false
168
- }
169
- },
170
- "execution": {
171
- "type": "object",
172
- "description": "Per-case execution configuration",
173
- "properties": {
174
- "target": {
175
- "type": "string",
176
- "description": "Override target for this specific eval case"
177
- },
178
- "evaluators": {
179
- "type": "array",
180
- "description": "Multiple evaluators (code-based and LLM judges)",
181
- "items": {
182
- "type": "object",
183
- "properties": {
184
- "name": {
185
- "type": "string",
186
- "description": "Evaluator name/identifier"
187
- },
188
- "type": {
189
- "type": "string",
190
- "enum": ["code", "llm_judge"],
191
- "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
192
- },
193
- "script": {
194
- "type": "string",
195
- "description": "Path to evaluator script (for type: code)"
196
- },
197
- "prompt": {
198
- "type": "string",
199
- "description": "Path to judge prompt file (for type: llm_judge)"
200
- }
201
- },
202
- "required": ["name", "type"],
203
- "additionalProperties": true
204
- }
205
- }
206
- },
207
- "additionalProperties": true
208
- }
209
- },
210
- "required": ["id", "expected_outcome", "input_messages", "expected_messages"],
211
- "additionalProperties": false
212
- }
213
- }
214
- },
215
- "required": ["evalcases"],
216
- "additionalProperties": false
217
- }
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "AgentV Eval Schema",
4
+ "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
5
+ "type": "object",
6
+ "properties": {
7
+ "$schema": {
8
+ "type": "string",
9
+ "description": "Schema identifier",
10
+ "enum": ["agentv-eval-v2"]
11
+ },
12
+ "description": {
13
+ "type": "string",
14
+ "description": "Description of what this eval suite covers"
15
+ },
16
+ "target": {
17
+ "type": "string",
18
+ "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
19
+ },
20
+ "execution": {
21
+ "type": "object",
22
+ "description": "Default execution configuration for all eval cases (can be overridden per case)",
23
+ "properties": {
24
+ "target": {
25
+ "type": "string",
26
+ "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
27
+ },
28
+ "evaluators": {
29
+ "type": "array",
30
+ "description": "Default evaluators for all eval cases (code-based and LLM judges)",
31
+ "items": {
32
+ "type": "object",
33
+ "properties": {
34
+ "name": {
35
+ "type": "string",
36
+ "description": "Evaluator name/identifier"
37
+ },
38
+ "type": {
39
+ "type": "string",
40
+ "enum": ["code", "llm_judge"],
41
+ "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
42
+ },
43
+ "script": {
44
+ "type": "string",
45
+ "description": "Path to evaluator script (for type: code)"
46
+ },
47
+ "prompt": {
48
+ "type": "string",
49
+ "description": "Path to judge prompt file (for type: llm_judge)"
50
+ }
51
+ },
52
+ "required": ["name", "type"],
53
+ "additionalProperties": true
54
+ }
55
+ }
56
+ },
57
+ "additionalProperties": true
58
+ },
59
+ "evalcases": {
60
+ "type": "array",
61
+ "description": "Array of evaluation cases",
62
+ "minItems": 1,
63
+ "items": {
64
+ "type": "object",
65
+ "properties": {
66
+ "id": {
67
+ "type": "string",
68
+ "description": "Unique identifier for the eval case"
69
+ },
70
+ "conversation_id": {
71
+ "type": "string",
72
+ "description": "Optional conversation identifier for threading multiple eval cases together"
73
+ },
74
+ "expected_outcome": {
75
+ "type": "string",
76
+ "description": "Description of what the AI should accomplish in this eval"
77
+ },
78
+ "note": {
79
+ "type": "string",
80
+ "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
81
+ },
82
+ "input_messages": {
83
+ "type": "array",
84
+ "description": "Input messages for the conversation",
85
+ "minItems": 1,
86
+ "items": {
87
+ "type": "object",
88
+ "properties": {
89
+ "role": {
90
+ "type": "string",
91
+ "enum": ["system", "user", "assistant", "tool"],
92
+ "description": "Message role"
93
+ },
94
+ "content": {
95
+ "oneOf": [
96
+ {
97
+ "type": "string",
98
+ "description": "Simple text content"
99
+ },
100
+ {
101
+ "type": "array",
102
+ "description": "Mixed content items (text and file references)",
103
+ "items": {
104
+ "type": "object",
105
+ "properties": {
106
+ "type": {
107
+ "type": "string",
108
+ "enum": ["text", "file"],
109
+ "description": "Content type: 'text' for inline content, 'file' for file references"
110
+ },
111
+ "value": {
112
+ "type": "string",
113
+ "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
114
+ }
115
+ },
116
+ "required": ["type", "value"],
117
+ "additionalProperties": false
118
+ }
119
+ }
120
+ ]
121
+ }
122
+ },
123
+ "required": ["role", "content"],
124
+ "additionalProperties": false
125
+ }
126
+ },
127
+ "expected_messages": {
128
+ "type": "array",
129
+ "description": "Expected response messages",
130
+ "minItems": 1,
131
+ "items": {
132
+ "type": "object",
133
+ "properties": {
134
+ "role": {
135
+ "type": "string",
136
+ "enum": ["system", "user", "assistant", "tool"],
137
+ "description": "Message role"
138
+ },
139
+ "content": {
140
+ "oneOf": [
141
+ {
142
+ "type": "string",
143
+ "description": "Simple text content"
144
+ },
145
+ {
146
+ "type": "array",
147
+ "description": "Mixed content items",
148
+ "items": {
149
+ "type": "object",
150
+ "properties": {
151
+ "type": {
152
+ "type": "string",
153
+ "enum": ["text", "file"]
154
+ },
155
+ "value": {
156
+ "type": "string"
157
+ }
158
+ },
159
+ "required": ["type", "value"],
160
+ "additionalProperties": false
161
+ }
162
+ }
163
+ ]
164
+ }
165
+ },
166
+ "required": ["role", "content"],
167
+ "additionalProperties": false
168
+ }
169
+ },
170
+ "execution": {
171
+ "type": "object",
172
+ "description": "Per-case execution configuration",
173
+ "properties": {
174
+ "target": {
175
+ "type": "string",
176
+ "description": "Override target for this specific eval case"
177
+ },
178
+ "evaluators": {
179
+ "type": "array",
180
+ "description": "Multiple evaluators (code-based and LLM judges)",
181
+ "items": {
182
+ "type": "object",
183
+ "properties": {
184
+ "name": {
185
+ "type": "string",
186
+ "description": "Evaluator name/identifier"
187
+ },
188
+ "type": {
189
+ "type": "string",
190
+ "enum": ["code", "llm_judge"],
191
+ "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
192
+ },
193
+ "script": {
194
+ "type": "string",
195
+ "description": "Path to evaluator script (for type: code)"
196
+ },
197
+ "prompt": {
198
+ "type": "string",
199
+ "description": "Path to judge prompt file (for type: llm_judge)"
200
+ }
201
+ },
202
+ "required": ["name", "type"],
203
+ "additionalProperties": true
204
+ }
205
+ }
206
+ },
207
+ "additionalProperties": true
208
+ }
209
+ },
210
+ "required": ["id", "expected_outcome", "input_messages", "expected_messages"],
211
+ "additionalProperties": false
212
+ }
213
+ }
214
+ },
215
+ "required": ["evalcases"],
216
+ "additionalProperties": false
217
+ }
@@ -0,0 +1,23 @@
1
+ # Example environment configuration for AgentV
2
+ # Copy this file to .env and fill in your credentials
3
+
4
+ # Model Provider Selection (Optional - can be configured via targets.yaml)
5
+ PROVIDER=azure
6
+
7
+ # Azure OpenAI Configuration
8
+ # These are the default environment variable names used in the provided targets.yaml
9
+ AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
10
+ AZURE_OPENAI_API_KEY=your-api-key-here
11
+ AZURE_DEPLOYMENT_NAME=gpt-4o
12
+
13
+ # Anthropic Configuration (if using Anthropic provider)
14
+ ANTHROPIC_API_KEY=your-anthropic-api-key-here
15
+
16
+ # VS Code Workspace Paths for Execution Targets
17
+ # Note: Using forward slashes is recommended for paths in .env files
18
+ # to avoid issues with escape characters.
19
+ PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
20
+
21
+ # CLI provider sample (used by the local_cli target)
22
+ PROJECT_ROOT=D:/GitHub/your-username/agentv/docs/examples/simple
23
+ LOCAL_AGENT_TOKEN=your-cli-token
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentv",
3
- "version": "0.25.0",
3
+ "version": "0.26.0",
4
4
  "description": "CLI entry point for AgentV",
5
5
  "type": "module",
6
6
  "repository": {