@agentv/core 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -77
- package/dist/{chunk-KPHTMTZ3.js → chunk-IBTKEEOT.js} +337 -83
- package/dist/chunk-IBTKEEOT.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +83 -71
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -72
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +4137 -1182
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +445 -40
- package/dist/index.d.ts +445 -40
- package/dist/index.js +3822 -1130
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
- package/dist/chunk-KPHTMTZ3.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
# @agentv/core
|
|
2
|
-
|
|
3
|
-
Core evaluation engine and runtime primitives for AgentV - a TypeScript-based AI agent evaluation and optimization framework.
|
|
4
|
-
|
|
5
|
-
## Overview
|
|
6
|
-
|
|
7
|
-
This package provides the foundational components for building and evaluating AI agents:
|
|
8
|
-
|
|
9
|
-
- **Provider Abstraction**: Unified interface for Azure OpenAI, Anthropic, Google Gemini, VS Code Copilot, and mock providers
|
|
10
|
-
- **Evaluation Engine**: YAML-based test specification and execution
|
|
11
|
-
- **Quality Grading**: AI-powered scoring system for comparing expected vs. actual outputs
|
|
12
|
-
- **Target Management**: Flexible configuration for different execution environments
|
|
13
|
-
|
|
14
|
-
## Installation
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
npm install @agentv/core
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Usage
|
|
21
|
-
|
|
22
|
-
This is a low-level package primarily used by the [agentv](https://www.npmjs.com/package/agentv) CLI. Most users should install the CLI package instead:
|
|
23
|
-
|
|
24
|
-
```bash
|
|
25
|
-
npm install -g agentv
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
For programmatic usage or custom integrations, you can import core components:
|
|
29
|
-
|
|
30
|
-
```typescript
|
|
31
|
-
import { createProvider, runEvaluation } from '@agentv/core';
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
## Features
|
|
35
|
-
|
|
36
|
-
### Multi-Provider Support
|
|
37
|
-
|
|
38
|
-
- **Azure OpenAI**: Enterprise-grade deployment support
|
|
39
|
-
- **Anthropic Claude**: Latest Claude models including Sonnet 4.5
|
|
40
|
-
- **Google Gemini**: Gemini 2.0 Flash and other models
|
|
41
|
-
- **VS Code Copilot**: Programmatic integration via subagent
|
|
42
|
-
- **Mock Provider**: Testing without API calls
|
|
43
|
-
|
|
44
|
-
### Evaluation Framework
|
|
45
|
-
|
|
46
|
-
- YAML-based test specifications
|
|
47
|
-
- Code block extraction and structured prompting
|
|
48
|
-
- Automatic retry handling for timeouts
|
|
49
|
-
- Detailed scoring with hit/miss analysis
|
|
50
|
-
- Multiple output formats (JSONL, YAML)
|
|
51
|
-
|
|
52
|
-
### Quality Grading
|
|
53
|
-
|
|
54
|
-
- AI-powered aspect extraction and comparison
|
|
55
|
-
- Normalized scoring (0.0 to 1.0)
|
|
56
|
-
- Detailed reasoning and analysis
|
|
57
|
-
- Configurable grading models
|
|
58
|
-
|
|
59
|
-
## Architecture
|
|
60
|
-
|
|
61
|
-
Built on modern TypeScript tooling:
|
|
62
|
-
|
|
63
|
-
- **Vercel AI SDK**: Direct Azure OpenAI, Anthropic, and Google Gemini integrations
|
|
64
|
-
- **Zod**: Runtime type validation
|
|
65
|
-
- **YAML**: Configuration and test specifications
|
|
66
|
-
|
|
67
|
-
## Documentation
|
|
68
|
-
|
|
69
|
-
For complete documentation, examples, and CLI usage, see the [agentv](https://www.npmjs.com/package/agentv) package.
|
|
70
|
-
|
|
71
|
-
## Repository
|
|
72
|
-
|
|
73
|
-
[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
|
|
74
|
-
|
|
75
|
-
## License
|
|
76
|
-
|
|
77
|
-
MIT License - see [LICENSE](../../LICENSE) for details.
|
|
1
|
+
# @agentv/core
|
|
2
|
+
|
|
3
|
+
Core evaluation engine and runtime primitives for AgentV - a TypeScript-based AI agent evaluation and optimization framework.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This package provides the foundational components for building and evaluating AI agents:
|
|
8
|
+
|
|
9
|
+
- **Provider Abstraction**: Unified interface for Azure OpenAI, Anthropic, Google Gemini, VS Code Copilot, and mock providers
|
|
10
|
+
- **Evaluation Engine**: YAML-based test specification and execution
|
|
11
|
+
- **Quality Grading**: AI-powered scoring system for comparing expected vs. actual outputs
|
|
12
|
+
- **Target Management**: Flexible configuration for different execution environments
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install @agentv/core
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
This is a low-level package primarily used by the [agentv](https://www.npmjs.com/package/agentv) CLI. Most users should install the CLI package instead:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install -g agentv
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
For programmatic usage or custom integrations, you can import core components:
|
|
29
|
+
|
|
30
|
+
```typescript
|
|
31
|
+
import { createProvider, runEvaluation } from '@agentv/core';
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
### Multi-Provider Support
|
|
37
|
+
|
|
38
|
+
- **Azure OpenAI**: Enterprise-grade deployment support
|
|
39
|
+
- **Anthropic Claude**: Latest Claude models including Sonnet 4.5
|
|
40
|
+
- **Google Gemini**: Gemini 2.0 Flash and other models
|
|
41
|
+
- **VS Code Copilot**: Programmatic integration via subagent
|
|
42
|
+
- **Mock Provider**: Testing without API calls
|
|
43
|
+
|
|
44
|
+
### Evaluation Framework
|
|
45
|
+
|
|
46
|
+
- YAML-based test specifications
|
|
47
|
+
- Code block extraction and structured prompting
|
|
48
|
+
- Automatic retry handling for timeouts
|
|
49
|
+
- Detailed scoring with hit/miss analysis
|
|
50
|
+
- Multiple output formats (JSONL, YAML)
|
|
51
|
+
|
|
52
|
+
### Quality Grading
|
|
53
|
+
|
|
54
|
+
- AI-powered aspect extraction and comparison
|
|
55
|
+
- Normalized scoring (0.0 to 1.0)
|
|
56
|
+
- Detailed reasoning and analysis
|
|
57
|
+
- Configurable grading models
|
|
58
|
+
|
|
59
|
+
## Architecture
|
|
60
|
+
|
|
61
|
+
Built on modern TypeScript tooling:
|
|
62
|
+
|
|
63
|
+
- **Vercel AI SDK**: Direct Azure OpenAI, Anthropic, and Google Gemini integrations
|
|
64
|
+
- **Zod**: Runtime type validation
|
|
65
|
+
- **YAML**: Configuration and test specifications
|
|
66
|
+
|
|
67
|
+
## Documentation
|
|
68
|
+
|
|
69
|
+
For complete documentation, examples, and CLI usage, see the [agentv](https://www.npmjs.com/package/agentv) package.
|
|
70
|
+
|
|
71
|
+
## Repository
|
|
72
|
+
|
|
73
|
+
[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
MIT License - see [LICENSE](../../LICENSE) for details.
|
|
@@ -116,6 +116,164 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
116
116
|
// src/evaluation/providers/targets.ts
|
|
117
117
|
import path2 from "node:path";
|
|
118
118
|
import { z } from "zod";
|
|
119
|
+
var CliHealthcheckHttpInputSchema = z.object({
|
|
120
|
+
type: z.literal("http"),
|
|
121
|
+
url: z.string().min(1, "healthcheck URL is required"),
|
|
122
|
+
timeout_seconds: z.number().positive().optional(),
|
|
123
|
+
timeoutSeconds: z.number().positive().optional()
|
|
124
|
+
});
|
|
125
|
+
var CliHealthcheckCommandInputSchema = z.object({
|
|
126
|
+
type: z.literal("command"),
|
|
127
|
+
command_template: z.string().optional(),
|
|
128
|
+
commandTemplate: z.string().optional(),
|
|
129
|
+
cwd: z.string().optional(),
|
|
130
|
+
timeout_seconds: z.number().positive().optional(),
|
|
131
|
+
timeoutSeconds: z.number().positive().optional()
|
|
132
|
+
});
|
|
133
|
+
var CliHealthcheckInputSchema = z.discriminatedUnion("type", [
|
|
134
|
+
CliHealthcheckHttpInputSchema,
|
|
135
|
+
CliHealthcheckCommandInputSchema
|
|
136
|
+
]);
|
|
137
|
+
var CliTargetInputSchema = z.object({
|
|
138
|
+
name: z.string().min(1, "target name is required"),
|
|
139
|
+
provider: z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
|
|
140
|
+
// Command template - required (accept both naming conventions)
|
|
141
|
+
command_template: z.string().optional(),
|
|
142
|
+
commandTemplate: z.string().optional(),
|
|
143
|
+
// Files format - optional
|
|
144
|
+
files_format: z.string().optional(),
|
|
145
|
+
filesFormat: z.string().optional(),
|
|
146
|
+
attachments_format: z.string().optional(),
|
|
147
|
+
attachmentsFormat: z.string().optional(),
|
|
148
|
+
// Working directory - optional
|
|
149
|
+
cwd: z.string().optional(),
|
|
150
|
+
// Timeout in seconds - optional
|
|
151
|
+
timeout_seconds: z.number().positive().optional(),
|
|
152
|
+
timeoutSeconds: z.number().positive().optional(),
|
|
153
|
+
// Healthcheck configuration - optional
|
|
154
|
+
healthcheck: CliHealthcheckInputSchema.optional(),
|
|
155
|
+
// Verbose mode - optional
|
|
156
|
+
verbose: z.boolean().optional(),
|
|
157
|
+
cli_verbose: z.boolean().optional(),
|
|
158
|
+
cliVerbose: z.boolean().optional(),
|
|
159
|
+
// Keep temp files - optional
|
|
160
|
+
keep_temp_files: z.boolean().optional(),
|
|
161
|
+
keepTempFiles: z.boolean().optional(),
|
|
162
|
+
keep_output_files: z.boolean().optional(),
|
|
163
|
+
keepOutputFiles: z.boolean().optional(),
|
|
164
|
+
// Common target fields
|
|
165
|
+
judge_target: z.string().optional(),
|
|
166
|
+
workers: z.number().int().min(1).optional(),
|
|
167
|
+
provider_batching: z.boolean().optional(),
|
|
168
|
+
providerBatching: z.boolean().optional()
|
|
169
|
+
}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
|
|
170
|
+
message: "Either command_template or commandTemplate is required"
|
|
171
|
+
});
|
|
172
|
+
var CliHealthcheckHttpSchema = z.object({
|
|
173
|
+
type: z.literal("http"),
|
|
174
|
+
url: z.string().min(1),
|
|
175
|
+
timeoutMs: z.number().positive().optional()
|
|
176
|
+
}).strict();
|
|
177
|
+
var CliHealthcheckCommandSchema = z.object({
|
|
178
|
+
type: z.literal("command"),
|
|
179
|
+
commandTemplate: z.string().min(1),
|
|
180
|
+
cwd: z.string().optional(),
|
|
181
|
+
timeoutMs: z.number().positive().optional()
|
|
182
|
+
}).strict();
|
|
183
|
+
var CliHealthcheckSchema = z.discriminatedUnion("type", [
|
|
184
|
+
CliHealthcheckHttpSchema,
|
|
185
|
+
CliHealthcheckCommandSchema
|
|
186
|
+
]);
|
|
187
|
+
var CliTargetConfigSchema = z.object({
|
|
188
|
+
commandTemplate: z.string().min(1),
|
|
189
|
+
filesFormat: z.string().optional(),
|
|
190
|
+
cwd: z.string().optional(),
|
|
191
|
+
timeoutMs: z.number().positive().optional(),
|
|
192
|
+
healthcheck: CliHealthcheckSchema.optional(),
|
|
193
|
+
verbose: z.boolean().optional(),
|
|
194
|
+
keepTempFiles: z.boolean().optional()
|
|
195
|
+
}).strict();
|
|
196
|
+
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
197
|
+
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
198
|
+
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
199
|
+
if (input.type === "http") {
|
|
200
|
+
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
201
|
+
return {
|
|
202
|
+
type: "http",
|
|
203
|
+
url,
|
|
204
|
+
timeoutMs
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
const commandTemplateSource = input.command_template ?? input.commandTemplate;
|
|
208
|
+
if (commandTemplateSource === void 0) {
|
|
209
|
+
throw new Error(
|
|
210
|
+
`${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
const commandTemplate = resolveString(
|
|
214
|
+
commandTemplateSource,
|
|
215
|
+
env,
|
|
216
|
+
`${targetName} healthcheck command template`,
|
|
217
|
+
true
|
|
218
|
+
);
|
|
219
|
+
let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
|
|
220
|
+
allowLiteral: true,
|
|
221
|
+
optionalEnv: true
|
|
222
|
+
});
|
|
223
|
+
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
224
|
+
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
225
|
+
}
|
|
226
|
+
if (!cwd && evalFilePath) {
|
|
227
|
+
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
228
|
+
}
|
|
229
|
+
return {
|
|
230
|
+
type: "command",
|
|
231
|
+
commandTemplate,
|
|
232
|
+
cwd,
|
|
233
|
+
timeoutMs
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
237
|
+
const targetName = input.name;
|
|
238
|
+
const commandTemplateSource = input.command_template ?? input.commandTemplate;
|
|
239
|
+
if (commandTemplateSource === void 0) {
|
|
240
|
+
throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
|
|
241
|
+
}
|
|
242
|
+
const commandTemplate = resolveString(
|
|
243
|
+
commandTemplateSource,
|
|
244
|
+
env,
|
|
245
|
+
`${targetName} CLI command template`,
|
|
246
|
+
true
|
|
247
|
+
);
|
|
248
|
+
const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
|
|
249
|
+
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
250
|
+
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
251
|
+
allowLiteral: true,
|
|
252
|
+
optionalEnv: true
|
|
253
|
+
});
|
|
254
|
+
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
255
|
+
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
256
|
+
}
|
|
257
|
+
if (!cwd && evalFilePath) {
|
|
258
|
+
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
259
|
+
}
|
|
260
|
+
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
261
|
+
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
262
|
+
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
|
|
263
|
+
const keepTempFiles = resolveOptionalBoolean(
|
|
264
|
+
input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
|
|
265
|
+
);
|
|
266
|
+
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
267
|
+
return {
|
|
268
|
+
commandTemplate,
|
|
269
|
+
filesFormat,
|
|
270
|
+
cwd,
|
|
271
|
+
timeoutMs,
|
|
272
|
+
healthcheck,
|
|
273
|
+
verbose,
|
|
274
|
+
keepTempFiles
|
|
275
|
+
};
|
|
276
|
+
}
|
|
119
277
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
120
278
|
"PROMPT",
|
|
121
279
|
"GUIDELINES",
|
|
@@ -221,6 +379,25 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
221
379
|
providerBatching,
|
|
222
380
|
config: resolveCodexConfig(parsed, env)
|
|
223
381
|
};
|
|
382
|
+
case "pi":
|
|
383
|
+
case "pi-coding-agent":
|
|
384
|
+
return {
|
|
385
|
+
kind: "pi-coding-agent",
|
|
386
|
+
name: parsed.name,
|
|
387
|
+
judgeTarget: parsed.judge_target,
|
|
388
|
+
workers: parsed.workers,
|
|
389
|
+
providerBatching,
|
|
390
|
+
config: resolvePiCodingAgentConfig(parsed, env)
|
|
391
|
+
};
|
|
392
|
+
case "claude-code":
|
|
393
|
+
return {
|
|
394
|
+
kind: "claude-code",
|
|
395
|
+
name: parsed.name,
|
|
396
|
+
judgeTarget: parsed.judge_target,
|
|
397
|
+
workers: parsed.workers,
|
|
398
|
+
providerBatching,
|
|
399
|
+
config: resolveClaudeCodeConfig(parsed, env)
|
|
400
|
+
};
|
|
224
401
|
case "mock":
|
|
225
402
|
return {
|
|
226
403
|
kind: "mock",
|
|
@@ -329,6 +506,7 @@ function resolveCodexConfig(target, env) {
|
|
|
329
506
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
330
507
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
331
508
|
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
509
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
332
510
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
333
511
|
allowLiteral: true,
|
|
334
512
|
optionalEnv: true
|
|
@@ -344,13 +522,15 @@ function resolveCodexConfig(target, env) {
|
|
|
344
522
|
optionalEnv: true
|
|
345
523
|
});
|
|
346
524
|
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
525
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
347
526
|
return {
|
|
348
527
|
executable,
|
|
349
528
|
args,
|
|
350
529
|
cwd,
|
|
351
530
|
timeoutMs,
|
|
352
531
|
logDir,
|
|
353
|
-
logFormat
|
|
532
|
+
logFormat,
|
|
533
|
+
systemPrompt
|
|
354
534
|
};
|
|
355
535
|
}
|
|
356
536
|
function normalizeCodexLogFormat(value) {
|
|
@@ -366,6 +546,128 @@ function normalizeCodexLogFormat(value) {
|
|
|
366
546
|
}
|
|
367
547
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
368
548
|
}
|
|
549
|
+
function resolvePiCodingAgentConfig(target, env) {
|
|
550
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
551
|
+
const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
|
|
552
|
+
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
553
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
554
|
+
const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
|
|
555
|
+
const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
|
|
556
|
+
const argsSource = target.args ?? target.arguments;
|
|
557
|
+
const cwdSource = target.cwd;
|
|
558
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
559
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
560
|
+
const logFormatSource = target.log_format ?? target.logFormat;
|
|
561
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
562
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
|
|
563
|
+
allowLiteral: true,
|
|
564
|
+
optionalEnv: true
|
|
565
|
+
}) ?? "pi";
|
|
566
|
+
const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
|
|
567
|
+
allowLiteral: true,
|
|
568
|
+
optionalEnv: true
|
|
569
|
+
});
|
|
570
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
|
|
571
|
+
allowLiteral: true,
|
|
572
|
+
optionalEnv: true
|
|
573
|
+
});
|
|
574
|
+
const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi api key`, {
|
|
575
|
+
allowLiteral: false,
|
|
576
|
+
optionalEnv: true
|
|
577
|
+
});
|
|
578
|
+
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
|
|
579
|
+
allowLiteral: true,
|
|
580
|
+
optionalEnv: true
|
|
581
|
+
});
|
|
582
|
+
const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi thinking`, {
|
|
583
|
+
allowLiteral: true,
|
|
584
|
+
optionalEnv: true
|
|
585
|
+
});
|
|
586
|
+
const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
|
|
587
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
|
|
588
|
+
allowLiteral: true,
|
|
589
|
+
optionalEnv: true
|
|
590
|
+
});
|
|
591
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
|
|
592
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
|
|
593
|
+
allowLiteral: true,
|
|
594
|
+
optionalEnv: true
|
|
595
|
+
});
|
|
596
|
+
const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
|
|
597
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
598
|
+
return {
|
|
599
|
+
executable,
|
|
600
|
+
provider,
|
|
601
|
+
model,
|
|
602
|
+
apiKey,
|
|
603
|
+
tools,
|
|
604
|
+
thinking,
|
|
605
|
+
args,
|
|
606
|
+
cwd,
|
|
607
|
+
timeoutMs,
|
|
608
|
+
logDir,
|
|
609
|
+
logFormat,
|
|
610
|
+
systemPrompt
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
function resolveClaudeCodeConfig(target, env) {
|
|
614
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
615
|
+
const modelSource = target.model;
|
|
616
|
+
const argsSource = target.args ?? target.arguments;
|
|
617
|
+
const cwdSource = target.cwd;
|
|
618
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
619
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
620
|
+
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_CODE_LOG_FORMAT;
|
|
621
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
622
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} claude-code executable`, {
|
|
623
|
+
allowLiteral: true,
|
|
624
|
+
optionalEnv: true
|
|
625
|
+
}) ?? "claude";
|
|
626
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} claude-code model`, {
|
|
627
|
+
allowLiteral: true,
|
|
628
|
+
optionalEnv: true
|
|
629
|
+
});
|
|
630
|
+
const args = resolveOptionalStringArray(argsSource, env, `${target.name} claude-code args`);
|
|
631
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} claude-code cwd`, {
|
|
632
|
+
allowLiteral: true,
|
|
633
|
+
optionalEnv: true
|
|
634
|
+
});
|
|
635
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} claude-code timeout`);
|
|
636
|
+
const logDir = resolveOptionalString(
|
|
637
|
+
logDirSource,
|
|
638
|
+
env,
|
|
639
|
+
`${target.name} claude-code log directory`,
|
|
640
|
+
{
|
|
641
|
+
allowLiteral: true,
|
|
642
|
+
optionalEnv: true
|
|
643
|
+
}
|
|
644
|
+
);
|
|
645
|
+
const logFormat = normalizeClaudeCodeLogFormat(logFormatSource);
|
|
646
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
647
|
+
return {
|
|
648
|
+
executable,
|
|
649
|
+
model,
|
|
650
|
+
systemPrompt,
|
|
651
|
+
args,
|
|
652
|
+
cwd,
|
|
653
|
+
timeoutMs,
|
|
654
|
+
logDir,
|
|
655
|
+
logFormat
|
|
656
|
+
};
|
|
657
|
+
}
|
|
658
|
+
function normalizeClaudeCodeLogFormat(value) {
|
|
659
|
+
if (value === void 0 || value === null) {
|
|
660
|
+
return void 0;
|
|
661
|
+
}
|
|
662
|
+
if (typeof value !== "string") {
|
|
663
|
+
throw new Error("claude-code log format must be 'summary' or 'json'");
|
|
664
|
+
}
|
|
665
|
+
const normalized = value.trim().toLowerCase();
|
|
666
|
+
if (normalized === "json" || normalized === "summary") {
|
|
667
|
+
return normalized;
|
|
668
|
+
}
|
|
669
|
+
throw new Error("claude-code log format must be 'summary' or 'json'");
|
|
670
|
+
}
|
|
369
671
|
function resolveMockConfig(target) {
|
|
370
672
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
371
673
|
return { response };
|
|
@@ -400,46 +702,35 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
400
702
|
workspaceTemplate
|
|
401
703
|
};
|
|
402
704
|
}
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
407
|
-
);
|
|
408
|
-
const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
|
|
409
|
-
const keepTempFiles = resolveOptionalBoolean(
|
|
410
|
-
target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
|
|
411
|
-
);
|
|
412
|
-
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
413
|
-
allowLiteral: true,
|
|
414
|
-
optionalEnv: true
|
|
415
|
-
});
|
|
416
|
-
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
417
|
-
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
705
|
+
var cliErrorMap = (issue, ctx) => {
|
|
706
|
+
if (issue.code === z.ZodIssueCode.unrecognized_keys) {
|
|
707
|
+
return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
|
|
418
708
|
}
|
|
419
|
-
if (
|
|
420
|
-
|
|
709
|
+
if (issue.code === z.ZodIssueCode.invalid_union_discriminator) {
|
|
710
|
+
return { message: "healthcheck type must be 'http' or 'command'" };
|
|
421
711
|
}
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
}
|
|
712
|
+
if (issue.code === z.ZodIssueCode.invalid_type && issue.expected === "string") {
|
|
713
|
+
return { message: `${ctx.defaultError} (expected a string value)` };
|
|
714
|
+
}
|
|
715
|
+
return { message: ctx.defaultError };
|
|
716
|
+
};
|
|
717
|
+
function resolveCliConfig(target, env, evalFilePath) {
|
|
718
|
+
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
719
|
+
if (!parseResult.success) {
|
|
720
|
+
const firstError = parseResult.error.errors[0];
|
|
721
|
+
const path3 = firstError?.path.join(".") || "";
|
|
722
|
+
const prefix = path3 ? `${target.name} ${path3}: ` : `${target.name}: `;
|
|
723
|
+
throw new Error(`${prefix}${firstError?.message}`);
|
|
724
|
+
}
|
|
725
|
+
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
726
|
+
assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
|
|
727
|
+
if (normalized.healthcheck?.type === "command") {
|
|
728
|
+
assertSupportedCliPlaceholders(
|
|
729
|
+
normalized.healthcheck.commandTemplate,
|
|
730
|
+
`${target.name} healthcheck command template`
|
|
731
|
+
);
|
|
732
|
+
}
|
|
733
|
+
return normalized;
|
|
443
734
|
}
|
|
444
735
|
function resolveTimeoutMs(source, description) {
|
|
445
736
|
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
@@ -451,49 +742,6 @@ function resolveTimeoutMs(source, description) {
|
|
|
451
742
|
}
|
|
452
743
|
return Math.floor(seconds * 1e3);
|
|
453
744
|
}
|
|
454
|
-
function resolveCliHealthcheck(source, env, targetName, evalFilePath) {
|
|
455
|
-
if (source === void 0 || source === null) {
|
|
456
|
-
return void 0;
|
|
457
|
-
}
|
|
458
|
-
if (typeof source !== "object" || Array.isArray(source)) {
|
|
459
|
-
throw new Error(`${targetName} healthcheck must be an object`);
|
|
460
|
-
}
|
|
461
|
-
const candidate = source;
|
|
462
|
-
const type = candidate.type;
|
|
463
|
-
const timeoutMs = resolveTimeoutMs(
|
|
464
|
-
candidate.timeout_seconds ?? candidate.timeoutSeconds,
|
|
465
|
-
`${targetName} healthcheck timeout`
|
|
466
|
-
);
|
|
467
|
-
if (type === "http") {
|
|
468
|
-
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
469
|
-
return {
|
|
470
|
-
type: "http",
|
|
471
|
-
url,
|
|
472
|
-
timeoutMs
|
|
473
|
-
};
|
|
474
|
-
}
|
|
475
|
-
if (type === "command") {
|
|
476
|
-
const commandTemplate = resolveString(
|
|
477
|
-
candidate.command_template ?? candidate.commandTemplate,
|
|
478
|
-
env,
|
|
479
|
-
`${targetName} healthcheck command template`,
|
|
480
|
-
true
|
|
481
|
-
);
|
|
482
|
-
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
483
|
-
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
484
|
-
allowLiteral: true,
|
|
485
|
-
optionalEnv: true
|
|
486
|
-
});
|
|
487
|
-
const resolvedCwd = cwd && evalFilePath && !path2.isAbsolute(cwd) ? path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd) : cwd;
|
|
488
|
-
return {
|
|
489
|
-
type: "command",
|
|
490
|
-
commandTemplate,
|
|
491
|
-
timeoutMs,
|
|
492
|
-
cwd: resolvedCwd
|
|
493
|
-
};
|
|
494
|
-
}
|
|
495
|
-
throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
|
|
496
|
-
}
|
|
497
745
|
function assertSupportedCliPlaceholders(template, description) {
|
|
498
746
|
const placeholders = extractCliPlaceholders(template);
|
|
499
747
|
for (const placeholder of placeholders) {
|
|
@@ -661,6 +909,8 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
661
909
|
// src/evaluation/providers/types.ts
|
|
662
910
|
var AGENT_PROVIDER_KINDS = [
|
|
663
911
|
"codex",
|
|
912
|
+
"pi-coding-agent",
|
|
913
|
+
"claude-code",
|
|
664
914
|
"vscode",
|
|
665
915
|
"vscode-insiders"
|
|
666
916
|
];
|
|
@@ -669,6 +919,8 @@ var KNOWN_PROVIDERS = [
|
|
|
669
919
|
"anthropic",
|
|
670
920
|
"gemini",
|
|
671
921
|
"codex",
|
|
922
|
+
"pi-coding-agent",
|
|
923
|
+
"claude-code",
|
|
672
924
|
"cli",
|
|
673
925
|
"mock",
|
|
674
926
|
"vscode",
|
|
@@ -683,6 +935,8 @@ var PROVIDER_ALIASES = [
|
|
|
683
935
|
// alias for "gemini"
|
|
684
936
|
"codex-cli",
|
|
685
937
|
// alias for "codex"
|
|
938
|
+
"pi",
|
|
939
|
+
// alias for "pi-coding-agent"
|
|
686
940
|
"openai",
|
|
687
941
|
// legacy/future support
|
|
688
942
|
"bedrock",
|
|
@@ -725,4 +979,4 @@ export {
|
|
|
725
979
|
extractLastAssistantContent,
|
|
726
980
|
isAgentProvider
|
|
727
981
|
};
|
|
728
|
-
//# sourceMappingURL=chunk-
|
|
982
|
+
//# sourceMappingURL=chunk-IBTKEEOT.js.map
|