@agentv/core 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -77
- package/dist/{chunk-4A6L2F6L.js → chunk-E2VSU4WZ.js} +282 -81
- package/dist/chunk-E2VSU4WZ.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +82 -67
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -68
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +1668 -489
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +279 -77
- package/dist/index.d.ts +279 -77
- package/dist/index.js +1334 -356
- package/dist/index.js.map +1 -1
- package/package.json +2 -5
- package/dist/chunk-4A6L2F6L.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
# @agentv/core
|
|
2
|
-
|
|
3
|
-
Core evaluation engine and runtime primitives for AgentV - a TypeScript-based AI agent evaluation and optimization framework.
|
|
4
|
-
|
|
5
|
-
## Overview
|
|
6
|
-
|
|
7
|
-
This package provides the foundational components for building and evaluating AI agents:
|
|
8
|
-
|
|
9
|
-
- **Provider Abstraction**: Unified interface for Azure OpenAI, Anthropic, Google Gemini, VS Code Copilot, and mock providers
|
|
10
|
-
- **Evaluation Engine**: YAML-based test specification and execution
|
|
11
|
-
- **Quality Grading**: AI-powered scoring system for comparing expected vs. actual outputs
|
|
12
|
-
- **Target Management**: Flexible configuration for different execution environments
|
|
13
|
-
|
|
14
|
-
## Installation
|
|
15
|
-
|
|
16
|
-
```bash
|
|
17
|
-
npm install @agentv/core
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Usage
|
|
21
|
-
|
|
22
|
-
This is a low-level package primarily used by the [agentv](https://www.npmjs.com/package/agentv) CLI. Most users should install the CLI package instead:
|
|
23
|
-
|
|
24
|
-
```bash
|
|
25
|
-
npm install -g agentv
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
For programmatic usage or custom integrations, you can import core components:
|
|
29
|
-
|
|
30
|
-
```typescript
|
|
31
|
-
import { createProvider, runEvaluation } from '@agentv/core';
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
## Features
|
|
35
|
-
|
|
36
|
-
### Multi-Provider Support
|
|
37
|
-
|
|
38
|
-
- **Azure OpenAI**: Enterprise-grade deployment support
|
|
39
|
-
- **Anthropic Claude**: Latest Claude models including Sonnet 4.5
|
|
40
|
-
- **Google Gemini**: Gemini 2.0 Flash and other models
|
|
41
|
-
- **VS Code Copilot**: Programmatic integration via subagent
|
|
42
|
-
- **Mock Provider**: Testing without API calls
|
|
43
|
-
|
|
44
|
-
### Evaluation Framework
|
|
45
|
-
|
|
46
|
-
- YAML-based test specifications
|
|
47
|
-
- Code block extraction and structured prompting
|
|
48
|
-
- Automatic retry handling for timeouts
|
|
49
|
-
- Detailed scoring with hit/miss analysis
|
|
50
|
-
- Multiple output formats (JSONL, YAML)
|
|
51
|
-
|
|
52
|
-
### Quality Grading
|
|
53
|
-
|
|
54
|
-
- AI-powered aspect extraction and comparison
|
|
55
|
-
- Normalized scoring (0.0 to 1.0)
|
|
56
|
-
- Detailed reasoning and analysis
|
|
57
|
-
- Configurable grading models
|
|
58
|
-
|
|
59
|
-
## Architecture
|
|
60
|
-
|
|
61
|
-
Built on modern TypeScript tooling:
|
|
62
|
-
|
|
63
|
-
- **Vercel AI SDK**: Direct Azure OpenAI, Anthropic, and Google Gemini integrations
|
|
64
|
-
- **Zod**: Runtime type validation
|
|
65
|
-
- **YAML**: Configuration and test specifications
|
|
66
|
-
|
|
67
|
-
## Documentation
|
|
68
|
-
|
|
69
|
-
For complete documentation, examples, and CLI usage, see the [agentv](https://www.npmjs.com/package/agentv) package.
|
|
70
|
-
|
|
71
|
-
## Repository
|
|
72
|
-
|
|
73
|
-
[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
|
|
74
|
-
|
|
75
|
-
## License
|
|
76
|
-
|
|
77
|
-
MIT License - see [LICENSE](../../LICENSE) for details.
|
|
1
|
+
# @agentv/core
|
|
2
|
+
|
|
3
|
+
Core evaluation engine and runtime primitives for AgentV - a TypeScript-based AI agent evaluation and optimization framework.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This package provides the foundational components for building and evaluating AI agents:
|
|
8
|
+
|
|
9
|
+
- **Provider Abstraction**: Unified interface for Azure OpenAI, Anthropic, Google Gemini, VS Code Copilot, and mock providers
|
|
10
|
+
- **Evaluation Engine**: YAML-based test specification and execution
|
|
11
|
+
- **Quality Grading**: AI-powered scoring system for comparing expected vs. actual outputs
|
|
12
|
+
- **Target Management**: Flexible configuration for different execution environments
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install @agentv/core
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
This is a low-level package primarily used by the [agentv](https://www.npmjs.com/package/agentv) CLI. Most users should install the CLI package instead:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install -g agentv
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
For programmatic usage or custom integrations, you can import core components:
|
|
29
|
+
|
|
30
|
+
```typescript
|
|
31
|
+
import { createProvider, runEvaluation } from '@agentv/core';
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
### Multi-Provider Support
|
|
37
|
+
|
|
38
|
+
- **Azure OpenAI**: Enterprise-grade deployment support
|
|
39
|
+
- **Anthropic Claude**: Latest Claude models including Sonnet 4.5
|
|
40
|
+
- **Google Gemini**: Gemini 2.0 Flash and other models
|
|
41
|
+
- **VS Code Copilot**: Programmatic integration via subagent
|
|
42
|
+
- **Mock Provider**: Testing without API calls
|
|
43
|
+
|
|
44
|
+
### Evaluation Framework
|
|
45
|
+
|
|
46
|
+
- YAML-based test specifications
|
|
47
|
+
- Code block extraction and structured prompting
|
|
48
|
+
- Automatic retry handling for timeouts
|
|
49
|
+
- Detailed scoring with hit/miss analysis
|
|
50
|
+
- Multiple output formats (JSONL, YAML)
|
|
51
|
+
|
|
52
|
+
### Quality Grading
|
|
53
|
+
|
|
54
|
+
- AI-powered aspect extraction and comparison
|
|
55
|
+
- Normalized scoring (0.0 to 1.0)
|
|
56
|
+
- Detailed reasoning and analysis
|
|
57
|
+
- Configurable grading models
|
|
58
|
+
|
|
59
|
+
## Architecture
|
|
60
|
+
|
|
61
|
+
Built on modern TypeScript tooling:
|
|
62
|
+
|
|
63
|
+
- **Vercel AI SDK**: Direct Azure OpenAI, Anthropic, and Google Gemini integrations
|
|
64
|
+
- **Zod**: Runtime type validation
|
|
65
|
+
- **YAML**: Configuration and test specifications
|
|
66
|
+
|
|
67
|
+
## Documentation
|
|
68
|
+
|
|
69
|
+
For complete documentation, examples, and CLI usage, see the [agentv](https://www.npmjs.com/package/agentv) package.
|
|
70
|
+
|
|
71
|
+
## Repository
|
|
72
|
+
|
|
73
|
+
[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
MIT License - see [LICENSE](../../LICENSE) for details.
|
|
@@ -116,6 +116,161 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
116
116
|
// src/evaluation/providers/targets.ts
|
|
117
117
|
import path2 from "node:path";
|
|
118
118
|
import { z } from "zod";
|
|
119
|
+
var CliHealthcheckHttpInputSchema = z.object({
|
|
120
|
+
type: z.literal("http"),
|
|
121
|
+
url: z.string().min(1, "healthcheck URL is required"),
|
|
122
|
+
timeout_seconds: z.number().positive().optional(),
|
|
123
|
+
timeoutSeconds: z.number().positive().optional()
|
|
124
|
+
});
|
|
125
|
+
var CliHealthcheckCommandInputSchema = z.object({
|
|
126
|
+
type: z.literal("command"),
|
|
127
|
+
command_template: z.string().optional(),
|
|
128
|
+
commandTemplate: z.string().optional(),
|
|
129
|
+
cwd: z.string().optional(),
|
|
130
|
+
timeout_seconds: z.number().positive().optional(),
|
|
131
|
+
timeoutSeconds: z.number().positive().optional()
|
|
132
|
+
});
|
|
133
|
+
var CliHealthcheckInputSchema = z.discriminatedUnion("type", [
|
|
134
|
+
CliHealthcheckHttpInputSchema,
|
|
135
|
+
CliHealthcheckCommandInputSchema
|
|
136
|
+
]);
|
|
137
|
+
var CliTargetInputSchema = z.object({
|
|
138
|
+
name: z.string().min(1, "target name is required"),
|
|
139
|
+
provider: z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
|
|
140
|
+
// Command template - required (accept both naming conventions)
|
|
141
|
+
command_template: z.string().optional(),
|
|
142
|
+
commandTemplate: z.string().optional(),
|
|
143
|
+
// Files format - optional
|
|
144
|
+
files_format: z.string().optional(),
|
|
145
|
+
filesFormat: z.string().optional(),
|
|
146
|
+
attachments_format: z.string().optional(),
|
|
147
|
+
attachmentsFormat: z.string().optional(),
|
|
148
|
+
// Working directory - optional
|
|
149
|
+
cwd: z.string().optional(),
|
|
150
|
+
// Timeout in seconds - optional
|
|
151
|
+
timeout_seconds: z.number().positive().optional(),
|
|
152
|
+
timeoutSeconds: z.number().positive().optional(),
|
|
153
|
+
// Healthcheck configuration - optional
|
|
154
|
+
healthcheck: CliHealthcheckInputSchema.optional(),
|
|
155
|
+
// Verbose mode - optional
|
|
156
|
+
verbose: z.boolean().optional(),
|
|
157
|
+
cli_verbose: z.boolean().optional(),
|
|
158
|
+
cliVerbose: z.boolean().optional(),
|
|
159
|
+
// Keep temp files - optional
|
|
160
|
+
keep_temp_files: z.boolean().optional(),
|
|
161
|
+
keepTempFiles: z.boolean().optional(),
|
|
162
|
+
keep_output_files: z.boolean().optional(),
|
|
163
|
+
keepOutputFiles: z.boolean().optional(),
|
|
164
|
+
// Common target fields
|
|
165
|
+
judge_target: z.string().optional(),
|
|
166
|
+
workers: z.number().int().min(1).optional(),
|
|
167
|
+
provider_batching: z.boolean().optional(),
|
|
168
|
+
providerBatching: z.boolean().optional()
|
|
169
|
+
}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
|
|
170
|
+
message: "Either command_template or commandTemplate is required"
|
|
171
|
+
});
|
|
172
|
+
var CliHealthcheckHttpSchema = z.object({
|
|
173
|
+
type: z.literal("http"),
|
|
174
|
+
url: z.string().min(1),
|
|
175
|
+
timeoutMs: z.number().positive().optional()
|
|
176
|
+
}).strict();
|
|
177
|
+
var CliHealthcheckCommandSchema = z.object({
|
|
178
|
+
type: z.literal("command"),
|
|
179
|
+
commandTemplate: z.string().min(1),
|
|
180
|
+
cwd: z.string().optional(),
|
|
181
|
+
timeoutMs: z.number().positive().optional()
|
|
182
|
+
}).strict();
|
|
183
|
+
var CliHealthcheckSchema = z.discriminatedUnion("type", [
|
|
184
|
+
CliHealthcheckHttpSchema,
|
|
185
|
+
CliHealthcheckCommandSchema
|
|
186
|
+
]);
|
|
187
|
+
var CliTargetConfigSchema = z.object({
|
|
188
|
+
commandTemplate: z.string().min(1),
|
|
189
|
+
filesFormat: z.string().optional(),
|
|
190
|
+
cwd: z.string().optional(),
|
|
191
|
+
timeoutMs: z.number().positive().optional(),
|
|
192
|
+
healthcheck: CliHealthcheckSchema.optional(),
|
|
193
|
+
verbose: z.boolean().optional(),
|
|
194
|
+
keepTempFiles: z.boolean().optional()
|
|
195
|
+
}).strict();
|
|
196
|
+
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
197
|
+
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
198
|
+
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
199
|
+
if (input.type === "http") {
|
|
200
|
+
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
201
|
+
return {
|
|
202
|
+
type: "http",
|
|
203
|
+
url,
|
|
204
|
+
timeoutMs
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
const commandTemplateSource = input.command_template ?? input.commandTemplate;
|
|
208
|
+
if (commandTemplateSource === void 0) {
|
|
209
|
+
throw new Error(
|
|
210
|
+
`${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
const commandTemplate = resolveString(
|
|
214
|
+
commandTemplateSource,
|
|
215
|
+
env,
|
|
216
|
+
`${targetName} healthcheck command template`,
|
|
217
|
+
true
|
|
218
|
+
);
|
|
219
|
+
let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
|
|
220
|
+
allowLiteral: true,
|
|
221
|
+
optionalEnv: true
|
|
222
|
+
});
|
|
223
|
+
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
224
|
+
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
225
|
+
}
|
|
226
|
+
return {
|
|
227
|
+
type: "command",
|
|
228
|
+
commandTemplate,
|
|
229
|
+
cwd,
|
|
230
|
+
timeoutMs
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
234
|
+
const targetName = input.name;
|
|
235
|
+
const commandTemplateSource = input.command_template ?? input.commandTemplate;
|
|
236
|
+
if (commandTemplateSource === void 0) {
|
|
237
|
+
throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
|
|
238
|
+
}
|
|
239
|
+
const commandTemplate = resolveString(
|
|
240
|
+
commandTemplateSource,
|
|
241
|
+
env,
|
|
242
|
+
`${targetName} CLI command template`,
|
|
243
|
+
true
|
|
244
|
+
);
|
|
245
|
+
const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
|
|
246
|
+
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
247
|
+
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
248
|
+
allowLiteral: true,
|
|
249
|
+
optionalEnv: true
|
|
250
|
+
});
|
|
251
|
+
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
252
|
+
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
253
|
+
}
|
|
254
|
+
if (!cwd && evalFilePath) {
|
|
255
|
+
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
256
|
+
}
|
|
257
|
+
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
258
|
+
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
259
|
+
const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
|
|
260
|
+
const keepTempFiles = resolveOptionalBoolean(
|
|
261
|
+
input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
|
|
262
|
+
);
|
|
263
|
+
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
264
|
+
return {
|
|
265
|
+
commandTemplate,
|
|
266
|
+
filesFormat,
|
|
267
|
+
cwd,
|
|
268
|
+
timeoutMs,
|
|
269
|
+
healthcheck,
|
|
270
|
+
verbose,
|
|
271
|
+
keepTempFiles
|
|
272
|
+
};
|
|
273
|
+
}
|
|
119
274
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
120
275
|
"PROMPT",
|
|
121
276
|
"GUIDELINES",
|
|
@@ -221,6 +376,16 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
221
376
|
providerBatching,
|
|
222
377
|
config: resolveCodexConfig(parsed, env)
|
|
223
378
|
};
|
|
379
|
+
case "pi":
|
|
380
|
+
case "pi-coding-agent":
|
|
381
|
+
return {
|
|
382
|
+
kind: "pi-coding-agent",
|
|
383
|
+
name: parsed.name,
|
|
384
|
+
judgeTarget: parsed.judge_target,
|
|
385
|
+
workers: parsed.workers,
|
|
386
|
+
providerBatching,
|
|
387
|
+
config: resolvePiCodingAgentConfig(parsed, env)
|
|
388
|
+
};
|
|
224
389
|
case "mock":
|
|
225
390
|
return {
|
|
226
391
|
kind: "mock",
|
|
@@ -329,6 +494,7 @@ function resolveCodexConfig(target, env) {
|
|
|
329
494
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
330
495
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
331
496
|
const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
497
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
332
498
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
333
499
|
allowLiteral: true,
|
|
334
500
|
optionalEnv: true
|
|
@@ -344,13 +510,15 @@ function resolveCodexConfig(target, env) {
|
|
|
344
510
|
optionalEnv: true
|
|
345
511
|
});
|
|
346
512
|
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
513
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
347
514
|
return {
|
|
348
515
|
executable,
|
|
349
516
|
args,
|
|
350
517
|
cwd,
|
|
351
518
|
timeoutMs,
|
|
352
519
|
logDir,
|
|
353
|
-
logFormat
|
|
520
|
+
logFormat,
|
|
521
|
+
systemPrompt
|
|
354
522
|
};
|
|
355
523
|
}
|
|
356
524
|
function normalizeCodexLogFormat(value) {
|
|
@@ -366,10 +534,73 @@ function normalizeCodexLogFormat(value) {
|
|
|
366
534
|
}
|
|
367
535
|
throw new Error("codex log format must be 'summary' or 'json'");
|
|
368
536
|
}
|
|
537
|
+
function resolvePiCodingAgentConfig(target, env) {
|
|
538
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
539
|
+
const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
|
|
540
|
+
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
541
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
542
|
+
const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
|
|
543
|
+
const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
|
|
544
|
+
const argsSource = target.args ?? target.arguments;
|
|
545
|
+
const cwdSource = target.cwd;
|
|
546
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
547
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
548
|
+
const logFormatSource = target.log_format ?? target.logFormat;
|
|
549
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
550
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
|
|
551
|
+
allowLiteral: true,
|
|
552
|
+
optionalEnv: true
|
|
553
|
+
}) ?? "pi";
|
|
554
|
+
const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
|
|
555
|
+
allowLiteral: true,
|
|
556
|
+
optionalEnv: true
|
|
557
|
+
});
|
|
558
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
|
|
559
|
+
allowLiteral: true,
|
|
560
|
+
optionalEnv: true
|
|
561
|
+
});
|
|
562
|
+
const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi api key`, {
|
|
563
|
+
allowLiteral: false,
|
|
564
|
+
optionalEnv: true
|
|
565
|
+
});
|
|
566
|
+
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
|
|
567
|
+
allowLiteral: true,
|
|
568
|
+
optionalEnv: true
|
|
569
|
+
});
|
|
570
|
+
const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi thinking`, {
|
|
571
|
+
allowLiteral: true,
|
|
572
|
+
optionalEnv: true
|
|
573
|
+
});
|
|
574
|
+
const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
|
|
575
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
|
|
576
|
+
allowLiteral: true,
|
|
577
|
+
optionalEnv: true
|
|
578
|
+
});
|
|
579
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
|
|
580
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
|
|
581
|
+
allowLiteral: true,
|
|
582
|
+
optionalEnv: true
|
|
583
|
+
});
|
|
584
|
+
const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
|
|
585
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
586
|
+
return {
|
|
587
|
+
executable,
|
|
588
|
+
provider,
|
|
589
|
+
model,
|
|
590
|
+
apiKey,
|
|
591
|
+
tools,
|
|
592
|
+
thinking,
|
|
593
|
+
args,
|
|
594
|
+
cwd,
|
|
595
|
+
timeoutMs,
|
|
596
|
+
logDir,
|
|
597
|
+
logFormat,
|
|
598
|
+
systemPrompt
|
|
599
|
+
};
|
|
600
|
+
}
|
|
369
601
|
function resolveMockConfig(target) {
|
|
370
602
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
371
|
-
|
|
372
|
-
return { response, trace };
|
|
603
|
+
return { response };
|
|
373
604
|
}
|
|
374
605
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
375
606
|
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
@@ -401,42 +632,35 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
401
632
|
workspaceTemplate
|
|
402
633
|
};
|
|
403
634
|
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
408
|
-
);
|
|
409
|
-
const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
|
|
410
|
-
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
411
|
-
allowLiteral: true,
|
|
412
|
-
optionalEnv: true
|
|
413
|
-
});
|
|
414
|
-
if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
|
|
415
|
-
cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
|
|
635
|
+
var cliErrorMap = (issue, ctx) => {
|
|
636
|
+
if (issue.code === z.ZodIssueCode.unrecognized_keys) {
|
|
637
|
+
return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
|
|
416
638
|
}
|
|
417
|
-
if (
|
|
418
|
-
|
|
639
|
+
if (issue.code === z.ZodIssueCode.invalid_union_discriminator) {
|
|
640
|
+
return { message: "healthcheck type must be 'http' or 'command'" };
|
|
419
641
|
}
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
642
|
+
if (issue.code === z.ZodIssueCode.invalid_type && issue.expected === "string") {
|
|
643
|
+
return { message: `${ctx.defaultError} (expected a string value)` };
|
|
644
|
+
}
|
|
645
|
+
return { message: ctx.defaultError };
|
|
646
|
+
};
|
|
647
|
+
function resolveCliConfig(target, env, evalFilePath) {
|
|
648
|
+
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
649
|
+
if (!parseResult.success) {
|
|
650
|
+
const firstError = parseResult.error.errors[0];
|
|
651
|
+
const path3 = firstError?.path.join(".") || "";
|
|
652
|
+
const prefix = path3 ? `${target.name} ${path3}: ` : `${target.name}: `;
|
|
653
|
+
throw new Error(`${prefix}${firstError?.message}`);
|
|
654
|
+
}
|
|
655
|
+
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
656
|
+
assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
|
|
657
|
+
if (normalized.healthcheck?.type === "command") {
|
|
658
|
+
assertSupportedCliPlaceholders(
|
|
659
|
+
normalized.healthcheck.commandTemplate,
|
|
660
|
+
`${target.name} healthcheck command template`
|
|
661
|
+
);
|
|
662
|
+
}
|
|
663
|
+
return normalized;
|
|
440
664
|
}
|
|
441
665
|
function resolveTimeoutMs(source, description) {
|
|
442
666
|
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
@@ -448,49 +672,6 @@ function resolveTimeoutMs(source, description) {
|
|
|
448
672
|
}
|
|
449
673
|
return Math.floor(seconds * 1e3);
|
|
450
674
|
}
|
|
451
|
-
function resolveCliHealthcheck(source, env, targetName, evalFilePath) {
|
|
452
|
-
if (source === void 0 || source === null) {
|
|
453
|
-
return void 0;
|
|
454
|
-
}
|
|
455
|
-
if (typeof source !== "object" || Array.isArray(source)) {
|
|
456
|
-
throw new Error(`${targetName} healthcheck must be an object`);
|
|
457
|
-
}
|
|
458
|
-
const candidate = source;
|
|
459
|
-
const type = candidate.type;
|
|
460
|
-
const timeoutMs = resolveTimeoutMs(
|
|
461
|
-
candidate.timeout_seconds ?? candidate.timeoutSeconds,
|
|
462
|
-
`${targetName} healthcheck timeout`
|
|
463
|
-
);
|
|
464
|
-
if (type === "http") {
|
|
465
|
-
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
466
|
-
return {
|
|
467
|
-
type: "http",
|
|
468
|
-
url,
|
|
469
|
-
timeoutMs
|
|
470
|
-
};
|
|
471
|
-
}
|
|
472
|
-
if (type === "command") {
|
|
473
|
-
const commandTemplate = resolveString(
|
|
474
|
-
candidate.command_template ?? candidate.commandTemplate,
|
|
475
|
-
env,
|
|
476
|
-
`${targetName} healthcheck command template`,
|
|
477
|
-
true
|
|
478
|
-
);
|
|
479
|
-
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
480
|
-
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
481
|
-
allowLiteral: true,
|
|
482
|
-
optionalEnv: true
|
|
483
|
-
});
|
|
484
|
-
const resolvedCwd = cwd && evalFilePath && !path2.isAbsolute(cwd) ? path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd) : cwd;
|
|
485
|
-
return {
|
|
486
|
-
type: "command",
|
|
487
|
-
commandTemplate,
|
|
488
|
-
timeoutMs,
|
|
489
|
-
cwd: resolvedCwd
|
|
490
|
-
};
|
|
491
|
-
}
|
|
492
|
-
throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
|
|
493
|
-
}
|
|
494
675
|
function assertSupportedCliPlaceholders(template, description) {
|
|
495
676
|
const placeholders = extractCliPlaceholders(template);
|
|
496
677
|
for (const placeholder of placeholders) {
|
|
@@ -658,6 +839,7 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
658
839
|
// src/evaluation/providers/types.ts
|
|
659
840
|
var AGENT_PROVIDER_KINDS = [
|
|
660
841
|
"codex",
|
|
842
|
+
"pi-coding-agent",
|
|
661
843
|
"vscode",
|
|
662
844
|
"vscode-insiders"
|
|
663
845
|
];
|
|
@@ -666,6 +848,7 @@ var KNOWN_PROVIDERS = [
|
|
|
666
848
|
"anthropic",
|
|
667
849
|
"gemini",
|
|
668
850
|
"codex",
|
|
851
|
+
"pi-coding-agent",
|
|
669
852
|
"cli",
|
|
670
853
|
"mock",
|
|
671
854
|
"vscode",
|
|
@@ -680,6 +863,8 @@ var PROVIDER_ALIASES = [
|
|
|
680
863
|
// alias for "gemini"
|
|
681
864
|
"codex-cli",
|
|
682
865
|
// alias for "codex"
|
|
866
|
+
"pi",
|
|
867
|
+
// alias for "pi-coding-agent"
|
|
683
868
|
"openai",
|
|
684
869
|
// legacy/future support
|
|
685
870
|
"bedrock",
|
|
@@ -687,6 +872,21 @@ var PROVIDER_ALIASES = [
|
|
|
687
872
|
"vertex"
|
|
688
873
|
// legacy/future support
|
|
689
874
|
];
|
|
875
|
+
function extractLastAssistantContent(messages) {
|
|
876
|
+
if (!messages || messages.length === 0) {
|
|
877
|
+
return "";
|
|
878
|
+
}
|
|
879
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
880
|
+
const msg = messages[i];
|
|
881
|
+
if (msg.role === "assistant" && msg.content !== void 0) {
|
|
882
|
+
if (typeof msg.content === "string") {
|
|
883
|
+
return msg.content;
|
|
884
|
+
}
|
|
885
|
+
return JSON.stringify(msg.content);
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
return "";
|
|
889
|
+
}
|
|
690
890
|
function isAgentProvider(provider) {
|
|
691
891
|
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
692
892
|
}
|
|
@@ -704,6 +904,7 @@ export {
|
|
|
704
904
|
resolveTargetDefinition,
|
|
705
905
|
KNOWN_PROVIDERS,
|
|
706
906
|
PROVIDER_ALIASES,
|
|
907
|
+
extractLastAssistantContent,
|
|
707
908
|
isAgentProvider
|
|
708
909
|
};
|
|
709
|
-
//# sourceMappingURL=chunk-
|
|
910
|
+
//# sourceMappingURL=chunk-E2VSU4WZ.js.map
|