agentv 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -4
- package/dist/{chunk-6SHT2QS6.js → chunk-5AJ7DFUO.js} +211 -7
- package/dist/chunk-5AJ7DFUO.js.map +1 -0
- package/dist/cli.js +4 -2
- package/dist/cli.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +45 -43
- package/package.json +4 -2
- package/dist/chunk-6SHT2QS6.js.map +0 -1
package/README.md
CHANGED
|
@@ -59,6 +59,14 @@ bun run build
|
|
|
59
59
|
bun test
|
|
60
60
|
```
|
|
61
61
|
|
|
62
|
+
5. (Optional) Install example dependencies:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
bun run examples:install
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
This step is required if you want to run the examples in the `examples/` directory, as they are self-contained packages with their own dependencies.
|
|
69
|
+
|
|
62
70
|
You are now ready to start development. The monorepo contains:
|
|
63
71
|
|
|
64
72
|
- `packages/core/` - Core evaluation engine
|
|
@@ -77,9 +85,8 @@ You are now ready to start development. The monorepo contains:
|
|
|
77
85
|
|
|
78
86
|
## Quick Start
|
|
79
87
|
|
|
80
|
-
You can use the following examples as a starting point
|
|
81
|
-
- [
|
|
82
|
-
- [Showcase](docs/examples/showcase/README.md): A collection of advanced use cases and real-world agent evaluation scenarios.
|
|
88
|
+
You can use the following examples as a starting point:
|
|
89
|
+
- [Examples](examples/README.md): Feature demonstrations and real-world showcase examples
|
|
83
90
|
|
|
84
91
|
### Validating Eval Files
|
|
85
92
|
|
|
@@ -373,7 +380,7 @@ agentv generate rubrics evals/my-eval.yaml --target openai:gpt-4o
|
|
|
373
380
|
- `borderline`: Score ≥ 0.6 and all required rubrics met
|
|
374
381
|
- `fail`: Score < 0.6 or any required rubric failed
|
|
375
382
|
|
|
376
|
-
For complete examples and detailed patterns, see [examples/features/
|
|
383
|
+
For complete examples and detailed patterns, see [examples/features/rubric/](examples/features/rubric/).
|
|
377
384
|
|
|
378
385
|
## Advanced Configuration
|
|
379
386
|
|
|
@@ -375,7 +375,7 @@ var compareCommand = command({
|
|
|
375
375
|
import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
|
|
376
376
|
import path15 from "node:path";
|
|
377
377
|
|
|
378
|
-
// ../../packages/core/dist/chunk-
|
|
378
|
+
// ../../packages/core/dist/chunk-KDEP4I7G.js
|
|
379
379
|
import { constants } from "node:fs";
|
|
380
380
|
import { access, readFile } from "node:fs/promises";
|
|
381
381
|
import path from "node:path";
|
|
@@ -4422,7 +4422,7 @@ var coerce = {
|
|
|
4422
4422
|
};
|
|
4423
4423
|
var NEVER = INVALID;
|
|
4424
4424
|
|
|
4425
|
-
// ../../packages/core/dist/chunk-
|
|
4425
|
+
// ../../packages/core/dist/chunk-KDEP4I7G.js
|
|
4426
4426
|
async function fileExists(filePath) {
|
|
4427
4427
|
try {
|
|
4428
4428
|
await access(filePath, constants.F_OK);
|
|
@@ -4802,6 +4802,15 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
4802
4802
|
providerBatching,
|
|
4803
4803
|
config: resolvePiCodingAgentConfig(parsed, env)
|
|
4804
4804
|
};
|
|
4805
|
+
case "pi-agent-sdk":
|
|
4806
|
+
return {
|
|
4807
|
+
kind: "pi-agent-sdk",
|
|
4808
|
+
name: parsed.name,
|
|
4809
|
+
judgeTarget: parsed.judge_target,
|
|
4810
|
+
workers: parsed.workers,
|
|
4811
|
+
providerBatching,
|
|
4812
|
+
config: resolvePiAgentSdkConfig(parsed, env)
|
|
4813
|
+
};
|
|
4805
4814
|
case "claude-code":
|
|
4806
4815
|
return {
|
|
4807
4816
|
kind: "claude-code",
|
|
@@ -5023,6 +5032,39 @@ function resolvePiCodingAgentConfig(target, env) {
|
|
|
5023
5032
|
systemPrompt
|
|
5024
5033
|
};
|
|
5025
5034
|
}
|
|
5035
|
+
function resolvePiAgentSdkConfig(target, env) {
|
|
5036
|
+
const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
|
|
5037
|
+
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
5038
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
5039
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
5040
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
5041
|
+
const provider = resolveOptionalString(
|
|
5042
|
+
providerSource,
|
|
5043
|
+
env,
|
|
5044
|
+
`${target.name} pi-agent-sdk provider`,
|
|
5045
|
+
{
|
|
5046
|
+
allowLiteral: true,
|
|
5047
|
+
optionalEnv: true
|
|
5048
|
+
}
|
|
5049
|
+
);
|
|
5050
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} pi-agent-sdk model`, {
|
|
5051
|
+
allowLiteral: true,
|
|
5052
|
+
optionalEnv: true
|
|
5053
|
+
});
|
|
5054
|
+
const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-agent-sdk api key`, {
|
|
5055
|
+
allowLiteral: false,
|
|
5056
|
+
optionalEnv: true
|
|
5057
|
+
});
|
|
5058
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
|
|
5059
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
5060
|
+
return {
|
|
5061
|
+
provider,
|
|
5062
|
+
model,
|
|
5063
|
+
apiKey,
|
|
5064
|
+
timeoutMs,
|
|
5065
|
+
systemPrompt
|
|
5066
|
+
};
|
|
5067
|
+
}
|
|
5026
5068
|
function resolveClaudeCodeConfig(target, env) {
|
|
5027
5069
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
5028
5070
|
const modelSource = target.model;
|
|
@@ -5331,6 +5373,7 @@ var KNOWN_PROVIDERS = [
|
|
|
5331
5373
|
"gemini",
|
|
5332
5374
|
"codex",
|
|
5333
5375
|
"pi-coding-agent",
|
|
5376
|
+
"pi-agent-sdk",
|
|
5334
5377
|
"claude-code",
|
|
5335
5378
|
"cli",
|
|
5336
5379
|
"mock",
|
|
@@ -38948,6 +38991,165 @@ var MockProvider = class {
|
|
|
38948
38991
|
return this.delayMs;
|
|
38949
38992
|
}
|
|
38950
38993
|
};
|
|
38994
|
+
var piAgentModule = null;
|
|
38995
|
+
var piAiModule = null;
|
|
38996
|
+
async function loadPiModules() {
|
|
38997
|
+
if (!piAgentModule || !piAiModule) {
|
|
38998
|
+
try {
|
|
38999
|
+
[piAgentModule, piAiModule] = await Promise.all([
|
|
39000
|
+
import("@mariozechner/pi-agent"),
|
|
39001
|
+
import("@mariozechner/pi-ai")
|
|
39002
|
+
]);
|
|
39003
|
+
} catch (error40) {
|
|
39004
|
+
throw new Error(
|
|
39005
|
+
`Failed to load pi-agent-sdk dependencies. Please install them:
|
|
39006
|
+
npm install @mariozechner/pi-agent @mariozechner/pi-ai
|
|
39007
|
+
|
|
39008
|
+
Original error: ${error40 instanceof Error ? error40.message : String(error40)}`
|
|
39009
|
+
);
|
|
39010
|
+
}
|
|
39011
|
+
}
|
|
39012
|
+
return {
|
|
39013
|
+
Agent: piAgentModule.Agent,
|
|
39014
|
+
ProviderTransport: piAgentModule.ProviderTransport,
|
|
39015
|
+
getModel: piAiModule.getModel,
|
|
39016
|
+
getEnvApiKey: piAiModule.getEnvApiKey
|
|
39017
|
+
};
|
|
39018
|
+
}
|
|
39019
|
+
var PiAgentSdkProvider = class {
|
|
39020
|
+
id;
|
|
39021
|
+
kind = "pi-agent-sdk";
|
|
39022
|
+
targetName;
|
|
39023
|
+
supportsBatch = false;
|
|
39024
|
+
config;
|
|
39025
|
+
constructor(targetName, config2) {
|
|
39026
|
+
this.id = `pi-agent-sdk:${targetName}`;
|
|
39027
|
+
this.targetName = targetName;
|
|
39028
|
+
this.config = config2;
|
|
39029
|
+
}
|
|
39030
|
+
async invoke(request) {
|
|
39031
|
+
if (request.signal?.aborted) {
|
|
39032
|
+
throw new Error("Pi agent SDK request was aborted before execution");
|
|
39033
|
+
}
|
|
39034
|
+
const { Agent, ProviderTransport, getModel, getEnvApiKey } = await loadPiModules();
|
|
39035
|
+
const startTime = Date.now();
|
|
39036
|
+
const providerName = this.config.provider ?? "anthropic";
|
|
39037
|
+
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
39038
|
+
const model = getModel(providerName, modelId);
|
|
39039
|
+
const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
|
|
39040
|
+
const transport = new ProviderTransport({
|
|
39041
|
+
getApiKey: async (provider) => {
|
|
39042
|
+
return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
|
|
39043
|
+
}
|
|
39044
|
+
});
|
|
39045
|
+
const agent = new Agent({
|
|
39046
|
+
initialState: {
|
|
39047
|
+
systemPrompt,
|
|
39048
|
+
model,
|
|
39049
|
+
tools: [],
|
|
39050
|
+
// No tools for simple Q&A
|
|
39051
|
+
messages: []
|
|
39052
|
+
},
|
|
39053
|
+
transport
|
|
39054
|
+
});
|
|
39055
|
+
const outputMessages = [];
|
|
39056
|
+
let finalAssistantContent = "";
|
|
39057
|
+
const unsubscribe = agent.subscribe((event) => {
|
|
39058
|
+
if (event.type === "message_end") {
|
|
39059
|
+
const msg = event.message;
|
|
39060
|
+
if (msg.role === "assistant") {
|
|
39061
|
+
const content = extractTextContent22(msg.content);
|
|
39062
|
+
if (content) {
|
|
39063
|
+
finalAssistantContent = content;
|
|
39064
|
+
}
|
|
39065
|
+
}
|
|
39066
|
+
}
|
|
39067
|
+
});
|
|
39068
|
+
try {
|
|
39069
|
+
const timeoutMs = this.config.timeoutMs ?? 12e4;
|
|
39070
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
39071
|
+
setTimeout(
|
|
39072
|
+
() => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
|
|
39073
|
+
timeoutMs
|
|
39074
|
+
);
|
|
39075
|
+
});
|
|
39076
|
+
await Promise.race([agent.prompt(request.question), timeoutPromise]);
|
|
39077
|
+
await agent.waitForIdle();
|
|
39078
|
+
const agentMessages = agent.state.messages;
|
|
39079
|
+
for (const msg of agentMessages) {
|
|
39080
|
+
outputMessages.push(convertAgentMessage(msg));
|
|
39081
|
+
}
|
|
39082
|
+
const durationMs = Date.now() - startTime;
|
|
39083
|
+
return {
|
|
39084
|
+
raw: {
|
|
39085
|
+
messages: agentMessages,
|
|
39086
|
+
systemPrompt,
|
|
39087
|
+
model: this.config.model,
|
|
39088
|
+
provider: this.config.provider
|
|
39089
|
+
},
|
|
39090
|
+
outputMessages,
|
|
39091
|
+
durationMs
|
|
39092
|
+
};
|
|
39093
|
+
} finally {
|
|
39094
|
+
unsubscribe();
|
|
39095
|
+
}
|
|
39096
|
+
}
|
|
39097
|
+
};
|
|
39098
|
+
function extractTextContent22(content) {
|
|
39099
|
+
if (typeof content === "string") {
|
|
39100
|
+
return content;
|
|
39101
|
+
}
|
|
39102
|
+
if (!Array.isArray(content)) {
|
|
39103
|
+
return void 0;
|
|
39104
|
+
}
|
|
39105
|
+
const textParts = [];
|
|
39106
|
+
for (const part of content) {
|
|
39107
|
+
if (!part || typeof part !== "object") {
|
|
39108
|
+
continue;
|
|
39109
|
+
}
|
|
39110
|
+
const p = part;
|
|
39111
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
39112
|
+
textParts.push(p.text);
|
|
39113
|
+
}
|
|
39114
|
+
}
|
|
39115
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
39116
|
+
}
|
|
39117
|
+
function convertAgentMessage(message) {
|
|
39118
|
+
if (!message || typeof message !== "object") {
|
|
39119
|
+
return { role: "unknown", content: String(message) };
|
|
39120
|
+
}
|
|
39121
|
+
const msg = message;
|
|
39122
|
+
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
39123
|
+
const content = extractTextContent22(msg.content);
|
|
39124
|
+
const toolCalls = extractToolCalls2(msg.content);
|
|
39125
|
+
const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
39126
|
+
return {
|
|
39127
|
+
role,
|
|
39128
|
+
content,
|
|
39129
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
39130
|
+
timestamp
|
|
39131
|
+
};
|
|
39132
|
+
}
|
|
39133
|
+
function extractToolCalls2(content) {
|
|
39134
|
+
if (!Array.isArray(content)) {
|
|
39135
|
+
return [];
|
|
39136
|
+
}
|
|
39137
|
+
const toolCalls = [];
|
|
39138
|
+
for (const part of content) {
|
|
39139
|
+
if (!part || typeof part !== "object") {
|
|
39140
|
+
continue;
|
|
39141
|
+
}
|
|
39142
|
+
const p = part;
|
|
39143
|
+
if (p.type === "tool_use" && typeof p.name === "string") {
|
|
39144
|
+
toolCalls.push({
|
|
39145
|
+
tool: p.name,
|
|
39146
|
+
input: p.input,
|
|
39147
|
+
id: typeof p.id === "string" ? p.id : void 0
|
|
39148
|
+
});
|
|
39149
|
+
}
|
|
39150
|
+
}
|
|
39151
|
+
return toolCalls;
|
|
39152
|
+
}
|
|
38951
39153
|
var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
|
|
38952
39154
|
var GLOBAL_SUBSCRIBERS_KEY3 = Symbol.for("agentv.piLogSubscribers");
|
|
38953
39155
|
function getPiLogStore() {
|
|
@@ -39445,8 +39647,8 @@ function convertPiMessage(message) {
|
|
|
39445
39647
|
if (typeof role !== "string") {
|
|
39446
39648
|
return void 0;
|
|
39447
39649
|
}
|
|
39448
|
-
const content =
|
|
39449
|
-
const toolCalls =
|
|
39650
|
+
const content = extractTextContent3(msg.content);
|
|
39651
|
+
const toolCalls = extractToolCalls3(msg.content);
|
|
39450
39652
|
const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
39451
39653
|
const metadata = {};
|
|
39452
39654
|
if (msg.api) metadata.api = msg.api;
|
|
@@ -39462,7 +39664,7 @@ function convertPiMessage(message) {
|
|
|
39462
39664
|
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
39463
39665
|
};
|
|
39464
39666
|
}
|
|
39465
|
-
function
|
|
39667
|
+
function extractTextContent3(content) {
|
|
39466
39668
|
if (typeof content === "string") {
|
|
39467
39669
|
return content;
|
|
39468
39670
|
}
|
|
@@ -39481,7 +39683,7 @@ function extractTextContent22(content) {
|
|
|
39481
39683
|
}
|
|
39482
39684
|
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
39483
39685
|
}
|
|
39484
|
-
function
|
|
39686
|
+
function extractToolCalls3(content) {
|
|
39485
39687
|
if (!Array.isArray(content)) {
|
|
39486
39688
|
return [];
|
|
39487
39689
|
}
|
|
@@ -39955,6 +40157,8 @@ function createProvider(target) {
|
|
|
39955
40157
|
return new CodexProvider(target.name, target.config);
|
|
39956
40158
|
case "pi-coding-agent":
|
|
39957
40159
|
return new PiCodingAgentProvider(target.name, target.config);
|
|
40160
|
+
case "pi-agent-sdk":
|
|
40161
|
+
return new PiAgentSdkProvider(target.name, target.config);
|
|
39958
40162
|
case "claude-code":
|
|
39959
40163
|
return new ClaudeCodeProvider(target.name, target.config);
|
|
39960
40164
|
case "mock":
|
|
@@ -45694,4 +45898,4 @@ export {
|
|
|
45694
45898
|
app,
|
|
45695
45899
|
runCli
|
|
45696
45900
|
};
|
|
45697
|
-
//# sourceMappingURL=chunk-
|
|
45901
|
+
//# sourceMappingURL=chunk-5AJ7DFUO.js.map
|