@agentv/core 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-IBTKEEOT.js → chunk-KDEP4I7G.js} +44 -1
- package/dist/chunk-KDEP4I7G.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +209 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -30
- package/dist/index.d.ts +16 -30
- package/dist/index.js +168 -41
- package/dist/index.js.map +1 -1
- package/package.json +4 -1
- package/dist/chunk-IBTKEEOT.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -67,8 +67,6 @@ __export(index_exports, {
|
|
|
67
67
|
loadEvalCases: () => loadEvalCases,
|
|
68
68
|
mergeExecutionMetrics: () => mergeExecutionMetrics,
|
|
69
69
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
70
|
-
parseCodeJudgePayload: () => parseCodeJudgePayload,
|
|
71
|
-
readCodeJudgePayload: () => readCodeJudgePayload,
|
|
72
70
|
readJsonFile: () => readJsonFile,
|
|
73
71
|
readTargetDefinitions: () => readTargetDefinitions,
|
|
74
72
|
readTestSuiteMetadata: () => readTestSuiteMetadata,
|
|
@@ -4272,6 +4270,167 @@ var MockProvider = class {
|
|
|
4272
4270
|
}
|
|
4273
4271
|
};
|
|
4274
4272
|
|
|
4273
|
+
// src/evaluation/providers/pi-agent-sdk.ts
|
|
4274
|
+
var piAgentModule = null;
|
|
4275
|
+
var piAiModule = null;
|
|
4276
|
+
async function loadPiModules() {
|
|
4277
|
+
if (!piAgentModule || !piAiModule) {
|
|
4278
|
+
try {
|
|
4279
|
+
[piAgentModule, piAiModule] = await Promise.all([
|
|
4280
|
+
import("@mariozechner/pi-agent"),
|
|
4281
|
+
import("@mariozechner/pi-ai")
|
|
4282
|
+
]);
|
|
4283
|
+
} catch (error) {
|
|
4284
|
+
throw new Error(
|
|
4285
|
+
`Failed to load pi-agent-sdk dependencies. Please install them:
|
|
4286
|
+
npm install @mariozechner/pi-agent @mariozechner/pi-ai
|
|
4287
|
+
|
|
4288
|
+
Original error: ${error instanceof Error ? error.message : String(error)}`
|
|
4289
|
+
);
|
|
4290
|
+
}
|
|
4291
|
+
}
|
|
4292
|
+
return {
|
|
4293
|
+
Agent: piAgentModule.Agent,
|
|
4294
|
+
ProviderTransport: piAgentModule.ProviderTransport,
|
|
4295
|
+
getModel: piAiModule.getModel,
|
|
4296
|
+
getEnvApiKey: piAiModule.getEnvApiKey
|
|
4297
|
+
};
|
|
4298
|
+
}
|
|
4299
|
+
var PiAgentSdkProvider = class {
|
|
4300
|
+
id;
|
|
4301
|
+
kind = "pi-agent-sdk";
|
|
4302
|
+
targetName;
|
|
4303
|
+
supportsBatch = false;
|
|
4304
|
+
config;
|
|
4305
|
+
constructor(targetName, config) {
|
|
4306
|
+
this.id = `pi-agent-sdk:${targetName}`;
|
|
4307
|
+
this.targetName = targetName;
|
|
4308
|
+
this.config = config;
|
|
4309
|
+
}
|
|
4310
|
+
async invoke(request) {
|
|
4311
|
+
if (request.signal?.aborted) {
|
|
4312
|
+
throw new Error("Pi agent SDK request was aborted before execution");
|
|
4313
|
+
}
|
|
4314
|
+
const { Agent, ProviderTransport, getModel, getEnvApiKey } = await loadPiModules();
|
|
4315
|
+
const startTime = Date.now();
|
|
4316
|
+
const providerName = this.config.provider ?? "anthropic";
|
|
4317
|
+
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
4318
|
+
const model = getModel(providerName, modelId);
|
|
4319
|
+
const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
|
|
4320
|
+
const transport = new ProviderTransport({
|
|
4321
|
+
getApiKey: async (provider) => {
|
|
4322
|
+
return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
|
|
4323
|
+
}
|
|
4324
|
+
});
|
|
4325
|
+
const agent = new Agent({
|
|
4326
|
+
initialState: {
|
|
4327
|
+
systemPrompt,
|
|
4328
|
+
model,
|
|
4329
|
+
tools: [],
|
|
4330
|
+
// No tools for simple Q&A
|
|
4331
|
+
messages: []
|
|
4332
|
+
},
|
|
4333
|
+
transport
|
|
4334
|
+
});
|
|
4335
|
+
const outputMessages = [];
|
|
4336
|
+
let finalAssistantContent = "";
|
|
4337
|
+
const unsubscribe = agent.subscribe((event) => {
|
|
4338
|
+
if (event.type === "message_end") {
|
|
4339
|
+
const msg = event.message;
|
|
4340
|
+
if (msg.role === "assistant") {
|
|
4341
|
+
const content = extractTextContent2(msg.content);
|
|
4342
|
+
if (content) {
|
|
4343
|
+
finalAssistantContent = content;
|
|
4344
|
+
}
|
|
4345
|
+
}
|
|
4346
|
+
}
|
|
4347
|
+
});
|
|
4348
|
+
try {
|
|
4349
|
+
const timeoutMs = this.config.timeoutMs ?? 12e4;
|
|
4350
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
4351
|
+
setTimeout(
|
|
4352
|
+
() => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
|
|
4353
|
+
timeoutMs
|
|
4354
|
+
);
|
|
4355
|
+
});
|
|
4356
|
+
await Promise.race([agent.prompt(request.question), timeoutPromise]);
|
|
4357
|
+
await agent.waitForIdle();
|
|
4358
|
+
const agentMessages = agent.state.messages;
|
|
4359
|
+
for (const msg of agentMessages) {
|
|
4360
|
+
outputMessages.push(convertAgentMessage(msg));
|
|
4361
|
+
}
|
|
4362
|
+
const durationMs = Date.now() - startTime;
|
|
4363
|
+
return {
|
|
4364
|
+
raw: {
|
|
4365
|
+
messages: agentMessages,
|
|
4366
|
+
systemPrompt,
|
|
4367
|
+
model: this.config.model,
|
|
4368
|
+
provider: this.config.provider
|
|
4369
|
+
},
|
|
4370
|
+
outputMessages,
|
|
4371
|
+
durationMs
|
|
4372
|
+
};
|
|
4373
|
+
} finally {
|
|
4374
|
+
unsubscribe();
|
|
4375
|
+
}
|
|
4376
|
+
}
|
|
4377
|
+
};
|
|
4378
|
+
function extractTextContent2(content) {
|
|
4379
|
+
if (typeof content === "string") {
|
|
4380
|
+
return content;
|
|
4381
|
+
}
|
|
4382
|
+
if (!Array.isArray(content)) {
|
|
4383
|
+
return void 0;
|
|
4384
|
+
}
|
|
4385
|
+
const textParts = [];
|
|
4386
|
+
for (const part of content) {
|
|
4387
|
+
if (!part || typeof part !== "object") {
|
|
4388
|
+
continue;
|
|
4389
|
+
}
|
|
4390
|
+
const p = part;
|
|
4391
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
4392
|
+
textParts.push(p.text);
|
|
4393
|
+
}
|
|
4394
|
+
}
|
|
4395
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4396
|
+
}
|
|
4397
|
+
function convertAgentMessage(message) {
|
|
4398
|
+
if (!message || typeof message !== "object") {
|
|
4399
|
+
return { role: "unknown", content: String(message) };
|
|
4400
|
+
}
|
|
4401
|
+
const msg = message;
|
|
4402
|
+
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
4403
|
+
const content = extractTextContent2(msg.content);
|
|
4404
|
+
const toolCalls = extractToolCalls2(msg.content);
|
|
4405
|
+
const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
4406
|
+
return {
|
|
4407
|
+
role,
|
|
4408
|
+
content,
|
|
4409
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
4410
|
+
timestamp
|
|
4411
|
+
};
|
|
4412
|
+
}
|
|
4413
|
+
function extractToolCalls2(content) {
|
|
4414
|
+
if (!Array.isArray(content)) {
|
|
4415
|
+
return [];
|
|
4416
|
+
}
|
|
4417
|
+
const toolCalls = [];
|
|
4418
|
+
for (const part of content) {
|
|
4419
|
+
if (!part || typeof part !== "object") {
|
|
4420
|
+
continue;
|
|
4421
|
+
}
|
|
4422
|
+
const p = part;
|
|
4423
|
+
if (p.type === "tool_use" && typeof p.name === "string") {
|
|
4424
|
+
toolCalls.push({
|
|
4425
|
+
tool: p.name,
|
|
4426
|
+
input: p.input,
|
|
4427
|
+
id: typeof p.id === "string" ? p.id : void 0
|
|
4428
|
+
});
|
|
4429
|
+
}
|
|
4430
|
+
}
|
|
4431
|
+
return toolCalls;
|
|
4432
|
+
}
|
|
4433
|
+
|
|
4275
4434
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
4276
4435
|
var import_node_child_process4 = require("child_process");
|
|
4277
4436
|
var import_node_crypto3 = require("crypto");
|
|
@@ -4787,8 +4946,8 @@ function convertPiMessage(message) {
|
|
|
4787
4946
|
if (typeof role !== "string") {
|
|
4788
4947
|
return void 0;
|
|
4789
4948
|
}
|
|
4790
|
-
const content =
|
|
4791
|
-
const toolCalls =
|
|
4949
|
+
const content = extractTextContent3(msg.content);
|
|
4950
|
+
const toolCalls = extractToolCalls3(msg.content);
|
|
4792
4951
|
const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
4793
4952
|
const metadata = {};
|
|
4794
4953
|
if (msg.api) metadata.api = msg.api;
|
|
@@ -4804,7 +4963,7 @@ function convertPiMessage(message) {
|
|
|
4804
4963
|
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
4805
4964
|
};
|
|
4806
4965
|
}
|
|
4807
|
-
function
|
|
4966
|
+
function extractTextContent3(content) {
|
|
4808
4967
|
if (typeof content === "string") {
|
|
4809
4968
|
return content;
|
|
4810
4969
|
}
|
|
@@ -4823,7 +4982,7 @@ function extractTextContent2(content) {
|
|
|
4823
4982
|
}
|
|
4824
4983
|
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4825
4984
|
}
|
|
4826
|
-
function
|
|
4985
|
+
function extractToolCalls3(content) {
|
|
4827
4986
|
if (!Array.isArray(content)) {
|
|
4828
4987
|
return [];
|
|
4829
4988
|
}
|
|
@@ -5227,6 +5386,15 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
5227
5386
|
providerBatching,
|
|
5228
5387
|
config: resolvePiCodingAgentConfig(parsed, env)
|
|
5229
5388
|
};
|
|
5389
|
+
case "pi-agent-sdk":
|
|
5390
|
+
return {
|
|
5391
|
+
kind: "pi-agent-sdk",
|
|
5392
|
+
name: parsed.name,
|
|
5393
|
+
judgeTarget: parsed.judge_target,
|
|
5394
|
+
workers: parsed.workers,
|
|
5395
|
+
providerBatching,
|
|
5396
|
+
config: resolvePiAgentSdkConfig(parsed, env)
|
|
5397
|
+
};
|
|
5230
5398
|
case "claude-code":
|
|
5231
5399
|
return {
|
|
5232
5400
|
kind: "claude-code",
|
|
@@ -5448,6 +5616,39 @@ function resolvePiCodingAgentConfig(target, env) {
|
|
|
5448
5616
|
systemPrompt
|
|
5449
5617
|
};
|
|
5450
5618
|
}
|
|
5619
|
+
function resolvePiAgentSdkConfig(target, env) {
|
|
5620
|
+
const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
|
|
5621
|
+
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
5622
|
+
const apiKeySource = target.api_key ?? target.apiKey;
|
|
5623
|
+
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
5624
|
+
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
5625
|
+
const provider = resolveOptionalString(
|
|
5626
|
+
providerSource,
|
|
5627
|
+
env,
|
|
5628
|
+
`${target.name} pi-agent-sdk provider`,
|
|
5629
|
+
{
|
|
5630
|
+
allowLiteral: true,
|
|
5631
|
+
optionalEnv: true
|
|
5632
|
+
}
|
|
5633
|
+
);
|
|
5634
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} pi-agent-sdk model`, {
|
|
5635
|
+
allowLiteral: true,
|
|
5636
|
+
optionalEnv: true
|
|
5637
|
+
});
|
|
5638
|
+
const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-agent-sdk api key`, {
|
|
5639
|
+
allowLiteral: false,
|
|
5640
|
+
optionalEnv: true
|
|
5641
|
+
});
|
|
5642
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
|
|
5643
|
+
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
5644
|
+
return {
|
|
5645
|
+
provider,
|
|
5646
|
+
model,
|
|
5647
|
+
apiKey,
|
|
5648
|
+
timeoutMs,
|
|
5649
|
+
systemPrompt
|
|
5650
|
+
};
|
|
5651
|
+
}
|
|
5451
5652
|
function resolveClaudeCodeConfig(target, env) {
|
|
5452
5653
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
5453
5654
|
const modelSource = target.model;
|
|
@@ -6106,6 +6307,8 @@ function createProvider(target) {
|
|
|
6106
6307
|
return new CodexProvider(target.name, target.config);
|
|
6107
6308
|
case "pi-coding-agent":
|
|
6108
6309
|
return new PiCodingAgentProvider(target.name, target.config);
|
|
6310
|
+
case "pi-agent-sdk":
|
|
6311
|
+
return new PiAgentSdkProvider(target.name, target.config);
|
|
6109
6312
|
case "claude-code":
|
|
6110
6313
|
return new ClaudeCodeProvider(target.name, target.config);
|
|
6111
6314
|
case "mock":
|
|
@@ -6273,12 +6476,6 @@ function toSnakeCase(str) {
|
|
|
6273
6476
|
}
|
|
6274
6477
|
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
6275
6478
|
}
|
|
6276
|
-
function toCamelCase(str) {
|
|
6277
|
-
if (/^[A-Z]/.test(str)) {
|
|
6278
|
-
return str;
|
|
6279
|
-
}
|
|
6280
|
-
return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());
|
|
6281
|
-
}
|
|
6282
6479
|
function toSnakeCaseDeep(obj) {
|
|
6283
6480
|
if (obj === null || obj === void 0) {
|
|
6284
6481
|
return obj;
|
|
@@ -6296,23 +6493,6 @@ function toSnakeCaseDeep(obj) {
|
|
|
6296
6493
|
}
|
|
6297
6494
|
return obj;
|
|
6298
6495
|
}
|
|
6299
|
-
function toCamelCaseDeep(obj) {
|
|
6300
|
-
if (obj === null || obj === void 0) {
|
|
6301
|
-
return obj;
|
|
6302
|
-
}
|
|
6303
|
-
if (Array.isArray(obj)) {
|
|
6304
|
-
return obj.map((item) => toCamelCaseDeep(item));
|
|
6305
|
-
}
|
|
6306
|
-
if (typeof obj === "object") {
|
|
6307
|
-
const result = {};
|
|
6308
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
6309
|
-
const camelKey = toCamelCase(key);
|
|
6310
|
-
result[camelKey] = toCamelCaseDeep(value);
|
|
6311
|
-
}
|
|
6312
|
-
return result;
|
|
6313
|
-
}
|
|
6314
|
-
return obj;
|
|
6315
|
-
}
|
|
6316
6496
|
|
|
6317
6497
|
// src/evaluation/providers/types.ts
|
|
6318
6498
|
var AGENT_PROVIDER_KINDS = [
|
|
@@ -9084,17 +9264,6 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
|
|
|
9084
9264
|
return parts.join("\n");
|
|
9085
9265
|
}
|
|
9086
9266
|
|
|
9087
|
-
// src/evaluation/code-judge-sdk.ts
|
|
9088
|
-
var import_node_fs7 = require("fs");
|
|
9089
|
-
function parseCodeJudgePayload(payload) {
|
|
9090
|
-
const parsed = JSON.parse(payload);
|
|
9091
|
-
return toCamelCaseDeep(parsed);
|
|
9092
|
-
}
|
|
9093
|
-
function readCodeJudgePayload() {
|
|
9094
|
-
const stdin = (0, import_node_fs7.readFileSync)(0, "utf8");
|
|
9095
|
-
return parseCodeJudgePayload(stdin);
|
|
9096
|
-
}
|
|
9097
|
-
|
|
9098
9267
|
// src/index.ts
|
|
9099
9268
|
function createAgentKernel() {
|
|
9100
9269
|
return { status: "stub" };
|
|
@@ -9138,8 +9307,6 @@ function createAgentKernel() {
|
|
|
9138
9307
|
loadEvalCases,
|
|
9139
9308
|
mergeExecutionMetrics,
|
|
9140
9309
|
normalizeLineEndings,
|
|
9141
|
-
parseCodeJudgePayload,
|
|
9142
|
-
readCodeJudgePayload,
|
|
9143
9310
|
readJsonFile,
|
|
9144
9311
|
readTargetDefinitions,
|
|
9145
9312
|
readTestSuiteMetadata,
|