@agentv/core 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -67,8 +67,6 @@ __export(index_exports, {
67
67
  loadEvalCases: () => loadEvalCases,
68
68
  mergeExecutionMetrics: () => mergeExecutionMetrics,
69
69
  normalizeLineEndings: () => normalizeLineEndings,
70
- parseCodeJudgePayload: () => parseCodeJudgePayload,
71
- readCodeJudgePayload: () => readCodeJudgePayload,
72
70
  readJsonFile: () => readJsonFile,
73
71
  readTargetDefinitions: () => readTargetDefinitions,
74
72
  readTestSuiteMetadata: () => readTestSuiteMetadata,
@@ -4272,6 +4270,167 @@ var MockProvider = class {
4272
4270
  }
4273
4271
  };
4274
4272
 
4273
+ // src/evaluation/providers/pi-agent-sdk.ts
4274
+ var piAgentModule = null;
4275
+ var piAiModule = null;
4276
+ async function loadPiModules() {
4277
+ if (!piAgentModule || !piAiModule) {
4278
+ try {
4279
+ [piAgentModule, piAiModule] = await Promise.all([
4280
+ import("@mariozechner/pi-agent"),
4281
+ import("@mariozechner/pi-ai")
4282
+ ]);
4283
+ } catch (error) {
4284
+ throw new Error(
4285
+ `Failed to load pi-agent-sdk dependencies. Please install them:
4286
+ npm install @mariozechner/pi-agent @mariozechner/pi-ai
4287
+
4288
+ Original error: ${error instanceof Error ? error.message : String(error)}`
4289
+ );
4290
+ }
4291
+ }
4292
+ return {
4293
+ Agent: piAgentModule.Agent,
4294
+ ProviderTransport: piAgentModule.ProviderTransport,
4295
+ getModel: piAiModule.getModel,
4296
+ getEnvApiKey: piAiModule.getEnvApiKey
4297
+ };
4298
+ }
4299
+ var PiAgentSdkProvider = class {
4300
+ id;
4301
+ kind = "pi-agent-sdk";
4302
+ targetName;
4303
+ supportsBatch = false;
4304
+ config;
4305
+ constructor(targetName, config) {
4306
+ this.id = `pi-agent-sdk:${targetName}`;
4307
+ this.targetName = targetName;
4308
+ this.config = config;
4309
+ }
4310
+ async invoke(request) {
4311
+ if (request.signal?.aborted) {
4312
+ throw new Error("Pi agent SDK request was aborted before execution");
4313
+ }
4314
+ const { Agent, ProviderTransport, getModel, getEnvApiKey } = await loadPiModules();
4315
+ const startTime = Date.now();
4316
+ const providerName = this.config.provider ?? "anthropic";
4317
+ const modelId = this.config.model ?? "claude-sonnet-4-20250514";
4318
+ const model = getModel(providerName, modelId);
4319
+ const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
4320
+ const transport = new ProviderTransport({
4321
+ getApiKey: async (provider) => {
4322
+ return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
4323
+ }
4324
+ });
4325
+ const agent = new Agent({
4326
+ initialState: {
4327
+ systemPrompt,
4328
+ model,
4329
+ tools: [],
4330
+ // No tools for simple Q&A
4331
+ messages: []
4332
+ },
4333
+ transport
4334
+ });
4335
+ const outputMessages = [];
4336
+ let finalAssistantContent = "";
4337
+ const unsubscribe = agent.subscribe((event) => {
4338
+ if (event.type === "message_end") {
4339
+ const msg = event.message;
4340
+ if (msg.role === "assistant") {
4341
+ const content = extractTextContent2(msg.content);
4342
+ if (content) {
4343
+ finalAssistantContent = content;
4344
+ }
4345
+ }
4346
+ }
4347
+ });
4348
+ try {
4349
+ const timeoutMs = this.config.timeoutMs ?? 12e4;
4350
+ const timeoutPromise = new Promise((_, reject) => {
4351
+ setTimeout(
4352
+ () => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
4353
+ timeoutMs
4354
+ );
4355
+ });
4356
+ await Promise.race([agent.prompt(request.question), timeoutPromise]);
4357
+ await agent.waitForIdle();
4358
+ const agentMessages = agent.state.messages;
4359
+ for (const msg of agentMessages) {
4360
+ outputMessages.push(convertAgentMessage(msg));
4361
+ }
4362
+ const durationMs = Date.now() - startTime;
4363
+ return {
4364
+ raw: {
4365
+ messages: agentMessages,
4366
+ systemPrompt,
4367
+ model: this.config.model,
4368
+ provider: this.config.provider
4369
+ },
4370
+ outputMessages,
4371
+ durationMs
4372
+ };
4373
+ } finally {
4374
+ unsubscribe();
4375
+ }
4376
+ }
4377
+ };
4378
+ function extractTextContent2(content) {
4379
+ if (typeof content === "string") {
4380
+ return content;
4381
+ }
4382
+ if (!Array.isArray(content)) {
4383
+ return void 0;
4384
+ }
4385
+ const textParts = [];
4386
+ for (const part of content) {
4387
+ if (!part || typeof part !== "object") {
4388
+ continue;
4389
+ }
4390
+ const p = part;
4391
+ if (p.type === "text" && typeof p.text === "string") {
4392
+ textParts.push(p.text);
4393
+ }
4394
+ }
4395
+ return textParts.length > 0 ? textParts.join("\n") : void 0;
4396
+ }
4397
+ function convertAgentMessage(message) {
4398
+ if (!message || typeof message !== "object") {
4399
+ return { role: "unknown", content: String(message) };
4400
+ }
4401
+ const msg = message;
4402
+ const role = typeof msg.role === "string" ? msg.role : "unknown";
4403
+ const content = extractTextContent2(msg.content);
4404
+ const toolCalls = extractToolCalls2(msg.content);
4405
+ const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
4406
+ return {
4407
+ role,
4408
+ content,
4409
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
4410
+ timestamp
4411
+ };
4412
+ }
4413
+ function extractToolCalls2(content) {
4414
+ if (!Array.isArray(content)) {
4415
+ return [];
4416
+ }
4417
+ const toolCalls = [];
4418
+ for (const part of content) {
4419
+ if (!part || typeof part !== "object") {
4420
+ continue;
4421
+ }
4422
+ const p = part;
4423
+ if (p.type === "tool_use" && typeof p.name === "string") {
4424
+ toolCalls.push({
4425
+ tool: p.name,
4426
+ input: p.input,
4427
+ id: typeof p.id === "string" ? p.id : void 0
4428
+ });
4429
+ }
4430
+ }
4431
+ return toolCalls;
4432
+ }
4433
+
4275
4434
  // src/evaluation/providers/pi-coding-agent.ts
4276
4435
  var import_node_child_process4 = require("child_process");
4277
4436
  var import_node_crypto3 = require("crypto");
@@ -4787,8 +4946,8 @@ function convertPiMessage(message) {
4787
4946
  if (typeof role !== "string") {
4788
4947
  return void 0;
4789
4948
  }
4790
- const content = extractTextContent2(msg.content);
4791
- const toolCalls = extractToolCalls2(msg.content);
4949
+ const content = extractTextContent3(msg.content);
4950
+ const toolCalls = extractToolCalls3(msg.content);
4792
4951
  const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
4793
4952
  const metadata = {};
4794
4953
  if (msg.api) metadata.api = msg.api;
@@ -4804,7 +4963,7 @@ function convertPiMessage(message) {
4804
4963
  metadata: Object.keys(metadata).length > 0 ? metadata : void 0
4805
4964
  };
4806
4965
  }
4807
- function extractTextContent2(content) {
4966
+ function extractTextContent3(content) {
4808
4967
  if (typeof content === "string") {
4809
4968
  return content;
4810
4969
  }
@@ -4823,7 +4982,7 @@ function extractTextContent2(content) {
4823
4982
  }
4824
4983
  return textParts.length > 0 ? textParts.join("\n") : void 0;
4825
4984
  }
4826
- function extractToolCalls2(content) {
4985
+ function extractToolCalls3(content) {
4827
4986
  if (!Array.isArray(content)) {
4828
4987
  return [];
4829
4988
  }
@@ -5227,6 +5386,15 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
5227
5386
  providerBatching,
5228
5387
  config: resolvePiCodingAgentConfig(parsed, env)
5229
5388
  };
5389
+ case "pi-agent-sdk":
5390
+ return {
5391
+ kind: "pi-agent-sdk",
5392
+ name: parsed.name,
5393
+ judgeTarget: parsed.judge_target,
5394
+ workers: parsed.workers,
5395
+ providerBatching,
5396
+ config: resolvePiAgentSdkConfig(parsed, env)
5397
+ };
5230
5398
  case "claude-code":
5231
5399
  return {
5232
5400
  kind: "claude-code",
@@ -5448,6 +5616,39 @@ function resolvePiCodingAgentConfig(target, env) {
5448
5616
  systemPrompt
5449
5617
  };
5450
5618
  }
5619
+ function resolvePiAgentSdkConfig(target, env) {
5620
+ const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
5621
+ const modelSource = target.model ?? target.pi_model ?? target.piModel;
5622
+ const apiKeySource = target.api_key ?? target.apiKey;
5623
+ const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
5624
+ const systemPromptSource = target.system_prompt ?? target.systemPrompt;
5625
+ const provider = resolveOptionalString(
5626
+ providerSource,
5627
+ env,
5628
+ `${target.name} pi-agent-sdk provider`,
5629
+ {
5630
+ allowLiteral: true,
5631
+ optionalEnv: true
5632
+ }
5633
+ );
5634
+ const model = resolveOptionalString(modelSource, env, `${target.name} pi-agent-sdk model`, {
5635
+ allowLiteral: true,
5636
+ optionalEnv: true
5637
+ });
5638
+ const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-agent-sdk api key`, {
5639
+ allowLiteral: false,
5640
+ optionalEnv: true
5641
+ });
5642
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
5643
+ const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
5644
+ return {
5645
+ provider,
5646
+ model,
5647
+ apiKey,
5648
+ timeoutMs,
5649
+ systemPrompt
5650
+ };
5651
+ }
5451
5652
  function resolveClaudeCodeConfig(target, env) {
5452
5653
  const executableSource = target.executable ?? target.command ?? target.binary;
5453
5654
  const modelSource = target.model;
@@ -6106,6 +6307,8 @@ function createProvider(target) {
6106
6307
  return new CodexProvider(target.name, target.config);
6107
6308
  case "pi-coding-agent":
6108
6309
  return new PiCodingAgentProvider(target.name, target.config);
6310
+ case "pi-agent-sdk":
6311
+ return new PiAgentSdkProvider(target.name, target.config);
6109
6312
  case "claude-code":
6110
6313
  return new ClaudeCodeProvider(target.name, target.config);
6111
6314
  case "mock":
@@ -6273,12 +6476,6 @@ function toSnakeCase(str) {
6273
6476
  }
6274
6477
  return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
6275
6478
  }
6276
- function toCamelCase(str) {
6277
- if (/^[A-Z]/.test(str)) {
6278
- return str;
6279
- }
6280
- return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());
6281
- }
6282
6479
  function toSnakeCaseDeep(obj) {
6283
6480
  if (obj === null || obj === void 0) {
6284
6481
  return obj;
@@ -6296,23 +6493,6 @@ function toSnakeCaseDeep(obj) {
6296
6493
  }
6297
6494
  return obj;
6298
6495
  }
6299
- function toCamelCaseDeep(obj) {
6300
- if (obj === null || obj === void 0) {
6301
- return obj;
6302
- }
6303
- if (Array.isArray(obj)) {
6304
- return obj.map((item) => toCamelCaseDeep(item));
6305
- }
6306
- if (typeof obj === "object") {
6307
- const result = {};
6308
- for (const [key, value] of Object.entries(obj)) {
6309
- const camelKey = toCamelCase(key);
6310
- result[camelKey] = toCamelCaseDeep(value);
6311
- }
6312
- return result;
6313
- }
6314
- return obj;
6315
- }
6316
6496
 
6317
6497
  // src/evaluation/providers/types.ts
6318
6498
  var AGENT_PROVIDER_KINDS = [
@@ -9084,17 +9264,6 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
9084
9264
  return parts.join("\n");
9085
9265
  }
9086
9266
 
9087
- // src/evaluation/code-judge-sdk.ts
9088
- var import_node_fs7 = require("fs");
9089
- function parseCodeJudgePayload(payload) {
9090
- const parsed = JSON.parse(payload);
9091
- return toCamelCaseDeep(parsed);
9092
- }
9093
- function readCodeJudgePayload() {
9094
- const stdin = (0, import_node_fs7.readFileSync)(0, "utf8");
9095
- return parseCodeJudgePayload(stdin);
9096
- }
9097
-
9098
9267
  // src/index.ts
9099
9268
  function createAgentKernel() {
9100
9269
  return { status: "stub" };
@@ -9138,8 +9307,6 @@ function createAgentKernel() {
9138
9307
  loadEvalCases,
9139
9308
  mergeExecutionMetrics,
9140
9309
  normalizeLineEndings,
9141
- parseCodeJudgePayload,
9142
- readCodeJudgePayload,
9143
9310
  readJsonFile,
9144
9311
  readTargetDefinitions,
9145
9312
  readTestSuiteMetadata,