agentv 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -59,6 +59,14 @@ bun run build
59
59
  bun test
60
60
  ```
61
61
 
62
+ 5. (Optional) Install example dependencies:
63
+
64
+ ```bash
65
+ bun run examples:install
66
+ ```
67
+
68
+ This step is required if you want to run the examples in the `examples/` directory, as they are self-contained packages with their own dependencies.
69
+
62
70
  You are now ready to start development. The monorepo contains:
63
71
 
64
72
  - `packages/core/` - Core evaluation engine
@@ -77,9 +85,8 @@ You are now ready to start development. The monorepo contains:
77
85
 
78
86
  ## Quick Start
79
87
 
80
- You can use the following examples as a starting point.
81
- - [Simple Example](docs/examples/simple/README.md): A minimal working example to help you get started fast.
82
- - [Showcase](docs/examples/showcase/README.md): A collection of advanced use cases and real-world agent evaluation scenarios.
88
+ You can use the following examples as a starting point:
89
+ - [Examples](examples/README.md): Feature demonstrations and real-world showcase examples
83
90
 
84
91
  ### Validating Eval Files
85
92
 
@@ -373,7 +380,7 @@ agentv generate rubrics evals/my-eval.yaml --target openai:gpt-4o
373
380
  - `borderline`: Score ≥ 0.6 and all required rubrics met
374
381
  - `fail`: Score < 0.6 or any required rubric failed
375
382
 
376
- For complete examples and detailed patterns, see [examples/features/evals/rubric/](examples/features/evals/rubric/).
383
+ For complete examples and detailed patterns, see [examples/features/rubric/](examples/features/rubric/).
377
384
 
378
385
  ## Advanced Configuration
379
386
 
@@ -375,7 +375,7 @@ var compareCommand = command({
375
375
  import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
376
376
  import path15 from "node:path";
377
377
 
378
- // ../../packages/core/dist/chunk-IBTKEEOT.js
378
+ // ../../packages/core/dist/chunk-KDEP4I7G.js
379
379
  import { constants } from "node:fs";
380
380
  import { access, readFile } from "node:fs/promises";
381
381
  import path from "node:path";
@@ -4422,7 +4422,7 @@ var coerce = {
4422
4422
  };
4423
4423
  var NEVER = INVALID;
4424
4424
 
4425
- // ../../packages/core/dist/chunk-IBTKEEOT.js
4425
+ // ../../packages/core/dist/chunk-KDEP4I7G.js
4426
4426
  async function fileExists(filePath) {
4427
4427
  try {
4428
4428
  await access(filePath, constants.F_OK);
@@ -4802,6 +4802,15 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
4802
4802
  providerBatching,
4803
4803
  config: resolvePiCodingAgentConfig(parsed, env)
4804
4804
  };
4805
+ case "pi-agent-sdk":
4806
+ return {
4807
+ kind: "pi-agent-sdk",
4808
+ name: parsed.name,
4809
+ judgeTarget: parsed.judge_target,
4810
+ workers: parsed.workers,
4811
+ providerBatching,
4812
+ config: resolvePiAgentSdkConfig(parsed, env)
4813
+ };
4805
4814
  case "claude-code":
4806
4815
  return {
4807
4816
  kind: "claude-code",
@@ -5023,6 +5032,39 @@ function resolvePiCodingAgentConfig(target, env) {
5023
5032
  systemPrompt
5024
5033
  };
5025
5034
  }
5035
+ function resolvePiAgentSdkConfig(target, env) {
5036
+ const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
5037
+ const modelSource = target.model ?? target.pi_model ?? target.piModel;
5038
+ const apiKeySource = target.api_key ?? target.apiKey;
5039
+ const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
5040
+ const systemPromptSource = target.system_prompt ?? target.systemPrompt;
5041
+ const provider = resolveOptionalString(
5042
+ providerSource,
5043
+ env,
5044
+ `${target.name} pi-agent-sdk provider`,
5045
+ {
5046
+ allowLiteral: true,
5047
+ optionalEnv: true
5048
+ }
5049
+ );
5050
+ const model = resolveOptionalString(modelSource, env, `${target.name} pi-agent-sdk model`, {
5051
+ allowLiteral: true,
5052
+ optionalEnv: true
5053
+ });
5054
+ const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-agent-sdk api key`, {
5055
+ allowLiteral: false,
5056
+ optionalEnv: true
5057
+ });
5058
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
5059
+ const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
5060
+ return {
5061
+ provider,
5062
+ model,
5063
+ apiKey,
5064
+ timeoutMs,
5065
+ systemPrompt
5066
+ };
5067
+ }
5026
5068
  function resolveClaudeCodeConfig(target, env) {
5027
5069
  const executableSource = target.executable ?? target.command ?? target.binary;
5028
5070
  const modelSource = target.model;
@@ -5331,6 +5373,7 @@ var KNOWN_PROVIDERS = [
5331
5373
  "gemini",
5332
5374
  "codex",
5333
5375
  "pi-coding-agent",
5376
+ "pi-agent-sdk",
5334
5377
  "claude-code",
5335
5378
  "cli",
5336
5379
  "mock",
@@ -38948,6 +38991,165 @@ var MockProvider = class {
38948
38991
  return this.delayMs;
38949
38992
  }
38950
38993
  };
38994
+ var piAgentModule = null;
38995
+ var piAiModule = null;
38996
+ async function loadPiModules() {
38997
+ if (!piAgentModule || !piAiModule) {
38998
+ try {
38999
+ [piAgentModule, piAiModule] = await Promise.all([
39000
+ import("@mariozechner/pi-agent"),
39001
+ import("@mariozechner/pi-ai")
39002
+ ]);
39003
+ } catch (error40) {
39004
+ throw new Error(
39005
+ `Failed to load pi-agent-sdk dependencies. Please install them:
39006
+ npm install @mariozechner/pi-agent @mariozechner/pi-ai
39007
+
39008
+ Original error: ${error40 instanceof Error ? error40.message : String(error40)}`
39009
+ );
39010
+ }
39011
+ }
39012
+ return {
39013
+ Agent: piAgentModule.Agent,
39014
+ ProviderTransport: piAgentModule.ProviderTransport,
39015
+ getModel: piAiModule.getModel,
39016
+ getEnvApiKey: piAiModule.getEnvApiKey
39017
+ };
39018
+ }
39019
+ var PiAgentSdkProvider = class {
39020
+ id;
39021
+ kind = "pi-agent-sdk";
39022
+ targetName;
39023
+ supportsBatch = false;
39024
+ config;
39025
+ constructor(targetName, config2) {
39026
+ this.id = `pi-agent-sdk:${targetName}`;
39027
+ this.targetName = targetName;
39028
+ this.config = config2;
39029
+ }
39030
+ async invoke(request) {
39031
+ if (request.signal?.aborted) {
39032
+ throw new Error("Pi agent SDK request was aborted before execution");
39033
+ }
39034
+ const { Agent, ProviderTransport, getModel, getEnvApiKey } = await loadPiModules();
39035
+ const startTime = Date.now();
39036
+ const providerName = this.config.provider ?? "anthropic";
39037
+ const modelId = this.config.model ?? "claude-sonnet-4-20250514";
39038
+ const model = getModel(providerName, modelId);
39039
+ const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
39040
+ const transport = new ProviderTransport({
39041
+ getApiKey: async (provider) => {
39042
+ return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
39043
+ }
39044
+ });
39045
+ const agent = new Agent({
39046
+ initialState: {
39047
+ systemPrompt,
39048
+ model,
39049
+ tools: [],
39050
+ // No tools for simple Q&A
39051
+ messages: []
39052
+ },
39053
+ transport
39054
+ });
39055
+ const outputMessages = [];
39056
+ let finalAssistantContent = "";
39057
+ const unsubscribe = agent.subscribe((event) => {
39058
+ if (event.type === "message_end") {
39059
+ const msg = event.message;
39060
+ if (msg.role === "assistant") {
39061
+ const content = extractTextContent22(msg.content);
39062
+ if (content) {
39063
+ finalAssistantContent = content;
39064
+ }
39065
+ }
39066
+ }
39067
+ });
39068
+ try {
39069
+ const timeoutMs = this.config.timeoutMs ?? 12e4;
39070
+ const timeoutPromise = new Promise((_, reject) => {
39071
+ setTimeout(
39072
+ () => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
39073
+ timeoutMs
39074
+ );
39075
+ });
39076
+ await Promise.race([agent.prompt(request.question), timeoutPromise]);
39077
+ await agent.waitForIdle();
39078
+ const agentMessages = agent.state.messages;
39079
+ for (const msg of agentMessages) {
39080
+ outputMessages.push(convertAgentMessage(msg));
39081
+ }
39082
+ const durationMs = Date.now() - startTime;
39083
+ return {
39084
+ raw: {
39085
+ messages: agentMessages,
39086
+ systemPrompt,
39087
+ model: this.config.model,
39088
+ provider: this.config.provider
39089
+ },
39090
+ outputMessages,
39091
+ durationMs
39092
+ };
39093
+ } finally {
39094
+ unsubscribe();
39095
+ }
39096
+ }
39097
+ };
39098
+ function extractTextContent22(content) {
39099
+ if (typeof content === "string") {
39100
+ return content;
39101
+ }
39102
+ if (!Array.isArray(content)) {
39103
+ return void 0;
39104
+ }
39105
+ const textParts = [];
39106
+ for (const part of content) {
39107
+ if (!part || typeof part !== "object") {
39108
+ continue;
39109
+ }
39110
+ const p = part;
39111
+ if (p.type === "text" && typeof p.text === "string") {
39112
+ textParts.push(p.text);
39113
+ }
39114
+ }
39115
+ return textParts.length > 0 ? textParts.join("\n") : void 0;
39116
+ }
39117
+ function convertAgentMessage(message) {
39118
+ if (!message || typeof message !== "object") {
39119
+ return { role: "unknown", content: String(message) };
39120
+ }
39121
+ const msg = message;
39122
+ const role = typeof msg.role === "string" ? msg.role : "unknown";
39123
+ const content = extractTextContent22(msg.content);
39124
+ const toolCalls = extractToolCalls2(msg.content);
39125
+ const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
39126
+ return {
39127
+ role,
39128
+ content,
39129
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
39130
+ timestamp
39131
+ };
39132
+ }
39133
+ function extractToolCalls2(content) {
39134
+ if (!Array.isArray(content)) {
39135
+ return [];
39136
+ }
39137
+ const toolCalls = [];
39138
+ for (const part of content) {
39139
+ if (!part || typeof part !== "object") {
39140
+ continue;
39141
+ }
39142
+ const p = part;
39143
+ if (p.type === "tool_use" && typeof p.name === "string") {
39144
+ toolCalls.push({
39145
+ tool: p.name,
39146
+ input: p.input,
39147
+ id: typeof p.id === "string" ? p.id : void 0
39148
+ });
39149
+ }
39150
+ }
39151
+ return toolCalls;
39152
+ }
38951
39153
  var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
38952
39154
  var GLOBAL_SUBSCRIBERS_KEY3 = Symbol.for("agentv.piLogSubscribers");
38953
39155
  function getPiLogStore() {
@@ -39445,8 +39647,8 @@ function convertPiMessage(message) {
39445
39647
  if (typeof role !== "string") {
39446
39648
  return void 0;
39447
39649
  }
39448
- const content = extractTextContent22(msg.content);
39449
- const toolCalls = extractToolCalls2(msg.content);
39650
+ const content = extractTextContent3(msg.content);
39651
+ const toolCalls = extractToolCalls3(msg.content);
39450
39652
  const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
39451
39653
  const metadata = {};
39452
39654
  if (msg.api) metadata.api = msg.api;
@@ -39462,7 +39664,7 @@ function convertPiMessage(message) {
39462
39664
  metadata: Object.keys(metadata).length > 0 ? metadata : void 0
39463
39665
  };
39464
39666
  }
39465
- function extractTextContent22(content) {
39667
+ function extractTextContent3(content) {
39466
39668
  if (typeof content === "string") {
39467
39669
  return content;
39468
39670
  }
@@ -39481,7 +39683,7 @@ function extractTextContent22(content) {
39481
39683
  }
39482
39684
  return textParts.length > 0 ? textParts.join("\n") : void 0;
39483
39685
  }
39484
- function extractToolCalls2(content) {
39686
+ function extractToolCalls3(content) {
39485
39687
  if (!Array.isArray(content)) {
39486
39688
  return [];
39487
39689
  }
@@ -39955,6 +40157,8 @@ function createProvider(target) {
39955
40157
  return new CodexProvider(target.name, target.config);
39956
40158
  case "pi-coding-agent":
39957
40159
  return new PiCodingAgentProvider(target.name, target.config);
40160
+ case "pi-agent-sdk":
40161
+ return new PiAgentSdkProvider(target.name, target.config);
39958
40162
  case "claude-code":
39959
40163
  return new ClaudeCodeProvider(target.name, target.config);
39960
40164
  case "mock":
@@ -45694,4 +45898,4 @@ export {
45694
45898
  app,
45695
45899
  runCli
45696
45900
  };
45697
- //# sourceMappingURL=chunk-6SHT2QS6.js.map
45901
+ //# sourceMappingURL=chunk-5AJ7DFUO.js.map