npm - @agentv/core - Versions diffs - 1.3.1 → 1.5.0 - Mend

@agentv/core 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +77 -77
package/dist/{chunk-4A6L2F6L.js → chunk-E2VSU4WZ.js} +282 -81
package/dist/chunk-E2VSU4WZ.js.map +1 -0
package/dist/evaluation/validation/index.cjs +82 -67
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +3 -68
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +1668 -489
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +279 -77
package/dist/index.d.ts +279 -77
package/dist/index.js +1334 -356
package/dist/index.js.map +1 -1
package/package.json +2 -5
package/dist/chunk-4A6L2F6L.js.map +0 -1

package/README.md CHANGED Viewed

@@ -1,77 +1,77 @@
-# @agentv/core
-Core evaluation engine and runtime primitives for AgentV - a TypeScript-based AI agent evaluation and optimization framework.
-## Overview
-This package provides the foundational components for building and evaluating AI agents:
-- **Provider Abstraction**: Unified interface for Azure OpenAI, Anthropic, Google Gemini, VS Code Copilot, and mock providers
-- **Evaluation Engine**: YAML-based test specification and execution
-- **Quality Grading**: AI-powered scoring system for comparing expected vs. actual outputs
-- **Target Management**: Flexible configuration for different execution environments
-## Installation
-```bash
-npm install @agentv/core
-```
-## Usage
-This is a low-level package primarily used by the [agentv](https://www.npmjs.com/package/agentv) CLI. Most users should install the CLI package instead:
-```bash
-npm install -g agentv
-```
-For programmatic usage or custom integrations, you can import core components:
-```typescript
-import { createProvider, runEvaluation } from '@agentv/core';
-```
-## Features
-### Multi-Provider Support
-- **Azure OpenAI**: Enterprise-grade deployment support
-- **Anthropic Claude**: Latest Claude models including Sonnet 4.5
-- **Google Gemini**: Gemini 2.0 Flash and other models
-- **VS Code Copilot**: Programmatic integration via subagent
-- **Mock Provider**: Testing without API calls
-### Evaluation Framework
-- YAML-based test specifications
-- Code block extraction and structured prompting
-- Automatic retry handling for timeouts
-- Detailed scoring with hit/miss analysis
-- Multiple output formats (JSONL, YAML)
-### Quality Grading
-- AI-powered aspect extraction and comparison
-- Normalized scoring (0.0 to 1.0)
-- Detailed reasoning and analysis
-- Configurable grading models
-## Architecture
-Built on modern TypeScript tooling:
-- **Vercel AI SDK**: Direct Azure OpenAI, Anthropic, and Google Gemini integrations
-- **Zod**: Runtime type validation
-- **YAML**: Configuration and test specifications
-## Documentation
-For complete documentation, examples, and CLI usage, see the [agentv](https://www.npmjs.com/package/agentv) package.
-## Repository
-[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
-## License
-MIT License - see [LICENSE](../../LICENSE) for details.
+# @agentv/core
+Core evaluation engine and runtime primitives for AgentV - a TypeScript-based AI agent evaluation and optimization framework.
+## Overview
+This package provides the foundational components for building and evaluating AI agents:
+- **Provider Abstraction**: Unified interface for Azure OpenAI, Anthropic, Google Gemini, VS Code Copilot, and mock providers
+- **Evaluation Engine**: YAML-based test specification and execution
+- **Quality Grading**: AI-powered scoring system for comparing expected vs. actual outputs
+- **Target Management**: Flexible configuration for different execution environments
+## Installation
+```bash
+npm install @agentv/core
+```
+## Usage
+This is a low-level package primarily used by the [agentv](https://www.npmjs.com/package/agentv) CLI. Most users should install the CLI package instead:
+```bash
+npm install -g agentv
+```
+For programmatic usage or custom integrations, you can import core components:
+```typescript
+import { createProvider, runEvaluation } from '@agentv/core';
+```
+## Features
+### Multi-Provider Support
+- **Azure OpenAI**: Enterprise-grade deployment support
+- **Anthropic Claude**: Latest Claude models including Sonnet 4.5
+- **Google Gemini**: Gemini 2.0 Flash and other models
+- **VS Code Copilot**: Programmatic integration via subagent
+- **Mock Provider**: Testing without API calls
+### Evaluation Framework
+- YAML-based test specifications
+- Code block extraction and structured prompting
+- Automatic retry handling for timeouts
+- Detailed scoring with hit/miss analysis
+- Multiple output formats (JSONL, YAML)
+### Quality Grading
+- AI-powered aspect extraction and comparison
+- Normalized scoring (0.0 to 1.0)
+- Detailed reasoning and analysis
+- Configurable grading models
+## Architecture
+Built on modern TypeScript tooling:
+- **Vercel AI SDK**: Direct Azure OpenAI, Anthropic, and Google Gemini integrations
+- **Zod**: Runtime type validation
+- **YAML**: Configuration and test specifications
+## Documentation
+For complete documentation, examples, and CLI usage, see the [agentv](https://www.npmjs.com/package/agentv) package.
+## Repository
+[https://github.com/EntityProcess/agentv](https://github.com/EntityProcess/agentv)
+## License
+MIT License - see [LICENSE](../../LICENSE) for details.

package/dist/{chunk-4A6L2F6L.js → chunk-E2VSU4WZ.js} RENAMED Viewed

@@ -116,6 +116,161 @@ async function resolveFileReference(rawValue, searchRoots) {
 // src/evaluation/providers/targets.ts
 import path2 from "node:path";
 import { z } from "zod";
+var CliHealthcheckHttpInputSchema = z.object({
+  type: z.literal("http"),
+  url: z.string().min(1, "healthcheck URL is required"),
+  timeout_seconds: z.number().positive().optional(),
+  timeoutSeconds: z.number().positive().optional()
+});
+var CliHealthcheckCommandInputSchema = z.object({
+  type: z.literal("command"),
+  command_template: z.string().optional(),
+  commandTemplate: z.string().optional(),
+  cwd: z.string().optional(),
+  timeout_seconds: z.number().positive().optional(),
+  timeoutSeconds: z.number().positive().optional()
+});
+var CliHealthcheckInputSchema = z.discriminatedUnion("type", [
+  CliHealthcheckHttpInputSchema,
+  CliHealthcheckCommandInputSchema
+]);
+var CliTargetInputSchema = z.object({
+  name: z.string().min(1, "target name is required"),
+  provider: z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
+  // Command template - required (accept both naming conventions)
+  command_template: z.string().optional(),
+  commandTemplate: z.string().optional(),
+  // Files format - optional
+  files_format: z.string().optional(),
+  filesFormat: z.string().optional(),
+  attachments_format: z.string().optional(),
+  attachmentsFormat: z.string().optional(),
+  // Working directory - optional
+  cwd: z.string().optional(),
+  // Timeout in seconds - optional
+  timeout_seconds: z.number().positive().optional(),
+  timeoutSeconds: z.number().positive().optional(),
+  // Healthcheck configuration - optional
+  healthcheck: CliHealthcheckInputSchema.optional(),
+  // Verbose mode - optional
+  verbose: z.boolean().optional(),
+  cli_verbose: z.boolean().optional(),
+  cliVerbose: z.boolean().optional(),
+  // Keep temp files - optional
+  keep_temp_files: z.boolean().optional(),
+  keepTempFiles: z.boolean().optional(),
+  keep_output_files: z.boolean().optional(),
+  keepOutputFiles: z.boolean().optional(),
+  // Common target fields
+  judge_target: z.string().optional(),
+  workers: z.number().int().min(1).optional(),
+  provider_batching: z.boolean().optional(),
+  providerBatching: z.boolean().optional()
+}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
+  message: "Either command_template or commandTemplate is required"
+});
+var CliHealthcheckHttpSchema = z.object({
+  type: z.literal("http"),
+  url: z.string().min(1),
+  timeoutMs: z.number().positive().optional()
+}).strict();
+var CliHealthcheckCommandSchema = z.object({
+  type: z.literal("command"),
+  commandTemplate: z.string().min(1),
+  cwd: z.string().optional(),
+  timeoutMs: z.number().positive().optional()
+}).strict();
+var CliHealthcheckSchema = z.discriminatedUnion("type", [
+  CliHealthcheckHttpSchema,
+  CliHealthcheckCommandSchema
+]);
+var CliTargetConfigSchema = z.object({
+  commandTemplate: z.string().min(1),
+  filesFormat: z.string().optional(),
+  cwd: z.string().optional(),
+  timeoutMs: z.number().positive().optional(),
+  healthcheck: CliHealthcheckSchema.optional(),
+  verbose: z.boolean().optional(),
+  keepTempFiles: z.boolean().optional()
+}).strict();
+function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  if (input.type === "http") {
+    const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
+    return {
+      type: "http",
+      url,
+      timeoutMs
+    };
+  }
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(
+      `${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
+    );
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} healthcheck command template`,
+    true
+  );
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
+    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  }
+  return {
+    type: "command",
+    commandTemplate,
+    cwd,
+    timeoutMs
+  };
+}
+function normalizeCliTargetInput(input, env, evalFilePath) {
+  const targetName = input.name;
+  const commandTemplateSource = input.command_template ?? input.commandTemplate;
+  if (commandTemplateSource === void 0) {
+    throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
+  }
+  const commandTemplate = resolveString(
+    commandTemplateSource,
+    env,
+    `${targetName} CLI command template`,
+    true
+  );
+  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
+  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
+  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
+    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  }
+  if (!cwd && evalFilePath) {
+    cwd = path2.dirname(path2.resolve(evalFilePath));
+  }
+  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
+  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
+  const keepTempFiles = resolveOptionalBoolean(
+    input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
+  );
+  const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
+  return {
+    commandTemplate,
+    filesFormat,
+    cwd,
+    timeoutMs,
+    healthcheck,
+    verbose,
+    keepTempFiles
+  };
+}
 var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
   "PROMPT",
   "GUIDELINES",
@@ -221,6 +376,16 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
         providerBatching,
         config: resolveCodexConfig(parsed, env)
       };
+    case "pi":
+    case "pi-coding-agent":
+      return {
+        kind: "pi-coding-agent",
+        name: parsed.name,
+        judgeTarget: parsed.judge_target,
+        workers: parsed.workers,
+        providerBatching,
+        config: resolvePiCodingAgentConfig(parsed, env)
+      };
     case "mock":
       return {
         kind: "mock",
@@ -329,6 +494,7 @@ function resolveCodexConfig(target, env) {
   const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
   const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
   const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -344,13 +510,15 @@ function resolveCodexConfig(target, env) {
     optionalEnv: true
   });
   const logFormat = normalizeCodexLogFormat(logFormatSource);
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
     executable,
     args,
     cwd,
     timeoutMs,
     logDir,
-    logFormat
+    logFormat,
+    systemPrompt
   };
 }
 function normalizeCodexLogFormat(value) {
@@ -366,10 +534,73 @@ function normalizeCodexLogFormat(value) {
   }
   throw new Error("codex log format must be 'summary' or 'json'");
 }
+function resolvePiCodingAgentConfig(target, env) {
+  const executableSource = target.executable ?? target.command ?? target.binary;
+  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
+  const modelSource = target.model ?? target.pi_model ?? target.piModel;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
+  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const argsSource = target.args ?? target.arguments;
+  const cwdSource = target.cwd;
+  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
+  const logFormatSource = target.log_format ?? target.logFormat;
+  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
+    allowLiteral: true,
+    optionalEnv: true
+  }) ?? "pi";
+  const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi api key`, {
+    allowLiteral: false,
+    optionalEnv: true
+  });
+  const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi thinking`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
+  const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
+  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
+  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
+  return {
+    executable,
+    provider,
+    model,
+    apiKey,
+    tools,
+    thinking,
+    args,
+    cwd,
+    timeoutMs,
+    logDir,
+    logFormat,
+    systemPrompt
+  };
+}
 function resolveMockConfig(target) {
   const response = typeof target.response === "string" ? target.response : void 0;
-  const trace = Array.isArray(target.trace) ? target.trace : void 0;
-  return { response, trace };
+  return { response };
 }
 function resolveVSCodeConfig(target, env, insiders) {
   const workspaceTemplateEnvVar = resolveOptionalLiteralString(
@@ -401,42 +632,35 @@ function resolveVSCodeConfig(target, env, insiders) {
     workspaceTemplate
   };
 }
-function resolveCliConfig(target, env, evalFilePath) {
-  const commandTemplateSource = target.command_template ?? target.commandTemplate;
-  const filesFormat = resolveOptionalLiteralString(
-    target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
-  );
-  const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
-  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
-    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+var cliErrorMap = (issue, ctx) => {
+  if (issue.code === z.ZodIssueCode.unrecognized_keys) {
+    return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
   }
-  if (!cwd && evalFilePath) {
-    cwd = path2.dirname(path2.resolve(evalFilePath));
+  if (issue.code === z.ZodIssueCode.invalid_union_discriminator) {
+    return { message: "healthcheck type must be 'http' or 'command'" };
   }
-  const timeoutMs = resolveTimeoutMs(
-    target.timeout_seconds ?? target.timeoutSeconds,
-    `${target.name} timeout`
-  );
-  const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name, evalFilePath);
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${target.name} CLI command template`,
-    true
-  );
-  assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
-  return {
-    commandTemplate,
-    filesFormat,
-    cwd,
-    timeoutMs,
-    healthcheck,
-    verbose
-  };
+  if (issue.code === z.ZodIssueCode.invalid_type && issue.expected === "string") {
+    return { message: `${ctx.defaultError} (expected a string value)` };
+  }
+  return { message: ctx.defaultError };
+};
+function resolveCliConfig(target, env, evalFilePath) {
+  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
+  if (!parseResult.success) {
+    const firstError = parseResult.error.errors[0];
+    const path3 = firstError?.path.join(".") || "";
+    const prefix = path3 ? `${target.name} ${path3}: ` : `${target.name}: `;
+    throw new Error(`${prefix}${firstError?.message}`);
+  }
+  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
+  assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
+  if (normalized.healthcheck?.type === "command") {
+    assertSupportedCliPlaceholders(
+      normalized.healthcheck.commandTemplate,
+      `${target.name} healthcheck command template`
+    );
+  }
+  return normalized;
 }
 function resolveTimeoutMs(source, description) {
   const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
@@ -448,49 +672,6 @@ function resolveTimeoutMs(source, description) {
   }
   return Math.floor(seconds * 1e3);
 }
-function resolveCliHealthcheck(source, env, targetName, evalFilePath) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "object" || Array.isArray(source)) {
-    throw new Error(`${targetName} healthcheck must be an object`);
-  }
-  const candidate = source;
-  const type = candidate.type;
-  const timeoutMs = resolveTimeoutMs(
-    candidate.timeout_seconds ?? candidate.timeoutSeconds,
-    `${targetName} healthcheck timeout`
-  );
-  if (type === "http") {
-    const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
-    return {
-      type: "http",
-      url,
-      timeoutMs
-    };
-  }
-  if (type === "command") {
-    const commandTemplate = resolveString(
-      candidate.command_template ?? candidate.commandTemplate,
-      env,
-      `${targetName} healthcheck command template`,
-      true
-    );
-    assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
-    const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
-      allowLiteral: true,
-      optionalEnv: true
-    });
-    const resolvedCwd = cwd && evalFilePath && !path2.isAbsolute(cwd) ? path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd) : cwd;
-    return {
-      type: "command",
-      commandTemplate,
-      timeoutMs,
-      cwd: resolvedCwd
-    };
-  }
-  throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
-}
 function assertSupportedCliPlaceholders(template, description) {
   const placeholders = extractCliPlaceholders(template);
   for (const placeholder of placeholders) {
@@ -658,6 +839,7 @@ function resolveOptionalNumberArray(source, description) {
 // src/evaluation/providers/types.ts
 var AGENT_PROVIDER_KINDS = [
   "codex",
+  "pi-coding-agent",
   "vscode",
   "vscode-insiders"
 ];
@@ -666,6 +848,7 @@ var KNOWN_PROVIDERS = [
   "anthropic",
   "gemini",
   "codex",
+  "pi-coding-agent",
   "cli",
   "mock",
   "vscode",
@@ -680,6 +863,8 @@ var PROVIDER_ALIASES = [
   // alias for "gemini"
   "codex-cli",
   // alias for "codex"
+  "pi",
+  // alias for "pi-coding-agent"
   "openai",
   // legacy/future support
   "bedrock",
@@ -687,6 +872,21 @@ var PROVIDER_ALIASES = [
   "vertex"
   // legacy/future support
 ];
+function extractLastAssistantContent(messages) {
+  if (!messages || messages.length === 0) {
+    return "";
+  }
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "assistant" && msg.content !== void 0) {
+      if (typeof msg.content === "string") {
+        return msg.content;
+      }
+      return JSON.stringify(msg.content);
+    }
+  }
+  return "";
+}
 function isAgentProvider(provider) {
   return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
 }
@@ -704,6 +904,7 @@ export {
   resolveTargetDefinition,
   KNOWN_PROVIDERS,
   PROVIDER_ALIASES,
+  extractLastAssistantContent,
   isAgentProvider
 };
-//# sourceMappingURL=chunk-4A6L2F6L.js.map
+//# sourceMappingURL=chunk-E2VSU4WZ.js.map