npm - @agentv/core - Versions diffs - 0.7.2 → 0.7.4 - Mend

@agentv/core 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/chunk-L6RCDZ4Z.js +641 -0
package/dist/chunk-L6RCDZ4Z.js.map +1 -0
package/dist/evaluation/validation/index.cjs +11 -1
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +2 -2
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +76 -74
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +3 -3
package/dist/index.d.ts +3 -3
package/dist/index.js +63 -541
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-UQLHF3T7.js +0 -158
package/dist/chunk-UQLHF3T7.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -6,8 +6,9 @@ import {
   findGitRoot,
   isAgentProvider,
   readTextFile,
-  resolveFileReference
-} from "./chunk-UQLHF3T7.js";
+  resolveFileReference,
+  resolveTargetDefinition
+} from "./chunk-L6RCDZ4Z.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -218,6 +219,7 @@ async function processMessages(options) {
 }
 async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
+  const evalIdFilter = options?.evalId;
   const absoluteTestPath = path.resolve(evalFilePath);
   if (!await fileExists2(absoluteTestPath)) {
     throw new Error(`Test file not found: ${evalFilePath}`);
@@ -249,62 +251,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
   const results = [];
   for (const rawEvalcase of rawTestcases) {
     if (!isJsonObject(rawEvalcase)) {
-      logWarning("Skipping invalid test case entry (expected object)");
+      logWarning("Skipping invalid eval case entry (expected object)");
       continue;
     }
     const evalcase = rawEvalcase;
     const id = asString(evalcase.id);
+    if (evalIdFilter && id !== evalIdFilter) {
+      continue;
+    }
     const conversationId = asString(evalcase.conversation_id);
     const outcome = asString(evalcase.outcome);
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
+      logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
       continue;
     }
     if (!Array.isArray(expectedMessagesValue)) {
-      logWarning(`Test case '${id}' missing expected_messages array`);
+      logWarning(`Eval case '${id}' missing expected_messages array`);
       continue;
     }
     const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
     const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
-    const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
-    const userMessages = inputMessages.filter((message) => message.role === "user");
-    const systemMessages = inputMessages.filter((message) => message.role === "system");
-    if (assistantMessages.length === 0) {
-      logWarning(`No assistant message found for test case: ${id}`);
+    if (expectedMessages.length === 0) {
+      logWarning(`No expected message found for eval case: ${id}`);
       continue;
     }
-    if (assistantMessages.length > 1) {
-      logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
-    }
-    if (systemMessages.length > 1) {
-      logWarning(`Multiple system messages found for test case: ${id}, using first`);
-    }
-    let systemMessageContent;
-    if (systemMessages.length > 0) {
-      const content = systemMessages[0]?.content;
-      if (typeof content === "string") {
-        systemMessageContent = content;
-      } else if (Array.isArray(content)) {
-        const textParts = [];
-        for (const segment of content) {
-          if (isJsonObject(segment)) {
-            const value = segment.value;
-            if (typeof value === "string") {
-              textParts.push(value);
-            }
-          }
-        }
-        if (textParts.length > 0) {
-          systemMessageContent = textParts.join("\n\n");
-        }
-      }
+    if (expectedMessages.length > 1) {
+      logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
     }
     const guidelinePaths = [];
     const inputTextParts = [];
     const inputSegments = await processMessages({
-      messages: userMessages,
+      messages: inputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -314,7 +293,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       verbose
     });
     const outputSegments = await processMessages({
-      messages: assistantMessages,
+      messages: expectedMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -322,10 +301,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       verbose
     });
     const codeSnippets = extractCodeBlocks(inputSegments);
-    const assistantContent = assistantMessages[0]?.content;
-    const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
+    const expectedContent = expectedMessages[0]?.content;
+    const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
     const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
     const userFilePaths = [];
     for (const segment of inputSegments) {
@@ -344,19 +323,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       question,
       input_segments: inputSegments,
       output_segments: outputSegments,
-      system_message: systemMessageContent,
       reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       code_snippets: codeSnippets,
       expected_outcome: outcome,
-      evaluator: testCaseEvaluatorKind,
+      evaluator: evalCaseEvaluatorKind,
       evaluators
     };
     if (verbose) {
       console.log(`
-[Test Case: ${id}]`);
+[Eval Case: ${id}]`);
       if (testCase.guideline_paths.length > 0) {
         console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
         for (const guidelinePath of testCase.guideline_paths) {
@@ -415,7 +393,7 @@ ${body}`);
   }
   const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { question, guidelines, systemMessage: testCase.system_message };
+  return { question, guidelines };
 }
 async function fileExists2(absolutePath) {
   try {
@@ -801,6 +779,8 @@ var GeminiProvider = class {
 // src/evaluation/providers/cli.ts
 import { exec as execWithCallback } from "node:child_process";
+import fs from "node:fs/promises";
+import os from "node:os";
 import path2 from "node:path";
 import { promisify } from "node:util";
 var execAsync = promisify(execWithCallback);
@@ -816,6 +796,7 @@ async function defaultCommandRunner(command, options) {
   };
   try {
     const { stdout, stderr } = await execAsync(command, execOptions);
+    console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
     return {
       stdout,
       stderr,
@@ -826,6 +807,8 @@ async function defaultCommandRunner(command, options) {
     };
   } catch (error) {
     const execError = error;
+    console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
+    console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
     return {
       stdout: execError.stdout ?? "",
       stderr: execError.stderr ?? "",
@@ -855,7 +838,8 @@ var CliProvider = class {
       throw new Error("CLI provider request was aborted before execution");
     }
     await this.ensureHealthy(request.signal);
-    const templateValues = buildTemplateValues(request, this.config);
+    const outputFilePath = generateOutputFilePath(request.evalCaseId);
+    const templateValues = buildTemplateValues(request, this.config, outputFilePath);
     const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
     const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
     const result = await this.runCommand(renderedCommand, {
@@ -878,16 +862,30 @@ var CliProvider = class {
       const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
       throw new Error(message);
     }
+    const responseText = await this.readAndCleanupOutputFile(outputFilePath);
     return {
-      text: result.stdout,
+      text: responseText,
       raw: {
         command: renderedCommand,
         stderr: result.stderr,
         exitCode: result.exitCode ?? 0,
-        cwd: this.config.cwd
+        cwd: this.config.cwd,
+        outputFile: outputFilePath
       }
     };
   }
+  async readAndCleanupOutputFile(filePath) {
+    try {
+      const content = await fs.readFile(filePath, "utf-8");
+      return content;
+    } catch (error) {
+      const errorMsg = error instanceof Error ? error.message : String(error);
+      throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
+    } finally {
+      await fs.unlink(filePath).catch(() => {
+      });
+    }
+  }
   async ensureHealthy(signal) {
     if (!this.config.healthcheck) {
       return;
@@ -928,10 +926,11 @@ var CliProvider = class {
           question: "",
           guidelines: "",
           inputFiles: [],
-          evalCaseId: "",
+          evalCaseId: "healthcheck",
           attempt: 0
         },
-        this.config
+        this.config,
+        generateOutputFilePath("healthcheck")
       )
     );
     const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
@@ -949,14 +948,15 @@ var CliProvider = class {
     }
   }
 };
-function buildTemplateValues(request, config) {
+function buildTemplateValues(request, config, outputFilePath) {
   const inputFiles = normalizeInputFiles(request.inputFiles);
   return {
     PROMPT: shellEscape(request.question ?? ""),
     GUIDELINES: shellEscape(request.guidelines ?? ""),
     EVAL_ID: shellEscape(request.evalCaseId ?? ""),
     ATTEMPT: shellEscape(String(request.attempt ?? 0)),
-    FILES: formatFileList(inputFiles, config.filesFormat)
+    FILES: formatFileList(inputFiles, config.filesFormat),
+    OUTPUT_FILE: shellEscape(outputFilePath)
   };
 }
 function normalizeInputFiles(inputFiles) {
@@ -994,11 +994,17 @@ function shellEscape(value) {
     return "''";
   }
   if (process.platform === "win32") {
-    const escaped = value.replace(/"/g, '\\"');
-    return `"${escaped}"`;
+    const escaped = value.replace(/'/g, "''");
+    return `'${escaped}'`;
   }
   return `'${value.replace(/'/g, `'"'"'`)}'`;
 }
+function generateOutputFilePath(evalCaseId) {
+  const safeEvalId = evalCaseId || "unknown";
+  const timestamp = Date.now();
+  const random = Math.random().toString(36).substring(2, 9);
+  return path2.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
+}
 function formatTimeoutSuffix(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) {
     return "";
@@ -1875,487 +1881,6 @@ var MockProvider = class {
   }
 };
-// src/evaluation/providers/targets.ts
-import { z } from "zod";
-var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
-var BASE_TARGET_SCHEMA = z.object({
-  name: z.string().min(1, "target name is required"),
-  provider: z.string().min(1, "provider is required"),
-  settings: z.record(z.unknown()).optional(),
-  judge_target: z.string().optional(),
-  workers: z.number().int().min(1).optional()
-});
-var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
-function normalizeAzureApiVersion(value) {
-  if (!value) {
-    return DEFAULT_AZURE_API_VERSION;
-  }
-  const trimmed = value.trim();
-  if (trimmed.length === 0) {
-    return DEFAULT_AZURE_API_VERSION;
-  }
-  const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
-  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
-}
-function resolveTargetDefinition(definition, env = process.env) {
-  const parsed = BASE_TARGET_SCHEMA.parse(definition);
-  const provider = parsed.provider.toLowerCase();
-  const providerBatching = resolveOptionalBoolean(
-    parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
-  );
-  switch (provider) {
-    case "azure":
-    case "azure-openai":
-      return {
-        kind: "azure",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveAzureConfig(parsed, env)
-      };
-    case "anthropic":
-      return {
-        kind: "anthropic",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveAnthropicConfig(parsed, env)
-      };
-    case "gemini":
-    case "google":
-    case "google-gemini":
-      return {
-        kind: "gemini",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveGeminiConfig(parsed, env)
-      };
-    case "codex":
-    case "codex-cli":
-      return {
-        kind: "codex",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveCodexConfig(parsed, env)
-      };
-    case "mock":
-      return {
-        kind: "mock",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveMockConfig(parsed)
-      };
-    case "vscode":
-    case "vscode-insiders":
-      return {
-        kind: provider,
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
-      };
-    case "cli":
-      return {
-        kind: "cli",
-        name: parsed.name,
-        judgeTarget: parsed.judge_target,
-        workers: parsed.workers,
-        providerBatching,
-        config: resolveCliConfig(parsed, env)
-      };
-    default:
-      throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
-  }
-}
-function resolveAzureConfig(target, env) {
-  const settings = target.settings ?? {};
-  const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
-  const apiKeySource = settings.api_key ?? settings.apiKey;
-  const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
-  const versionSource = settings.version ?? settings.api_version;
-  const temperatureSource = settings.temperature;
-  const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
-  const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
-  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
-  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
-  const version = normalizeAzureApiVersion(
-    resolveOptionalString(versionSource, env, `${target.name} api version`)
-  );
-  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
-  const maxOutputTokens = resolveOptionalNumber(
-    maxTokensSource,
-    `${target.name} max output tokens`
-  );
-  return {
-    resourceName,
-    deploymentName,
-    apiKey,
-    version,
-    temperature,
-    maxOutputTokens
-  };
-}
-function resolveAnthropicConfig(target, env) {
-  const settings = target.settings ?? {};
-  const apiKeySource = settings.api_key ?? settings.apiKey;
-  const modelSource = settings.model ?? settings.deployment ?? settings.variant;
-  const temperatureSource = settings.temperature;
-  const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
-  const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
-  const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
-  const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
-  return {
-    apiKey,
-    model,
-    temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
-    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
-    thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
-  };
-}
-function resolveGeminiConfig(target, env) {
-  const settings = target.settings ?? {};
-  const apiKeySource = settings.api_key ?? settings.apiKey;
-  const modelSource = settings.model ?? settings.deployment ?? settings.variant;
-  const temperatureSource = settings.temperature;
-  const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
-  const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
-  const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
-    allowLiteral: true,
-    optionalEnv: true
-  }) ?? "gemini-2.5-flash";
-  return {
-    apiKey,
-    model,
-    temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
-    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
-  };
-}
-function resolveCodexConfig(target, env) {
-  const settings = target.settings ?? {};
-  const executableSource = settings.executable ?? settings.command ?? settings.binary;
-  const argsSource = settings.args ?? settings.arguments;
-  const cwdSource = settings.cwd;
-  const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
-  const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
-  const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
-  const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
-    allowLiteral: true,
-    optionalEnv: true
-  }) ?? "codex";
-  const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
-  const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
-  const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  const logFormat = normalizeCodexLogFormat(logFormatSource);
-  return {
-    executable,
-    args,
-    cwd,
-    timeoutMs,
-    logDir,
-    logFormat
-  };
-}
-function normalizeCodexLogFormat(value) {
-  if (value === void 0 || value === null) {
-    return void 0;
-  }
-  if (typeof value !== "string") {
-    throw new Error("codex log format must be 'summary' or 'json'");
-  }
-  const normalized = value.trim().toLowerCase();
-  if (normalized === "json" || normalized === "summary") {
-    return normalized;
-  }
-  throw new Error("codex log format must be 'summary' or 'json'");
-}
-function resolveMockConfig(target) {
-  const settings = target.settings ?? {};
-  const response = typeof settings.response === "string" ? settings.response : void 0;
-  return { response };
-}
-function resolveVSCodeConfig(target, env, insiders) {
-  const settings = target.settings ?? {};
-  const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
-  const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
-    allowLiteral: false,
-    optionalEnv: true
-  }) : void 0;
-  const commandSource = settings.vscode_cmd ?? settings.command;
-  const waitSource = settings.wait;
-  const dryRunSource = settings.dry_run ?? settings.dryRun;
-  const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
-  const defaultCommand = insiders ? "code-insiders" : "code";
-  const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
-  return {
-    command,
-    waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
-    dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
-    subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
-      allowLiteral: true,
-      optionalEnv: true
-    }),
-    workspaceTemplate
-  };
-}
-function resolveCliConfig(target, env) {
-  const settings = target.settings ?? {};
-  const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
-  const filesFormat = resolveOptionalLiteralString(
-    settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
-  );
-  const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
-  const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
-  const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
-  const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
-  const commandTemplate = resolveString(
-    commandTemplateSource,
-    env,
-    `${target.name} CLI command template`,
-    true
-  );
-  assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
-  return {
-    commandTemplate,
-    filesFormat,
-    cwd,
-    env: envOverrides,
-    timeoutMs,
-    healthcheck
-  };
-}
-function resolveEnvOverrides(source, env, targetName) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "object" || Array.isArray(source)) {
-    throw new Error(`${targetName} env overrides must be an object map of strings`);
-  }
-  const entries = Object.entries(source);
-  const resolved = {};
-  for (const [key, value] of entries) {
-    if (typeof value !== "string") {
-      throw new Error(`${targetName} env override '${key}' must be a string`);
-    }
-    const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
-    resolved[key] = resolvedValue;
-  }
-  return Object.keys(resolved).length > 0 ? resolved : void 0;
-}
-function resolveTimeoutMs(source, description) {
-  const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
-  if (seconds === void 0) {
-    return void 0;
-  }
-  if (seconds <= 0) {
-    throw new Error(`${description} must be greater than zero seconds`);
-  }
-  return Math.floor(seconds * 1e3);
-}
-function resolveCliHealthcheck(source, env, targetName) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "object" || Array.isArray(source)) {
-    throw new Error(`${targetName} healthcheck must be an object`);
-  }
-  const candidate = source;
-  const type = candidate.type;
-  const timeoutMs = resolveTimeoutMs(
-    candidate.timeout_seconds ?? candidate.timeoutSeconds,
-    `${targetName} healthcheck timeout`
-  );
-  if (type === "http") {
-    const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
-    return {
-      type: "http",
-      url,
-      timeoutMs
-    };
-  }
-  if (type === "command") {
-    const commandTemplate = resolveString(
-      candidate.command_template ?? candidate.commandTemplate,
-      env,
-      `${targetName} healthcheck command template`,
-      true
-    );
-    assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
-    const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
-      allowLiteral: true,
-      optionalEnv: true
-    });
-    return {
-      type: "command",
-      commandTemplate,
-      timeoutMs,
-      cwd
-    };
-  }
-  throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
-}
-function assertSupportedCliPlaceholders(template, description) {
-  const placeholders = extractCliPlaceholders(template);
-  for (const placeholder of placeholders) {
-    if (!CLI_PLACEHOLDERS.has(placeholder)) {
-      throw new Error(
-        `${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
-      );
-    }
-  }
-}
-function extractCliPlaceholders(template) {
-  const matches = template.matchAll(/\{([A-Z_]+)\}/g);
-  const results = [];
-  for (const match of matches) {
-    if (match[1]) {
-      results.push(match[1]);
-    }
-  }
-  return results;
-}
-function resolveString(source, env, description, allowLiteral = false) {
-  const value = resolveOptionalString(source, env, description, {
-    allowLiteral,
-    optionalEnv: false
-  });
-  if (value === void 0) {
-    throw new Error(`${description} is required`);
-  }
-  return value;
-}
-function resolveOptionalString(source, env, description, options) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "string") {
-    throw new Error(`${description} must be a string`);
-  }
-  const trimmed = source.trim();
-  if (trimmed.length === 0) {
-    return void 0;
-  }
-  const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
-  if (envVarMatch) {
-    const varName = envVarMatch[1];
-    const envValue = env[varName];
-    if (envValue !== void 0) {
-      if (envValue.trim().length === 0) {
-        throw new Error(`Environment variable '${varName}' for ${description} is empty`);
-      }
-      return envValue;
-    }
-    const optionalEnv = options?.optionalEnv ?? false;
-    if (optionalEnv) {
-      return void 0;
-    }
-    throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
-  }
-  const allowLiteral = options?.allowLiteral ?? false;
-  if (!allowLiteral) {
-    throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
-  }
-  return trimmed;
-}
-function resolveOptionalLiteralString(source) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "string") {
-    throw new Error("expected string value");
-  }
-  const trimmed = source.trim();
-  return trimmed.length > 0 ? trimmed : void 0;
-}
-function resolveOptionalNumber(source, description) {
-  if (source === void 0 || source === null || source === "") {
-    return void 0;
-  }
-  if (typeof source === "number") {
-    return Number.isFinite(source) ? source : void 0;
-  }
-  if (typeof source === "string") {
-    const numeric = Number(source);
-    if (Number.isFinite(numeric)) {
-      return numeric;
-    }
-  }
-  throw new Error(`${description} must be a number`);
-}
-function resolveOptionalBoolean(source) {
-  if (source === void 0 || source === null || source === "") {
-    return void 0;
-  }
-  if (typeof source === "boolean") {
-    return source;
-  }
-  if (typeof source === "string") {
-    const lowered = source.trim().toLowerCase();
-    if (lowered === "true" || lowered === "1") {
-      return true;
-    }
-    if (lowered === "false" || lowered === "0") {
-      return false;
-    }
-  }
-  throw new Error("expected boolean value");
-}
-function resolveOptionalStringArray(source, env, description) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (!Array.isArray(source)) {
-    throw new Error(`${description} must be an array of strings`);
-  }
-  if (source.length === 0) {
-    return void 0;
-  }
-  const resolved = [];
-  for (let i = 0; i < source.length; i++) {
-    const item = source[i];
-    if (typeof item !== "string") {
-      throw new Error(`${description}[${i}] must be a string`);
-    }
-    const trimmed = item.trim();
-    if (trimmed.length === 0) {
-      throw new Error(`${description}[${i}] cannot be empty`);
-    }
-    const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
-    if (envVarMatch) {
-      const varName = envVarMatch[1];
-      const envValue = env[varName];
-      if (envValue !== void 0) {
-        if (envValue.trim().length === 0) {
-          throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
-        }
-        resolved.push(envValue);
-        continue;
-      }
-      throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
-    }
-    resolved.push(trimmed);
-  }
-  return resolved.length > 0 ? resolved : void 0;
-}
 // src/evaluation/providers/vscode.ts
 import path5 from "node:path";
 import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
@@ -2918,7 +2443,6 @@ var CodeEvaluator = class {
         expected_outcome: context.evalCase.expected_outcome,
         reference_answer: context.evalCase.reference_answer,
         candidate_answer: context.candidate,
-        system_message: context.promptInputs.systemMessage ?? "",
         guideline_paths: context.evalCase.guideline_paths,
         input_files: context.evalCase.file_paths,
         input_segments: context.evalCase.input_segments
@@ -3160,7 +2684,7 @@ function validateConcurrency(concurrency) {
 // src/evaluation/orchestrator.ts
 async function runEvaluation(options) {
   const {
-    testFilePath,
+    testFilePath: evalFilePath,
     repoRoot,
     target,
     targets,
@@ -3179,11 +2703,11 @@ async function runEvaluation(options) {
     onProgress
   } = options;
   const load = loadEvalCases;
-  const evalCases = await load(testFilePath, repoRoot, { verbose });
+  const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
   const filteredEvalCases = filterEvalCases(evalCases, evalId);
   if (filteredEvalCases.length === 0) {
     if (evalId) {
-      throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
+      throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
     }
     return [];
   }
@@ -3562,8 +3086,7 @@ async function evaluateCandidate(options) {
   const rawRequest = {
     question: promptInputs.question,
     ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
-    guideline_paths: evalCase.guideline_paths,
-    system_message: promptInputs.systemMessage ?? ""
+    guideline_paths: evalCase.guideline_paths
   };
   return {
     eval_id: evalCase.id,
@@ -3827,7 +3350,6 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
     question: promptInputs.question,
     ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
     guideline_paths: evalCase.guideline_paths,
-    system_message: promptInputs.systemMessage ?? "",
     error: message
   };
   return {