agentv 0.2.11 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -585,7 +585,7 @@ var require_utc = __commonJS({
585
585
  import { Command } from "commander";
586
586
  import { readFileSync as readFileSync2 } from "node:fs";
587
587
 
588
- // ../../packages/core/dist/chunk-P4GOYWYH.js
588
+ // ../../packages/core/dist/chunk-NL7K4CAK.js
589
589
  import { constants } from "node:fs";
590
590
  import { access } from "node:fs/promises";
591
591
  import path from "node:path";
@@ -692,6 +692,8 @@ var KNOWN_PROVIDERS = [
692
692
  "azure",
693
693
  "anthropic",
694
694
  "gemini",
695
+ "codex",
696
+ "cli",
695
697
  "mock",
696
698
  "vscode",
697
699
  "vscode-insiders"
@@ -703,6 +705,8 @@ var PROVIDER_ALIASES = [
703
705
  // alias for "gemini"
704
706
  "google-gemini",
705
707
  // alias for "gemini"
708
+ "codex-cli",
709
+ // alias for "codex"
706
710
  "openai",
707
711
  // legacy/future support
708
712
  "bedrock",
@@ -5031,6 +5035,18 @@ var _c = pr();
5031
5035
  var ya = new Error("Agent description must be at least 20 characters (explain in detail what the agent does)");
5032
5036
  var ba = new Error("Agent definition is the prompt you give to the LLM for the agent. It must be detailed and at least 100 characters");
5033
5037
 
5038
+ // ../../packages/core/dist/index.js
5039
+ import { exec as execWithCallback } from "node:child_process";
5040
+ import path22 from "node:path";
5041
+ import { promisify as promisify2 } from "node:util";
5042
+ import { exec as execCallback, spawn as spawn2 } from "node:child_process";
5043
+ import { constants as constants22 } from "node:fs";
5044
+ import { access as access22, copyFile as copyFile2, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
5045
+ import { tmpdir } from "node:os";
5046
+ import path42 from "node:path";
5047
+ import { promisify as promisify22 } from "node:util";
5048
+ import path32 from "node:path";
5049
+
5034
5050
  // ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/external.js
5035
5051
  var external_exports = {};
5036
5052
  __export(external_exports, {
@@ -9074,7 +9090,7 @@ var NEVER = INVALID;
9074
9090
 
9075
9091
  // ../../packages/core/dist/index.js
9076
9092
  import { readFile as readFile22 } from "node:fs/promises";
9077
- import path22 from "node:path";
9093
+ import path52 from "node:path";
9078
9094
 
9079
9095
  // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/agentDispatch.js
9080
9096
  import { exec, spawn } from "child_process";
@@ -11012,14 +11028,14 @@ async function provisionSubagents(options) {
11012
11028
  }
11013
11029
 
11014
11030
  // ../../packages/core/dist/index.js
11015
- import { constants as constants22 } from "node:fs";
11016
- import { access as access22, readFile as readFile3 } from "node:fs/promises";
11017
- import path32 from "node:path";
11031
+ import { constants as constants32 } from "node:fs";
11032
+ import { access as access32, readFile as readFile3 } from "node:fs/promises";
11033
+ import path62 from "node:path";
11018
11034
  import { parse as parse22 } from "yaml";
11019
11035
  import { randomUUID } from "node:crypto";
11020
11036
  import { createHash, randomUUID as randomUUID2 } from "node:crypto";
11021
- import { mkdir as mkdir3, writeFile as writeFile22 } from "node:fs/promises";
11022
- import path42 from "node:path";
11037
+ import { mkdir as mkdir22, readFile as readFile4, writeFile as writeFile22 } from "node:fs/promises";
11038
+ import path72 from "node:path";
11023
11039
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
11024
11040
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
11025
11041
  function isTestMessageRole(value) {
@@ -11059,10 +11075,10 @@ function isTestMessage(value) {
11059
11075
  }
11060
11076
  return candidate.content.every(isJsonObject);
11061
11077
  }
11062
- var GRADER_KIND_VALUES = ["heuristic", "llm_judge"];
11063
- var GRADER_KIND_SET = new Set(GRADER_KIND_VALUES);
11064
- function isGraderKind(value) {
11065
- return typeof value === "string" && GRADER_KIND_SET.has(value);
11078
+ var EVALUATOR_KIND_VALUES = ["code", "llm_judge"];
11079
+ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
11080
+ function isEvaluatorKind(value) {
11081
+ return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
11066
11082
  }
11067
11083
  var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
11068
11084
  var ANSI_YELLOW = "\x1B[33m";
@@ -11159,7 +11175,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11159
11175
  if (!Array.isArray(rawTestcases)) {
11160
11176
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
11161
11177
  }
11162
- const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
11178
+ const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
11163
11179
  const results = [];
11164
11180
  for (const rawEvalcase of rawTestcases) {
11165
11181
  if (!isJsonObject(rawEvalcase)) {
@@ -11282,7 +11298,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11282
11298
  const assistantContent = assistantMessages[0]?.content;
11283
11299
  const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
11284
11300
  const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
11285
- const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
11301
+ const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
11302
+ const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
11286
11303
  const userFilePaths = [];
11287
11304
  for (const segment of userSegments) {
11288
11305
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -11305,7 +11322,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11305
11322
  file_paths: allFilePaths,
11306
11323
  code_snippets: codeSnippets,
11307
11324
  outcome,
11308
- grader: testCaseGrader
11325
+ evaluator: testCaseEvaluatorKind,
11326
+ evaluators
11309
11327
  };
11310
11328
  if (verbose) {
11311
11329
  console.log(`
@@ -11466,14 +11484,88 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
11466
11484
  }
11467
11485
  return parts.join(" ");
11468
11486
  }
11469
- function coerceGrader(candidate) {
11487
+ async function parseEvaluators(rawEvalCase, searchRoots, evalId) {
11488
+ const execution = rawEvalCase.execution;
11489
+ const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators;
11490
+ if (candidateEvaluators === void 0) {
11491
+ return void 0;
11492
+ }
11493
+ if (!Array.isArray(candidateEvaluators)) {
11494
+ logWarning(`Skipping evaluators for '${evalId}': expected array`);
11495
+ return void 0;
11496
+ }
11497
+ const evaluators = [];
11498
+ for (const rawEvaluator of candidateEvaluators) {
11499
+ if (!isJsonObject(rawEvaluator)) {
11500
+ logWarning(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
11501
+ continue;
11502
+ }
11503
+ const name = asString(rawEvaluator.name);
11504
+ const typeValue = rawEvaluator.type;
11505
+ if (!name || !isEvaluatorKind(typeValue)) {
11506
+ logWarning(`Skipping evaluator with invalid name/type in '${evalId}'`);
11507
+ continue;
11508
+ }
11509
+ if (typeValue === "code") {
11510
+ const script = asString(rawEvaluator.script);
11511
+ if (!script) {
11512
+ logWarning(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
11513
+ continue;
11514
+ }
11515
+ const cwd = asString(rawEvaluator.cwd);
11516
+ let resolvedCwd;
11517
+ if (cwd) {
11518
+ const resolved = await resolveFileReference(cwd, searchRoots);
11519
+ if (resolved.resolvedPath) {
11520
+ resolvedCwd = path8.resolve(resolved.resolvedPath);
11521
+ } else {
11522
+ logWarning(
11523
+ `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
11524
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
11525
+ );
11526
+ }
11527
+ }
11528
+ evaluators.push({
11529
+ name,
11530
+ type: "code",
11531
+ script,
11532
+ cwd,
11533
+ resolvedCwd
11534
+ });
11535
+ continue;
11536
+ }
11537
+ const prompt = asString(rawEvaluator.prompt);
11538
+ let promptPath;
11539
+ if (prompt) {
11540
+ const resolved = await resolveFileReference(prompt, searchRoots);
11541
+ if (resolved.resolvedPath) {
11542
+ promptPath = path8.resolve(resolved.resolvedPath);
11543
+ } else {
11544
+ logWarning(
11545
+ `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
11546
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
11547
+ );
11548
+ }
11549
+ }
11550
+ const model = asString(rawEvaluator.model);
11551
+ evaluators.push({
11552
+ name,
11553
+ type: "llm_judge",
11554
+ prompt,
11555
+ promptPath,
11556
+ model
11557
+ });
11558
+ }
11559
+ return evaluators.length > 0 ? evaluators : void 0;
11560
+ }
11561
+ function coerceEvaluator(candidate, contextId) {
11470
11562
  if (typeof candidate !== "string") {
11471
11563
  return void 0;
11472
11564
  }
11473
- if (isGraderKind(candidate)) {
11565
+ if (isEvaluatorKind(candidate)) {
11474
11566
  return candidate;
11475
11567
  }
11476
- logWarning(`Unknown grader '${candidate}', falling back to default`);
11568
+ logWarning(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
11477
11569
  return void 0;
11478
11570
  }
11479
11571
  function logWarning(message, details) {
@@ -11665,194 +11757,1003 @@ var GeminiProvider = class {
11665
11757
  return mapResponse(ensureChatResponse(response));
11666
11758
  }
11667
11759
  };
11668
- var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
11669
- var MockProvider = class {
11760
+ var execAsync2 = promisify2(execWithCallback);
11761
+ var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
11762
+ async function defaultCommandRunner(command, options) {
11763
+ const execOptions = {
11764
+ cwd: options.cwd,
11765
+ env: options.env,
11766
+ timeout: options.timeoutMs,
11767
+ signal: options.signal,
11768
+ maxBuffer: DEFAULT_MAX_BUFFER,
11769
+ shell: process.platform === "win32" ? "powershell.exe" : void 0
11770
+ };
11771
+ try {
11772
+ const { stdout, stderr } = await execAsync2(command, execOptions);
11773
+ return {
11774
+ stdout,
11775
+ stderr,
11776
+ exitCode: 0,
11777
+ failed: false,
11778
+ timedOut: false,
11779
+ signal: null
11780
+ };
11781
+ } catch (error) {
11782
+ const execError = error;
11783
+ return {
11784
+ stdout: execError.stdout ?? "",
11785
+ stderr: execError.stderr ?? "",
11786
+ exitCode: typeof execError.code === "number" ? execError.code : null,
11787
+ failed: true,
11788
+ timedOut: execError.timedOut === true || execError.killed === true,
11789
+ signal: execError.signal ?? null
11790
+ };
11791
+ }
11792
+ }
11793
+ var CliProvider = class {
11670
11794
  id;
11671
- kind = "mock";
11795
+ kind = "cli";
11672
11796
  targetName;
11673
- cannedResponse;
11674
- delayMs;
11675
- delayMinMs;
11676
- delayMaxMs;
11677
- constructor(targetName, config) {
11678
- this.id = `mock:${targetName}`;
11797
+ supportsBatch = false;
11798
+ config;
11799
+ runCommand;
11800
+ healthcheckPromise;
11801
+ constructor(targetName, config, runner = defaultCommandRunner) {
11679
11802
  this.targetName = targetName;
11680
- this.cannedResponse = config.response ?? DEFAULT_MOCK_RESPONSE;
11681
- this.delayMs = config.delayMs ?? 0;
11682
- this.delayMinMs = config.delayMinMs ?? 0;
11683
- this.delayMaxMs = config.delayMaxMs ?? 0;
11803
+ this.id = `cli:${targetName}`;
11804
+ this.config = config;
11805
+ this.runCommand = runner;
11684
11806
  }
11685
11807
  async invoke(request) {
11686
- const delay = this.calculateDelay();
11687
- if (delay > 0) {
11688
- await new Promise((resolve) => setTimeout(resolve, delay));
11808
+ if (request.signal?.aborted) {
11809
+ throw new Error("CLI provider request was aborted before execution");
11810
+ }
11811
+ await this.ensureHealthy(request.signal);
11812
+ const templateValues = buildTemplateValues(request, this.config);
11813
+ const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
11814
+ const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
11815
+ const result = await this.runCommand(renderedCommand, {
11816
+ cwd: this.config.cwd,
11817
+ env,
11818
+ timeoutMs: this.config.timeoutMs,
11819
+ signal: request.signal
11820
+ });
11821
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
11822
+ if (request.signal?.aborted) {
11823
+ throw new Error("CLI provider request was aborted");
11824
+ }
11825
+ if (result.timedOut) {
11826
+ throw new Error(
11827
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
11828
+ );
11829
+ }
11830
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
11831
+ const detail = result.stderr.trim() || result.stdout.trim();
11832
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
11833
+ throw new Error(message);
11689
11834
  }
11690
11835
  return {
11691
- text: this.cannedResponse,
11836
+ text: result.stdout,
11692
11837
  raw: {
11693
- prompt: request.prompt,
11694
- guidelines: request.guidelines
11838
+ command: renderedCommand,
11839
+ stderr: result.stderr,
11840
+ exitCode: result.exitCode ?? 0,
11841
+ cwd: this.config.cwd
11695
11842
  }
11696
11843
  };
11697
11844
  }
11698
- calculateDelay() {
11699
- if (this.delayMinMs > 0 || this.delayMaxMs > 0) {
11700
- const min = Math.max(0, this.delayMinMs);
11701
- const max = Math.max(min, this.delayMaxMs);
11702
- return Math.floor(Math.random() * (max - min + 1)) + min;
11845
+ async ensureHealthy(signal) {
11846
+ if (!this.config.healthcheck) {
11847
+ return;
11848
+ }
11849
+ if (!this.healthcheckPromise) {
11850
+ this.healthcheckPromise = this.runHealthcheck(this.config.healthcheck, signal);
11851
+ }
11852
+ return this.healthcheckPromise;
11853
+ }
11854
+ async runHealthcheck(healthcheck, signal) {
11855
+ if (!healthcheck) {
11856
+ return;
11857
+ }
11858
+ const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
11859
+ if (healthcheck.type === "http") {
11860
+ const controller = new AbortController();
11861
+ const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
11862
+ signal?.addEventListener("abort", () => controller.abort(), { once: true });
11863
+ try {
11864
+ const response = await fetch(healthcheck.url, { method: "GET", signal: controller.signal });
11865
+ if (!response.ok) {
11866
+ throw new Error(`HTTP ${response.status} ${response.statusText}`);
11867
+ }
11868
+ } catch (error) {
11869
+ const reason = error instanceof Error ? error.message : String(error);
11870
+ throw new Error(`CLI healthcheck failed for '${this.targetName}': ${reason}`);
11871
+ } finally {
11872
+ if (timer !== void 0) {
11873
+ clearTimeout(timer);
11874
+ }
11875
+ }
11876
+ return;
11877
+ }
11878
+ const renderedCommand = renderTemplate(
11879
+ healthcheck.commandTemplate,
11880
+ buildTemplateValues(
11881
+ {
11882
+ prompt: "",
11883
+ guidelines: "",
11884
+ inputFiles: [],
11885
+ evalCaseId: "",
11886
+ attempt: 0
11887
+ },
11888
+ this.config
11889
+ )
11890
+ );
11891
+ const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
11892
+ const result = await this.runCommand(renderedCommand, {
11893
+ cwd: healthcheck.cwd ?? this.config.cwd,
11894
+ env,
11895
+ timeoutMs,
11896
+ signal
11897
+ });
11898
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
11899
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
11900
+ const detail = result.stderr.trim() || result.stdout.trim();
11901
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
11902
+ throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
11703
11903
  }
11704
- return this.delayMs;
11705
11904
  }
11706
11905
  };
11707
- var BASE_TARGET_SCHEMA = external_exports.object({
11708
- name: external_exports.string().min(1, "target name is required"),
11709
- provider: external_exports.string().min(1, "provider is required"),
11710
- settings: external_exports.record(external_exports.unknown()).optional(),
11711
- judge_target: external_exports.string().optional(),
11712
- workers: external_exports.number().int().min(1).optional()
11713
- });
11714
- var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
11715
- function normalizeAzureApiVersion(value) {
11716
- if (!value) {
11717
- return DEFAULT_AZURE_API_VERSION;
11906
+ function buildTemplateValues(request, config) {
11907
+ const inputFiles = normalizeInputFiles(request.inputFiles);
11908
+ return {
11909
+ PROMPT: shellEscape(request.prompt ?? ""),
11910
+ GUIDELINES: shellEscape(request.guidelines ?? ""),
11911
+ EVAL_ID: shellEscape(request.evalCaseId ?? ""),
11912
+ ATTEMPT: shellEscape(String(request.attempt ?? 0)),
11913
+ FILES: formatFileList(inputFiles, config.filesFormat)
11914
+ };
11915
+ }
11916
+ function normalizeInputFiles(inputFiles) {
11917
+ if (!inputFiles || inputFiles.length === 0) {
11918
+ return void 0;
11718
11919
  }
11719
- const trimmed = value.trim();
11720
- if (trimmed.length === 0) {
11721
- return DEFAULT_AZURE_API_VERSION;
11920
+ const unique = /* @__PURE__ */ new Map();
11921
+ for (const inputFile of inputFiles) {
11922
+ const absolutePath = path22.resolve(inputFile);
11923
+ if (!unique.has(absolutePath)) {
11924
+ unique.set(absolutePath, absolutePath);
11925
+ }
11722
11926
  }
11723
- const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
11724
- return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
11927
+ return Array.from(unique.values());
11725
11928
  }
11726
- function resolveTargetDefinition(definition, env = process.env) {
11727
- const parsed = BASE_TARGET_SCHEMA.parse(definition);
11728
- const provider = parsed.provider.toLowerCase();
11729
- const providerBatching = resolveOptionalBoolean(
11730
- parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
11731
- );
11732
- switch (provider) {
11733
- case "azure":
11734
- case "azure-openai":
11735
- return {
11736
- kind: "azure",
11737
- name: parsed.name,
11738
- judgeTarget: parsed.judge_target,
11739
- workers: parsed.workers,
11740
- providerBatching,
11741
- config: resolveAzureConfig(parsed, env)
11742
- };
11743
- case "anthropic":
11744
- return {
11745
- kind: "anthropic",
11746
- name: parsed.name,
11747
- judgeTarget: parsed.judge_target,
11748
- workers: parsed.workers,
11749
- providerBatching,
11750
- config: resolveAnthropicConfig(parsed, env)
11751
- };
11752
- case "gemini":
11753
- case "google":
11754
- case "google-gemini":
11755
- return {
11756
- kind: "gemini",
11757
- name: parsed.name,
11758
- judgeTarget: parsed.judge_target,
11759
- workers: parsed.workers,
11760
- providerBatching,
11761
- config: resolveGeminiConfig(parsed, env)
11762
- };
11763
- case "mock":
11764
- return {
11765
- kind: "mock",
11766
- name: parsed.name,
11767
- judgeTarget: parsed.judge_target,
11768
- workers: parsed.workers,
11769
- providerBatching,
11770
- config: resolveMockConfig(parsed)
11771
- };
11772
- case "vscode":
11773
- case "vscode-insiders":
11774
- return {
11775
- kind: provider,
11776
- name: parsed.name,
11777
- judgeTarget: parsed.judge_target,
11778
- workers: parsed.workers,
11779
- providerBatching,
11780
- config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
11781
- };
11782
- default:
11783
- throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
11929
+ function formatFileList(files, template) {
11930
+ if (!files || files.length === 0) {
11931
+ return "";
11784
11932
  }
11933
+ const formatter = template ?? "{path}";
11934
+ return files.map((filePath) => {
11935
+ const escapedPath = shellEscape(filePath);
11936
+ const escapedName = shellEscape(path22.basename(filePath));
11937
+ return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
11938
+ }).join(" ");
11785
11939
  }
11786
- function resolveAzureConfig(target, env) {
11787
- const settings = target.settings ?? {};
11788
- const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
11789
- const apiKeySource = settings.api_key ?? settings.apiKey;
11790
- const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
11791
- const versionSource = settings.version ?? settings.api_version;
11792
- const temperatureSource = settings.temperature;
11793
- const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
11794
- const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
11795
- const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
11796
- const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
11797
- const version = normalizeAzureApiVersion(
11798
- resolveOptionalString(versionSource, env, `${target.name} api version`)
11799
- );
11800
- const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
11801
- const maxOutputTokens = resolveOptionalNumber(
11802
- maxTokensSource,
11803
- `${target.name} max output tokens`
11804
- );
11805
- return {
11806
- resourceName,
11807
- deploymentName,
11808
- apiKey,
11809
- version,
11810
- temperature,
11811
- maxOutputTokens
11812
- };
11940
+ function renderTemplate(template, values) {
11941
+ return template.replace(/\{([A-Z_]+)\}/g, (match, key2) => {
11942
+ const replacement = values[key2];
11943
+ return replacement !== void 0 ? replacement : match;
11944
+ });
11813
11945
  }
11814
- function resolveAnthropicConfig(target, env) {
11815
- const settings = target.settings ?? {};
11816
- const apiKeySource = settings.api_key ?? settings.apiKey;
11817
- const modelSource = settings.model ?? settings.deployment ?? settings.variant;
11818
- const temperatureSource = settings.temperature;
11819
- const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
11820
- const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
11821
- const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
11822
- const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
11823
- return {
11824
- apiKey,
11825
- model,
11826
- temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
11827
- maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
11828
- thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
11829
- };
11946
+ function shellEscape(value) {
11947
+ if (value.length === 0) {
11948
+ return "''";
11949
+ }
11950
+ if (process.platform === "win32") {
11951
+ const escaped = value.replace(/"/g, '\\"');
11952
+ return `"${escaped}"`;
11953
+ }
11954
+ return `'${value.replace(/'/g, `'"'"'`)}'`;
11830
11955
  }
11831
- function resolveGeminiConfig(target, env) {
11832
- const settings = target.settings ?? {};
11833
- const apiKeySource = settings.api_key ?? settings.apiKey;
11834
- const modelSource = settings.model ?? settings.deployment ?? settings.variant;
11835
- const temperatureSource = settings.temperature;
11836
- const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
11837
- const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
11838
- const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
11839
- allowLiteral: true,
11840
- optionalEnv: true
11841
- }) ?? "gemini-2.5-flash";
11842
- return {
11843
- apiKey,
11844
- model,
11845
- temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
11846
- maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
11847
- };
11956
+ function formatTimeoutSuffix(timeoutMs) {
11957
+ if (!timeoutMs || timeoutMs <= 0) {
11958
+ return "";
11959
+ }
11960
+ const seconds = Math.ceil(timeoutMs / 1e3);
11961
+ return ` after ${seconds}s`;
11848
11962
  }
11849
- function resolveMockConfig(target) {
11850
- const settings = target.settings ?? {};
11851
- const response = typeof settings.response === "string" ? settings.response : void 0;
11852
- return { response };
11963
+ function buildPromptDocument(request, inputFiles, options) {
11964
+ const parts = [];
11965
+ const guidelineFiles = collectGuidelineFiles(
11966
+ inputFiles,
11967
+ options?.guidelinePatterns ?? request.guideline_patterns,
11968
+ options?.guidelineOverrides
11969
+ );
11970
+ const inputFilesList = collectInputFiles(inputFiles);
11971
+ const nonGuidelineInputFiles = inputFilesList.filter(
11972
+ (file) => !guidelineFiles.includes(file)
11973
+ );
11974
+ const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
11975
+ if (prereadBlock.length > 0) {
11976
+ parts.push("\n", prereadBlock);
11977
+ }
11978
+ parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
11979
+ return parts.join("\n").trim();
11853
11980
  }
11854
- function resolveVSCodeConfig(target, env, insiders) {
11855
- const settings = target.settings ?? {};
11981
+ function normalizeInputFiles2(inputFiles) {
11982
+ if (!inputFiles || inputFiles.length === 0) {
11983
+ return void 0;
11984
+ }
11985
+ const deduped = /* @__PURE__ */ new Map();
11986
+ for (const inputFile of inputFiles) {
11987
+ const absolutePath = path32.resolve(inputFile);
11988
+ if (!deduped.has(absolutePath)) {
11989
+ deduped.set(absolutePath, absolutePath);
11990
+ }
11991
+ }
11992
+ return Array.from(deduped.values());
11993
+ }
11994
+ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
11995
+ if (!inputFiles || inputFiles.length === 0) {
11996
+ return [];
11997
+ }
11998
+ const unique = /* @__PURE__ */ new Map();
11999
+ for (const inputFile of inputFiles) {
12000
+ const absolutePath = path32.resolve(inputFile);
12001
+ if (overrides?.has(absolutePath)) {
12002
+ if (!unique.has(absolutePath)) {
12003
+ unique.set(absolutePath, absolutePath);
12004
+ }
12005
+ continue;
12006
+ }
12007
+ const normalized = absolutePath.split(path32.sep).join("/");
12008
+ if (isGuidelineFile(normalized, guidelinePatterns)) {
12009
+ if (!unique.has(absolutePath)) {
12010
+ unique.set(absolutePath, absolutePath);
12011
+ }
12012
+ }
12013
+ }
12014
+ return Array.from(unique.values());
12015
+ }
12016
+ function collectInputFiles(inputFiles) {
12017
+ if (!inputFiles || inputFiles.length === 0) {
12018
+ return [];
12019
+ }
12020
+ const unique = /* @__PURE__ */ new Map();
12021
+ for (const inputFile of inputFiles) {
12022
+ const absolutePath = path32.resolve(inputFile);
12023
+ if (!unique.has(absolutePath)) {
12024
+ unique.set(absolutePath, absolutePath);
12025
+ }
12026
+ }
12027
+ return Array.from(unique.values());
12028
+ }
12029
+ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
12030
+ if (guidelineFiles.length === 0 && inputFiles.length === 0) {
12031
+ return "";
12032
+ }
12033
+ const buildList = (files) => files.map((absolutePath) => {
12034
+ const fileName = path32.basename(absolutePath);
12035
+ const fileUri = pathToFileUri2(absolutePath);
12036
+ return `* [${fileName}](${fileUri})`;
12037
+ });
12038
+ const sections = [];
12039
+ if (guidelineFiles.length > 0) {
12040
+ sections.push(`Read all guideline files:
12041
+ ${buildList(guidelineFiles).join("\n")}.`);
12042
+ }
12043
+ if (inputFiles.length > 0) {
12044
+ sections.push(`Read all input files:
12045
+ ${buildList(inputFiles).join("\n")}.`);
12046
+ }
12047
+ sections.push(
12048
+ "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
12049
+ "Then apply system_instructions on the user query below."
12050
+ );
12051
+ return sections.join("\n");
12052
+ }
12053
+ function pathToFileUri2(filePath) {
12054
+ const absolutePath = path32.isAbsolute(filePath) ? filePath : path32.resolve(filePath);
12055
+ const normalizedPath = absolutePath.replace(/\\/g, "/");
12056
+ if (/^[a-zA-Z]:\//.test(normalizedPath)) {
12057
+ return `file:///${normalizedPath}`;
12058
+ }
12059
+ return `file://${normalizedPath}`;
12060
+ }
12061
+ var execAsync22 = promisify22(execCallback);
12062
+ var WORKSPACE_PREFIX = "agentv-codex-";
12063
+ var PROMPT_FILENAME = "prompt.md";
12064
+ var FILES_DIR = "files";
12065
+ var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
12066
+ var CodexProvider = class {
12067
+ id;
12068
+ kind = "codex";
12069
+ targetName;
12070
+ supportsBatch = false;
12071
+ config;
12072
+ runCodex;
12073
+ environmentCheck;
12074
+ resolvedExecutable;
12075
+ constructor(targetName, config, runner = defaultCodexRunner) {
12076
+ this.id = `codex:${targetName}`;
12077
+ this.targetName = targetName;
12078
+ this.config = config;
12079
+ this.runCodex = runner;
12080
+ }
12081
+ async invoke(request) {
12082
+ if (request.signal?.aborted) {
12083
+ throw new Error("Codex provider request was aborted before execution");
12084
+ }
12085
+ await this.ensureEnvironmentReady();
12086
+ const inputFiles = normalizeInputFiles2(request.inputFiles);
12087
+ const originalGuidelines = new Set(
12088
+ collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path42.resolve(file))
12089
+ );
12090
+ const workspaceRoot = await this.createWorkspace();
12091
+ try {
12092
+ const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
12093
+ inputFiles,
12094
+ workspaceRoot,
12095
+ originalGuidelines
12096
+ );
12097
+ const promptContent = buildPromptDocument(request, mirroredInputFiles, {
12098
+ guidelinePatterns: request.guideline_patterns,
12099
+ guidelineOverrides: guidelineMirrors
12100
+ });
12101
+ const promptFile = path42.join(workspaceRoot, PROMPT_FILENAME);
12102
+ await writeFile3(promptFile, promptContent, "utf8");
12103
+ const args = this.buildCodexArgs();
12104
+ const cwd = this.resolveCwd(workspaceRoot);
12105
+ const result = await this.executeCodex(args, cwd, promptContent, request.signal);
12106
+ if (result.timedOut) {
12107
+ throw new Error(
12108
+ `Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
12109
+ );
12110
+ }
12111
+ if (result.exitCode !== 0) {
12112
+ const detail = pickDetail(result.stderr, result.stdout);
12113
+ const prefix = `Codex CLI exited with code ${result.exitCode}`;
12114
+ throw new Error(detail ? `${prefix}: ${detail}` : prefix);
12115
+ }
12116
+ const parsed = parseCodexJson(result.stdout);
12117
+ const assistantText = extractAssistantText(parsed);
12118
+ return {
12119
+ text: assistantText,
12120
+ raw: {
12121
+ response: parsed,
12122
+ stdout: result.stdout,
12123
+ stderr: result.stderr,
12124
+ exitCode: result.exitCode,
12125
+ args,
12126
+ executable: this.resolvedExecutable ?? this.config.executable,
12127
+ promptFile,
12128
+ workspace: workspaceRoot,
12129
+ inputFiles: mirroredInputFiles
12130
+ }
12131
+ };
12132
+ } finally {
12133
+ await this.cleanupWorkspace(workspaceRoot);
12134
+ }
12135
+ }
12136
+ async ensureEnvironmentReady() {
12137
+ if (!this.environmentCheck) {
12138
+ this.environmentCheck = this.validateEnvironment();
12139
+ }
12140
+ await this.environmentCheck;
12141
+ }
12142
+ async validateEnvironment() {
12143
+ this.resolvedExecutable = await locateExecutable(this.config.executable);
12144
+ }
12145
+ resolveCwd(workspaceRoot) {
12146
+ if (!this.config.cwd) {
12147
+ return workspaceRoot;
12148
+ }
12149
+ return path42.resolve(this.config.cwd);
12150
+ }
12151
+ buildCodexArgs() {
12152
+ const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
12153
+ if (this.config.args && this.config.args.length > 0) {
12154
+ args.push(...this.config.args);
12155
+ }
12156
+ args.push("-");
12157
+ return args;
12158
+ }
12159
+ async executeCodex(args, cwd, promptContent, signal) {
12160
+ try {
12161
+ return await this.runCodex({
12162
+ executable: this.resolvedExecutable ?? this.config.executable,
12163
+ args,
12164
+ cwd,
12165
+ prompt: promptContent,
12166
+ timeoutMs: this.config.timeoutMs,
12167
+ env: process.env,
12168
+ signal
12169
+ });
12170
+ } catch (error) {
12171
+ const err = error;
12172
+ if (err.code === "ENOENT") {
12173
+ throw new Error(
12174
+ `Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
12175
+ );
12176
+ }
12177
+ throw error;
12178
+ }
12179
+ }
12180
+ async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
12181
+ if (!inputFiles || inputFiles.length === 0) {
12182
+ return {
12183
+ mirroredInputFiles: void 0,
12184
+ guidelineMirrors: /* @__PURE__ */ new Set()
12185
+ };
12186
+ }
12187
+ const filesRoot = path42.join(workspaceRoot, FILES_DIR);
12188
+ await mkdir3(filesRoot, { recursive: true });
12189
+ const mirrored = [];
12190
+ const guidelineMirrors = /* @__PURE__ */ new Set();
12191
+ const nameCounts = /* @__PURE__ */ new Map();
12192
+ for (const inputFile of inputFiles) {
12193
+ const absoluteSource = path42.resolve(inputFile);
12194
+ const baseName = path42.basename(absoluteSource);
12195
+ const count = nameCounts.get(baseName) ?? 0;
12196
+ nameCounts.set(baseName, count + 1);
12197
+ const finalName = count === 0 ? baseName : `${baseName}.${count}`;
12198
+ const destination = path42.join(filesRoot, finalName);
12199
+ await copyFile2(absoluteSource, destination);
12200
+ const resolvedDestination = path42.resolve(destination);
12201
+ mirrored.push(resolvedDestination);
12202
+ if (guidelineOriginals.has(absoluteSource)) {
12203
+ guidelineMirrors.add(resolvedDestination);
12204
+ }
12205
+ }
12206
+ return {
12207
+ mirroredInputFiles: mirrored,
12208
+ guidelineMirrors
12209
+ };
12210
+ }
12211
+ async createWorkspace() {
12212
+ return await mkdtemp(path42.join(tmpdir(), WORKSPACE_PREFIX));
12213
+ }
12214
+ async cleanupWorkspace(workspaceRoot) {
12215
+ try {
12216
+ await rm2(workspaceRoot, { recursive: true, force: true });
12217
+ } catch {
12218
+ }
12219
+ }
12220
+ };
12221
+ async function locateExecutable(candidate) {
12222
+ const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
12223
+ if (includesPathSeparator) {
12224
+ const resolved = path42.isAbsolute(candidate) ? candidate : path42.resolve(candidate);
12225
+ const executablePath = await ensureWindowsExecutableVariant(resolved);
12226
+ await access22(executablePath, constants22.F_OK);
12227
+ return executablePath;
12228
+ }
12229
+ const locator = process.platform === "win32" ? "where" : "which";
12230
+ try {
12231
+ const { stdout } = await execAsync22(`${locator} ${candidate}`);
12232
+ const lines = stdout.split(/\r?\n/).map((line2) => line2.trim()).filter((line2) => line2.length > 0);
12233
+ const preferred = selectExecutableCandidate(lines);
12234
+ if (preferred) {
12235
+ const executablePath = await ensureWindowsExecutableVariant(preferred);
12236
+ await access22(executablePath, constants22.F_OK);
12237
+ return executablePath;
12238
+ }
12239
+ } catch {
12240
+ }
12241
+ throw new Error(`Codex executable '${candidate}' was not found on PATH`);
12242
+ }
12243
+ function selectExecutableCandidate(candidates) {
12244
+ if (candidates.length === 0) {
12245
+ return void 0;
12246
+ }
12247
+ if (process.platform !== "win32") {
12248
+ return candidates[0];
12249
+ }
12250
+ const extensions = getWindowsExecutableExtensions();
12251
+ for (const ext of extensions) {
12252
+ const match = candidates.find((candidate) => candidate.toLowerCase().endsWith(ext));
12253
+ if (match) {
12254
+ return match;
12255
+ }
12256
+ }
12257
+ return candidates[0];
12258
+ }
12259
+ async function ensureWindowsExecutableVariant(candidate) {
12260
+ if (process.platform !== "win32") {
12261
+ return candidate;
12262
+ }
12263
+ if (hasExecutableExtension(candidate)) {
12264
+ return candidate;
12265
+ }
12266
+ const extensions = getWindowsExecutableExtensions();
12267
+ for (const ext of extensions) {
12268
+ const withExtension = `${candidate}${ext}`;
12269
+ try {
12270
+ await access22(withExtension, constants22.F_OK);
12271
+ return withExtension;
12272
+ } catch {
12273
+ }
12274
+ }
12275
+ return candidate;
12276
+ }
12277
+ function hasExecutableExtension(candidate) {
12278
+ const lower = candidate.toLowerCase();
12279
+ return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
12280
+ }
12281
+ var DEFAULT_WINDOWS_EXTENSIONS = [".com", ".exe", ".bat", ".cmd", ".ps1"];
12282
+ function getWindowsExecutableExtensions() {
12283
+ if (process.platform !== "win32") {
12284
+ return [];
12285
+ }
12286
+ const fromEnv = process.env.PATHEXT?.split(";").map((ext) => ext.trim().toLowerCase()).filter((ext) => ext.length > 0);
12287
+ return fromEnv && fromEnv.length > 0 ? fromEnv : DEFAULT_WINDOWS_EXTENSIONS;
12288
+ }
12289
+ function parseCodexJson(output) {
12290
+ const trimmed = output.trim();
12291
+ if (trimmed.length === 0) {
12292
+ throw new Error("Codex CLI produced no output in --json mode");
12293
+ }
12294
+ try {
12295
+ return JSON.parse(trimmed);
12296
+ } catch {
12297
+ const lineObjects = parseJsonLines(trimmed);
12298
+ if (lineObjects) {
12299
+ return lineObjects;
12300
+ }
12301
+ const lastBrace = trimmed.lastIndexOf("{");
12302
+ if (lastBrace >= 0) {
12303
+ const candidate = trimmed.slice(lastBrace);
12304
+ try {
12305
+ return JSON.parse(candidate);
12306
+ } catch {
12307
+ }
12308
+ }
12309
+ const preview = trimmed.slice(0, 200);
12310
+ throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
12311
+ }
12312
+ }
12313
+ function extractAssistantText(parsed) {
12314
+ if (Array.isArray(parsed)) {
12315
+ const text = extractFromEventStream(parsed);
12316
+ if (text) {
12317
+ return text;
12318
+ }
12319
+ }
12320
+ if (!parsed || typeof parsed !== "object") {
12321
+ throw new Error("Codex CLI JSON response did not include an assistant message");
12322
+ }
12323
+ const record = parsed;
12324
+ const eventText = extractFromEvent(record);
12325
+ if (eventText) {
12326
+ return eventText;
12327
+ }
12328
+ const messages = Array.isArray(record.messages) ? record.messages : void 0;
12329
+ if (messages) {
12330
+ for (let index = messages.length - 1; index >= 0; index -= 1) {
12331
+ const entry = messages[index];
12332
+ if (!entry || typeof entry !== "object") {
12333
+ continue;
12334
+ }
12335
+ const role = entry.role;
12336
+ if (role !== "assistant") {
12337
+ continue;
12338
+ }
12339
+ const content = entry.content;
12340
+ const flattened = flattenContent(content);
12341
+ if (flattened) {
12342
+ return flattened;
12343
+ }
12344
+ }
12345
+ }
12346
+ const response = record.response;
12347
+ if (response && typeof response === "object") {
12348
+ const content = response.content;
12349
+ const flattened = flattenContent(content);
12350
+ if (flattened) {
12351
+ return flattened;
12352
+ }
12353
+ }
12354
+ const output = record.output;
12355
+ const flattenedOutput = flattenContent(output);
12356
+ if (flattenedOutput) {
12357
+ return flattenedOutput;
12358
+ }
12359
+ throw new Error("Codex CLI JSON response did not include an assistant message");
12360
+ }
12361
+ function extractFromEventStream(events) {
12362
+ for (let index = events.length - 1; index >= 0; index -= 1) {
12363
+ const candidate = events[index];
12364
+ const text = extractFromEvent(candidate);
12365
+ if (text) {
12366
+ return text;
12367
+ }
12368
+ }
12369
+ return void 0;
12370
+ }
12371
+ function extractFromEvent(event) {
12372
+ if (!event || typeof event !== "object") {
12373
+ return void 0;
12374
+ }
12375
+ const record = event;
12376
+ const type = typeof record.type === "string" ? record.type : void 0;
12377
+ if (type === JSONL_TYPE_ITEM_COMPLETED) {
12378
+ const item = record.item;
12379
+ const text = extractFromItem(item);
12380
+ if (text) {
12381
+ return text;
12382
+ }
12383
+ }
12384
+ const output = record.output ?? record.content;
12385
+ const flattened = flattenContent(output);
12386
+ if (flattened) {
12387
+ return flattened;
12388
+ }
12389
+ return void 0;
12390
+ }
12391
+ function extractFromItem(item) {
12392
+ if (!item || typeof item !== "object") {
12393
+ return void 0;
12394
+ }
12395
+ const record = item;
12396
+ const itemType = typeof record.type === "string" ? record.type : void 0;
12397
+ if (itemType === "agent_message" || itemType === "response" || itemType === "output") {
12398
+ const text = flattenContent(record.text ?? record.content ?? record.output);
12399
+ if (text) {
12400
+ return text;
12401
+ }
12402
+ }
12403
+ return void 0;
12404
+ }
12405
+ function flattenContent(value) {
12406
+ if (typeof value === "string") {
12407
+ return value;
12408
+ }
12409
+ if (Array.isArray(value)) {
12410
+ const parts = value.map((segment) => {
12411
+ if (typeof segment === "string") {
12412
+ return segment;
12413
+ }
12414
+ if (segment && typeof segment === "object" && "text" in segment) {
12415
+ const text = segment.text;
12416
+ return typeof text === "string" ? text : void 0;
12417
+ }
12418
+ return void 0;
12419
+ }).filter((part) => typeof part === "string" && part.length > 0);
12420
+ return parts.length > 0 ? parts.join(" \n") : void 0;
12421
+ }
12422
+ if (value && typeof value === "object" && "text" in value) {
12423
+ const text = value.text;
12424
+ return typeof text === "string" ? text : void 0;
12425
+ }
12426
+ return void 0;
12427
+ }
12428
+ function parseJsonLines(output) {
12429
+ const lines = output.split(/\r?\n/).map((line2) => line2.trim()).filter((line2) => line2.length > 0);
12430
+ if (lines.length <= 1) {
12431
+ return void 0;
12432
+ }
12433
+ const parsed = [];
12434
+ for (const line2 of lines) {
12435
+ try {
12436
+ parsed.push(JSON.parse(line2));
12437
+ } catch {
12438
+ return void 0;
12439
+ }
12440
+ }
12441
+ return parsed;
12442
+ }
12443
+ function pickDetail(stderr, stdout) {
12444
+ const errorText = stderr.trim();
12445
+ if (errorText.length > 0) {
12446
+ return errorText;
12447
+ }
12448
+ const stdoutText = stdout.trim();
12449
+ return stdoutText.length > 0 ? stdoutText : void 0;
12450
+ }
12451
+ function formatTimeoutSuffix2(timeoutMs) {
12452
+ if (!timeoutMs || timeoutMs <= 0) {
12453
+ return "";
12454
+ }
12455
+ const seconds = Math.ceil(timeoutMs / 1e3);
12456
+ return ` after ${seconds}s`;
12457
+ }
12458
+ async function defaultCodexRunner(options) {
12459
+ return await new Promise((resolve, reject) => {
12460
+ const child = spawn2(options.executable, options.args, {
12461
+ cwd: options.cwd,
12462
+ env: options.env,
12463
+ stdio: ["pipe", "pipe", "pipe"],
12464
+ shell: shouldShellExecute(options.executable)
12465
+ });
12466
+ let stdout = "";
12467
+ let stderr = "";
12468
+ let timedOut = false;
12469
+ const onAbort = () => {
12470
+ child.kill("SIGTERM");
12471
+ };
12472
+ if (options.signal) {
12473
+ if (options.signal.aborted) {
12474
+ onAbort();
12475
+ } else {
12476
+ options.signal.addEventListener("abort", onAbort, { once: true });
12477
+ }
12478
+ }
12479
+ let timeoutHandle;
12480
+ if (options.timeoutMs && options.timeoutMs > 0) {
12481
+ timeoutHandle = setTimeout(() => {
12482
+ timedOut = true;
12483
+ child.kill("SIGTERM");
12484
+ }, options.timeoutMs);
12485
+ timeoutHandle.unref?.();
12486
+ }
12487
+ child.stdout.setEncoding("utf8");
12488
+ child.stdout.on("data", (chunk) => {
12489
+ stdout += chunk;
12490
+ });
12491
+ child.stderr.setEncoding("utf8");
12492
+ child.stderr.on("data", (chunk) => {
12493
+ stderr += chunk;
12494
+ });
12495
+ child.stdin.end(options.prompt);
12496
+ const cleanup = () => {
12497
+ if (timeoutHandle) {
12498
+ clearTimeout(timeoutHandle);
12499
+ }
12500
+ if (options.signal) {
12501
+ options.signal.removeEventListener("abort", onAbort);
12502
+ }
12503
+ };
12504
+ child.on("error", (error) => {
12505
+ cleanup();
12506
+ reject(error);
12507
+ });
12508
+ child.on("close", (code) => {
12509
+ cleanup();
12510
+ resolve({
12511
+ stdout,
12512
+ stderr,
12513
+ exitCode: typeof code === "number" ? code : -1,
12514
+ timedOut
12515
+ });
12516
+ });
12517
+ });
12518
+ }
12519
+ function shouldShellExecute(executable) {
12520
+ if (process.platform !== "win32") {
12521
+ return false;
12522
+ }
12523
+ const lower = executable.toLowerCase();
12524
+ return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
12525
+ }
12526
+ var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
12527
+ var MockProvider = class {
12528
+ id;
12529
+ kind = "mock";
12530
+ targetName;
12531
+ cannedResponse;
12532
+ delayMs;
12533
+ delayMinMs;
12534
+ delayMaxMs;
12535
+ constructor(targetName, config) {
12536
+ this.id = `mock:${targetName}`;
12537
+ this.targetName = targetName;
12538
+ this.cannedResponse = config.response ?? DEFAULT_MOCK_RESPONSE;
12539
+ this.delayMs = config.delayMs ?? 0;
12540
+ this.delayMinMs = config.delayMinMs ?? 0;
12541
+ this.delayMaxMs = config.delayMaxMs ?? 0;
12542
+ }
12543
+ async invoke(request) {
12544
+ const delay = this.calculateDelay();
12545
+ if (delay > 0) {
12546
+ await new Promise((resolve) => setTimeout(resolve, delay));
12547
+ }
12548
+ return {
12549
+ text: this.cannedResponse,
12550
+ raw: {
12551
+ prompt: request.prompt,
12552
+ guidelines: request.guidelines
12553
+ }
12554
+ };
12555
+ }
12556
+ calculateDelay() {
12557
+ if (this.delayMinMs > 0 || this.delayMaxMs > 0) {
12558
+ const min = Math.max(0, this.delayMinMs);
12559
+ const max = Math.max(min, this.delayMaxMs);
12560
+ return Math.floor(Math.random() * (max - min + 1)) + min;
12561
+ }
12562
+ return this.delayMs;
12563
+ }
12564
+ };
12565
+ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
12566
+ var BASE_TARGET_SCHEMA = external_exports.object({
12567
+ name: external_exports.string().min(1, "target name is required"),
12568
+ provider: external_exports.string().min(1, "provider is required"),
12569
+ settings: external_exports.record(external_exports.unknown()).optional(),
12570
+ judge_target: external_exports.string().optional(),
12571
+ workers: external_exports.number().int().min(1).optional()
12572
+ });
12573
+ var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
12574
+ function normalizeAzureApiVersion(value) {
12575
+ if (!value) {
12576
+ return DEFAULT_AZURE_API_VERSION;
12577
+ }
12578
+ const trimmed = value.trim();
12579
+ if (trimmed.length === 0) {
12580
+ return DEFAULT_AZURE_API_VERSION;
12581
+ }
12582
+ const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
12583
+ return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
12584
+ }
12585
+ function resolveTargetDefinition(definition, env = process.env) {
12586
+ const parsed = BASE_TARGET_SCHEMA.parse(definition);
12587
+ const provider = parsed.provider.toLowerCase();
12588
+ const providerBatching = resolveOptionalBoolean(
12589
+ parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
12590
+ );
12591
+ switch (provider) {
12592
+ case "azure":
12593
+ case "azure-openai":
12594
+ return {
12595
+ kind: "azure",
12596
+ name: parsed.name,
12597
+ judgeTarget: parsed.judge_target,
12598
+ workers: parsed.workers,
12599
+ providerBatching,
12600
+ config: resolveAzureConfig(parsed, env)
12601
+ };
12602
+ case "anthropic":
12603
+ return {
12604
+ kind: "anthropic",
12605
+ name: parsed.name,
12606
+ judgeTarget: parsed.judge_target,
12607
+ workers: parsed.workers,
12608
+ providerBatching,
12609
+ config: resolveAnthropicConfig(parsed, env)
12610
+ };
12611
+ case "gemini":
12612
+ case "google":
12613
+ case "google-gemini":
12614
+ return {
12615
+ kind: "gemini",
12616
+ name: parsed.name,
12617
+ judgeTarget: parsed.judge_target,
12618
+ workers: parsed.workers,
12619
+ providerBatching,
12620
+ config: resolveGeminiConfig(parsed, env)
12621
+ };
12622
+ case "codex":
12623
+ case "codex-cli":
12624
+ return {
12625
+ kind: "codex",
12626
+ name: parsed.name,
12627
+ judgeTarget: parsed.judge_target,
12628
+ workers: parsed.workers,
12629
+ providerBatching,
12630
+ config: resolveCodexConfig(parsed, env)
12631
+ };
12632
+ case "mock":
12633
+ return {
12634
+ kind: "mock",
12635
+ name: parsed.name,
12636
+ judgeTarget: parsed.judge_target,
12637
+ workers: parsed.workers,
12638
+ providerBatching,
12639
+ config: resolveMockConfig(parsed)
12640
+ };
12641
+ case "vscode":
12642
+ case "vscode-insiders":
12643
+ return {
12644
+ kind: provider,
12645
+ name: parsed.name,
12646
+ judgeTarget: parsed.judge_target,
12647
+ workers: parsed.workers,
12648
+ providerBatching,
12649
+ config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
12650
+ };
12651
+ case "cli":
12652
+ return {
12653
+ kind: "cli",
12654
+ name: parsed.name,
12655
+ judgeTarget: parsed.judge_target,
12656
+ workers: parsed.workers,
12657
+ providerBatching,
12658
+ config: resolveCliConfig(parsed, env)
12659
+ };
12660
+ default:
12661
+ throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
12662
+ }
12663
+ }
12664
+ function resolveAzureConfig(target, env) {
12665
+ const settings = target.settings ?? {};
12666
+ const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
12667
+ const apiKeySource = settings.api_key ?? settings.apiKey;
12668
+ const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
12669
+ const versionSource = settings.version ?? settings.api_version;
12670
+ const temperatureSource = settings.temperature;
12671
+ const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
12672
+ const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
12673
+ const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
12674
+ const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
12675
+ const version = normalizeAzureApiVersion(
12676
+ resolveOptionalString(versionSource, env, `${target.name} api version`)
12677
+ );
12678
+ const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
12679
+ const maxOutputTokens = resolveOptionalNumber(
12680
+ maxTokensSource,
12681
+ `${target.name} max output tokens`
12682
+ );
12683
+ return {
12684
+ resourceName,
12685
+ deploymentName,
12686
+ apiKey,
12687
+ version,
12688
+ temperature,
12689
+ maxOutputTokens
12690
+ };
12691
+ }
12692
+ function resolveAnthropicConfig(target, env) {
12693
+ const settings = target.settings ?? {};
12694
+ const apiKeySource = settings.api_key ?? settings.apiKey;
12695
+ const modelSource = settings.model ?? settings.deployment ?? settings.variant;
12696
+ const temperatureSource = settings.temperature;
12697
+ const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
12698
+ const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
12699
+ const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
12700
+ const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
12701
+ return {
12702
+ apiKey,
12703
+ model,
12704
+ temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
12705
+ maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
12706
+ thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
12707
+ };
12708
+ }
12709
+ function resolveGeminiConfig(target, env) {
12710
+ const settings = target.settings ?? {};
12711
+ const apiKeySource = settings.api_key ?? settings.apiKey;
12712
+ const modelSource = settings.model ?? settings.deployment ?? settings.variant;
12713
+ const temperatureSource = settings.temperature;
12714
+ const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
12715
+ const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
12716
+ const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
12717
+ allowLiteral: true,
12718
+ optionalEnv: true
12719
+ }) ?? "gemini-2.5-flash";
12720
+ return {
12721
+ apiKey,
12722
+ model,
12723
+ temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
12724
+ maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
12725
+ };
12726
+ }
12727
+ function resolveCodexConfig(target, env) {
12728
+ const settings = target.settings ?? {};
12729
+ const executableSource = settings.executable ?? settings.command ?? settings.binary;
12730
+ const argsSource = settings.args ?? settings.arguments;
12731
+ const cwdSource = settings.cwd;
12732
+ const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
12733
+ const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
12734
+ allowLiteral: true,
12735
+ optionalEnv: true
12736
+ }) ?? "codex";
12737
+ const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
12738
+ const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
12739
+ allowLiteral: true,
12740
+ optionalEnv: true
12741
+ });
12742
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
12743
+ return {
12744
+ executable,
12745
+ args,
12746
+ cwd,
12747
+ timeoutMs
12748
+ };
12749
+ }
12750
+ function resolveMockConfig(target) {
12751
+ const settings = target.settings ?? {};
12752
+ const response = typeof settings.response === "string" ? settings.response : void 0;
12753
+ return { response };
12754
+ }
12755
+ function resolveVSCodeConfig(target, env, insiders) {
12756
+ const settings = target.settings ?? {};
11856
12757
  const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
11857
12758
  const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
11858
12759
  allowLiteral: false,
@@ -11875,6 +12776,125 @@ function resolveVSCodeConfig(target, env, insiders) {
11875
12776
  workspaceTemplate
11876
12777
  };
11877
12778
  }
12779
+ function resolveCliConfig(target, env) {
12780
+ const settings = target.settings ?? {};
12781
+ const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
12782
+ const filesFormat = resolveOptionalLiteralString(
12783
+ settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
12784
+ );
12785
+ const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
12786
+ allowLiteral: true,
12787
+ optionalEnv: true
12788
+ });
12789
+ const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
12790
+ const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
12791
+ const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
12792
+ const commandTemplate = resolveString(
12793
+ commandTemplateSource,
12794
+ env,
12795
+ `${target.name} CLI command template`,
12796
+ true
12797
+ );
12798
+ assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
12799
+ return {
12800
+ commandTemplate,
12801
+ filesFormat,
12802
+ cwd,
12803
+ env: envOverrides,
12804
+ timeoutMs,
12805
+ healthcheck
12806
+ };
12807
+ }
12808
+ function resolveEnvOverrides(source2, env, targetName) {
12809
+ if (source2 === void 0 || source2 === null) {
12810
+ return void 0;
12811
+ }
12812
+ if (typeof source2 !== "object" || Array.isArray(source2)) {
12813
+ throw new Error(`${targetName} env overrides must be an object map of strings`);
12814
+ }
12815
+ const entries = Object.entries(source2);
12816
+ const resolved = {};
12817
+ for (const [key2, value] of entries) {
12818
+ if (typeof value !== "string") {
12819
+ throw new Error(`${targetName} env override '${key2}' must be a string`);
12820
+ }
12821
+ const resolvedValue = resolveString(value, env, `${targetName} env override '${key2}'`);
12822
+ resolved[key2] = resolvedValue;
12823
+ }
12824
+ return Object.keys(resolved).length > 0 ? resolved : void 0;
12825
+ }
12826
+ function resolveTimeoutMs(source2, description) {
12827
+ const seconds = resolveOptionalNumber(source2, `${description} (seconds)`);
12828
+ if (seconds === void 0) {
12829
+ return void 0;
12830
+ }
12831
+ if (seconds <= 0) {
12832
+ throw new Error(`${description} must be greater than zero seconds`);
12833
+ }
12834
+ return Math.floor(seconds * 1e3);
12835
+ }
12836
+ function resolveCliHealthcheck(source2, env, targetName) {
12837
+ if (source2 === void 0 || source2 === null) {
12838
+ return void 0;
12839
+ }
12840
+ if (typeof source2 !== "object" || Array.isArray(source2)) {
12841
+ throw new Error(`${targetName} healthcheck must be an object`);
12842
+ }
12843
+ const candidate = source2;
12844
+ const type = candidate.type;
12845
+ const timeoutMs = resolveTimeoutMs(
12846
+ candidate.timeout_seconds ?? candidate.timeoutSeconds,
12847
+ `${targetName} healthcheck timeout`
12848
+ );
12849
+ if (type === "http") {
12850
+ const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
12851
+ return {
12852
+ type: "http",
12853
+ url,
12854
+ timeoutMs
12855
+ };
12856
+ }
12857
+ if (type === "command") {
12858
+ const commandTemplate = resolveString(
12859
+ candidate.command_template ?? candidate.commandTemplate,
12860
+ env,
12861
+ `${targetName} healthcheck command template`,
12862
+ true
12863
+ );
12864
+ assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
12865
+ const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
12866
+ allowLiteral: true,
12867
+ optionalEnv: true
12868
+ });
12869
+ return {
12870
+ type: "command",
12871
+ commandTemplate,
12872
+ timeoutMs,
12873
+ cwd
12874
+ };
12875
+ }
12876
+ throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
12877
+ }
12878
+ function assertSupportedCliPlaceholders(template, description) {
12879
+ const placeholders = extractCliPlaceholders(template);
12880
+ for (const placeholder of placeholders) {
12881
+ if (!CLI_PLACEHOLDERS.has(placeholder)) {
12882
+ throw new Error(
12883
+ `${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
12884
+ );
12885
+ }
12886
+ }
12887
+ }
12888
+ function extractCliPlaceholders(template) {
12889
+ const matches = template.matchAll(/\{([A-Z_]+)\}/g);
12890
+ const results = [];
12891
+ for (const match of matches) {
12892
+ if (match[1]) {
12893
+ results.push(match[1]);
12894
+ }
12895
+ }
12896
+ return results;
12897
+ }
11878
12898
  function resolveString(source2, env, description, allowLiteral = false) {
11879
12899
  const value = resolveOptionalString(source2, env, description, {
11880
12900
  allowLiteral,
@@ -11905,11 +12925,14 @@ function resolveOptionalString(source2, env, description, options) {
11905
12925
  }
11906
12926
  const allowLiteral = options?.allowLiteral ?? false;
11907
12927
  const optionalEnv = options?.optionalEnv ?? false;
11908
- if (!allowLiteral && isLikelyEnvReference(trimmed)) {
12928
+ const looksLikeEnv = isLikelyEnvReference(trimmed);
12929
+ if (looksLikeEnv) {
11909
12930
  if (optionalEnv) {
11910
12931
  return void 0;
11911
12932
  }
11912
- throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
12933
+ if (!allowLiteral) {
12934
+ throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
12935
+ }
11913
12936
  }
11914
12937
  return trimmed;
11915
12938
  }
@@ -11959,6 +12982,38 @@ function resolveOptionalBoolean(source2) {
11959
12982
  function isLikelyEnvReference(value) {
11960
12983
  return /^[A-Z0-9_]+$/.test(value);
11961
12984
  }
12985
+ function resolveOptionalStringArray(source2, env, description) {
12986
+ if (source2 === void 0 || source2 === null) {
12987
+ return void 0;
12988
+ }
12989
+ if (!Array.isArray(source2)) {
12990
+ throw new Error(`${description} must be an array of strings`);
12991
+ }
12992
+ if (source2.length === 0) {
12993
+ return void 0;
12994
+ }
12995
+ const resolved = [];
12996
+ for (let i6 = 0; i6 < source2.length; i6++) {
12997
+ const item = source2[i6];
12998
+ if (typeof item !== "string") {
12999
+ throw new Error(`${description}[${i6}] must be a string`);
13000
+ }
13001
+ const trimmed = item.trim();
13002
+ if (trimmed.length === 0) {
13003
+ throw new Error(`${description}[${i6}] cannot be empty`);
13004
+ }
13005
+ const envValue = env[trimmed];
13006
+ if (envValue !== void 0) {
13007
+ if (envValue.trim().length === 0) {
13008
+ throw new Error(`Environment variable '${trimmed}' for ${description}[${i6}] is empty`);
13009
+ }
13010
+ resolved.push(envValue);
13011
+ } else {
13012
+ resolved.push(trimmed);
13013
+ }
13014
+ }
13015
+ return resolved.length > 0 ? resolved : void 0;
13016
+ }
11962
13017
  var VSCodeProvider = class {
11963
13018
  id;
11964
13019
  kind;
@@ -11975,12 +13030,11 @@ var VSCodeProvider = class {
11975
13030
  if (request.signal?.aborted) {
11976
13031
  throw new Error("VS Code provider request was aborted before dispatch");
11977
13032
  }
11978
- const attachments = normalizeAttachments(request.attachments);
11979
- const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
13033
+ const inputFiles = normalizeAttachments(request.inputFiles);
13034
+ const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
11980
13035
  const session = await dispatchAgentSession({
11981
13036
  userQuery: promptContent,
11982
- // Use full prompt content instead of just request.prompt
11983
- extraAttachments: attachments,
13037
+ extraAttachments: inputFiles,
11984
13038
  wait: this.config.waitForResponse,
11985
13039
  dryRun: this.config.dryRun,
11986
13040
  vscodeCmd: this.config.command,
@@ -11997,7 +13051,7 @@ var VSCodeProvider = class {
11997
13051
  text: "",
11998
13052
  raw: {
11999
13053
  session,
12000
- attachments
13054
+ inputFiles
12001
13055
  }
12002
13056
  };
12003
13057
  }
@@ -12006,7 +13060,7 @@ var VSCodeProvider = class {
12006
13060
  text: responseText,
12007
13061
  raw: {
12008
13062
  session,
12009
- attachments
13063
+ inputFiles
12010
13064
  }
12011
13065
  };
12012
13066
  }
@@ -12016,17 +13070,17 @@ var VSCodeProvider = class {
12016
13070
  }
12017
13071
  const normalizedRequests = requests.map((req) => ({
12018
13072
  request: req,
12019
- attachments: normalizeAttachments(req.attachments)
13073
+ inputFiles: normalizeAttachments(req.inputFiles)
12020
13074
  }));
12021
- const combinedAttachments = mergeAttachments(
12022
- normalizedRequests.map(({ attachments }) => attachments)
13075
+ const combinedInputFiles = mergeAttachments(
13076
+ normalizedRequests.map(({ inputFiles }) => inputFiles)
12023
13077
  );
12024
13078
  const userQueries = normalizedRequests.map(
12025
- ({ request, attachments }) => buildPromptDocument(request, attachments, request.guideline_patterns)
13079
+ ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
12026
13080
  );
12027
13081
  const session = await dispatchBatchAgent({
12028
13082
  userQueries,
12029
- extraAttachments: combinedAttachments,
13083
+ extraAttachments: combinedInputFiles,
12030
13084
  wait: this.config.waitForResponse,
12031
13085
  dryRun: this.config.dryRun,
12032
13086
  vscodeCmd: this.config.command,
@@ -12039,12 +13093,12 @@ var VSCodeProvider = class {
12039
13093
  throw new Error(failure);
12040
13094
  }
12041
13095
  if (this.config.dryRun) {
12042
- return normalizedRequests.map(({ attachments }) => ({
13096
+ return normalizedRequests.map(({ inputFiles }) => ({
12043
13097
  text: "",
12044
13098
  raw: {
12045
13099
  session,
12046
- attachments,
12047
- allAttachments: combinedAttachments
13100
+ inputFiles,
13101
+ allInputFiles: combinedInputFiles
12048
13102
  }
12049
13103
  }));
12050
13104
  }
@@ -12060,8 +13114,8 @@ var VSCodeProvider = class {
12060
13114
  text: responseText,
12061
13115
  raw: {
12062
13116
  session,
12063
- attachments: normalizedRequests[index]?.attachments,
12064
- allAttachments: combinedAttachments,
13117
+ inputFiles: normalizedRequests[index]?.inputFiles,
13118
+ allInputFiles: combinedInputFiles,
12065
13119
  responseFile
12066
13120
  }
12067
13121
  });
@@ -12069,27 +13123,27 @@ var VSCodeProvider = class {
12069
13123
  return responses;
12070
13124
  }
12071
13125
  };
12072
- function buildPromptDocument(request, attachments, guidelinePatterns) {
13126
+ function buildPromptDocument2(request, attachments, guidelinePatterns) {
12073
13127
  const parts = [];
12074
- const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
13128
+ const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
12075
13129
  const attachmentFiles = collectAttachmentFiles(attachments);
12076
13130
  const nonGuidelineAttachments = attachmentFiles.filter(
12077
13131
  (file) => !guidelineFiles.includes(file)
12078
13132
  );
12079
- const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineAttachments);
13133
+ const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
12080
13134
  if (prereadBlock.length > 0) {
12081
13135
  parts.push("\n", prereadBlock);
12082
13136
  }
12083
13137
  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
12084
13138
  return parts.join("\n").trim();
12085
13139
  }
12086
- function buildMandatoryPrereadBlock(guidelineFiles, attachmentFiles) {
13140
+ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
12087
13141
  if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
12088
13142
  return "";
12089
13143
  }
12090
13144
  const buildList = (files) => files.map((absolutePath) => {
12091
- const fileName = path22.basename(absolutePath);
12092
- const fileUri = pathToFileUri2(absolutePath);
13145
+ const fileName = path52.basename(absolutePath);
13146
+ const fileUri = pathToFileUri22(absolutePath);
12093
13147
  return `* [${fileName}](${fileUri})`;
12094
13148
  });
12095
13149
  const sections = [];
@@ -12107,14 +13161,14 @@ ${buildList(attachmentFiles).join("\n")}.`);
12107
13161
  );
12108
13162
  return sections.join("\n");
12109
13163
  }
12110
- function collectGuidelineFiles(attachments, guidelinePatterns) {
13164
+ function collectGuidelineFiles2(attachments, guidelinePatterns) {
12111
13165
  if (!attachments || attachments.length === 0) {
12112
13166
  return [];
12113
13167
  }
12114
13168
  const unique = /* @__PURE__ */ new Map();
12115
13169
  for (const attachment of attachments) {
12116
- const absolutePath = path22.resolve(attachment);
12117
- const normalized = absolutePath.split(path22.sep).join("/");
13170
+ const absolutePath = path52.resolve(attachment);
13171
+ const normalized = absolutePath.split(path52.sep).join("/");
12118
13172
  if (isGuidelineFile(normalized, guidelinePatterns)) {
12119
13173
  if (!unique.has(absolutePath)) {
12120
13174
  unique.set(absolutePath, absolutePath);
@@ -12129,15 +13183,15 @@ function collectAttachmentFiles(attachments) {
12129
13183
  }
12130
13184
  const unique = /* @__PURE__ */ new Map();
12131
13185
  for (const attachment of attachments) {
12132
- const absolutePath = path22.resolve(attachment);
13186
+ const absolutePath = path52.resolve(attachment);
12133
13187
  if (!unique.has(absolutePath)) {
12134
13188
  unique.set(absolutePath, absolutePath);
12135
13189
  }
12136
13190
  }
12137
13191
  return Array.from(unique.values());
12138
13192
  }
12139
- function pathToFileUri2(filePath) {
12140
- const absolutePath = path22.isAbsolute(filePath) ? filePath : path22.resolve(filePath);
13193
+ function pathToFileUri22(filePath) {
13194
+ const absolutePath = path52.isAbsolute(filePath) ? filePath : path52.resolve(filePath);
12141
13195
  const normalizedPath = absolutePath.replace(/\\/g, "/");
12142
13196
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
12143
13197
  return `file:///${normalizedPath}`;
@@ -12150,7 +13204,7 @@ function normalizeAttachments(attachments) {
12150
13204
  }
12151
13205
  const deduped = /* @__PURE__ */ new Set();
12152
13206
  for (const attachment of attachments) {
12153
- deduped.add(path22.resolve(attachment));
13207
+ deduped.add(path52.resolve(attachment));
12154
13208
  }
12155
13209
  return Array.from(deduped);
12156
13210
  }
@@ -12158,8 +13212,8 @@ function mergeAttachments(all) {
12158
13212
  const deduped = /* @__PURE__ */ new Set();
12159
13213
  for (const list of all) {
12160
13214
  if (!list) continue;
12161
- for (const attachment of list) {
12162
- deduped.add(path22.resolve(attachment));
13215
+ for (const inputFile of list) {
13216
+ deduped.add(path52.resolve(inputFile));
12163
13217
  }
12164
13218
  }
12165
13219
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -12256,14 +13310,14 @@ function assertTargetDefinition(value, index, filePath) {
12256
13310
  }
12257
13311
  async function fileExists3(filePath) {
12258
13312
  try {
12259
- await access22(filePath, constants22.F_OK);
13313
+ await access32(filePath, constants32.F_OK);
12260
13314
  return true;
12261
13315
  } catch {
12262
13316
  return false;
12263
13317
  }
12264
13318
  }
12265
13319
  async function readTargetDefinitions(filePath) {
12266
- const absolutePath = path32.resolve(filePath);
13320
+ const absolutePath = path62.resolve(filePath);
12267
13321
  if (!await fileExists3(absolutePath)) {
12268
13322
  throw new Error(`targets.yaml not found at ${absolutePath}`);
12269
13323
  }
@@ -12288,233 +13342,43 @@ function createProvider(target) {
12288
13342
  return new AnthropicProvider(target.name, target.config);
12289
13343
  case "gemini":
12290
13344
  return new GeminiProvider(target.name, target.config);
13345
+ case "cli":
13346
+ return new CliProvider(target.name, target.config);
13347
+ case "codex":
13348
+ return new CodexProvider(target.name, target.config);
12291
13349
  case "mock":
12292
13350
  return new MockProvider(target.name, target.config);
12293
13351
  case "vscode":
12294
- case "vscode-insiders":
12295
- return new VSCodeProvider(target.name, target.config, target.kind);
12296
- default: {
12297
- const neverTarget = target;
12298
- throw new Error(`Unsupported provider kind ${neverTarget.kind}`);
12299
- }
12300
- }
12301
- }
12302
- var KEY_TERM_MATCH_THRESHOLD = 0.5;
12303
- var ACTION_WORDS = /* @__PURE__ */ new Set([
12304
- "use",
12305
- "avoid",
12306
- "prefer",
12307
- "replace",
12308
- "consider",
12309
- "ensure",
12310
- "remove",
12311
- "add"
12312
- ]);
12313
- var STOP_WORDS = /* @__PURE__ */ new Set([
12314
- "the",
12315
- "a",
12316
- "an",
12317
- "and",
12318
- "or",
12319
- "but",
12320
- "in",
12321
- "on",
12322
- "at",
12323
- "to",
12324
- "for",
12325
- "of",
12326
- "with",
12327
- "by",
12328
- "is",
12329
- "are",
12330
- "was",
12331
- "were",
12332
- "be",
12333
- "been",
12334
- "being",
12335
- "have",
12336
- "has",
12337
- "had",
12338
- "do",
12339
- "does",
12340
- "did",
12341
- "will",
12342
- "would",
12343
- "could",
12344
- "should"
12345
- ]);
12346
- var ERROR_PREFIXES = [
12347
- "error:",
12348
- "err:",
12349
- "vs code command failed",
12350
- "exception",
12351
- "traceback",
12352
- "no response file was generated",
12353
- "timed out",
12354
- "cli not found"
12355
- ];
12356
- function extractAspects(expectedResponse) {
12357
- const lines = expectedResponse.split(/\r?\n/).map((line2) => line2.trim());
12358
- const aspects = [];
12359
- for (const line2 of lines) {
12360
- if (line2.length === 0) {
12361
- continue;
12362
- }
12363
- const bulletMatch = /^([-*•]|[0-9]+\.)\s*(.+)$/.exec(line2);
12364
- if (bulletMatch) {
12365
- const normalized = normalizeAspect(bulletMatch[2]);
12366
- if (normalized.length > 0) {
12367
- aspects.push(normalized);
12368
- }
12369
- continue;
12370
- }
12371
- const lowered = line2.toLowerCase();
12372
- if (Array.from(ACTION_WORDS).some((word) => lowered.startsWith(word))) {
12373
- const normalized = normalizeAspect(line2);
12374
- if (normalized.length > 0) {
12375
- aspects.push(normalized);
12376
- }
12377
- }
12378
- }
12379
- return aspects;
12380
- }
12381
- function calculateHits(candidateResponse, expectedAspects) {
12382
- const { normalizedText, words } = normalizeCandidate(candidateResponse);
12383
- const hits = [];
12384
- for (const aspect of expectedAspects) {
12385
- if (matchesAspect(aspect, normalizedText, words)) {
12386
- hits.push(aspect);
12387
- }
12388
- }
12389
- return hits;
12390
- }
12391
- function scoreCandidateResponse(candidateResponse, expectedAspects) {
12392
- if (expectedAspects.length === 0) {
12393
- if (isErrorLike(candidateResponse)) {
12394
- return {
12395
- score: 0,
12396
- hits: [],
12397
- misses: ["Model produced an error instead of an answer."],
12398
- hitCount: 0,
12399
- totalAspects: 0,
12400
- rawAspects: []
12401
- };
12402
- }
12403
- return {
12404
- score: 1,
12405
- hits: [],
12406
- misses: [],
12407
- hitCount: 0,
12408
- totalAspects: 0,
12409
- rawAspects: []
12410
- };
12411
- }
12412
- const hits = calculateHits(candidateResponse, expectedAspects);
12413
- const misses = expectedAspects.filter((aspect) => !hits.includes(aspect));
12414
- const score = expectedAspects.length > 0 ? hits.length / expectedAspects.length : 0;
12415
- return {
12416
- score,
12417
- hits,
12418
- misses,
12419
- hitCount: hits.length,
12420
- totalAspects: expectedAspects.length,
12421
- rawAspects: expectedAspects
12422
- };
12423
- }
12424
- function isErrorLike(text) {
12425
- if (!text) {
12426
- return false;
12427
- }
12428
- const lowered = text.trim().toLowerCase();
12429
- return ERROR_PREFIXES.some((prefix) => lowered.startsWith(prefix));
12430
- }
12431
- function normalizeAspect(aspect) {
12432
- const sanitized = aspect.toLowerCase().replace(/[^\w\s]/g, " ").replace(/\s+/g, " ").trim();
12433
- return sanitized;
12434
- }
12435
- function normalizeCandidate(candidate) {
12436
- const lowered = candidate.toLowerCase();
12437
- const normalizedText = lowered.replace(/[^\w\s]/g, " ");
12438
- const words = new Set(normalizedText.split(/\s+/).filter((word) => word.length > 0));
12439
- return { normalizedText, words };
12440
- }
12441
- function matchesAspect(aspect, candidateNormalized, candidateWords) {
12442
- const keyTerms = extractKeyTerms(aspect);
12443
- if (keyTerms.length === 0) {
12444
- return false;
12445
- }
12446
- const matches = keyTerms.filter((term) => candidateWords.has(term)).length;
12447
- const ratio = matches / keyTerms.length;
12448
- if (ratio >= KEY_TERM_MATCH_THRESHOLD) {
12449
- return true;
12450
- }
12451
- const aspectWords = aspect.split(" ");
12452
- if (aspectWords.length >= 2) {
12453
- for (let index = 0; index < aspectWords.length - 1; index += 1) {
12454
- const phrase = `${aspectWords[index]} ${aspectWords[index + 1]}`;
12455
- if (candidateNormalized.includes(phrase)) {
12456
- return true;
12457
- }
12458
- }
12459
- }
12460
- return false;
12461
- }
12462
- function extractKeyTerms(aspect, maxTerms = 5) {
12463
- const terms = [];
12464
- const words = aspect.split(" ");
12465
- for (const word of words) {
12466
- if (word.length <= 2) {
12467
- continue;
12468
- }
12469
- if (STOP_WORDS.has(word)) {
12470
- continue;
12471
- }
12472
- terms.push(word);
12473
- if (terms.length >= maxTerms) {
12474
- break;
13352
+ case "vscode-insiders":
13353
+ return new VSCodeProvider(target.name, target.config, target.kind);
13354
+ default: {
13355
+ const neverTarget = target;
13356
+ throw new Error(`Unsupported provider kind ${neverTarget.kind}`);
12475
13357
  }
12476
13358
  }
12477
- return terms;
12478
13359
  }
12479
- var HeuristicGrader = class {
12480
- kind = "heuristic";
12481
- grade(context2) {
12482
- const expectedAspects = extractAspects(context2.evalCase.expected_assistant_raw);
12483
- const result = scoreCandidateResponse(context2.candidate, expectedAspects);
12484
- const misses = [...result.misses];
12485
- if (expectedAspects.length === 0 && isErrorLike(context2.candidate)) {
12486
- const firstLine = context2.candidate.split(/\r?\n/)[0]?.trim();
12487
- if (firstLine && !misses.includes(firstLine)) {
12488
- misses.unshift(firstLine);
12489
- }
12490
- }
12491
- return {
12492
- score: result.score,
12493
- hits: result.hits,
12494
- misses,
12495
- expectedAspectCount: result.totalAspects,
12496
- rawAspects: result.rawAspects
12497
- };
12498
- }
12499
- };
12500
- var QualityGrader = class {
13360
+ var LlmJudgeEvaluator = class {
12501
13361
  kind = "llm_judge";
12502
13362
  resolveJudgeProvider;
12503
13363
  maxOutputTokens;
12504
13364
  temperature;
13365
+ customPrompt;
12505
13366
  constructor(options) {
12506
13367
  this.resolveJudgeProvider = options.resolveJudgeProvider;
12507
13368
  this.maxOutputTokens = options.maxOutputTokens;
12508
13369
  this.temperature = options.temperature;
13370
+ this.customPrompt = options.customPrompt;
12509
13371
  }
12510
- async grade(context2) {
13372
+ async evaluate(context2) {
12511
13373
  const judgeProvider = await this.resolveJudgeProvider(context2);
12512
13374
  if (!judgeProvider) {
12513
13375
  throw new Error("No judge provider available for LLM grading");
12514
13376
  }
12515
13377
  const prompt = buildQualityPrompt(context2.evalCase, context2.candidate);
13378
+ const systemPrompt = context2.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
12516
13379
  const metadata = {
12517
- systemPrompt: QUALITY_SYSTEM_PROMPT
13380
+ ...systemPrompt !== void 0 ? { systemPrompt } : {},
13381
+ ...context2.judgeModel !== void 0 ? { model: context2.judgeModel } : {}
12518
13382
  };
12519
13383
  const response = await judgeProvider.invoke({
12520
13384
  prompt,
@@ -12529,12 +13393,13 @@ var QualityGrader = class {
12529
13393
  const hits = Array.isArray(parsed.hits) ? parsed.hits.filter(isNonEmptyString).slice(0, 4) : [];
12530
13394
  const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
12531
13395
  const reasoning = parsed.reasoning ?? response.reasoning;
12532
- const graderRawRequest = {
13396
+ const evaluatorRawRequest = {
12533
13397
  id: randomUUID(),
12534
13398
  provider: judgeProvider.id,
12535
13399
  prompt,
12536
- systemPrompt: QUALITY_SYSTEM_PROMPT,
12537
- target: context2.target.name
13400
+ target: context2.target.name,
13401
+ ...systemPrompt !== void 0 ? { systemPrompt } : {},
13402
+ ...context2.judgeModel !== void 0 ? { model: context2.judgeModel } : {}
12538
13403
  };
12539
13404
  return {
12540
13405
  score,
@@ -12542,7 +13407,7 @@ var QualityGrader = class {
12542
13407
  misses,
12543
13408
  expectedAspectCount: hits.length + misses.length || 1,
12544
13409
  reasoning,
12545
- graderRawRequest
13410
+ evaluatorRawRequest
12546
13411
  };
12547
13412
  }
12548
13413
  };
@@ -12660,6 +13525,112 @@ function extractJsonBlob(text) {
12660
13525
  function isNonEmptyString(value) {
12661
13526
  return typeof value === "string" && value.trim().length > 0;
12662
13527
  }
13528
+ var CodeEvaluator = class {
13529
+ kind = "code";
13530
+ script;
13531
+ cwd;
13532
+ agentTimeoutMs;
13533
+ constructor(options) {
13534
+ this.script = options.script;
13535
+ this.cwd = options.cwd;
13536
+ this.agentTimeoutMs = options.agentTimeoutMs;
13537
+ }
13538
+ async evaluate(context2) {
13539
+ const inputPayload = JSON.stringify(
13540
+ {
13541
+ task: context2.evalCase.task,
13542
+ outcome: context2.evalCase.outcome,
13543
+ expected: context2.evalCase.expected_assistant_raw,
13544
+ output: context2.candidate,
13545
+ system_message: context2.promptInputs.systemMessage ?? "",
13546
+ guideline_paths: context2.evalCase.guideline_paths,
13547
+ attachments: context2.evalCase.file_paths,
13548
+ user_segments: context2.evalCase.user_segments
13549
+ },
13550
+ null,
13551
+ 2
13552
+ );
13553
+ try {
13554
+ const stdout = await executeScript(this.script, inputPayload, this.agentTimeoutMs, this.cwd);
13555
+ const parsed = parseJsonSafe(stdout);
13556
+ const score = clampScore(typeof parsed?.score === "number" ? parsed.score : 0);
13557
+ const hits = Array.isArray(parsed?.hits) ? parsed.hits.filter(isNonEmptyString) : [];
13558
+ const misses = Array.isArray(parsed?.misses) ? parsed.misses.filter(isNonEmptyString) : [];
13559
+ const reasoning = typeof parsed?.reasoning === "string" ? parsed.reasoning : void 0;
13560
+ return {
13561
+ score,
13562
+ hits,
13563
+ misses,
13564
+ expectedAspectCount: hits.length + misses.length || 1,
13565
+ reasoning,
13566
+ evaluatorRawRequest: {
13567
+ script: this.script,
13568
+ ...this.cwd ? { cwd: this.cwd } : {}
13569
+ }
13570
+ };
13571
+ } catch (error) {
13572
+ const message = error instanceof Error ? error.message : String(error);
13573
+ return {
13574
+ score: 0,
13575
+ hits: [],
13576
+ misses: [`Code evaluator failed: ${message}`],
13577
+ expectedAspectCount: 1,
13578
+ reasoning: message,
13579
+ evaluatorRawRequest: {
13580
+ script: this.script,
13581
+ ...this.cwd ? { cwd: this.cwd } : {},
13582
+ error: message
13583
+ }
13584
+ };
13585
+ }
13586
+ }
13587
+ };
13588
+ async function executeScript(scriptPath, input, agentTimeoutMs, cwd) {
13589
+ const { spawn: spawn22 } = await import("node:child_process");
13590
+ return await new Promise((resolve, reject) => {
13591
+ const child = spawn22(scriptPath, {
13592
+ shell: true,
13593
+ cwd
13594
+ });
13595
+ let stdout = "";
13596
+ let stderr = "";
13597
+ const timeout = agentTimeoutMs ? setTimeout(() => {
13598
+ child.kill();
13599
+ reject(new Error(`Code evaluator timed out after ${agentTimeoutMs}ms`));
13600
+ }, agentTimeoutMs) : void 0;
13601
+ child.stdout?.on("data", (data) => {
13602
+ stdout += data.toString();
13603
+ });
13604
+ child.stderr?.on("data", (data) => {
13605
+ stderr += data.toString();
13606
+ });
13607
+ child.on("error", (error) => {
13608
+ if (timeout !== void 0) {
13609
+ clearTimeout(timeout);
13610
+ }
13611
+ reject(error);
13612
+ });
13613
+ child.on("exit", (code) => {
13614
+ if (timeout !== void 0) {
13615
+ clearTimeout(timeout);
13616
+ }
13617
+ if (code && code !== 0 && stderr.length > 0) {
13618
+ reject(new Error(`Code evaluator exited with code ${code}: ${stderr.trim()}`));
13619
+ return;
13620
+ }
13621
+ resolve(stdout.trim());
13622
+ });
13623
+ child.stdin?.write(input);
13624
+ child.stdin?.end();
13625
+ });
13626
+ }
13627
+ function parseJsonSafe(payload) {
13628
+ try {
13629
+ return JSON.parse(payload);
13630
+ } catch {
13631
+ return void 0;
13632
+ }
13633
+ }
12663
13634
  var Node = class {
12664
13635
  value;
12665
13636
  next;
@@ -12800,7 +13771,7 @@ async function runEvaluation(options) {
12800
13771
  targets,
12801
13772
  env,
12802
13773
  providerFactory,
12803
- graders,
13774
+ evaluators,
12804
13775
  maxRetries,
12805
13776
  agentTimeoutMs,
12806
13777
  promptDumpDir,
@@ -12859,7 +13830,7 @@ async function runEvaluation(options) {
12859
13830
  }
12860
13831
  return getOrCreateProvider(resolvedJudge);
12861
13832
  };
12862
- const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
13833
+ const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
12863
13834
  const primaryProvider = getOrCreateProvider(target);
12864
13835
  const providerSupportsBatch = target.providerBatching === true && primaryProvider.supportsBatch === true && typeof primaryProvider.invokeBatch === "function";
12865
13836
  if (target.providerBatching && !providerSupportsBatch && verbose) {
@@ -12882,13 +13853,14 @@ async function runEvaluation(options) {
12882
13853
  evalCases: filteredEvalCases,
12883
13854
  provider: primaryProvider,
12884
13855
  target,
12885
- graderRegistry,
13856
+ evaluatorRegistry,
12886
13857
  promptDumpDir,
12887
13858
  nowFn: now ?? (() => /* @__PURE__ */ new Date()),
12888
13859
  onProgress,
12889
13860
  onResult,
12890
13861
  verbose,
12891
- resolveJudgeProvider
13862
+ resolveJudgeProvider,
13863
+ agentTimeoutMs
12892
13864
  });
12893
13865
  } catch (error) {
12894
13866
  if (verbose) {
@@ -12919,7 +13891,7 @@ async function runEvaluation(options) {
12919
13891
  evalCase,
12920
13892
  provider: primaryProvider,
12921
13893
  target,
12922
- graders: graderRegistry,
13894
+ evaluators: evaluatorRegistry,
12923
13895
  maxRetries,
12924
13896
  agentTimeoutMs,
12925
13897
  promptDumpDir,
@@ -12985,12 +13957,13 @@ async function runBatchEvaluation(options) {
12985
13957
  evalCases,
12986
13958
  provider,
12987
13959
  target,
12988
- graderRegistry,
13960
+ evaluatorRegistry,
12989
13961
  promptDumpDir,
12990
13962
  nowFn,
12991
13963
  onProgress,
12992
13964
  onResult,
12993
- resolveJudgeProvider
13965
+ resolveJudgeProvider,
13966
+ agentTimeoutMs
12994
13967
  } = options;
12995
13968
  const promptInputsList = [];
12996
13969
  for (const evalCase of evalCases) {
@@ -13006,7 +13979,7 @@ async function runBatchEvaluation(options) {
13006
13979
  prompt: promptInputs.request,
13007
13980
  guidelines: promptInputs.guidelines,
13008
13981
  guideline_patterns: evalCase.guideline_patterns,
13009
- attachments: evalCase.file_paths,
13982
+ inputFiles: evalCase.file_paths,
13010
13983
  evalCaseId: evalCase.id,
13011
13984
  metadata: {
13012
13985
  systemPrompt: promptInputs.systemMessage ?? ""
@@ -13038,23 +14011,19 @@ async function runBatchEvaluation(options) {
13038
14011
  const evalCase = evalCases[i6];
13039
14012
  const promptInputs = promptInputsList[i6];
13040
14013
  const providerResponse = batchResponse[i6];
13041
- const now = nowFn();
13042
- const graderKind = evalCase.grader ?? "heuristic";
13043
- const activeGrader = graderRegistry[graderKind] ?? graderRegistry.heuristic;
13044
- if (!activeGrader) {
13045
- throw new Error(`No grader registered for kind '${graderKind}'`);
13046
- }
13047
- let grade;
14014
+ let result;
13048
14015
  try {
13049
- grade = await activeGrader.grade({
14016
+ result = await evaluateCandidate({
13050
14017
  evalCase,
13051
14018
  candidate: providerResponse.text ?? "",
13052
14019
  target,
13053
14020
  provider,
13054
- attempt: 0,
14021
+ evaluators: evaluatorRegistry,
13055
14022
  promptInputs,
13056
- now,
13057
- judgeProvider: await resolveJudgeProvider(target)
14023
+ nowFn,
14024
+ attempt: 0,
14025
+ judgeProvider: await resolveJudgeProvider(target),
14026
+ agentTimeoutMs
13058
14027
  });
13059
14028
  } catch (error) {
13060
14029
  const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
@@ -13073,28 +14042,6 @@ async function runBatchEvaluation(options) {
13073
14042
  }
13074
14043
  continue;
13075
14044
  }
13076
- const completedAt = nowFn();
13077
- const rawRequest = {
13078
- request: promptInputs.request,
13079
- guidelines: promptInputs.guidelines,
13080
- guideline_paths: evalCase.guideline_paths,
13081
- system_message: promptInputs.systemMessage ?? ""
13082
- };
13083
- const result = {
13084
- eval_id: evalCase.id,
13085
- conversation_id: evalCase.conversation_id,
13086
- score: grade.score,
13087
- hits: grade.hits,
13088
- misses: grade.misses,
13089
- model_answer: providerResponse.text ?? "",
13090
- expected_aspect_count: grade.expectedAspectCount,
13091
- target: target.name,
13092
- timestamp: completedAt.toISOString(),
13093
- reasoning: grade.reasoning,
13094
- raw_aspects: grade.rawAspects,
13095
- raw_request: rawRequest,
13096
- grader_raw_request: grade.graderRawRequest
13097
- };
13098
14045
  results.push(result);
13099
14046
  if (onResult) {
13100
14047
  await onResult(result);
@@ -13116,7 +14063,7 @@ async function runEvalCase(options) {
13116
14063
  evalCase,
13117
14064
  provider,
13118
14065
  target,
13119
- graders,
14066
+ evaluators,
13120
14067
  now,
13121
14068
  maxRetries,
13122
14069
  agentTimeoutMs,
@@ -13171,27 +14118,49 @@ async function runEvalCase(options) {
13171
14118
  if (cacheKey && cache && !cachedResponse) {
13172
14119
  await cache.set(cacheKey, providerResponse);
13173
14120
  }
13174
- const graderKind = evalCase.grader ?? "heuristic";
13175
- const activeGrader = graders[graderKind] ?? graders.heuristic;
13176
- if (!activeGrader) {
13177
- throw new Error(`No grader registered for kind '${graderKind}'`);
13178
- }
13179
- let grade;
13180
14121
  try {
13181
- const gradeTimestamp = nowFn();
13182
- grade = await activeGrader.grade({
14122
+ return await evaluateCandidate({
13183
14123
  evalCase,
13184
14124
  candidate: providerResponse.text ?? "",
13185
14125
  target,
13186
14126
  provider,
13187
- attempt,
14127
+ evaluators,
13188
14128
  promptInputs,
13189
- now: gradeTimestamp,
13190
- judgeProvider
14129
+ nowFn,
14130
+ attempt,
14131
+ judgeProvider,
14132
+ agentTimeoutMs
13191
14133
  });
13192
14134
  } catch (error) {
13193
14135
  return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
13194
14136
  }
14137
+ }
14138
+ async function evaluateCandidate(options) {
14139
+ const {
14140
+ evalCase,
14141
+ candidate,
14142
+ target,
14143
+ provider,
14144
+ evaluators,
14145
+ promptInputs,
14146
+ nowFn,
14147
+ attempt,
14148
+ judgeProvider,
14149
+ agentTimeoutMs
14150
+ } = options;
14151
+ const gradeTimestamp = nowFn();
14152
+ const { score, evaluatorResults } = await runEvaluatorsForCase({
14153
+ evalCase,
14154
+ candidate,
14155
+ target,
14156
+ provider,
14157
+ evaluators,
14158
+ attempt,
14159
+ promptInputs,
14160
+ now: gradeTimestamp,
14161
+ judgeProvider,
14162
+ agentTimeoutMs
14163
+ });
13195
14164
  const completedAt = nowFn();
13196
14165
  const rawRequest = {
13197
14166
  request: promptInputs.request,
@@ -13202,18 +14171,191 @@ async function runEvalCase(options) {
13202
14171
  return {
13203
14172
  eval_id: evalCase.id,
13204
14173
  conversation_id: evalCase.conversation_id,
13205
- score: grade.score,
13206
- hits: grade.hits,
13207
- misses: grade.misses,
13208
- model_answer: providerResponse.text ?? "",
13209
- expected_aspect_count: grade.expectedAspectCount,
14174
+ score: score.score,
14175
+ hits: score.hits,
14176
+ misses: score.misses,
14177
+ model_answer: candidate,
14178
+ expected_aspect_count: score.expectedAspectCount,
13210
14179
  target: target.name,
13211
14180
  timestamp: completedAt.toISOString(),
13212
- reasoning: grade.reasoning,
13213
- raw_aspects: grade.rawAspects,
14181
+ reasoning: score.reasoning,
14182
+ raw_aspects: score.rawAspects,
13214
14183
  raw_request: rawRequest,
13215
- grader_raw_request: grade.graderRawRequest
14184
+ evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
14185
+ evaluator_results: evaluatorResults
14186
+ };
14187
+ }
14188
+ async function runEvaluatorsForCase(options) {
14189
+ const { evalCase, candidate, target, provider, evaluators, attempt, promptInputs, now, judgeProvider, agentTimeoutMs } = options;
14190
+ if (evalCase.evaluators && evalCase.evaluators.length > 0) {
14191
+ return runEvaluatorList({
14192
+ evalCase,
14193
+ evaluators: evalCase.evaluators,
14194
+ candidate,
14195
+ target,
14196
+ provider,
14197
+ evaluatorRegistry: evaluators,
14198
+ attempt,
14199
+ promptInputs,
14200
+ now,
14201
+ judgeProvider,
14202
+ agentTimeoutMs
14203
+ });
14204
+ }
14205
+ const evaluatorKind = evalCase.evaluator ?? "llm_judge";
14206
+ const activeEvaluator = evaluators[evaluatorKind] ?? evaluators.llm_judge;
14207
+ if (!activeEvaluator) {
14208
+ throw new Error(`No evaluator registered for kind '${evaluatorKind}'`);
14209
+ }
14210
+ const score = await activeEvaluator.evaluate({
14211
+ evalCase,
14212
+ candidate,
14213
+ target,
14214
+ provider,
14215
+ attempt,
14216
+ promptInputs,
14217
+ now,
14218
+ judgeProvider
14219
+ });
14220
+ return { score };
14221
+ }
14222
+ async function runEvaluatorList(options) {
14223
+ const {
14224
+ evalCase,
14225
+ evaluators,
14226
+ candidate,
14227
+ target,
14228
+ provider,
14229
+ evaluatorRegistry,
14230
+ attempt,
14231
+ promptInputs,
14232
+ now,
14233
+ judgeProvider,
14234
+ agentTimeoutMs
14235
+ } = options;
14236
+ const scored = [];
14237
+ const evaluatorResults = [];
14238
+ for (const evaluator of evaluators ?? []) {
14239
+ try {
14240
+ if (evaluator.type === "llm_judge") {
14241
+ const score2 = await runLlmJudgeEvaluator({
14242
+ config: evaluator,
14243
+ evalCase,
14244
+ candidate,
14245
+ target,
14246
+ provider,
14247
+ evaluatorRegistry,
14248
+ attempt,
14249
+ promptInputs,
14250
+ now,
14251
+ judgeProvider
14252
+ });
14253
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
14254
+ evaluatorResults.push({
14255
+ name: evaluator.name,
14256
+ type: evaluator.type,
14257
+ score: score2.score,
14258
+ hits: score2.hits,
14259
+ misses: score2.misses,
14260
+ reasoning: score2.reasoning,
14261
+ evaluator_raw_request: score2.evaluatorRawRequest
14262
+ });
14263
+ continue;
14264
+ }
14265
+ if (evaluator.type === "code") {
14266
+ const codeEvaluator = new CodeEvaluator({
14267
+ script: evaluator.script,
14268
+ cwd: evaluator.resolvedCwd ?? evaluator.cwd,
14269
+ agentTimeoutMs
14270
+ });
14271
+ const score2 = await codeEvaluator.evaluate({
14272
+ evalCase,
14273
+ candidate,
14274
+ target,
14275
+ provider,
14276
+ attempt,
14277
+ promptInputs,
14278
+ now
14279
+ });
14280
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
14281
+ evaluatorResults.push({
14282
+ name: evaluator.name,
14283
+ type: evaluator.type,
14284
+ score: score2.score,
14285
+ hits: score2.hits,
14286
+ misses: score2.misses,
14287
+ reasoning: score2.reasoning,
14288
+ evaluator_raw_request: score2.evaluatorRawRequest
14289
+ });
14290
+ continue;
14291
+ }
14292
+ } catch (error) {
14293
+ const message = error instanceof Error ? error.message : String(error);
14294
+ const fallbackScore = {
14295
+ score: 0,
14296
+ hits: [],
14297
+ misses: [`Evaluator '${evaluator.name}' failed: ${message}`],
14298
+ expectedAspectCount: 1,
14299
+ reasoning: message
14300
+ };
14301
+ scored.push({ score: fallbackScore, name: evaluator.name ?? "unknown", type: evaluator.type ?? "unknown" });
14302
+ evaluatorResults.push({
14303
+ name: evaluator.name ?? "unknown",
14304
+ type: evaluator.type ?? "unknown",
14305
+ score: 0,
14306
+ hits: [],
14307
+ misses: [`Evaluator '${evaluator.name ?? "unknown"}' failed: ${message}`],
14308
+ reasoning: message
14309
+ });
14310
+ }
14311
+ }
14312
+ const aggregateScore = scored.length > 0 ? scored.reduce((total, entry) => total + entry.score.score, 0) / scored.length : 0;
14313
+ const hits = scored.flatMap((entry) => entry.score.hits);
14314
+ const misses = scored.flatMap((entry) => entry.score.misses);
14315
+ const expectedAspectCount = scored.reduce((total, entry) => total + (entry.score.expectedAspectCount ?? 0), 0);
14316
+ const rawAspects = scored.flatMap((entry) => entry.score.rawAspects ?? []);
14317
+ const reasoningParts = scored.map((entry) => entry.score.reasoning ? `${entry.name}: ${entry.score.reasoning}` : void 0).filter(isNonEmptyString2);
14318
+ const reasoning = reasoningParts.length > 0 ? reasoningParts.join(" | ") : void 0;
14319
+ const score = {
14320
+ score: aggregateScore,
14321
+ hits,
14322
+ misses,
14323
+ expectedAspectCount,
14324
+ reasoning,
14325
+ rawAspects: rawAspects.length > 0 ? rawAspects : void 0
13216
14326
  };
14327
+ return { score, evaluatorResults };
14328
+ }
14329
+ async function runLlmJudgeEvaluator(options) {
14330
+ const { config, evalCase, candidate, target, provider, evaluatorRegistry, attempt, promptInputs, now, judgeProvider } = options;
14331
+ const customPrompt = await resolveCustomPrompt(config);
14332
+ return evaluatorRegistry.llm_judge.evaluate({
14333
+ evalCase,
14334
+ candidate,
14335
+ target,
14336
+ provider,
14337
+ attempt,
14338
+ promptInputs,
14339
+ now,
14340
+ judgeProvider,
14341
+ systemPrompt: customPrompt,
14342
+ evaluator: config,
14343
+ judgeModel: config.model
14344
+ });
14345
+ }
14346
+ async function resolveCustomPrompt(config) {
14347
+ if (config.promptPath) {
14348
+ try {
14349
+ return await readFile4(config.promptPath, "utf8");
14350
+ } catch (error) {
14351
+ const message = error instanceof Error ? error.message : String(error);
14352
+ console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
14353
+ }
14354
+ }
14355
+ return config.prompt;
14356
+ }
14357
+ function isNonEmptyString2(value) {
14358
+ return typeof value === "string" && value.trim().length > 0;
13217
14359
  }
13218
14360
  function filterEvalCases(evalCases, evalId) {
13219
14361
  if (!evalId) {
@@ -13221,9 +14363,8 @@ function filterEvalCases(evalCases, evalId) {
13221
14363
  }
13222
14364
  return evalCases.filter((evalCase) => evalCase.id === evalId);
13223
14365
  }
13224
- function buildGraderRegistry(overrides, resolveJudgeProvider) {
13225
- const heuristic = overrides?.heuristic ?? new HeuristicGrader();
13226
- const llmJudge = overrides?.llm_judge ?? new QualityGrader({
14366
+ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
14367
+ const llmJudge = overrides?.llm_judge ?? new LlmJudgeEvaluator({
13227
14368
  resolveJudgeProvider: async (context2) => {
13228
14369
  if (context2.judgeProvider) {
13229
14370
  return context2.judgeProvider;
@@ -13233,15 +14374,14 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
13233
14374
  });
13234
14375
  return {
13235
14376
  ...overrides,
13236
- heuristic,
13237
14377
  llm_judge: llmJudge
13238
14378
  };
13239
14379
  }
13240
14380
  async function dumpPrompt(directory, evalCase, promptInputs) {
13241
14381
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
13242
14382
  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
13243
- const filePath = path42.resolve(directory, filename);
13244
- await mkdir3(path42.dirname(filePath), { recursive: true });
14383
+ const filePath = path72.resolve(directory, filename);
14384
+ await mkdir22(path72.dirname(filePath), { recursive: true });
13245
14385
  const payload = {
13246
14386
  eval_id: evalCase.id,
13247
14387
  request: promptInputs.request,
@@ -13258,7 +14398,7 @@ function sanitizeFilename(value) {
13258
14398
  return sanitized.length > 0 ? sanitized : randomUUID2();
13259
14399
  }
13260
14400
  async function invokeProvider(provider, options) {
13261
- const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
14401
+ const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
13262
14402
  const controller = new AbortController();
13263
14403
  const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
13264
14404
  if (signal) {
@@ -13269,7 +14409,7 @@ async function invokeProvider(provider, options) {
13269
14409
  prompt: promptInputs.request,
13270
14410
  guidelines: promptInputs.guidelines,
13271
14411
  guideline_patterns: evalCase.guideline_patterns,
13272
- attachments: evalCase.file_paths,
14412
+ inputFiles: evalCase.file_paths,
13273
14413
  evalCaseId: evalCase.id,
13274
14414
  attempt,
13275
14415
  metadata: {
@@ -13980,7 +15120,7 @@ function formatEvaluationSummary(summary) {
13980
15120
 
13981
15121
  // src/commands/eval/targets.ts
13982
15122
  import { constants as constants5 } from "node:fs";
13983
- import { access as access5, readFile as readFile4 } from "node:fs/promises";
15123
+ import { access as access5, readFile as readFile5 } from "node:fs/promises";
13984
15124
  import path12 from "node:path";
13985
15125
  import { parse as parse4 } from "yaml";
13986
15126
  var TARGET_FILE_CANDIDATES = [
@@ -13999,7 +15139,7 @@ async function fileExists5(filePath) {
13999
15139
  }
14000
15140
  async function readTestSuiteTarget(testFilePath) {
14001
15141
  try {
14002
- const raw = await readFile4(path12.resolve(testFilePath), "utf8");
15142
+ const raw = await readFile5(path12.resolve(testFilePath), "utf8");
14003
15143
  const parsed = parse4(raw);
14004
15144
  if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
14005
15145
  const targetValue = parsed.target;
@@ -14386,58 +15526,105 @@ function registerEvalCommand(program) {
14386
15526
  // src/commands/init/index.ts
14387
15527
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
14388
15528
  import path15 from "node:path";
15529
+ import * as readline from "node:readline/promises";
14389
15530
 
14390
15531
  // src/templates/index.ts
14391
- import { readFileSync } from "node:fs";
15532
+ import { readFileSync, readdirSync, statSync } from "node:fs";
14392
15533
  import path14 from "node:path";
14393
15534
  import { fileURLToPath as fileURLToPath2 } from "node:url";
14394
15535
  var TemplateManager = class {
14395
- static getTemplates() {
15536
+ static getGithubTemplates() {
15537
+ return this.getTemplatesFromDir("github");
15538
+ }
15539
+ static getAgentvTemplates() {
15540
+ return this.getTemplatesFromDir("agentv");
15541
+ }
15542
+ static getTemplatesFromDir(subdir) {
14396
15543
  const currentDir = path14.dirname(fileURLToPath2(import.meta.url));
14397
15544
  let templatesDir;
14398
15545
  if (currentDir.includes(path14.sep + "dist")) {
14399
- templatesDir = path14.join(currentDir, "templates");
15546
+ templatesDir = path14.join(currentDir, "templates", subdir);
14400
15547
  } else {
14401
- templatesDir = currentDir;
15548
+ templatesDir = path14.join(currentDir, subdir);
14402
15549
  }
14403
- const evalBuildPrompt = readFileSync(
14404
- path14.join(templatesDir, "eval-build.prompt.md"),
14405
- "utf-8"
14406
- );
14407
- const evalSchema = readFileSync(
14408
- path14.join(templatesDir, "eval-schema.json"),
14409
- "utf-8"
14410
- );
14411
- const configSchema = readFileSync(
14412
- path14.join(templatesDir, "config-schema.json"),
14413
- "utf-8"
14414
- );
14415
- return [
14416
- {
14417
- path: "prompts/eval-build.prompt.md",
14418
- content: evalBuildPrompt
14419
- },
14420
- {
14421
- path: "contexts/eval-schema.json",
14422
- content: evalSchema
14423
- },
14424
- {
14425
- path: "contexts/config-schema.json",
14426
- content: configSchema
15550
+ return this.readTemplatesRecursively(templatesDir, "");
15551
+ }
15552
+ static readTemplatesRecursively(dir, relativePath) {
15553
+ const templates = [];
15554
+ const entries = readdirSync(dir);
15555
+ for (const entry of entries) {
15556
+ const fullPath = path14.join(dir, entry);
15557
+ const stat4 = statSync(fullPath);
15558
+ const entryRelativePath = relativePath ? path14.join(relativePath, entry) : entry;
15559
+ if (stat4.isDirectory()) {
15560
+ templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
15561
+ } else {
15562
+ const content = readFileSync(fullPath, "utf-8");
15563
+ templates.push({
15564
+ path: entryRelativePath.split(path14.sep).join("/"),
15565
+ // Normalize to forward slashes
15566
+ content
15567
+ });
14427
15568
  }
14428
- ];
15569
+ }
15570
+ return templates;
14429
15571
  }
14430
15572
  };
14431
15573
 
14432
15574
  // src/commands/init/index.ts
15575
+ async function promptYesNo(message) {
15576
+ const rl = readline.createInterface({
15577
+ input: process.stdin,
15578
+ output: process.stdout
15579
+ });
15580
+ try {
15581
+ const answer = await rl.question(`${message} (y/N): `);
15582
+ return answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
15583
+ } finally {
15584
+ rl.close();
15585
+ }
15586
+ }
14433
15587
  async function initCommand(options = {}) {
14434
15588
  const targetPath = path15.resolve(options.targetPath ?? ".");
14435
15589
  const githubDir = path15.join(targetPath, ".github");
15590
+ const agentvDir = path15.join(targetPath, ".agentv");
15591
+ const githubTemplates = TemplateManager.getGithubTemplates();
15592
+ const agentvTemplates = TemplateManager.getAgentvTemplates();
15593
+ const existingFiles = [];
15594
+ if (existsSync(githubDir)) {
15595
+ for (const template of githubTemplates) {
15596
+ const targetFilePath = path15.join(githubDir, template.path);
15597
+ if (existsSync(targetFilePath)) {
15598
+ existingFiles.push(path15.relative(targetPath, targetFilePath));
15599
+ }
15600
+ }
15601
+ }
15602
+ if (existsSync(agentvDir)) {
15603
+ for (const template of agentvTemplates) {
15604
+ const targetFilePath = path15.join(agentvDir, template.path);
15605
+ if (existsSync(targetFilePath)) {
15606
+ existingFiles.push(path15.relative(targetPath, targetFilePath));
15607
+ }
15608
+ }
15609
+ }
15610
+ if (existingFiles.length > 0) {
15611
+ console.log("We detected an existing setup:");
15612
+ existingFiles.forEach((file) => console.log(` - ${file}`));
15613
+ console.log();
15614
+ const shouldReplace = await promptYesNo("Do you want to replace these files?");
15615
+ if (!shouldReplace) {
15616
+ console.log("\nInit cancelled. No files were changed.");
15617
+ return;
15618
+ }
15619
+ console.log();
15620
+ }
14436
15621
  if (!existsSync(githubDir)) {
14437
15622
  mkdirSync(githubDir, { recursive: true });
14438
15623
  }
14439
- const templates = TemplateManager.getTemplates();
14440
- for (const template of templates) {
15624
+ if (!existsSync(agentvDir)) {
15625
+ mkdirSync(agentvDir, { recursive: true });
15626
+ }
15627
+ for (const template of githubTemplates) {
14441
15628
  const targetFilePath = path15.join(githubDir, template.path);
14442
15629
  const targetDirPath = path15.dirname(targetFilePath);
14443
15630
  if (!existsSync(targetDirPath)) {
@@ -14446,11 +15633,35 @@ async function initCommand(options = {}) {
14446
15633
  writeFileSync(targetFilePath, template.content, "utf-8");
14447
15634
  console.log(`Created ${path15.relative(targetPath, targetFilePath)}`);
14448
15635
  }
15636
+ for (const template of agentvTemplates) {
15637
+ const targetFilePath = path15.join(agentvDir, template.path);
15638
+ const targetDirPath = path15.dirname(targetFilePath);
15639
+ if (!existsSync(targetDirPath)) {
15640
+ mkdirSync(targetDirPath, { recursive: true });
15641
+ }
15642
+ writeFileSync(targetFilePath, template.content, "utf-8");
15643
+ console.log(`Created ${path15.relative(targetPath, targetFilePath)}`);
15644
+ }
14449
15645
  console.log("\nAgentV initialized successfully!");
14450
15646
  console.log(`
14451
15647
  Files installed to ${path15.relative(targetPath, githubDir)}:`);
14452
- templates.forEach((t) => console.log(` - ${t.path}`));
14453
- console.log("\nYou can now create eval files using the schema and prompt templates.");
15648
+ githubTemplates.forEach((t) => console.log(` - ${t.path}`));
15649
+ console.log(`
15650
+ Files installed to ${path15.relative(targetPath, agentvDir)}:`);
15651
+ agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
15652
+ console.log("\nYou can now:");
15653
+ console.log(" 1. Edit .agentv/.env with your API credentials");
15654
+ console.log(" 2. Configure targets in .agentv/targets.yaml");
15655
+ console.log(" 3. Create eval files using the schema and prompt templates");
15656
+ }
15657
+
15658
+ // src/commands/status.ts
15659
+ function registerStatusCommand(program) {
15660
+ program.command("status").description("Show the latest AgentV kernel status").action(() => {
15661
+ const kernel = createAgentKernel();
15662
+ console.log(`Kernel status: ${kernel.status}`);
15663
+ });
15664
+ return program;
14454
15665
  }
14455
15666
 
14456
15667
  // src/commands/validate/format-output.ts
@@ -14525,7 +15736,7 @@ function isTTY() {
14525
15736
  }
14526
15737
 
14527
15738
  // ../../packages/core/dist/evaluation/validation/index.js
14528
- import { readFile as readFile5 } from "node:fs/promises";
15739
+ import { readFile as readFile6 } from "node:fs/promises";
14529
15740
  import { parse as parse5 } from "yaml";
14530
15741
  import { readFile as readFile23 } from "node:fs/promises";
14531
15742
  import path16 from "node:path";
@@ -14543,7 +15754,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
14543
15754
  var SCHEMA_CONFIG_V22 = "agentv-config-v2";
14544
15755
  async function detectFileType(filePath) {
14545
15756
  try {
14546
- const content = await readFile5(filePath, "utf8");
15757
+ const content = await readFile6(filePath, "utf8");
14547
15758
  const parsed = parse5(content);
14548
15759
  if (typeof parsed !== "object" || parsed === null) {
14549
15760
  return "unknown";
@@ -14762,6 +15973,7 @@ function validateMessages(messages, location, filePath, errors) {
14762
15973
  function isObject2(value) {
14763
15974
  return typeof value === "object" && value !== null && !Array.isArray(value);
14764
15975
  }
15976
+ var CLI_PLACEHOLDERS2 = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
14765
15977
  async function validateTargetsFile(filePath) {
14766
15978
  const errors = [];
14767
15979
  const absolutePath = path23.resolve(filePath);
@@ -14782,6 +15994,182 @@ async function validateTargetsFile(filePath) {
14782
15994
  errors
14783
15995
  };
14784
15996
  }
15997
+ function validateCliSettings(settings, absolutePath2, location, errors2) {
15998
+ if (!isObject2(settings)) {
15999
+ errors2.push({
16000
+ severity: "error",
16001
+ filePath: absolutePath2,
16002
+ location,
16003
+ message: "CLI provider requires a 'settings' object"
16004
+ });
16005
+ return;
16006
+ }
16007
+ const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
16008
+ if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
16009
+ errors2.push({
16010
+ severity: "error",
16011
+ filePath: absolutePath2,
16012
+ location: `${location}.commandTemplate`,
16013
+ message: "CLI provider requires 'commandTemplate' as a non-empty string"
16014
+ });
16015
+ } else {
16016
+ recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
16017
+ }
16018
+ const attachmentsFormat = settings["attachments_format"] ?? settings["attachmentsFormat"];
16019
+ if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
16020
+ errors2.push({
16021
+ severity: "error",
16022
+ filePath: absolutePath2,
16023
+ location: `${location}.attachmentsFormat`,
16024
+ message: "'attachmentsFormat' must be a string when provided"
16025
+ });
16026
+ }
16027
+ const filesFormat = settings["files_format"] ?? settings["filesFormat"];
16028
+ if (filesFormat !== void 0 && typeof filesFormat !== "string") {
16029
+ errors2.push({
16030
+ severity: "error",
16031
+ filePath: absolutePath2,
16032
+ location: `${location}.filesFormat`,
16033
+ message: "'filesFormat' must be a string when provided"
16034
+ });
16035
+ }
16036
+ const cwd = settings["cwd"];
16037
+ if (cwd !== void 0 && typeof cwd !== "string") {
16038
+ errors2.push({
16039
+ severity: "error",
16040
+ filePath: absolutePath2,
16041
+ location: `${location}.cwd`,
16042
+ message: "'cwd' must be a string when provided"
16043
+ });
16044
+ }
16045
+ const timeoutSeconds = settings["timeout_seconds"] ?? settings["timeoutSeconds"];
16046
+ if (timeoutSeconds !== void 0) {
16047
+ const numericTimeout = Number(timeoutSeconds);
16048
+ if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
16049
+ errors2.push({
16050
+ severity: "error",
16051
+ filePath: absolutePath2,
16052
+ location: `${location}.timeoutSeconds`,
16053
+ message: "'timeoutSeconds' must be a positive number when provided"
16054
+ });
16055
+ }
16056
+ }
16057
+ const envOverrides = settings["env"];
16058
+ if (envOverrides !== void 0) {
16059
+ if (!isObject2(envOverrides)) {
16060
+ errors2.push({
16061
+ severity: "error",
16062
+ filePath: absolutePath2,
16063
+ location: `${location}.env`,
16064
+ message: "'env' must be an object with string values"
16065
+ });
16066
+ } else {
16067
+ for (const [key2, value] of Object.entries(envOverrides)) {
16068
+ if (typeof value !== "string" || value.trim().length === 0) {
16069
+ errors2.push({
16070
+ severity: "error",
16071
+ filePath: absolutePath2,
16072
+ location: `${location}.env.${key2}`,
16073
+ message: `Environment override '${key2}' must be a non-empty string`
16074
+ });
16075
+ }
16076
+ }
16077
+ }
16078
+ }
16079
+ const healthcheck = settings["healthcheck"];
16080
+ if (healthcheck !== void 0) {
16081
+ validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
16082
+ }
16083
+ }
16084
+ function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
16085
+ if (!isObject2(healthcheck)) {
16086
+ errors2.push({
16087
+ severity: "error",
16088
+ filePath: absolutePath2,
16089
+ location,
16090
+ message: "'healthcheck' must be an object when provided"
16091
+ });
16092
+ return;
16093
+ }
16094
+ const type = healthcheck["type"];
16095
+ if (type !== "http" && type !== "command") {
16096
+ errors2.push({
16097
+ severity: "error",
16098
+ filePath: absolutePath2,
16099
+ location: `${location}.type`,
16100
+ message: "healthcheck.type must be either 'http' or 'command'"
16101
+ });
16102
+ return;
16103
+ }
16104
+ const timeoutSeconds = healthcheck["timeout_seconds"] ?? healthcheck["timeoutSeconds"];
16105
+ if (timeoutSeconds !== void 0) {
16106
+ const numericTimeout = Number(timeoutSeconds);
16107
+ if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
16108
+ errors2.push({
16109
+ severity: "error",
16110
+ filePath: absolutePath2,
16111
+ location: `${location}.timeoutSeconds`,
16112
+ message: "healthcheck.timeoutSeconds must be a positive number when provided"
16113
+ });
16114
+ }
16115
+ }
16116
+ if (type === "http") {
16117
+ const url = healthcheck["url"];
16118
+ if (typeof url !== "string" || url.trim().length === 0) {
16119
+ errors2.push({
16120
+ severity: "error",
16121
+ filePath: absolutePath2,
16122
+ location: `${location}.url`,
16123
+ message: "healthcheck.url must be a non-empty string for http checks"
16124
+ });
16125
+ }
16126
+ return;
16127
+ }
16128
+ const commandTemplate = healthcheck["command_template"] ?? healthcheck["commandTemplate"];
16129
+ if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
16130
+ errors2.push({
16131
+ severity: "error",
16132
+ filePath: absolutePath2,
16133
+ location: `${location}.commandTemplate`,
16134
+ message: "healthcheck.commandTemplate must be a non-empty string for command checks"
16135
+ });
16136
+ } else {
16137
+ recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
16138
+ }
16139
+ const cwd = healthcheck["cwd"];
16140
+ if (cwd !== void 0 && typeof cwd !== "string") {
16141
+ errors2.push({
16142
+ severity: "error",
16143
+ filePath: absolutePath2,
16144
+ location: `${location}.cwd`,
16145
+ message: "healthcheck.cwd must be a string when provided"
16146
+ });
16147
+ }
16148
+ }
16149
+ function recordUnknownPlaceholders(template, absolutePath2, location, errors2) {
16150
+ const placeholders = extractPlaceholders(template);
16151
+ for (const placeholder of placeholders) {
16152
+ if (!CLI_PLACEHOLDERS2.has(placeholder)) {
16153
+ errors2.push({
16154
+ severity: "error",
16155
+ filePath: absolutePath2,
16156
+ location,
16157
+ message: `Unknown CLI placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS2).join(", ")}`
16158
+ });
16159
+ }
16160
+ }
16161
+ }
16162
+ function extractPlaceholders(template) {
16163
+ const matches = template.matchAll(/\{([A-Z_]+)\}/g);
16164
+ const result = [];
16165
+ for (const match of matches) {
16166
+ const placeholder = match[1];
16167
+ if (placeholder) {
16168
+ result.push(placeholder);
16169
+ }
16170
+ }
16171
+ return result;
16172
+ }
14785
16173
  if (!isObject2(parsed)) {
14786
16174
  errors.push({
14787
16175
  severity: "error",
@@ -14843,6 +16231,7 @@ async function validateTargetsFile(filePath) {
14843
16231
  });
14844
16232
  }
14845
16233
  const provider = target["provider"];
16234
+ const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
14846
16235
  if (typeof provider !== "string" || provider.trim().length === 0) {
14847
16236
  errors.push({
14848
16237
  severity: "error",
@@ -14859,7 +16248,7 @@ async function validateTargetsFile(filePath) {
14859
16248
  });
14860
16249
  }
14861
16250
  const settings = target["settings"];
14862
- if (settings !== void 0 && !isObject2(settings)) {
16251
+ if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
14863
16252
  errors.push({
14864
16253
  severity: "error",
14865
16254
  filePath: absolutePath,
@@ -14867,6 +16256,9 @@ async function validateTargetsFile(filePath) {
14867
16256
  message: "Invalid 'settings' field (must be an object)"
14868
16257
  });
14869
16258
  }
16259
+ if (providerValue === "cli") {
16260
+ validateCliSettings(settings, absolutePath, `${location}.settings`, errors);
16261
+ }
14870
16262
  const judgeTarget = target["judge_target"];
14871
16263
  if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
14872
16264
  errors.push({
@@ -15198,15 +16590,6 @@ function registerValidateCommand(program) {
15198
16590
  return program;
15199
16591
  }
15200
16592
 
15201
- // src/commands/status.ts
15202
- function registerStatusCommand(program) {
15203
- program.command("status").description("Show the latest AgentV kernel status").action(() => {
15204
- const kernel = createAgentKernel();
15205
- console.log(`Kernel status: ${kernel.status}`);
15206
- });
15207
- return program;
15208
- }
15209
-
15210
16593
  // src/index.ts
15211
16594
  var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
15212
16595
  function createProgram() {
@@ -15235,4 +16618,4 @@ export {
15235
16618
  createProgram,
15236
16619
  runCli
15237
16620
  };
15238
- //# sourceMappingURL=chunk-7MGIZBZG.js.map
16621
+ //# sourceMappingURL=chunk-HPH4YWGU.js.map