@agentv/core 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- import { AxChatRequest } from '@ax-llm/ax';
1
+ import { AxChatRequest, AxAI } from '@ax-llm/ax';
2
2
 
3
3
  /**
4
4
  * JSON primitive values appearing in AgentV payloads.
@@ -99,6 +99,7 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
99
99
  */
100
100
  interface EvalCase {
101
101
  readonly id: string;
102
+ readonly dataset: string;
102
103
  readonly conversation_id?: string;
103
104
  readonly task: string;
104
105
  readonly user_segments: readonly JsonObject[];
@@ -117,6 +118,7 @@ interface EvalCase {
117
118
  */
118
119
  interface EvaluationResult {
119
120
  readonly eval_id: string;
121
+ readonly dataset: string;
120
122
  readonly conversation_id?: string;
121
123
  readonly score: number;
122
124
  readonly hits: readonly string[];
@@ -171,6 +173,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
171
173
  }>;
172
174
 
173
175
  declare function fileExists(filePath: string): Promise<boolean>;
176
+ /**
177
+ * Read a text file and normalize line endings to LF (\n).
178
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
179
+ */
180
+ declare function readTextFile(filePath: string): Promise<string>;
174
181
  /**
175
182
  * Find git repository root by walking up the directory tree.
176
183
  */
@@ -229,6 +236,11 @@ interface Provider {
229
236
  * the orchestrator may send multiple requests in a single provider session.
230
237
  */
231
238
  invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
239
+ /**
240
+ * Optional access to the underlying AxAI instance.
241
+ * This enables using advanced Ax features like structured output signatures.
242
+ */
243
+ getAxAI?(): AxAI;
232
244
  }
233
245
  type EnvLookup = Readonly<Record<string, string | undefined>>;
234
246
  interface TargetDefinition {
@@ -372,6 +384,16 @@ interface EnsureSubagentsResult {
372
384
  */
373
385
  declare function ensureVSCodeSubagents(options: EnsureSubagentsOptions): Promise<EnsureSubagentsResult>;
374
386
 
387
+ type CodexLogEntry = {
388
+ readonly filePath: string;
389
+ readonly evalCaseId?: string;
390
+ readonly targetName: string;
391
+ readonly attempt?: number;
392
+ };
393
+ type CodexLogListener = (entry: CodexLogEntry) => void;
394
+ declare function consumeCodexLogEntries(): CodexLogEntry[];
395
+ declare function subscribeToCodexLogEntries(listener: CodexLogListener): () => void;
396
+
375
397
  declare function createProvider(target: ResolvedTarget): Provider;
376
398
  declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
377
399
 
@@ -420,6 +442,10 @@ declare class LlmJudgeEvaluator implements Evaluator {
420
442
  private readonly customPrompt?;
421
443
  constructor(options: LlmJudgeEvaluatorOptions);
422
444
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
445
+ private evaluateWithAx;
446
+ private evaluateWithPrompt;
447
+ private buildJudgeForwardOptions;
448
+ private buildJudgeModelConfig;
423
449
  }
424
450
  interface CodeEvaluatorOptions {
425
451
  readonly script: string;
@@ -492,4 +518,4 @@ type AgentKernel = {
492
518
  };
493
519
  declare function createAgentKernel(): AgentKernel;
494
520
 
495
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation };
521
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { AxChatRequest } from '@ax-llm/ax';
1
+ import { AxChatRequest, AxAI } from '@ax-llm/ax';
2
2
 
3
3
  /**
4
4
  * JSON primitive values appearing in AgentV payloads.
@@ -99,6 +99,7 @@ type EvaluatorConfig = CodeEvaluatorConfig | LlmJudgeEvaluatorConfig;
99
99
  */
100
100
  interface EvalCase {
101
101
  readonly id: string;
102
+ readonly dataset: string;
102
103
  readonly conversation_id?: string;
103
104
  readonly task: string;
104
105
  readonly user_segments: readonly JsonObject[];
@@ -117,6 +118,7 @@ interface EvalCase {
117
118
  */
118
119
  interface EvaluationResult {
119
120
  readonly eval_id: string;
121
+ readonly dataset: string;
120
122
  readonly conversation_id?: string;
121
123
  readonly score: number;
122
124
  readonly hits: readonly string[];
@@ -171,6 +173,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
171
173
  }>;
172
174
 
173
175
  declare function fileExists(filePath: string): Promise<boolean>;
176
+ /**
177
+ * Read a text file and normalize line endings to LF (\n).
178
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
179
+ */
180
+ declare function readTextFile(filePath: string): Promise<string>;
174
181
  /**
175
182
  * Find git repository root by walking up the directory tree.
176
183
  */
@@ -229,6 +236,11 @@ interface Provider {
229
236
  * the orchestrator may send multiple requests in a single provider session.
230
237
  */
231
238
  invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
239
+ /**
240
+ * Optional access to the underlying AxAI instance.
241
+ * This enables using advanced Ax features like structured output signatures.
242
+ */
243
+ getAxAI?(): AxAI;
232
244
  }
233
245
  type EnvLookup = Readonly<Record<string, string | undefined>>;
234
246
  interface TargetDefinition {
@@ -372,6 +384,16 @@ interface EnsureSubagentsResult {
372
384
  */
373
385
  declare function ensureVSCodeSubagents(options: EnsureSubagentsOptions): Promise<EnsureSubagentsResult>;
374
386
 
387
+ type CodexLogEntry = {
388
+ readonly filePath: string;
389
+ readonly evalCaseId?: string;
390
+ readonly targetName: string;
391
+ readonly attempt?: number;
392
+ };
393
+ type CodexLogListener = (entry: CodexLogEntry) => void;
394
+ declare function consumeCodexLogEntries(): CodexLogEntry[];
395
+ declare function subscribeToCodexLogEntries(listener: CodexLogListener): () => void;
396
+
375
397
  declare function createProvider(target: ResolvedTarget): Provider;
376
398
  declare function resolveAndCreateProvider(definition: TargetDefinition, env?: EnvLookup): Provider;
377
399
 
@@ -420,6 +442,10 @@ declare class LlmJudgeEvaluator implements Evaluator {
420
442
  private readonly customPrompt?;
421
443
  constructor(options: LlmJudgeEvaluatorOptions);
422
444
  evaluate(context: EvaluationContext): Promise<EvaluationScore>;
445
+ private evaluateWithAx;
446
+ private evaluateWithPrompt;
447
+ private buildJudgeForwardOptions;
448
+ private buildJudgeModelConfig;
423
449
  }
424
450
  interface CodeEvaluatorOptions {
425
451
  readonly script: string;
@@ -492,4 +518,4 @@ type AgentKernel = {
492
518
  };
493
519
  declare function createAgentKernel(): AgentKernel;
494
520
 
495
- export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation };
521
+ export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
package/dist/index.js CHANGED
@@ -4,8 +4,9 @@ import {
4
4
  buildSearchRoots,
5
5
  fileExists,
6
6
  findGitRoot,
7
+ readTextFile,
7
8
  resolveFileReference
8
- } from "./chunk-NL7K4CAK.js";
9
+ } from "./chunk-OW3SHBIJ.js";
9
10
 
10
11
  // src/evaluation/types.ts
11
12
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -149,6 +150,9 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
149
150
  throw new Error(`Invalid test file format: ${evalFilePath}`);
150
151
  }
151
152
  const suite = parsed;
153
+ const datasetNameFromSuite = asString(suite.dataset)?.trim();
154
+ const fallbackDataset = path.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
155
+ const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
152
156
  const schema = suite.$schema;
153
157
  if (schema !== SCHEMA_EVAL_V2) {
154
158
  const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
@@ -296,6 +300,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
296
300
  ];
297
301
  const testCase = {
298
302
  id,
303
+ dataset: datasetName,
299
304
  conversation_id: conversationId,
300
305
  task: userTextPrompt,
301
306
  user_segments: userSegments,
@@ -676,6 +681,9 @@ var AzureProvider = class {
676
681
  );
677
682
  return mapResponse(ensureChatResponse(response));
678
683
  }
684
+ getAxAI() {
685
+ return this.ai;
686
+ }
679
687
  };
680
688
  var AnthropicProvider = class {
681
689
  constructor(targetName, config) {
@@ -710,6 +718,9 @@ var AnthropicProvider = class {
710
718
  );
711
719
  return mapResponse(ensureChatResponse(response));
712
720
  }
721
+ getAxAI() {
722
+ return this.ai;
723
+ }
713
724
  };
714
725
  var GeminiProvider = class {
715
726
  constructor(targetName, config) {
@@ -743,6 +754,9 @@ var GeminiProvider = class {
743
754
  );
744
755
  return mapResponse(ensureChatResponse(response));
745
756
  }
757
+ getAxAI() {
758
+ return this.ai;
759
+ }
746
760
  };
747
761
 
748
762
  // src/evaluation/providers/cli.ts
@@ -1063,6 +1077,59 @@ function pathToFileUri(filePath) {
1063
1077
  return `file://${normalizedPath}`;
1064
1078
  }
1065
1079
 
1080
+ // src/evaluation/providers/codex-log-tracker.ts
1081
+ var GLOBAL_LOGS_KEY = Symbol.for("agentv.codexLogs");
1082
+ var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.codexLogSubscribers");
1083
+ function getCodexLogStore() {
1084
+ const globalObject = globalThis;
1085
+ const existing = globalObject[GLOBAL_LOGS_KEY];
1086
+ if (existing) {
1087
+ return existing;
1088
+ }
1089
+ const created = [];
1090
+ globalObject[GLOBAL_LOGS_KEY] = created;
1091
+ return created;
1092
+ }
1093
+ function getSubscriberStore() {
1094
+ const globalObject = globalThis;
1095
+ const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY];
1096
+ if (existing) {
1097
+ return existing;
1098
+ }
1099
+ const created = /* @__PURE__ */ new Set();
1100
+ globalObject[GLOBAL_SUBSCRIBERS_KEY] = created;
1101
+ return created;
1102
+ }
1103
+ function notifySubscribers(entry) {
1104
+ const subscribers = Array.from(getSubscriberStore());
1105
+ for (const listener of subscribers) {
1106
+ try {
1107
+ listener(entry);
1108
+ } catch (error) {
1109
+ const message = error instanceof Error ? error.message : String(error);
1110
+ console.warn(`Codex log subscriber failed: ${message}`);
1111
+ }
1112
+ }
1113
+ }
1114
+ function recordCodexLogEntry(entry) {
1115
+ getCodexLogStore().push(entry);
1116
+ notifySubscribers(entry);
1117
+ }
1118
+ function consumeCodexLogEntries() {
1119
+ const store = getCodexLogStore();
1120
+ if (store.length === 0) {
1121
+ return [];
1122
+ }
1123
+ return store.splice(0, store.length);
1124
+ }
1125
+ function subscribeToCodexLogEntries(listener) {
1126
+ const store = getSubscriberStore();
1127
+ store.add(listener);
1128
+ return () => {
1129
+ store.delete(listener);
1130
+ };
1131
+ }
1132
+
1066
1133
  // src/evaluation/providers/codex.ts
1067
1134
  var execAsync2 = promisify2(execCallback);
1068
1135
  var WORKSPACE_PREFIX = "agentv-codex-";
@@ -1259,7 +1326,12 @@ var CodexProvider = class {
1259
1326
  attempt: request.attempt,
1260
1327
  format: this.config.logFormat ?? "summary"
1261
1328
  });
1262
- console.log(`Streaming Codex CLI output to ${filePath}`);
1329
+ recordCodexLogEntry({
1330
+ filePath,
1331
+ targetName: this.targetName,
1332
+ evalCaseId: request.evalCaseId,
1333
+ attempt: request.attempt
1334
+ });
1263
1335
  return logger;
1264
1336
  } catch (error) {
1265
1337
  const message = error instanceof Error ? error.message : String(error);
@@ -2644,7 +2716,30 @@ function resolveAndCreateProvider(definition, env = process.env) {
2644
2716
  }
2645
2717
 
2646
2718
  // src/evaluation/evaluators.ts
2719
+ import { ax, f } from "@ax-llm/ax";
2647
2720
  import { randomUUID as randomUUID2 } from "node:crypto";
2721
+ var LLM_JUDGE_SIGNATURE = f().input(
2722
+ "evaluationContext",
2723
+ f.object(
2724
+ {
2725
+ expectedOutcome: f.string("The expected outcome for the original task"),
2726
+ request: f.string("The original task request"),
2727
+ referenceAnswer: f.string("The gold standard reference answer"),
2728
+ generatedAnswer: f.string("The answer to evaluate"),
2729
+ guidelines: f.string("Additional evaluation guidelines or instructions").optional()
2730
+ },
2731
+ "Complete evaluation context for the judge"
2732
+ )
2733
+ ).output(
2734
+ "evaluation",
2735
+ f.object({
2736
+ score: f.number("Score between 0.0 and 1.0").min(0).max(1),
2737
+ hits: f.string("Brief specific achievement").array(),
2738
+ misses: f.string("Brief specific failure or omission").array(),
2739
+ reasoning: f.string("Concise explanation for the score").max(500)
2740
+ })
2741
+ ).build();
2742
+ var LLM_JUDGE = ax(LLM_JUDGE_SIGNATURE);
2648
2743
  var LlmJudgeEvaluator = class {
2649
2744
  kind = "llm_judge";
2650
2745
  resolveJudgeProvider;
@@ -2662,6 +2757,44 @@ var LlmJudgeEvaluator = class {
2662
2757
  if (!judgeProvider) {
2663
2758
  throw new Error("No judge provider available for LLM grading");
2664
2759
  }
2760
+ if (providerSupportsAx(judgeProvider)) {
2761
+ return this.evaluateWithAx(context, judgeProvider);
2762
+ }
2763
+ return this.evaluateWithPrompt(context, judgeProvider);
2764
+ }
2765
+ async evaluateWithAx(context, judgeProvider) {
2766
+ const ai = judgeProvider.getAxAI();
2767
+ const guidelines = context.promptInputs.guidelines?.trim();
2768
+ const evaluationContext = {
2769
+ expectedOutcome: context.evalCase.outcome.trim(),
2770
+ request: context.evalCase.task.trim(),
2771
+ referenceAnswer: context.evalCase.expected_assistant_raw.trim(),
2772
+ generatedAnswer: context.candidate.trim(),
2773
+ ...guidelines ? { guidelines } : {}
2774
+ };
2775
+ const options = this.buildJudgeForwardOptions(context);
2776
+ const result = await LLM_JUDGE.forward(ai, { evaluationContext }, options);
2777
+ const evaluation = result.evaluation;
2778
+ const expectedAspectCount = Math.max(
2779
+ evaluation.hits.length + evaluation.misses.length,
2780
+ 1
2781
+ );
2782
+ return {
2783
+ score: evaluation.score,
2784
+ hits: evaluation.hits,
2785
+ misses: evaluation.misses,
2786
+ expectedAspectCount,
2787
+ reasoning: evaluation.reasoning,
2788
+ evaluatorRawRequest: {
2789
+ id: randomUUID2(),
2790
+ provider: judgeProvider.id,
2791
+ target: context.target.name,
2792
+ method: "ax-structured-output",
2793
+ signature: LLM_JUDGE_SIGNATURE.toString()
2794
+ }
2795
+ };
2796
+ }
2797
+ async evaluateWithPrompt(context, judgeProvider) {
2665
2798
  const prompt = buildQualityPrompt(context.evalCase, context.candidate);
2666
2799
  const systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
2667
2800
  const metadata = {
@@ -2681,6 +2814,7 @@ var LlmJudgeEvaluator = class {
2681
2814
  const hits = Array.isArray(parsed.hits) ? parsed.hits.filter(isNonEmptyString).slice(0, 4) : [];
2682
2815
  const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
2683
2816
  const reasoning = parsed.reasoning ?? response.reasoning;
2817
+ const expectedAspectCount = Math.max(hits.length + misses.length, 1);
2684
2818
  const evaluatorRawRequest = {
2685
2819
  id: randomUUID2(),
2686
2820
  provider: judgeProvider.id,
@@ -2693,12 +2827,34 @@ var LlmJudgeEvaluator = class {
2693
2827
  score,
2694
2828
  hits,
2695
2829
  misses,
2696
- expectedAspectCount: hits.length + misses.length || 1,
2830
+ expectedAspectCount,
2697
2831
  reasoning,
2698
2832
  evaluatorRawRequest
2699
2833
  };
2700
2834
  }
2835
+ buildJudgeForwardOptions(context) {
2836
+ const modelConfig = this.buildJudgeModelConfig();
2837
+ if (modelConfig === void 0 && context.judgeModel === void 0) {
2838
+ return void 0;
2839
+ }
2840
+ return {
2841
+ ...context.judgeModel ? { model: context.judgeModel } : {},
2842
+ ...modelConfig ? { modelConfig } : {}
2843
+ };
2844
+ }
2845
+ buildJudgeModelConfig() {
2846
+ if (this.maxOutputTokens === void 0 && this.temperature === void 0) {
2847
+ return void 0;
2848
+ }
2849
+ return {
2850
+ ...this.maxOutputTokens !== void 0 ? { maxTokens: this.maxOutputTokens } : {},
2851
+ ...this.temperature !== void 0 ? { temperature: this.temperature } : {}
2852
+ };
2853
+ }
2701
2854
  };
2855
+ function providerSupportsAx(provider) {
2856
+ return typeof provider.getAxAI === "function";
2857
+ }
2702
2858
  var QUALITY_SYSTEM_PROMPT = [
2703
2859
  "You are an expert evaluator. Your goal is to grade the generated_answer based on how well it achieves the expected_outcome for the original task.",
2704
2860
  "",
@@ -2922,7 +3078,7 @@ function parseJsonSafe(payload) {
2922
3078
 
2923
3079
  // src/evaluation/orchestrator.ts
2924
3080
  import { createHash, randomUUID as randomUUID3 } from "node:crypto";
2925
- import { mkdir as mkdir2, readFile as readFile4, writeFile as writeFile2 } from "node:fs/promises";
3081
+ import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
2926
3082
  import path7 from "node:path";
2927
3083
 
2928
3084
  // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
@@ -3469,6 +3625,7 @@ async function evaluateCandidate(options) {
3469
3625
  };
3470
3626
  return {
3471
3627
  eval_id: evalCase.id,
3628
+ dataset: evalCase.dataset,
3472
3629
  conversation_id: evalCase.conversation_id,
3473
3630
  score: score.score,
3474
3631
  hits: score.hits,
@@ -3645,7 +3802,7 @@ async function runLlmJudgeEvaluator(options) {
3645
3802
  async function resolveCustomPrompt(config) {
3646
3803
  if (config.promptPath) {
3647
3804
  try {
3648
- return await readFile4(config.promptPath, "utf8");
3805
+ return await readTextFile(config.promptPath);
3649
3806
  } catch (error) {
3650
3807
  const message = error instanceof Error ? error.message : String(error);
3651
3808
  console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
@@ -3733,6 +3890,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
3733
3890
  };
3734
3891
  return {
3735
3892
  eval_id: evalCase.id,
3893
+ dataset: evalCase.dataset,
3736
3894
  conversation_id: evalCase.conversation_id,
3737
3895
  score: 0,
3738
3896
  hits: [],
@@ -3782,6 +3940,7 @@ export {
3782
3940
  buildDirectoryChain,
3783
3941
  buildPromptInputs,
3784
3942
  buildSearchRoots,
3943
+ consumeCodexLogEntries,
3785
3944
  createAgentKernel,
3786
3945
  createProvider,
3787
3946
  ensureVSCodeSubagents,
@@ -3798,10 +3957,12 @@ export {
3798
3957
  listTargetNames,
3799
3958
  loadEvalCases,
3800
3959
  readTargetDefinitions,
3960
+ readTextFile,
3801
3961
  resolveAndCreateProvider,
3802
3962
  resolveFileReference,
3803
3963
  resolveTargetDefinition,
3804
3964
  runEvalCase,
3805
- runEvaluation
3965
+ runEvaluation,
3966
+ subscribeToCodexLogEntries
3806
3967
  };
3807
3968
  //# sourceMappingURL=index.js.map