@agentv/core 0.10.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  readTextFile,
10
10
  resolveFileReference,
11
11
  resolveTargetDefinition
12
- } from "./chunk-YQBJAT5I.js";
12
+ } from "./chunk-U3GEJ3K7.js";
13
13
 
14
14
  // src/evaluation/types.ts
15
15
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -62,48 +62,197 @@ function getHitCount(result) {
62
62
  }
63
63
 
64
64
  // src/evaluation/yaml-parser.ts
65
+ import { readFile as readFile4 } from "node:fs/promises";
66
+ import path6 from "node:path";
67
+ import { parse as parse2 } from "yaml";
68
+
69
+ // src/evaluation/formatting/segment-formatter.ts
70
+ function extractCodeBlocks(segments) {
71
+ const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
72
+ const codeBlocks = [];
73
+ for (const segment of segments) {
74
+ const typeValue = segment["type"];
75
+ if (typeof typeValue !== "string" || typeValue !== "text") {
76
+ continue;
77
+ }
78
+ const textValue = segment["value"];
79
+ if (typeof textValue !== "string") {
80
+ continue;
81
+ }
82
+ const matches = textValue.match(CODE_BLOCK_PATTERN);
83
+ if (matches) {
84
+ codeBlocks.push(...matches);
85
+ }
86
+ }
87
+ return codeBlocks;
88
+ }
89
+ function formatFileContents(parts) {
90
+ const fileCount = parts.filter((p) => p.isFile).length;
91
+ if (fileCount > 0) {
92
+ return parts.map((part) => {
93
+ if (part.isFile && part.displayPath) {
94
+ return `<file path="${part.displayPath}">
95
+ ${part.content}
96
+ </file>`;
97
+ }
98
+ return part.content;
99
+ }).join("\n\n");
100
+ }
101
+ return parts.map((p) => p.content).join(" ");
102
+ }
103
+ function formatSegment(segment) {
104
+ const type = asString(segment.type);
105
+ if (type === "text") {
106
+ return asString(segment.value);
107
+ }
108
+ if (type === "guideline_ref") {
109
+ const refPath = asString(segment.path);
110
+ return refPath ? `<Attached: ${refPath}>` : void 0;
111
+ }
112
+ if (type === "file") {
113
+ const text = asString(segment.text);
114
+ const filePath = asString(segment.path);
115
+ if (text && filePath) {
116
+ return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
117
+ }
118
+ }
119
+ return void 0;
120
+ }
121
+ function hasVisibleContent(segments) {
122
+ return segments.some((segment) => {
123
+ const type = asString(segment.type);
124
+ if (type === "text") {
125
+ const value = asString(segment.value);
126
+ return value !== void 0 && value.trim().length > 0;
127
+ }
128
+ if (type === "guideline_ref") {
129
+ return false;
130
+ }
131
+ if (type === "file") {
132
+ const text = asString(segment.text);
133
+ return text !== void 0 && text.trim().length > 0;
134
+ }
135
+ return false;
136
+ });
137
+ }
138
+ function asString(value) {
139
+ return typeof value === "string" ? value : void 0;
140
+ }
141
+
142
+ // src/evaluation/loaders/config-loader.ts
65
143
  import micromatch from "micromatch";
144
+ import { readFile } from "node:fs/promises";
145
+ import path2 from "node:path";
146
+ import { parse } from "yaml";
147
+
148
+ // src/evaluation/loaders/file-resolver.ts
66
149
  import { constants } from "node:fs";
67
- import { access, readFile } from "node:fs/promises";
150
+ import { access } from "node:fs/promises";
68
151
  import path from "node:path";
69
- import { fileURLToPath } from "node:url";
70
- import { parse } from "yaml";
71
- var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
72
- var ANSI_YELLOW = "\x1B[33m";
73
- var ANSI_RESET = "\x1B[0m";
74
- var SCHEMA_EVAL_V2 = "agentv-eval-v2";
75
- var SCHEMA_CONFIG_V2 = "agentv-config-v2";
76
- async function readTestSuiteMetadata(testFilePath) {
152
+ async function fileExists2(absolutePath) {
77
153
  try {
78
- const absolutePath = path.resolve(testFilePath);
79
- const content = await readFile(absolutePath, "utf8");
80
- const parsed = parse(content);
81
- if (!isJsonObject(parsed)) {
82
- return {};
83
- }
84
- return { target: extractTargetFromSuite(parsed) };
154
+ await access(absolutePath, constants.F_OK);
155
+ return true;
85
156
  } catch {
86
- return {};
157
+ return false;
87
158
  }
88
159
  }
89
- function extractTargetFromSuite(suite) {
90
- const execution = suite.execution;
91
- if (execution && typeof execution === "object" && !Array.isArray(execution)) {
92
- const executionTarget = execution.target;
93
- if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
94
- return executionTarget.trim();
160
+ function resolveToAbsolutePath(candidate) {
161
+ if (candidate instanceof URL) {
162
+ return new URL(candidate).pathname;
163
+ }
164
+ if (typeof candidate === "string") {
165
+ if (candidate.startsWith("file://")) {
166
+ return new URL(candidate).pathname;
95
167
  }
168
+ return path.resolve(candidate);
96
169
  }
97
- const targetValue = suite.target;
98
- if (typeof targetValue === "string" && targetValue.trim().length > 0) {
99
- return targetValue.trim();
170
+ throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
171
+ }
172
+ function buildDirectoryChain2(filePath, repoRoot) {
173
+ const directories = [];
174
+ const seen = /* @__PURE__ */ new Set();
175
+ const boundary = path.resolve(repoRoot);
176
+ let current = path.resolve(path.dirname(filePath));
177
+ while (current !== void 0) {
178
+ if (!seen.has(current)) {
179
+ directories.push(current);
180
+ seen.add(current);
181
+ }
182
+ if (current === boundary) {
183
+ break;
184
+ }
185
+ const parent = path.dirname(current);
186
+ if (parent === current) {
187
+ break;
188
+ }
189
+ current = parent;
100
190
  }
101
- return void 0;
191
+ if (!seen.has(boundary)) {
192
+ directories.push(boundary);
193
+ }
194
+ return directories;
195
+ }
196
+ function buildSearchRoots2(evalPath, repoRoot) {
197
+ const uniqueRoots = [];
198
+ const addRoot = (root) => {
199
+ const normalized = path.resolve(root);
200
+ if (!uniqueRoots.includes(normalized)) {
201
+ uniqueRoots.push(normalized);
202
+ }
203
+ };
204
+ let currentDir = path.dirname(evalPath);
205
+ let reachedBoundary = false;
206
+ while (!reachedBoundary) {
207
+ addRoot(currentDir);
208
+ const parentDir = path.dirname(currentDir);
209
+ if (currentDir === repoRoot || parentDir === currentDir) {
210
+ reachedBoundary = true;
211
+ } else {
212
+ currentDir = parentDir;
213
+ }
214
+ }
215
+ addRoot(repoRoot);
216
+ addRoot(process.cwd());
217
+ return uniqueRoots;
218
+ }
219
+ function trimLeadingSeparators(value) {
220
+ const trimmed = value.replace(/^[/\\]+/, "");
221
+ return trimmed.length > 0 ? trimmed : value;
102
222
  }
223
+ async function resolveFileReference2(rawValue, searchRoots) {
224
+ const displayPath = trimLeadingSeparators(rawValue);
225
+ const potentialPaths = [];
226
+ if (path.isAbsolute(rawValue)) {
227
+ potentialPaths.push(path.normalize(rawValue));
228
+ }
229
+ for (const base of searchRoots) {
230
+ potentialPaths.push(path.resolve(base, displayPath));
231
+ }
232
+ const attempted = [];
233
+ const seen = /* @__PURE__ */ new Set();
234
+ for (const candidate of potentialPaths) {
235
+ const absoluteCandidate = path.resolve(candidate);
236
+ if (seen.has(absoluteCandidate)) {
237
+ continue;
238
+ }
239
+ seen.add(absoluteCandidate);
240
+ attempted.push(absoluteCandidate);
241
+ if (await fileExists2(absoluteCandidate)) {
242
+ return { displayPath, resolvedPath: absoluteCandidate, attempted };
243
+ }
244
+ }
245
+ return { displayPath, attempted };
246
+ }
247
+
248
+ // src/evaluation/loaders/config-loader.ts
249
+ var SCHEMA_CONFIG_V2 = "agentv-config-v2";
250
+ var ANSI_YELLOW = "\x1B[33m";
251
+ var ANSI_RESET = "\x1B[0m";
103
252
  async function loadConfig(evalFilePath, repoRoot) {
104
- const directories = buildDirectoryChain(evalFilePath, repoRoot);
253
+ const directories = buildDirectoryChain2(evalFilePath, repoRoot);
105
254
  for (const directory of directories) {
106
- const configPath = path.join(directory, ".agentv", "config.yaml");
255
+ const configPath = path2.join(directory, ".agentv", "config.yaml");
107
256
  if (!await fileExists2(configPath)) {
108
257
  continue;
109
258
  }
@@ -146,24 +295,134 @@ function isGuidelineFile(filePath, patterns) {
146
295
  const patternsToUse = patterns ?? [];
147
296
  return micromatch.isMatch(normalized, patternsToUse);
148
297
  }
149
- function extractCodeBlocks(segments) {
150
- const codeBlocks = [];
151
- for (const segment of segments) {
152
- const typeValue = segment["type"];
153
- if (typeof typeValue !== "string" || typeValue !== "text") {
298
+ function extractTargetFromSuite(suite) {
299
+ const execution = suite.execution;
300
+ if (execution && typeof execution === "object" && !Array.isArray(execution)) {
301
+ const executionTarget = execution.target;
302
+ if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
303
+ return executionTarget.trim();
304
+ }
305
+ }
306
+ const targetValue = suite.target;
307
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
308
+ return targetValue.trim();
309
+ }
310
+ return void 0;
311
+ }
312
+ function logWarning(message) {
313
+ console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
314
+ }
315
+
316
+ // src/evaluation/loaders/evaluator-parser.ts
317
+ import path3 from "node:path";
318
+ var ANSI_YELLOW2 = "\x1B[33m";
319
+ var ANSI_RESET2 = "\x1B[0m";
320
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
321
+ const execution = rawEvalCase.execution;
322
+ const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
323
+ if (candidateEvaluators === void 0) {
324
+ return void 0;
325
+ }
326
+ if (!Array.isArray(candidateEvaluators)) {
327
+ logWarning2(`Skipping evaluators for '${evalId}': expected array`);
328
+ return void 0;
329
+ }
330
+ const evaluators = [];
331
+ for (const rawEvaluator of candidateEvaluators) {
332
+ if (!isJsonObject2(rawEvaluator)) {
333
+ logWarning2(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
154
334
  continue;
155
335
  }
156
- const textValue = segment["value"];
157
- if (typeof textValue !== "string") {
336
+ const name = asString2(rawEvaluator.name);
337
+ const typeValue = rawEvaluator.type;
338
+ if (!name || !isEvaluatorKind(typeValue)) {
339
+ logWarning2(`Skipping evaluator with invalid name/type in '${evalId}'`);
158
340
  continue;
159
341
  }
160
- const matches = textValue.match(CODE_BLOCK_PATTERN);
161
- if (matches) {
162
- codeBlocks.push(...matches);
342
+ if (typeValue === "code") {
343
+ const script = asString2(rawEvaluator.script);
344
+ if (!script) {
345
+ logWarning2(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
346
+ continue;
347
+ }
348
+ const cwd = asString2(rawEvaluator.cwd);
349
+ let resolvedCwd;
350
+ if (cwd) {
351
+ const resolved = await resolveFileReference2(cwd, searchRoots);
352
+ if (resolved.resolvedPath) {
353
+ resolvedCwd = path3.resolve(resolved.resolvedPath);
354
+ } else {
355
+ logWarning2(
356
+ `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
357
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
358
+ );
359
+ }
360
+ } else {
361
+ resolvedCwd = searchRoots[0];
362
+ }
363
+ evaluators.push({
364
+ name,
365
+ type: "code",
366
+ script,
367
+ cwd,
368
+ resolvedCwd
369
+ });
370
+ continue;
163
371
  }
372
+ const prompt = asString2(rawEvaluator.prompt);
373
+ let promptPath;
374
+ if (prompt) {
375
+ const resolved = await resolveFileReference2(prompt, searchRoots);
376
+ if (resolved.resolvedPath) {
377
+ promptPath = path3.resolve(resolved.resolvedPath);
378
+ } else {
379
+ logWarning2(
380
+ `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
381
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
382
+ );
383
+ }
384
+ }
385
+ const _model = asString2(rawEvaluator.model);
386
+ evaluators.push({
387
+ name,
388
+ type: "llm_judge",
389
+ prompt,
390
+ promptPath
391
+ });
164
392
  }
165
- return codeBlocks;
393
+ return evaluators.length > 0 ? evaluators : void 0;
394
+ }
395
+ function coerceEvaluator(candidate, contextId) {
396
+ if (typeof candidate !== "string") {
397
+ return void 0;
398
+ }
399
+ if (isEvaluatorKind(candidate)) {
400
+ return candidate;
401
+ }
402
+ logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
403
+ return void 0;
404
+ }
405
+ function asString2(value) {
406
+ return typeof value === "string" ? value : void 0;
166
407
  }
408
+ function isJsonObject2(value) {
409
+ return typeof value === "object" && value !== null && !Array.isArray(value);
410
+ }
411
+ function logWarning2(message, details) {
412
+ if (details && details.length > 0) {
413
+ const detailBlock = details.join("\n");
414
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}
415
+ ${detailBlock}${ANSI_RESET2}`);
416
+ } else {
417
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
418
+ }
419
+ }
420
+
421
+ // src/evaluation/loaders/message-processor.ts
422
+ import { readFile as readFile2 } from "node:fs/promises";
423
+ import path4 from "node:path";
424
+ var ANSI_YELLOW3 = "\x1B[33m";
425
+ var ANSI_RESET3 = "\x1B[0m";
167
426
  async function processMessages(options) {
168
427
  const {
169
428
  messages,
@@ -189,28 +448,28 @@ async function processMessages(options) {
189
448
  if (!isJsonObject(rawSegment)) {
190
449
  continue;
191
450
  }
192
- const segmentType = asString(rawSegment.type);
451
+ const segmentType = asString3(rawSegment.type);
193
452
  if (segmentType === "file") {
194
- const rawValue = asString(rawSegment.value);
453
+ const rawValue = asString3(rawSegment.value);
195
454
  if (!rawValue) {
196
455
  continue;
197
456
  }
198
- const { displayPath, resolvedPath, attempted } = await resolveFileReference(
457
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
199
458
  rawValue,
200
459
  searchRoots
201
460
  );
202
461
  if (!resolvedPath) {
203
462
  const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
204
463
  const context = messageType === "input" ? "" : " in expected_messages";
205
- logWarning(`File not found${context}: ${displayPath}`, attempts);
464
+ logWarning3(`File not found${context}: ${displayPath}`, attempts);
206
465
  continue;
207
466
  }
208
467
  try {
209
- const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
468
+ const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
210
469
  if (messageType === "input" && guidelinePatterns && guidelinePaths) {
211
- const relativeToRepo = path.relative(repoRootPath, resolvedPath);
470
+ const relativeToRepo = path4.relative(repoRootPath, resolvedPath);
212
471
  if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
213
- guidelinePaths.push(path.resolve(resolvedPath));
472
+ guidelinePaths.push(path4.resolve(resolvedPath));
214
473
  if (verbose) {
215
474
  console.log(` [Guideline] Found: ${displayPath}`);
216
475
  console.log(` Resolved to: ${resolvedPath}`);
@@ -222,7 +481,7 @@ async function processMessages(options) {
222
481
  type: "file",
223
482
  path: displayPath,
224
483
  text: fileContent,
225
- resolvedPath: path.resolve(resolvedPath)
484
+ resolvedPath: path4.resolve(resolvedPath)
226
485
  });
227
486
  if (verbose) {
228
487
  const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -231,7 +490,7 @@ async function processMessages(options) {
231
490
  }
232
491
  } catch (error) {
233
492
  const context = messageType === "input" ? "" : " expected output";
234
- logWarning(`Could not read${context} file ${resolvedPath}: ${error.message}`);
493
+ logWarning3(`Could not read${context} file ${resolvedPath}: ${error.message}`);
235
494
  }
236
495
  continue;
237
496
  }
@@ -245,202 +504,120 @@ async function processMessages(options) {
245
504
  }
246
505
  return segments;
247
506
  }
248
- async function loadEvalCases(evalFilePath, repoRoot, options) {
249
- const verbose = options?.verbose ?? false;
250
- const evalIdFilter = options?.evalId;
251
- const absoluteTestPath = path.resolve(evalFilePath);
252
- if (!await fileExists2(absoluteTestPath)) {
253
- throw new Error(`Test file not found: ${evalFilePath}`);
254
- }
255
- const repoRootPath = resolveToAbsolutePath(repoRoot);
256
- const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
257
- const config = await loadConfig(absoluteTestPath, repoRootPath);
258
- const guidelinePatterns = config?.guideline_patterns;
259
- const rawFile = await readFile(absoluteTestPath, "utf8");
260
- const parsed = parse(rawFile);
261
- if (!isJsonObject(parsed)) {
262
- throw new Error(`Invalid test file format: ${evalFilePath}`);
263
- }
264
- const suite = parsed;
265
- const datasetNameFromSuite = asString(suite.dataset)?.trim();
266
- const fallbackDataset = path.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
267
- const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
268
- const schema = suite.$schema;
269
- if (schema !== SCHEMA_EVAL_V2) {
270
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
271
- Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
272
- throw new Error(message);
507
+ async function resolveAssistantContent(content, searchRoots, verbose) {
508
+ if (typeof content === "string") {
509
+ return content;
273
510
  }
274
- const rawTestcases = suite.evalcases;
275
- if (!Array.isArray(rawTestcases)) {
276
- throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
511
+ if (!content) {
512
+ return "";
277
513
  }
278
- const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
279
- const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
280
- const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
281
- const results = [];
282
- for (const rawEvalcase of rawTestcases) {
283
- if (!isJsonObject(rawEvalcase)) {
284
- logWarning("Skipping invalid eval case entry (expected object)");
514
+ const parts = [];
515
+ for (const entry of content) {
516
+ if (typeof entry === "string") {
517
+ parts.push({ content: entry, isFile: false });
285
518
  continue;
286
519
  }
287
- const evalcase = rawEvalcase;
288
- const id = asString(evalcase.id);
289
- if (evalIdFilter && id !== evalIdFilter) {
520
+ if (!isJsonObject(entry)) {
290
521
  continue;
291
522
  }
292
- const conversationId = asString(evalcase.conversation_id);
293
- const outcome = asString(evalcase.outcome);
294
- const inputMessagesValue = evalcase.input_messages;
295
- const expectedMessagesValue = evalcase.expected_messages;
296
- if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
297
- logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
523
+ const segmentType = asString3(entry.type);
524
+ if (segmentType === "file") {
525
+ const rawValue = asString3(entry.value);
526
+ if (!rawValue) {
527
+ continue;
528
+ }
529
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
530
+ rawValue,
531
+ searchRoots
532
+ );
533
+ if (!resolvedPath) {
534
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
535
+ logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
536
+ continue;
537
+ }
538
+ try {
539
+ const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
540
+ parts.push({ content: fileContent, isFile: true, displayPath });
541
+ if (verbose) {
542
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
543
+ console.log(` Resolved to: ${resolvedPath}`);
544
+ }
545
+ } catch (error) {
546
+ logWarning3(`Could not read file ${resolvedPath}: ${error.message}`);
547
+ }
298
548
  continue;
299
549
  }
300
- const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
301
- const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
302
- const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
303
- if (hasExpectedMessages && expectedMessages.length === 0) {
304
- logWarning(`No valid expected message found for eval case: ${id}`);
550
+ const textValue = asString3(entry.text);
551
+ if (typeof textValue === "string") {
552
+ parts.push({ content: textValue, isFile: false });
305
553
  continue;
306
554
  }
307
- if (expectedMessages.length > 1) {
308
- logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
309
- }
310
- const guidelinePaths = [];
311
- const inputTextParts = [];
312
- const inputSegments = await processMessages({
313
- messages: inputMessages,
314
- searchRoots,
315
- repoRootPath,
316
- guidelinePatterns,
317
- guidelinePaths,
318
- textParts: inputTextParts,
319
- messageType: "input",
320
- verbose
321
- });
322
- const outputSegments = hasExpectedMessages ? await processMessages({
323
- messages: expectedMessages,
324
- searchRoots,
325
- repoRootPath,
326
- guidelinePatterns,
327
- messageType: "output",
328
- verbose
329
- }) : [];
330
- const codeSnippets = extractCodeBlocks(inputSegments);
331
- const expectedContent = expectedMessages[0]?.content;
332
- const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
333
- const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
334
- const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
335
- const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
336
- const userFilePaths = [];
337
- for (const segment of inputSegments) {
338
- if (segment.type === "file" && typeof segment.resolvedPath === "string") {
339
- userFilePaths.push(segment.resolvedPath);
340
- }
341
- }
342
- const allFilePaths = [
343
- ...guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
344
- ...userFilePaths
345
- ];
346
- const testCase = {
347
- id,
348
- dataset: datasetName,
349
- conversation_id: conversationId,
350
- question,
351
- input_messages: inputMessages,
352
- input_segments: inputSegments,
353
- output_segments: outputSegments,
354
- reference_answer: referenceAnswer,
355
- guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
356
- guideline_patterns: guidelinePatterns,
357
- file_paths: allFilePaths,
358
- code_snippets: codeSnippets,
359
- expected_outcome: outcome,
360
- evaluator: evalCaseEvaluatorKind,
361
- evaluators
362
- };
363
- if (verbose) {
364
- console.log(`
365
- [Eval Case: ${id}]`);
366
- if (testCase.guideline_paths.length > 0) {
367
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
368
- for (const guidelinePath of testCase.guideline_paths) {
369
- console.log(` - ${guidelinePath}`);
370
- }
371
- } else {
372
- console.log(" No guidelines found");
373
- }
555
+ const valueValue = asString3(entry.value);
556
+ if (typeof valueValue === "string") {
557
+ parts.push({ content: valueValue, isFile: false });
558
+ continue;
374
559
  }
375
- results.push(testCase);
560
+ parts.push({ content: JSON.stringify(entry), isFile: false });
376
561
  }
377
- return results;
562
+ return formatFileContents(parts);
378
563
  }
379
- function needsRoleMarkers(messages, processedSegmentsByMessage) {
380
- if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
381
- return true;
382
- }
383
- let messagesWithContent = 0;
384
- for (const segments of processedSegmentsByMessage) {
385
- if (hasVisibleContent(segments)) {
386
- messagesWithContent++;
387
- }
388
- }
389
- return messagesWithContent > 1;
564
+ function asString3(value) {
565
+ return typeof value === "string" ? value : void 0;
390
566
  }
391
- function hasVisibleContent(segments) {
392
- return segments.some((segment) => {
393
- const type = asString(segment.type);
394
- if (type === "text") {
395
- const value = asString(segment.value);
396
- return value !== void 0 && value.trim().length > 0;
397
- }
398
- if (type === "guideline_ref") {
399
- return false;
400
- }
401
- if (type === "file") {
402
- const text = asString(segment.text);
403
- return text !== void 0 && text.trim().length > 0;
404
- }
405
- return false;
406
- });
567
+ function cloneJsonObject(source) {
568
+ const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
569
+ return Object.fromEntries(entries);
407
570
  }
408
- function formatSegment(segment) {
409
- const type = asString(segment.type);
410
- if (type === "text") {
411
- return asString(segment.value);
571
+ function cloneJsonValue(value) {
572
+ if (value === null) {
573
+ return null;
412
574
  }
413
- if (type === "guideline_ref") {
414
- const refPath = asString(segment.path);
415
- return refPath ? `<Attached: ${refPath}>` : void 0;
575
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
576
+ return value;
416
577
  }
417
- if (type === "file") {
418
- const text = asString(segment.text);
419
- const filePath = asString(segment.path);
420
- if (text && filePath) {
421
- return `=== ${filePath} ===
422
- ${text}`;
423
- }
578
+ if (Array.isArray(value)) {
579
+ return value.map((item) => cloneJsonValue(item));
580
+ }
581
+ if (typeof value === "object") {
582
+ return cloneJsonObject(value);
583
+ }
584
+ return value;
585
+ }
586
+ function logWarning3(message, details) {
587
+ if (details && details.length > 0) {
588
+ const detailBlock = details.join("\n");
589
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}
590
+ ${detailBlock}${ANSI_RESET3}`);
591
+ } else {
592
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
424
593
  }
425
- return void 0;
426
594
  }
595
+
596
+ // src/evaluation/formatting/prompt-builder.ts
597
+ import { readFile as readFile3 } from "node:fs/promises";
598
+ import path5 from "node:path";
599
+ var ANSI_YELLOW4 = "\x1B[33m";
600
+ var ANSI_RESET4 = "\x1B[0m";
427
601
  async function buildPromptInputs(testCase) {
428
- const guidelineContents = [];
602
+ const guidelineParts = [];
429
603
  for (const rawPath of testCase.guideline_paths) {
430
- const absolutePath = path.resolve(rawPath);
604
+ const absolutePath = path5.resolve(rawPath);
431
605
  if (!await fileExists2(absolutePath)) {
432
- logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
606
+ logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
433
607
  continue;
434
608
  }
435
609
  try {
436
- const content = (await readFile(absolutePath, "utf8")).replace(/\r\n/g, "\n");
437
- guidelineContents.push(`=== ${path.basename(absolutePath)} ===
438
- ${content}`);
610
+ const content = (await readFile3(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
611
+ guidelineParts.push({
612
+ content,
613
+ isFile: true,
614
+ displayPath: path5.basename(absolutePath)
615
+ });
439
616
  } catch (error) {
440
- logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
617
+ logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
441
618
  }
442
619
  }
443
- const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
620
+ const guidelines = formatFileContents(guidelineParts);
444
621
  const segmentsByMessage = [];
445
622
  const fileContentsByPath = /* @__PURE__ */ new Map();
446
623
  for (const segment of testCase.input_segments) {
@@ -461,9 +638,9 @@ ${content}`);
461
638
  messageSegments.push({ type: "text", value: segment });
462
639
  }
463
640
  } else if (isJsonObject(segment)) {
464
- const type = asString(segment.type);
641
+ const type = asString4(segment.type);
465
642
  if (type === "file") {
466
- const value = asString(segment.value);
643
+ const value = asString4(segment.value);
467
644
  if (!value) continue;
468
645
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
469
646
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -474,7 +651,7 @@ ${content}`);
474
651
  messageSegments.push({ type: "file", text: fileText, path: value });
475
652
  }
476
653
  } else if (type === "text") {
477
- const textValue = asString(segment.value);
654
+ const textValue = asString4(segment.value);
478
655
  if (textValue && textValue.trim().length > 0) {
479
656
  messageSegments.push({ type: "text", value: textValue });
480
657
  }
@@ -530,6 +707,18 @@ ${messageContent}`);
530
707
  }) : void 0;
531
708
  return { question, guidelines, chatPrompt };
532
709
  }
710
+ function needsRoleMarkers(messages, processedSegmentsByMessage) {
711
+ if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
712
+ return true;
713
+ }
714
+ let messagesWithContent = 0;
715
+ for (const segments of processedSegmentsByMessage) {
716
+ if (hasVisibleContent(segments)) {
717
+ messagesWithContent++;
718
+ }
719
+ }
720
+ return messagesWithContent > 1;
721
+ }
533
722
  function buildChatPromptFromSegments(options) {
534
723
  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
535
724
  if (messages.length === 0) {
@@ -596,201 +785,175 @@ ${guidelineContent.trim()}`);
596
785
  continue;
597
786
  }
598
787
  chatPrompt.push({
599
- role,
600
- content: contentParts.join("\n"),
601
- ...name ? { name } : {}
602
- });
603
- }
604
- return chatPrompt.length > 0 ? chatPrompt : void 0;
605
- }
606
- async function fileExists2(absolutePath) {
607
- try {
608
- await access(absolutePath, constants.F_OK);
609
- return true;
610
- } catch {
611
- return false;
612
- }
613
- }
614
- function resolveToAbsolutePath(candidate) {
615
- if (candidate instanceof URL) {
616
- return fileURLToPath(candidate);
617
- }
618
- if (typeof candidate === "string") {
619
- if (candidate.startsWith("file://")) {
620
- return fileURLToPath(new URL(candidate));
621
- }
622
- return path.resolve(candidate);
788
+ role,
789
+ content: contentParts.join("\n"),
790
+ ...name ? { name } : {}
791
+ });
623
792
  }
624
- throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
793
+ return chatPrompt.length > 0 ? chatPrompt : void 0;
625
794
  }
626
- function asString(value) {
795
+ function asString4(value) {
627
796
  return typeof value === "string" ? value : void 0;
628
797
  }
629
- function cloneJsonObject(source) {
630
- const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
631
- return Object.fromEntries(entries);
798
+ function logWarning4(message) {
799
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
632
800
  }
633
- function cloneJsonValue(value) {
634
- if (value === null) {
635
- return null;
636
- }
637
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
638
- return value;
639
- }
640
- if (Array.isArray(value)) {
641
- return value.map((item) => cloneJsonValue(item));
801
+
802
+ // src/evaluation/yaml-parser.ts
803
+ var ANSI_YELLOW5 = "\x1B[33m";
804
+ var ANSI_RESET5 = "\x1B[0m";
805
+ var SCHEMA_EVAL_V2 = "agentv-eval-v2";
806
+ async function readTestSuiteMetadata(testFilePath) {
807
+ try {
808
+ const absolutePath = path6.resolve(testFilePath);
809
+ const content = await readFile4(absolutePath, "utf8");
810
+ const parsed = parse2(content);
811
+ if (!isJsonObject(parsed)) {
812
+ return {};
813
+ }
814
+ return { target: extractTargetFromSuite(parsed) };
815
+ } catch {
816
+ return {};
642
817
  }
643
- return cloneJsonObject(value);
644
818
  }
645
- async function resolveAssistantContent(content, searchRoots, verbose) {
646
- if (typeof content === "string") {
647
- return content;
819
+ async function loadEvalCases(evalFilePath, repoRoot, options) {
820
+ const verbose = options?.verbose ?? false;
821
+ const evalIdFilter = options?.evalId;
822
+ const absoluteTestPath = path6.resolve(evalFilePath);
823
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
824
+ const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
825
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
826
+ const guidelinePatterns = config?.guideline_patterns;
827
+ const rawFile = await readFile4(absoluteTestPath, "utf8");
828
+ const parsed = parse2(rawFile);
829
+ if (!isJsonObject(parsed)) {
830
+ throw new Error(`Invalid test file format: ${evalFilePath}`);
648
831
  }
649
- if (!content) {
650
- return "";
832
+ const suite = parsed;
833
+ const datasetNameFromSuite = asString5(suite.dataset)?.trim();
834
+ const fallbackDataset = path6.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
835
+ const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
836
+ const schema = suite.$schema;
837
+ if (schema !== SCHEMA_EVAL_V2) {
838
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
839
+ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
840
+ throw new Error(message);
651
841
  }
652
- const parts = [];
653
- for (const entry of content) {
654
- if (typeof entry === "string") {
655
- parts.push(entry);
656
- continue;
657
- }
658
- if (!isJsonObject(entry)) {
659
- continue;
660
- }
661
- const segmentType = asString(entry.type);
662
- if (segmentType === "file") {
663
- const rawValue = asString(entry.value);
664
- if (!rawValue) {
665
- continue;
666
- }
667
- const { displayPath, resolvedPath, attempted } = await resolveFileReference(
668
- rawValue,
669
- searchRoots
670
- );
671
- if (!resolvedPath) {
672
- const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
673
- logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
674
- continue;
675
- }
676
- try {
677
- const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
678
- parts.push(fileContent);
679
- if (verbose) {
680
- console.log(` [Expected Assistant File] Found: ${displayPath}`);
681
- console.log(` Resolved to: ${resolvedPath}`);
682
- }
683
- } catch (error) {
684
- logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
685
- }
842
+ const rawTestcases = suite.evalcases;
843
+ if (!Array.isArray(rawTestcases)) {
844
+ throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
845
+ }
846
+ const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
847
+ const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
848
+ const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
849
+ const results = [];
850
+ for (const rawEvalcase of rawTestcases) {
851
+ if (!isJsonObject(rawEvalcase)) {
852
+ logWarning5("Skipping invalid eval case entry (expected object)");
686
853
  continue;
687
854
  }
688
- const textValue = asString(entry.text);
689
- if (typeof textValue === "string") {
690
- parts.push(textValue);
855
+ const evalcase = rawEvalcase;
856
+ const id = asString5(evalcase.id);
857
+ if (evalIdFilter && id !== evalIdFilter) {
691
858
  continue;
692
859
  }
693
- const valueValue = asString(entry.value);
694
- if (typeof valueValue === "string") {
695
- parts.push(valueValue);
860
+ const conversationId = asString5(evalcase.conversation_id);
861
+ const outcome = asString5(evalcase.outcome);
862
+ const inputMessagesValue = evalcase.input_messages;
863
+ const expectedMessagesValue = evalcase.expected_messages;
864
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
865
+ logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
696
866
  continue;
697
867
  }
698
- parts.push(JSON.stringify(entry));
699
- }
700
- return parts.join(" ");
701
- }
702
- async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
703
- const execution = rawEvalCase.execution;
704
- const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
705
- if (candidateEvaluators === void 0) {
706
- return void 0;
707
- }
708
- if (!Array.isArray(candidateEvaluators)) {
709
- logWarning(`Skipping evaluators for '${evalId}': expected array`);
710
- return void 0;
711
- }
712
- const evaluators = [];
713
- for (const rawEvaluator of candidateEvaluators) {
714
- if (!isJsonObject(rawEvaluator)) {
715
- logWarning(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
868
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
869
+ const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
870
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
871
+ if (hasExpectedMessages && expectedMessages.length === 0) {
872
+ logWarning5(`No valid expected message found for eval case: ${id}`);
716
873
  continue;
717
874
  }
718
- const name = asString(rawEvaluator.name);
719
- const typeValue = rawEvaluator.type;
720
- if (!name || !isEvaluatorKind(typeValue)) {
721
- logWarning(`Skipping evaluator with invalid name/type in '${evalId}'`);
722
- continue;
875
+ if (expectedMessages.length > 1) {
876
+ logWarning5(`Multiple expected messages found for eval case: ${id}, using first`);
723
877
  }
724
- if (typeValue === "code") {
725
- const script = asString(rawEvaluator.script);
726
- if (!script) {
727
- logWarning(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
728
- continue;
729
- }
730
- const cwd = asString(rawEvaluator.cwd);
731
- let resolvedCwd;
732
- if (cwd) {
733
- const resolved = await resolveFileReference(cwd, searchRoots);
734
- if (resolved.resolvedPath) {
735
- resolvedCwd = path.resolve(resolved.resolvedPath);
736
- } else {
737
- logWarning(
738
- `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
739
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
740
- );
741
- }
742
- } else {
743
- resolvedCwd = searchRoots[0];
878
+ const guidelinePaths = [];
879
+ const inputTextParts = [];
880
+ const inputSegments = await processMessages({
881
+ messages: inputMessages,
882
+ searchRoots,
883
+ repoRootPath,
884
+ guidelinePatterns,
885
+ guidelinePaths,
886
+ textParts: inputTextParts,
887
+ messageType: "input",
888
+ verbose
889
+ });
890
+ const outputSegments = hasExpectedMessages ? await processMessages({
891
+ messages: expectedMessages,
892
+ searchRoots,
893
+ repoRootPath,
894
+ guidelinePatterns,
895
+ messageType: "output",
896
+ verbose
897
+ }) : [];
898
+ const codeSnippets = extractCodeBlocks(inputSegments);
899
+ const expectedContent = expectedMessages[0]?.content;
900
+ const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
901
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
902
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
903
+ const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
904
+ const userFilePaths = [];
905
+ for (const segment of inputSegments) {
906
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
907
+ userFilePaths.push(segment.resolvedPath);
744
908
  }
745
- evaluators.push({
746
- name,
747
- type: "code",
748
- script,
749
- cwd,
750
- resolvedCwd
751
- });
752
- continue;
753
909
  }
754
- const prompt = asString(rawEvaluator.prompt);
755
- let promptPath;
756
- if (prompt) {
757
- const resolved = await resolveFileReference(prompt, searchRoots);
758
- if (resolved.resolvedPath) {
759
- promptPath = path.resolve(resolved.resolvedPath);
910
+ const allFilePaths = [
911
+ ...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
912
+ ...userFilePaths
913
+ ];
914
+ const testCase = {
915
+ id,
916
+ dataset: datasetName,
917
+ conversation_id: conversationId,
918
+ question,
919
+ input_messages: inputMessages,
920
+ input_segments: inputSegments,
921
+ output_segments: outputSegments,
922
+ reference_answer: referenceAnswer,
923
+ guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
924
+ guideline_patterns: guidelinePatterns,
925
+ file_paths: allFilePaths,
926
+ code_snippets: codeSnippets,
927
+ expected_outcome: outcome,
928
+ evaluator: evalCaseEvaluatorKind,
929
+ evaluators
930
+ };
931
+ if (verbose) {
932
+ console.log(`
933
+ [Eval Case: ${id}]`);
934
+ if (testCase.guideline_paths.length > 0) {
935
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
936
+ for (const guidelinePath of testCase.guideline_paths) {
937
+ console.log(` - ${guidelinePath}`);
938
+ }
760
939
  } else {
761
- logWarning(
762
- `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
763
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
764
- );
940
+ console.log(" No guidelines found");
765
941
  }
766
942
  }
767
- const model = asString(rawEvaluator.model);
768
- evaluators.push({
769
- name,
770
- type: "llm_judge",
771
- prompt,
772
- promptPath
773
- });
943
+ results.push(testCase);
774
944
  }
775
- return evaluators.length > 0 ? evaluators : void 0;
945
+ return results;
776
946
  }
777
- function coerceEvaluator(candidate, contextId) {
778
- if (typeof candidate !== "string") {
779
- return void 0;
780
- }
781
- if (isEvaluatorKind(candidate)) {
782
- return candidate;
783
- }
784
- logWarning(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
785
- return void 0;
947
+ function asString5(value) {
948
+ return typeof value === "string" ? value : void 0;
786
949
  }
787
- function logWarning(message, details) {
950
+ function logWarning5(message, details) {
788
951
  if (details && details.length > 0) {
789
952
  const detailBlock = details.join("\n");
790
- console.warn(`${ANSI_YELLOW}Warning: ${message}
791
- ${detailBlock}${ANSI_RESET}`);
953
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
954
+ ${detailBlock}${ANSI_RESET5}`);
792
955
  } else {
793
- console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
956
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
794
957
  }
795
958
  }
796
959
 
@@ -822,9 +985,8 @@ function buildChatPrompt(request) {
822
985
  }
823
986
  function resolveSystemContent(request) {
824
987
  const systemSegments = [];
825
- const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
826
- if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
827
- systemSegments.push(metadataSystemPrompt.trim());
988
+ if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
989
+ systemSegments.push(request.systemPrompt.trim());
828
990
  } else {
829
991
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
830
992
  }
@@ -1077,7 +1239,7 @@ var GeminiProvider = class {
1077
1239
  import { exec as execWithCallback } from "node:child_process";
1078
1240
  import fs from "node:fs/promises";
1079
1241
  import os from "node:os";
1080
- import path2 from "node:path";
1242
+ import path7 from "node:path";
1081
1243
  import { promisify } from "node:util";
1082
1244
  var execAsync = promisify(execWithCallback);
1083
1245
  var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
@@ -1256,7 +1418,7 @@ function normalizeInputFiles(inputFiles) {
1256
1418
  }
1257
1419
  const unique = /* @__PURE__ */ new Map();
1258
1420
  for (const inputFile of inputFiles) {
1259
- const absolutePath = path2.resolve(inputFile);
1421
+ const absolutePath = path7.resolve(inputFile);
1260
1422
  if (!unique.has(absolutePath)) {
1261
1423
  unique.set(absolutePath, absolutePath);
1262
1424
  }
@@ -1270,7 +1432,7 @@ function formatFileList(files, template) {
1270
1432
  const formatter = template ?? "{path}";
1271
1433
  return files.map((filePath) => {
1272
1434
  const escapedPath = shellEscape(filePath);
1273
- const escapedName = shellEscape(path2.basename(filePath));
1435
+ const escapedName = shellEscape(path7.basename(filePath));
1274
1436
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
1275
1437
  }).join(" ");
1276
1438
  }
@@ -1294,7 +1456,7 @@ function generateOutputFilePath(evalCaseId) {
1294
1456
  const safeEvalId = evalCaseId || "unknown";
1295
1457
  const timestamp = Date.now();
1296
1458
  const random = Math.random().toString(36).substring(2, 9);
1297
- return path2.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
1459
+ return path7.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
1298
1460
  }
1299
1461
  function formatTimeoutSuffix(timeoutMs) {
1300
1462
  if (!timeoutMs || timeoutMs <= 0) {
@@ -1310,7 +1472,7 @@ import { randomUUID } from "node:crypto";
1310
1472
  import { constants as constants2, createWriteStream } from "node:fs";
1311
1473
  import { access as access2, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
1312
1474
  import { tmpdir } from "node:os";
1313
- import path4 from "node:path";
1475
+ import path9 from "node:path";
1314
1476
  import { promisify as promisify2 } from "node:util";
1315
1477
 
1316
1478
  // src/evaluation/providers/codex-log-tracker.ts
@@ -1367,7 +1529,7 @@ function subscribeToCodexLogEntries(listener) {
1367
1529
  }
1368
1530
 
1369
1531
  // src/evaluation/providers/preread.ts
1370
- import path3 from "node:path";
1532
+ import path8 from "node:path";
1371
1533
  function buildPromptDocument(request, inputFiles, options) {
1372
1534
  const parts = [];
1373
1535
  const guidelineFiles = collectGuidelineFiles(
@@ -1392,7 +1554,7 @@ function normalizeInputFiles2(inputFiles) {
1392
1554
  }
1393
1555
  const deduped = /* @__PURE__ */ new Map();
1394
1556
  for (const inputFile of inputFiles) {
1395
- const absolutePath = path3.resolve(inputFile);
1557
+ const absolutePath = path8.resolve(inputFile);
1396
1558
  if (!deduped.has(absolutePath)) {
1397
1559
  deduped.set(absolutePath, absolutePath);
1398
1560
  }
@@ -1405,14 +1567,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
1405
1567
  }
1406
1568
  const unique = /* @__PURE__ */ new Map();
1407
1569
  for (const inputFile of inputFiles) {
1408
- const absolutePath = path3.resolve(inputFile);
1570
+ const absolutePath = path8.resolve(inputFile);
1409
1571
  if (overrides?.has(absolutePath)) {
1410
1572
  if (!unique.has(absolutePath)) {
1411
1573
  unique.set(absolutePath, absolutePath);
1412
1574
  }
1413
1575
  continue;
1414
1576
  }
1415
- const normalized = absolutePath.split(path3.sep).join("/");
1577
+ const normalized = absolutePath.split(path8.sep).join("/");
1416
1578
  if (isGuidelineFile(normalized, guidelinePatterns)) {
1417
1579
  if (!unique.has(absolutePath)) {
1418
1580
  unique.set(absolutePath, absolutePath);
@@ -1427,7 +1589,7 @@ function collectInputFiles(inputFiles) {
1427
1589
  }
1428
1590
  const unique = /* @__PURE__ */ new Map();
1429
1591
  for (const inputFile of inputFiles) {
1430
- const absolutePath = path3.resolve(inputFile);
1592
+ const absolutePath = path8.resolve(inputFile);
1431
1593
  if (!unique.has(absolutePath)) {
1432
1594
  unique.set(absolutePath, absolutePath);
1433
1595
  }
@@ -1439,7 +1601,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
1439
1601
  return "";
1440
1602
  }
1441
1603
  const buildList = (files) => files.map((absolutePath) => {
1442
- const fileName = path3.basename(absolutePath);
1604
+ const fileName = path8.basename(absolutePath);
1443
1605
  const fileUri = pathToFileUri(absolutePath);
1444
1606
  return `* [${fileName}](${fileUri})`;
1445
1607
  });
@@ -1459,7 +1621,7 @@ ${buildList(inputFiles).join("\n")}.`);
1459
1621
  return sections.join("\n");
1460
1622
  }
1461
1623
  function pathToFileUri(filePath) {
1462
- const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
1624
+ const absolutePath = path8.isAbsolute(filePath) ? filePath : path8.resolve(filePath);
1463
1625
  const normalizedPath = absolutePath.replace(/\\/g, "/");
1464
1626
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
1465
1627
  return `file:///${normalizedPath}`;
@@ -1497,7 +1659,7 @@ var CodexProvider = class {
1497
1659
  const logger = await this.createStreamLogger(request).catch(() => void 0);
1498
1660
  try {
1499
1661
  const promptContent = buildPromptDocument(request, inputFiles);
1500
- const promptFile = path4.join(workspaceRoot, PROMPT_FILENAME);
1662
+ const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
1501
1663
  await writeFile(promptFile, promptContent, "utf8");
1502
1664
  const args = this.buildCodexArgs();
1503
1665
  const cwd = this.resolveCwd(workspaceRoot);
@@ -1547,7 +1709,7 @@ var CodexProvider = class {
1547
1709
  if (!this.config.cwd) {
1548
1710
  return workspaceRoot;
1549
1711
  }
1550
- return path4.resolve(this.config.cwd);
1712
+ return path9.resolve(this.config.cwd);
1551
1713
  }
1552
1714
  buildCodexArgs() {
1553
1715
  const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
@@ -1581,7 +1743,7 @@ var CodexProvider = class {
1581
1743
  }
1582
1744
  }
1583
1745
  async createWorkspace() {
1584
- return await mkdtemp(path4.join(tmpdir(), WORKSPACE_PREFIX));
1746
+ return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
1585
1747
  }
1586
1748
  async cleanupWorkspace(workspaceRoot) {
1587
1749
  try {
@@ -1595,9 +1757,9 @@ var CodexProvider = class {
1595
1757
  return void 0;
1596
1758
  }
1597
1759
  if (this.config.logDir) {
1598
- return path4.resolve(this.config.logDir);
1760
+ return path9.resolve(this.config.logDir);
1599
1761
  }
1600
- return path4.join(process.cwd(), ".agentv", "logs", "codex");
1762
+ return path9.join(process.cwd(), ".agentv", "logs", "codex");
1601
1763
  }
1602
1764
  async createStreamLogger(request) {
1603
1765
  const logDir = this.resolveLogDirectory();
@@ -1611,7 +1773,7 @@ var CodexProvider = class {
1611
1773
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
1612
1774
  return void 0;
1613
1775
  }
1614
- const filePath = path4.join(logDir, buildLogFilename(request, this.targetName));
1776
+ const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
1615
1777
  try {
1616
1778
  const logger = await CodexStreamLogger.create({
1617
1779
  filePath,
@@ -1826,7 +1988,7 @@ function tryParseJsonValue(rawLine) {
1826
1988
  async function locateExecutable(candidate) {
1827
1989
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
1828
1990
  if (includesPathSeparator) {
1829
- const resolved = path4.isAbsolute(candidate) ? candidate : path4.resolve(candidate);
1991
+ const resolved = path9.isAbsolute(candidate) ? candidate : path9.resolve(candidate);
1830
1992
  const executablePath = await ensureWindowsExecutableVariant(resolved);
1831
1993
  await access2(executablePath, constants2.F_OK);
1832
1994
  return executablePath;
@@ -2173,7 +2335,7 @@ var MockProvider = class {
2173
2335
  };
2174
2336
 
2175
2337
  // src/evaluation/providers/vscode.ts
2176
- import path5 from "node:path";
2338
+ import path10 from "node:path";
2177
2339
  import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
2178
2340
  var VSCodeProvider = class {
2179
2341
  id;
@@ -2286,6 +2448,9 @@ var VSCodeProvider = class {
2286
2448
  };
2287
2449
  function buildPromptDocument2(request, attachments, guidelinePatterns) {
2288
2450
  const parts = [];
2451
+ if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
2452
+ parts.push(request.systemPrompt.trim());
2453
+ }
2289
2454
  const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
2290
2455
  const attachmentFiles = collectAttachmentFiles(attachments);
2291
2456
  const nonGuidelineAttachments = attachmentFiles.filter(
@@ -2303,7 +2468,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
2303
2468
  return "";
2304
2469
  }
2305
2470
  const buildList = (files) => files.map((absolutePath) => {
2306
- const fileName = path5.basename(absolutePath);
2471
+ const fileName = path10.basename(absolutePath);
2307
2472
  const fileUri = pathToFileUri2(absolutePath);
2308
2473
  return `* [${fileName}](${fileUri})`;
2309
2474
  });
@@ -2328,8 +2493,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
2328
2493
  }
2329
2494
  const unique = /* @__PURE__ */ new Map();
2330
2495
  for (const attachment of attachments) {
2331
- const absolutePath = path5.resolve(attachment);
2332
- const normalized = absolutePath.split(path5.sep).join("/");
2496
+ const absolutePath = path10.resolve(attachment);
2497
+ const normalized = absolutePath.split(path10.sep).join("/");
2333
2498
  if (isGuidelineFile(normalized, guidelinePatterns)) {
2334
2499
  if (!unique.has(absolutePath)) {
2335
2500
  unique.set(absolutePath, absolutePath);
@@ -2344,7 +2509,7 @@ function collectAttachmentFiles(attachments) {
2344
2509
  }
2345
2510
  const unique = /* @__PURE__ */ new Map();
2346
2511
  for (const attachment of attachments) {
2347
- const absolutePath = path5.resolve(attachment);
2512
+ const absolutePath = path10.resolve(attachment);
2348
2513
  if (!unique.has(absolutePath)) {
2349
2514
  unique.set(absolutePath, absolutePath);
2350
2515
  }
@@ -2352,7 +2517,7 @@ function collectAttachmentFiles(attachments) {
2352
2517
  return Array.from(unique.values());
2353
2518
  }
2354
2519
  function pathToFileUri2(filePath) {
2355
- const absolutePath = path5.isAbsolute(filePath) ? filePath : path5.resolve(filePath);
2520
+ const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
2356
2521
  const normalizedPath = absolutePath.replace(/\\/g, "/");
2357
2522
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
2358
2523
  return `file:///${normalizedPath}`;
@@ -2365,7 +2530,7 @@ function normalizeAttachments(attachments) {
2365
2530
  }
2366
2531
  const deduped = /* @__PURE__ */ new Set();
2367
2532
  for (const attachment of attachments) {
2368
- deduped.add(path5.resolve(attachment));
2533
+ deduped.add(path10.resolve(attachment));
2369
2534
  }
2370
2535
  return Array.from(deduped);
2371
2536
  }
@@ -2374,7 +2539,7 @@ function mergeAttachments(all) {
2374
2539
  for (const list of all) {
2375
2540
  if (!list) continue;
2376
2541
  for (const inputFile of list) {
2377
- deduped.add(path5.resolve(inputFile));
2542
+ deduped.add(path10.resolve(inputFile));
2378
2543
  }
2379
2544
  }
2380
2545
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -2420,9 +2585,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
2420
2585
 
2421
2586
  // src/evaluation/providers/targets-file.ts
2422
2587
  import { constants as constants3 } from "node:fs";
2423
- import { access as access3, readFile as readFile2 } from "node:fs/promises";
2424
- import path6 from "node:path";
2425
- import { parse as parse2 } from "yaml";
2588
+ import { access as access3, readFile as readFile5 } from "node:fs/promises";
2589
+ import path11 from "node:path";
2590
+ import { parse as parse3 } from "yaml";
2426
2591
  function isRecord(value) {
2427
2592
  return typeof value === "object" && value !== null && !Array.isArray(value);
2428
2593
  }
@@ -2477,12 +2642,12 @@ async function fileExists3(filePath) {
2477
2642
  }
2478
2643
  }
2479
2644
  async function readTargetDefinitions(filePath) {
2480
- const absolutePath = path6.resolve(filePath);
2645
+ const absolutePath = path11.resolve(filePath);
2481
2646
  if (!await fileExists3(absolutePath)) {
2482
2647
  throw new Error(`targets.yaml not found at ${absolutePath}`);
2483
2648
  }
2484
- const raw = await readFile2(absolutePath, "utf8");
2485
- const parsed = parse2(raw);
2649
+ const raw = await readFile5(absolutePath, "utf8");
2650
+ const parsed = parse3(raw);
2486
2651
  if (!isRecord(parsed)) {
2487
2652
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
2488
2653
  }
@@ -2525,18 +2690,34 @@ function resolveAndCreateProvider(definition, env = process.env) {
2525
2690
  }
2526
2691
 
2527
2692
  // src/evaluation/evaluators.ts
2528
- import { randomUUID as randomUUID2 } from "node:crypto";
2693
+ var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
2694
+
2695
+ Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
2696
+
2697
+ Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
2698
+
2699
+ [[ ## expected_outcome ## ]]
2700
+ {{expected_outcome}}
2701
+
2702
+ [[ ## question ## ]]
2703
+ {{question}}
2704
+
2705
+ [[ ## reference_answer ## ]]
2706
+ {{reference_answer}}
2707
+
2708
+ [[ ## candidate_answer ## ]]
2709
+ {{candidate_answer}}`;
2529
2710
  var LlmJudgeEvaluator = class {
2530
2711
  kind = "llm_judge";
2531
2712
  resolveJudgeProvider;
2532
2713
  maxOutputTokens;
2533
2714
  temperature;
2534
- customPrompt;
2715
+ evaluatorTemplate;
2535
2716
  constructor(options) {
2536
2717
  this.resolveJudgeProvider = options.resolveJudgeProvider;
2537
2718
  this.maxOutputTokens = options.maxOutputTokens;
2538
2719
  this.temperature = options.temperature;
2539
- this.customPrompt = options.customPrompt;
2720
+ this.evaluatorTemplate = options.evaluatorTemplate;
2540
2721
  }
2541
2722
  async evaluate(context) {
2542
2723
  const judgeProvider = await this.resolveJudgeProvider(context);
@@ -2546,26 +2727,21 @@ var LlmJudgeEvaluator = class {
2546
2727
  return this.evaluateWithPrompt(context, judgeProvider);
2547
2728
  }
2548
2729
  async evaluateWithPrompt(context, judgeProvider) {
2549
- const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context.evalCase);
2550
2730
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
2551
- let prompt = buildQualityPrompt(context.evalCase, context.candidate, formattedQuestion);
2552
- let systemPrompt = context.systemPrompt ?? this.customPrompt ?? buildSystemPrompt(hasReferenceAnswer);
2553
- if (systemPrompt && hasTemplateVariables(systemPrompt)) {
2554
- const variables = {
2555
- input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
2556
- output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
2557
- candidate_answer: context.candidate,
2558
- reference_answer: context.evalCase.reference_answer ?? "",
2559
- expected_outcome: context.evalCase.expected_outcome,
2560
- question: formattedQuestion
2561
- };
2562
- prompt = substituteVariables(systemPrompt, variables);
2563
- systemPrompt = buildSystemPrompt(hasReferenceAnswer);
2564
- }
2565
- const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
2731
+ const variables = {
2732
+ input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
2733
+ output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
2734
+ candidate_answer: context.candidate.trim(),
2735
+ reference_answer: (context.evalCase.reference_answer ?? "").trim(),
2736
+ expected_outcome: context.evalCase.expected_outcome.trim(),
2737
+ question: formattedQuestion.trim()
2738
+ };
2739
+ const systemPrompt = buildOutputSchema();
2740
+ const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
2741
+ const userPrompt = substituteVariables(evaluatorTemplate, variables);
2566
2742
  const response = await judgeProvider.invoke({
2567
- question: prompt,
2568
- metadata,
2743
+ question: userPrompt,
2744
+ systemPrompt,
2569
2745
  evalCaseId: context.evalCase.id,
2570
2746
  attempt: context.attempt,
2571
2747
  maxOutputTokens: this.maxOutputTokens,
@@ -2578,11 +2754,9 @@ var LlmJudgeEvaluator = class {
2578
2754
  const reasoning = parsed.reasoning ?? response.reasoning;
2579
2755
  const expectedAspectCount = Math.max(hits.length + misses.length, 1);
2580
2756
  const evaluatorRawRequest = {
2581
- id: randomUUID2(),
2582
- provider: judgeProvider.id,
2583
- prompt,
2584
- target: context.target.name,
2585
- ...systemPrompt !== void 0 && { systemPrompt }
2757
+ userPrompt,
2758
+ systemPrompt,
2759
+ target: judgeProvider.targetName
2586
2760
  };
2587
2761
  return {
2588
2762
  score,
@@ -2594,20 +2768,8 @@ var LlmJudgeEvaluator = class {
2594
2768
  };
2595
2769
  }
2596
2770
  };
2597
- function buildSystemPrompt(hasReferenceAnswer) {
2598
- const basePrompt = [
2599
- "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
2600
- ""
2601
- ];
2602
- if (hasReferenceAnswer) {
2603
- basePrompt.push(
2604
- "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
2605
- ""
2606
- );
2607
- }
2608
- basePrompt.push(
2609
- "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
2610
- "",
2771
+ function buildOutputSchema() {
2772
+ return [
2611
2773
  "You must respond with a single JSON object matching this schema:",
2612
2774
  "",
2613
2775
  "{",
@@ -2616,30 +2778,7 @@ function buildSystemPrompt(hasReferenceAnswer) {
2616
2778
  ' "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
2617
2779
  ' "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
2618
2780
  "}"
2619
- );
2620
- return basePrompt.join("\n");
2621
- }
2622
- function buildQualityPrompt(evalCase, candidate, question) {
2623
- const parts = [
2624
- "[[ ## expected_outcome ## ]]",
2625
- evalCase.expected_outcome.trim(),
2626
- "",
2627
- "[[ ## question ## ]]",
2628
- question.trim(),
2629
- ""
2630
- ];
2631
- if (hasNonEmptyReferenceAnswer(evalCase)) {
2632
- parts.push(
2633
- "[[ ## reference_answer ## ]]",
2634
- evalCase.reference_answer.trim(),
2635
- ""
2636
- );
2637
- }
2638
- parts.push(
2639
- "[[ ## candidate_answer ## ]]",
2640
- candidate.trim()
2641
- );
2642
- return parts.join("\n");
2781
+ ].join("\n");
2643
2782
  }
2644
2783
  function clampScore(value) {
2645
2784
  if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -2721,9 +2860,6 @@ function extractJsonBlob(text) {
2721
2860
  function isNonEmptyString(value) {
2722
2861
  return typeof value === "string" && value.trim().length > 0;
2723
2862
  }
2724
- function hasNonEmptyReferenceAnswer(evalCase) {
2725
- return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
2726
- }
2727
2863
  var CodeEvaluator = class {
2728
2864
  kind = "code";
2729
2865
  script;
@@ -2829,19 +2965,16 @@ function parseJsonSafe(payload) {
2829
2965
  return void 0;
2830
2966
  }
2831
2967
  }
2832
- function hasTemplateVariables(text) {
2833
- return /\$\{[a-zA-Z0-9_]+\}/.test(text);
2834
- }
2835
2968
  function substituteVariables(template, variables) {
2836
- return template.replace(/\$\{([a-zA-Z0-9_]+)\}/g, (match, varName) => {
2969
+ return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
2837
2970
  return variables[varName] ?? match;
2838
2971
  });
2839
2972
  }
2840
2973
 
2841
2974
  // src/evaluation/orchestrator.ts
2842
- import { createHash, randomUUID as randomUUID3 } from "node:crypto";
2975
+ import { createHash, randomUUID as randomUUID2 } from "node:crypto";
2843
2976
  import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
2844
- import path7 from "node:path";
2977
+ import path12 from "node:path";
2845
2978
 
2846
2979
  // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
2847
2980
  var Node = class {
@@ -3404,6 +3537,7 @@ async function evaluateCandidate(options) {
3404
3537
  }
3405
3538
  }
3406
3539
  return {
3540
+ timestamp: completedAt.toISOString(),
3407
3541
  eval_id: evalCase.id,
3408
3542
  dataset: evalCase.dataset,
3409
3543
  conversation_id: evalCase.conversation_id,
@@ -3411,14 +3545,12 @@ async function evaluateCandidate(options) {
3411
3545
  hits: score.hits,
3412
3546
  misses: score.misses,
3413
3547
  candidate_answer: candidate,
3414
- expected_aspect_count: score.expectedAspectCount,
3415
3548
  target: target.name,
3416
- timestamp: completedAt.toISOString(),
3417
3549
  reasoning: score.reasoning,
3418
3550
  raw_aspects: score.rawAspects,
3419
3551
  agent_provider_request: agentProviderRequest,
3420
3552
  lm_provider_request: lmProviderRequest,
3421
- evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
3553
+ evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
3422
3554
  evaluator_results: evaluatorResults
3423
3555
  };
3424
3556
  }
@@ -3495,7 +3627,7 @@ async function runEvaluatorList(options) {
3495
3627
  hits: score2.hits,
3496
3628
  misses: score2.misses,
3497
3629
  reasoning: score2.reasoning,
3498
- evaluator_raw_request: score2.evaluatorRawRequest
3630
+ evaluator_provider_request: score2.evaluatorRawRequest
3499
3631
  });
3500
3632
  continue;
3501
3633
  }
@@ -3522,7 +3654,7 @@ async function runEvaluatorList(options) {
3522
3654
  hits: score2.hits,
3523
3655
  misses: score2.misses,
3524
3656
  reasoning: score2.reasoning,
3525
- evaluator_raw_request: score2.evaluatorRawRequest
3657
+ evaluator_provider_request: score2.evaluatorRawRequest
3526
3658
  });
3527
3659
  continue;
3528
3660
  }
@@ -3575,7 +3707,7 @@ async function runLlmJudgeEvaluator(options) {
3575
3707
  promptInputs,
3576
3708
  now,
3577
3709
  judgeProvider,
3578
- systemPrompt: customPrompt,
3710
+ evaluatorTemplateOverride: customPrompt,
3579
3711
  evaluator: config
3580
3712
  });
3581
3713
  }
@@ -3616,8 +3748,8 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
3616
3748
  async function dumpPrompt(directory, evalCase, promptInputs) {
3617
3749
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
3618
3750
  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
3619
- const filePath = path7.resolve(directory, filename);
3620
- await mkdir2(path7.dirname(filePath), { recursive: true });
3751
+ const filePath = path12.resolve(directory, filename);
3752
+ await mkdir2(path12.dirname(filePath), { recursive: true });
3621
3753
  const payload = {
3622
3754
  eval_id: evalCase.id,
3623
3755
  question: promptInputs.question,
@@ -3631,7 +3763,7 @@ function sanitizeFilename(value) {
3631
3763
  return "prompt";
3632
3764
  }
3633
3765
  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
3634
- return sanitized.length > 0 ? sanitized : randomUUID3();
3766
+ return sanitized.length > 0 ? sanitized : randomUUID2();
3635
3767
  }
3636
3768
  async function invokeProvider(provider, options) {
3637
3769
  const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -3687,6 +3819,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
3687
3819
  }
3688
3820
  }
3689
3821
  return {
3822
+ timestamp: timestamp.toISOString(),
3690
3823
  eval_id: evalCase.id,
3691
3824
  dataset: evalCase.dataset,
3692
3825
  conversation_id: evalCase.conversation_id,
@@ -3694,9 +3827,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
3694
3827
  hits: [],
3695
3828
  misses: [`Error: ${message}`],
3696
3829
  candidate_answer: `Error occurred: ${message}`,
3697
- expected_aspect_count: 0,
3698
3830
  target: targetName,
3699
- timestamp: timestamp.toISOString(),
3700
3831
  raw_aspects: [],
3701
3832
  agent_provider_request: agentProviderRequest,
3702
3833
  lm_provider_request: lmProviderRequest,