agentv 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-4Z326WWF.js";
27
+ } from "./chunk-HAZJO7OY.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
30
  createBuiltinRegistry,
@@ -43,7 +43,7 @@ import {
43
43
  toSnakeCaseDeep as toSnakeCaseDeep2,
44
44
  transpileEvalYamlFile,
45
45
  trimBaselineResult
46
- } from "./chunk-XEAW7OQT.js";
46
+ } from "./chunk-XLM3RNN7.js";
47
47
  import {
48
48
  __commonJS,
49
49
  __esm,
@@ -4217,7 +4217,7 @@ var evalRunCommand = command({
4217
4217
  },
4218
4218
  handler: async (args) => {
4219
4219
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4220
- const { launchInteractiveWizard } = await import("./interactive-7ZYS6IOC.js");
4220
+ const { launchInteractiveWizard } = await import("./interactive-NVNOLL2H.js");
4221
4221
  await launchInteractiveWizard();
4222
4222
  return;
4223
4223
  }
@@ -4628,12 +4628,115 @@ function computeStats(values) {
4628
4628
  // src/commands/pipeline/grade.ts
4629
4629
  import { mkdir as mkdir2, readFile as readFile2, readdir as readdir2, writeFile as writeFile3 } from "node:fs/promises";
4630
4630
  import { join as join2 } from "node:path";
4631
+ var DEFAULT_CONCURRENCY = 10;
4631
4632
  function extractInputText(input) {
4632
4633
  if (!input || input.length === 0) return "";
4633
4634
  if (input.length === 1) return input[0].content;
4634
4635
  return input.map((m) => `@[${m.role}]:
4635
4636
  ${m.content}`).join("\n\n");
4636
4637
  }
4638
+ async function runCodeGraders(tasks, concurrency) {
4639
+ let totalGraders = 0;
4640
+ let totalPassed = 0;
4641
+ let completed = 0;
4642
+ const total = tasks.length;
4643
+ if (total === 0) return { totalGraders: 0, totalPassed: 0 };
4644
+ const writeProgress = () => {
4645
+ process.stderr.write(`\rGrading: ${completed}/${total} done`);
4646
+ };
4647
+ writeProgress();
4648
+ const executeGrader = async (task) => {
4649
+ const { testId, testDir, resultsDir, graderFile, responseText, inputData } = task;
4650
+ const graderConfig = JSON.parse(
4651
+ await readFile2(join2(testDir, "code_graders", graderFile), "utf8")
4652
+ );
4653
+ const graderName = graderConfig.name;
4654
+ const inputText = extractInputText(inputData.input);
4655
+ const payload = JSON.stringify({
4656
+ output: [{ role: "assistant", content: responseText }],
4657
+ input: inputData.input,
4658
+ criteria: "",
4659
+ expected_output: [],
4660
+ input_files: inputData.input_files ?? [],
4661
+ trace: null,
4662
+ token_usage: null,
4663
+ cost_usd: null,
4664
+ duration_ms: null,
4665
+ start_time: null,
4666
+ end_time: null,
4667
+ file_changes: null,
4668
+ workspace_path: null,
4669
+ config: graderConfig.config ?? null,
4670
+ metadata: inputData.metadata ?? {},
4671
+ input_text: inputText,
4672
+ output_text: responseText,
4673
+ expected_output_text: ""
4674
+ });
4675
+ try {
4676
+ const stdout = await executeScript(
4677
+ graderConfig.command,
4678
+ payload,
4679
+ void 0,
4680
+ graderConfig.cwd
4681
+ );
4682
+ const parsed = JSON.parse(stdout);
4683
+ const score = typeof parsed.score === "number" ? parsed.score : 0;
4684
+ const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
4685
+ const result = {
4686
+ name: graderName,
4687
+ type: "code-grader",
4688
+ score,
4689
+ weight: graderConfig.weight ?? 1,
4690
+ assertions,
4691
+ details: parsed.details ?? {}
4692
+ };
4693
+ await writeFile3(
4694
+ join2(resultsDir, `${graderName}.json`),
4695
+ `${JSON.stringify(result, null, 2)}
4696
+ `,
4697
+ "utf8"
4698
+ );
4699
+ totalGraders++;
4700
+ if (score >= 0.5) totalPassed++;
4701
+ } catch (error) {
4702
+ const message = error instanceof Error ? error.message : String(error);
4703
+ process.stderr.write(`
4704
+ ${testId}/${graderName}: ERROR \u2014 ${message}
4705
+ `);
4706
+ const errorResult = {
4707
+ name: graderName,
4708
+ type: "code-grader",
4709
+ score: 0,
4710
+ weight: graderConfig.weight ?? 1,
4711
+ assertions: [{ text: `Error: ${message}`, passed: false }],
4712
+ details: { error: message }
4713
+ };
4714
+ await writeFile3(
4715
+ join2(resultsDir, `${graderName}.json`),
4716
+ `${JSON.stringify(errorResult, null, 2)}
4717
+ `,
4718
+ "utf8"
4719
+ );
4720
+ totalGraders++;
4721
+ } finally {
4722
+ completed++;
4723
+ writeProgress();
4724
+ }
4725
+ };
4726
+ const pending = /* @__PURE__ */ new Set();
4727
+ for (const task of tasks) {
4728
+ const p = executeGrader(task).then(() => {
4729
+ pending.delete(p);
4730
+ });
4731
+ pending.add(p);
4732
+ if (pending.size >= concurrency) {
4733
+ await Promise.race(pending);
4734
+ }
4735
+ }
4736
+ await Promise.all(pending);
4737
+ process.stderr.write("\n");
4738
+ return { totalGraders, totalPassed };
4739
+ }
4637
4740
  var evalGradeCommand = command({
4638
4741
  name: "grade",
4639
4742
  description: "Run code-grader assertions on responses in an export directory",
@@ -4642,16 +4745,22 @@ var evalGradeCommand = command({
4642
4745
  type: string,
4643
4746
  displayName: "export-dir",
4644
4747
  description: "Export directory from pipeline input"
4748
+ }),
4749
+ concurrency: option({
4750
+ type: optional(number),
4751
+ long: "concurrency",
4752
+ short: "j",
4753
+ description: `Number of graders to run in parallel (default: ${DEFAULT_CONCURRENCY})`
4645
4754
  })
4646
4755
  },
4647
- handler: async ({ exportDir }) => {
4756
+ handler: async ({ exportDir, concurrency }) => {
4757
+ const maxWorkers = concurrency ?? DEFAULT_CONCURRENCY;
4648
4758
  const manifestPath = join2(exportDir, "manifest.json");
4649
4759
  const manifest = JSON.parse(await readFile2(manifestPath, "utf8"));
4650
4760
  const testIds = manifest.test_ids;
4651
4761
  const evalSet = manifest.dataset ?? "";
4652
4762
  const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, "_") : "";
4653
- let totalGraders = 0;
4654
- let totalPassed = 0;
4763
+ const tasks = [];
4655
4764
  for (const testId of testIds) {
4656
4765
  const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
4657
4766
  const testDir = join2(exportDir, ...subpath);
@@ -4668,76 +4777,10 @@ var evalGradeCommand = command({
4668
4777
  const responseText = await readFile2(join2(testDir, "response.md"), "utf8");
4669
4778
  const inputData = JSON.parse(await readFile2(join2(testDir, "input.json"), "utf8"));
4670
4779
  for (const graderFile of graderFiles) {
4671
- const graderConfig = JSON.parse(await readFile2(join2(codeGradersDir, graderFile), "utf8"));
4672
- const graderName = graderConfig.name;
4673
- const inputText = extractInputText(inputData.input);
4674
- const payload = JSON.stringify({
4675
- output: [{ role: "assistant", content: responseText }],
4676
- input: inputData.input,
4677
- criteria: "",
4678
- expected_output: [],
4679
- input_files: inputData.input_files ?? [],
4680
- trace: null,
4681
- token_usage: null,
4682
- cost_usd: null,
4683
- duration_ms: null,
4684
- start_time: null,
4685
- end_time: null,
4686
- file_changes: null,
4687
- workspace_path: null,
4688
- config: graderConfig.config ?? null,
4689
- metadata: inputData.metadata ?? {},
4690
- input_text: inputText,
4691
- output_text: responseText,
4692
- expected_output_text: ""
4693
- });
4694
- try {
4695
- const stdout = await executeScript(
4696
- graderConfig.command,
4697
- payload,
4698
- void 0,
4699
- graderConfig.cwd
4700
- );
4701
- const parsed = JSON.parse(stdout);
4702
- const score = typeof parsed.score === "number" ? parsed.score : 0;
4703
- const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
4704
- const result = {
4705
- name: graderName,
4706
- type: "code-grader",
4707
- score,
4708
- weight: graderConfig.weight ?? 1,
4709
- assertions,
4710
- details: parsed.details ?? {}
4711
- };
4712
- await writeFile3(
4713
- join2(resultsDir, `${graderName}.json`),
4714
- `${JSON.stringify(result, null, 2)}
4715
- `,
4716
- "utf8"
4717
- );
4718
- totalGraders++;
4719
- if (score >= 0.5) totalPassed++;
4720
- } catch (error) {
4721
- const message = error instanceof Error ? error.message : String(error);
4722
- console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
4723
- const errorResult = {
4724
- name: graderName,
4725
- type: "code-grader",
4726
- score: 0,
4727
- weight: graderConfig.weight ?? 1,
4728
- assertions: [{ text: `Error: ${message}`, passed: false }],
4729
- details: { error: message }
4730
- };
4731
- await writeFile3(
4732
- join2(resultsDir, `${graderName}.json`),
4733
- `${JSON.stringify(errorResult, null, 2)}
4734
- `,
4735
- "utf8"
4736
- );
4737
- totalGraders++;
4738
- }
4780
+ tasks.push({ testId, testDir, resultsDir, graderFile, responseText, inputData });
4739
4781
  }
4740
4782
  }
4783
+ const { totalGraders, totalPassed } = await runCodeGraders(tasks, maxWorkers);
4741
4784
  console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
4742
4785
  }
4743
4786
  });
@@ -5151,11 +5194,12 @@ var evalRunCommand2 = command({
5151
5194
  if (graderType !== "code") {
5152
5195
  console.log(`
5153
5196
  Done. Results in ${outDir}`);
5154
- console.log("To run code graders: agentv pipeline grade <run-dir> (or re-run with --grader-type code)");
5197
+ console.log(
5198
+ "To run code graders: agentv pipeline grade <run-dir> (or re-run with --grader-type code)"
5199
+ );
5155
5200
  return;
5156
5201
  }
5157
- let totalGraders = 0;
5158
- let totalPassed = 0;
5202
+ const graderTasks = [];
5159
5203
  for (const testId of testIds) {
5160
5204
  const subpath = safeEvalSet ? [safeEvalSet, testId] : [testId];
5161
5205
  const testDir = join4(outDir, ...subpath);
@@ -5172,82 +5216,11 @@ Done. Results in ${outDir}`);
5172
5216
  const responseText = await readFile4(join4(testDir, "response.md"), "utf8");
5173
5217
  const inputData = JSON.parse(await readFile4(join4(testDir, "input.json"), "utf8"));
5174
5218
  for (const graderFile of graderFiles) {
5175
- const graderConfig = JSON.parse(await readFile4(join4(codeGradersDir, graderFile), "utf8"));
5176
- const graderName = graderConfig.name;
5177
- const inputText = extractInputText2(inputData.input);
5178
- const payload = JSON.stringify({
5179
- output: [{ role: "assistant", content: responseText }],
5180
- input: inputData.input,
5181
- criteria: "",
5182
- expected_output: [],
5183
- input_files: inputData.input_files ?? [],
5184
- trace: null,
5185
- token_usage: null,
5186
- cost_usd: null,
5187
- duration_ms: null,
5188
- start_time: null,
5189
- end_time: null,
5190
- file_changes: null,
5191
- workspace_path: null,
5192
- config: graderConfig.config ?? null,
5193
- metadata: inputData.metadata ?? {},
5194
- input_text: inputText,
5195
- output_text: responseText,
5196
- expected_output_text: ""
5197
- });
5198
- try {
5199
- const stdout = await executeScript(
5200
- graderConfig.command,
5201
- payload,
5202
- void 0,
5203
- graderConfig.cwd
5204
- );
5205
- const parsed = JSON.parse(stdout);
5206
- const score = typeof parsed.score === "number" ? parsed.score : 0;
5207
- const assertions = Array.isArray(parsed.assertions) ? parsed.assertions : [];
5208
- await writeFile5(
5209
- join4(resultsDir, `${graderName}.json`),
5210
- `${JSON.stringify(
5211
- {
5212
- name: graderName,
5213
- type: "code-grader",
5214
- score,
5215
- weight: graderConfig.weight ?? 1,
5216
- assertions,
5217
- details: parsed.details ?? {}
5218
- },
5219
- null,
5220
- 2
5221
- )}
5222
- `,
5223
- "utf8"
5224
- );
5225
- totalGraders++;
5226
- if (score >= 0.5) totalPassed++;
5227
- } catch (error) {
5228
- const message = error instanceof Error ? error.message : String(error);
5229
- console.error(` ${testId}/${graderName}: ERROR \u2014 ${message}`);
5230
- await writeFile5(
5231
- join4(resultsDir, `${graderName}.json`),
5232
- `${JSON.stringify(
5233
- {
5234
- name: graderName,
5235
- type: "code-grader",
5236
- score: 0,
5237
- weight: graderConfig.weight ?? 1,
5238
- assertions: [{ text: `Error: ${message}`, passed: false }],
5239
- details: { error: message }
5240
- },
5241
- null,
5242
- 2
5243
- )}
5244
- `,
5245
- "utf8"
5246
- );
5247
- totalGraders++;
5248
- }
5219
+ graderTasks.push({ testId, testDir, resultsDir, graderFile, responseText, inputData });
5249
5220
  }
5250
5221
  }
5222
+ const graderConcurrency = workers ?? 10;
5223
+ const { totalGraders, totalPassed } = await runCodeGraders(graderTasks, graderConcurrency);
5251
5224
  console.log(`Graded ${totalGraders} code-grader(s): ${totalPassed} passed`);
5252
5225
  console.log(`
5253
5226
  Done. Agent can now perform LLM grading on responses in ${outDir}`);
@@ -6741,8 +6714,8 @@ function resolveStudioDistDir() {
6741
6714
  path9.resolve(currentDir, "../../../../studio/dist"),
6742
6715
  // From dist/ → sibling apps/studio/dist (monorepo dev)
6743
6716
  path9.resolve(currentDir, "../../studio/dist"),
6744
- // Bundled inside CLI dist (published package)
6745
- path9.resolve(currentDir, "../studio"),
6717
+ // Bundled inside CLI dist (published package: dist/studio/)
6718
+ path9.resolve(currentDir, "studio"),
6746
6719
  // From dist/ in monorepo root context
6747
6720
  path9.resolve(currentDir, "../../../apps/studio/dist")
6748
6721
  ];
@@ -8357,4 +8330,4 @@ export {
8357
8330
  preprocessArgv,
8358
8331
  runCli
8359
8332
  };
8360
- //# sourceMappingURL=chunk-2W5JKKXC.js.map
8333
+ //# sourceMappingURL=chunk-UXSQQHCI.js.map