agentv 3.13.1 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@ import {
22
22
  validateFileReferences,
23
23
  validateTargetsFile,
24
24
  writeArtifactsFromResults
25
- } from "./chunk-LSXO22CF.js";
25
+ } from "./chunk-PACTPWEN.js";
26
26
  import {
27
27
  createBuiltinRegistry,
28
28
  executeScript,
@@ -39,7 +39,7 @@ import {
39
39
  toSnakeCaseDeep as toSnakeCaseDeep2,
40
40
  transpileEvalYamlFile,
41
41
  trimBaselineResult
42
- } from "./chunk-K747KGDP.js";
42
+ } from "./chunk-D3LNJUUB.js";
43
43
  import {
44
44
  __commonJS,
45
45
  __esm,
@@ -4176,11 +4176,16 @@ var evalRunCommand = command({
4176
4176
  type: optional(string),
4177
4177
  long: "output-messages",
4178
4178
  description: 'Number of trailing messages to include in results output (default: 1, or "all")'
4179
+ }),
4180
+ threshold: option({
4181
+ type: optional(number),
4182
+ long: "threshold",
4183
+ description: "Suite-level quality gate: exit 1 if mean score falls below this value (0-1)"
4179
4184
  })
4180
4185
  },
4181
4186
  handler: async (args) => {
4182
4187
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4183
- const { launchInteractiveWizard } = await import("./interactive-76ZJVPI7.js");
4188
+ const { launchInteractiveWizard } = await import("./interactive-OMJAMCQP.js");
4184
4189
  await launchInteractiveWizard();
4185
4190
  return;
4186
4191
  }
@@ -4216,9 +4221,13 @@ var evalRunCommand = command({
4216
4221
  artifacts: args.artifacts,
4217
4222
  graderTarget: args.graderTarget,
4218
4223
  model: args.model,
4219
- outputMessages: args.outputMessages
4224
+ outputMessages: args.outputMessages,
4225
+ threshold: args.threshold
4220
4226
  };
4221
- await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4227
+ const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
4228
+ if (result?.thresholdFailed) {
4229
+ process.exit(1);
4230
+ }
4222
4231
  }
4223
4232
  });
4224
4233
 
@@ -5534,8 +5543,8 @@ var resultsCommand = subcommands({
5534
5543
  import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
5535
5544
  import path8 from "node:path";
5536
5545
  import { Hono } from "hono";
5537
- function feedbackPath(cwd) {
5538
- return path8.join(cwd, "feedback.json");
5546
+ function feedbackPath(resultDir) {
5547
+ return path8.join(resultDir, "feedback.json");
5539
5548
  }
5540
5549
  function readFeedback(cwd) {
5541
5550
  const fp = feedbackPath(cwd);
@@ -5553,13 +5562,13 @@ function writeFeedback(cwd, data) {
5553
5562
  writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5554
5563
  `, "utf8");
5555
5564
  }
5556
- function createApp(results, cwd) {
5565
+ function createApp(results, resultDir) {
5557
5566
  const app2 = new Hono();
5558
5567
  app2.get("/", (c3) => {
5559
5568
  return c3.html(generateServeHtml(results));
5560
5569
  });
5561
5570
  app2.get("/api/feedback", (c3) => {
5562
- const data = readFeedback(cwd);
5571
+ const data = readFeedback(resultDir);
5563
5572
  return c3.json(data);
5564
5573
  });
5565
5574
  app2.post("/api/feedback", async (c3) => {
@@ -5582,7 +5591,7 @@ function createApp(results, cwd) {
5582
5591
  return c3.json({ error: "Each review must have test_id and comment strings" }, 400);
5583
5592
  }
5584
5593
  }
5585
- const existing = readFeedback(cwd);
5594
+ const existing = readFeedback(resultDir);
5586
5595
  const now = (/* @__PURE__ */ new Date()).toISOString();
5587
5596
  for (const review of incoming) {
5588
5597
  const newReview = {
@@ -5597,7 +5606,7 @@ function createApp(results, cwd) {
5597
5606
  existing.reviews.push(newReview);
5598
5607
  }
5599
5608
  }
5600
- writeFeedback(cwd, existing);
5609
+ writeFeedback(resultDir, existing);
5601
5610
  return c3.json(existing);
5602
5611
  });
5603
5612
  return app2;
@@ -6221,11 +6230,12 @@ var resultsServeCommand = command({
6221
6230
  const listenPort = port ?? 3117;
6222
6231
  try {
6223
6232
  const { results, sourceFile } = await loadResults(source, cwd);
6224
- const app2 = createApp(results, cwd);
6233
+ const resultDir = path8.dirname(path8.resolve(sourceFile));
6234
+ const app2 = createApp(results, resultDir);
6225
6235
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
6226
6236
  console.log(`Dashboard: http://localhost:${listenPort}`);
6227
6237
  console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
6228
- console.log(`Feedback file: ${feedbackPath(cwd)}`);
6238
+ console.log(`Feedback file: ${feedbackPath(resultDir)}`);
6229
6239
  console.log("Press Ctrl+C to stop");
6230
6240
  const { serve: startServer } = await import("@hono/node-server");
6231
6241
  startServer({
@@ -7756,4 +7766,4 @@ export {
7756
7766
  preprocessArgv,
7757
7767
  runCli
7758
7768
  };
7759
- //# sourceMappingURL=chunk-UK7UMQOX.js.map
7769
+ //# sourceMappingURL=chunk-TGCWIHBH.js.map