@ls-stack/agent-eval 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-Drw0IpOd.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-Cx2CvM6a.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-BVnLr79e.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-lOZdhO2D.mjs";
2
+ import { t as runCli } from "./cli-CMiCEQ-3.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
6
6
  const inspectFlagPrefix = "--inspect";
7
7
  const inspectBrkFlagPrefix = "--inspect-brk";
8
+ const runChildInspectArgEnv = "AGENT_EVALS_RUN_CHILD_INSPECT_ARG";
8
9
  function needsModuleMocksFlag() {
9
10
  return !process.execArgv.includes(moduleMocksFlag);
10
11
  }
@@ -32,10 +33,13 @@ function isInspectArg(arg) {
32
33
  }
33
34
  function buildExecArgv(inspectArg) {
34
35
  const nextExecArgv = [moduleMocksFlag, ...process.execArgv.filter((arg) => arg !== moduleMocksFlag && !isInspectArg(arg))];
35
- if (inspectArg !== void 0) nextExecArgv.push(inspectArg);
36
- else nextExecArgv.push(...process.execArgv.filter(isInspectArg));
36
+ if (inspectArg === void 0) nextExecArgv.push(...process.execArgv.filter(isInspectArg));
37
37
  return nextExecArgv;
38
38
  }
39
+ function setRunChildInspectArg(inspectArg) {
40
+ if (inspectArg === void 0) return;
41
+ process.env[runChildInspectArgEnv] = inspectArg;
42
+ }
39
43
  function execArgvMatches(nextExecArgv) {
40
44
  return process.execArgv.length === nextExecArgv.length && process.execArgv.every((arg, index) => arg === nextExecArgv[index]);
41
45
  }
@@ -67,6 +71,7 @@ async function reexecWithNodeArgs(argv, execArgv) {
67
71
  });
68
72
  }
69
73
  const { argv, inspectArg } = parseDebugFlags(process.argv.slice(2));
74
+ setRunChildInspectArg(inspectArg);
70
75
  const execArgv = buildExecArgv(inspectArg);
71
76
  if (needsModuleMocksFlag() || !execArgvMatches(execArgv)) await reexecWithNodeArgs(argv, execArgv);
72
77
  else await runCli(argv);
@@ -208,6 +208,9 @@ function isRunChildMessage(value) {
208
208
  }
209
209
  //#endregion
210
210
  //#region ../runner/src/runChildManager.ts
211
+ const runChildInspectArgEnv = "AGENT_EVALS_RUN_CHILD_INSPECT_ARG";
212
+ const inspectFlagPrefix = "--inspect";
213
+ const inspectBrkFlagPrefix = "--inspect-brk";
211
214
  function startRunChild(params) {
212
215
  const child = spawn(process.execPath, [
213
216
  ...getRunChildExecArgv(),
@@ -256,10 +259,16 @@ function getRunChildExecArgv() {
256
259
  if (arg === "--input-type") skipNext = true;
257
260
  continue;
258
261
  }
262
+ if (isInspectArg(arg)) continue;
259
263
  execArgv.push(arg);
260
264
  }
265
+ const inspectArg = process.env[runChildInspectArgEnv];
266
+ if (inspectArg !== void 0 && isInspectArg(inspectArg)) execArgv.push(inspectArg);
261
267
  return execArgv;
262
268
  }
269
+ function isInspectArg(arg) {
270
+ return arg === inspectFlagPrefix || arg.startsWith(`${inspectFlagPrefix}=`) || arg === inspectBrkFlagPrefix || arg.startsWith(`${inspectBrkFlagPrefix}=`);
271
+ }
263
272
  function killRunChild(runState) {
264
273
  const child = runState.childProcess;
265
274
  runState.childProcess = void 0;
@@ -1050,8 +1059,8 @@ async function commandApp(args) {
1050
1059
  const { serve } = await import("@hono/node-server");
1051
1060
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1052
1061
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1053
- const appModule = await import("./app-Dg3qYVku.mjs");
1054
- const runnerModule = await import("./runner-BK1KX2SA.mjs");
1062
+ const appModule = await import("./app-DKWm1oxc.mjs");
1063
+ const runnerModule = await import("./runner-Dx1sMCbh.mjs");
1055
1064
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1056
1065
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1057
1066
  await runnerModule.initRunner();
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
1
  import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as defineEval, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, fn as getEvalRegistry, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as startEvalBackgroundJob, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as repoFile, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-H0pSUl3I.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-lOZdhO2D.mjs";
3
- import "./src-Btb9RCYD.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-CMiCEQ-3.mjs";
3
+ import "./src-BgGL7DDp.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-lOZdhO2D.mjs";
2
- import "./src-Btb9RCYD.mjs";
1
+ import { n as createRunner } from "./cli-CMiCEQ-3.mjs";
2
+ import "./src-BgGL7DDp.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-CmbmfBG2.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-DLnj18MO.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,3 +1,3 @@
1
1
  import "./runOrchestration-H0pSUl3I.mjs";
2
- import "./cli-lOZdhO2D.mjs";
2
+ import "./cli-CMiCEQ-3.mjs";
3
3
  export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.13.0",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"