@axlsdk/studio 0.16.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -24,8 +24,8 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/cli.ts
27
- var import_node_path3 = require("path");
28
- var import_node_fs3 = require("fs");
27
+ var import_node_path2 = require("path");
28
+ var import_node_fs2 = require("fs");
29
29
  var import_node_server = require("@hono/node-server");
30
30
  var import_node_ws = require("@hono/node-ws");
31
31
 
@@ -131,8 +131,11 @@ function redactEvalItem(item) {
131
131
  }
132
132
  function redactEvalResult(result, redact) {
133
133
  if (!redact) return result;
134
+ const meta = result.metadata;
135
+ const scrubbedMetadata = meta && typeof meta.batchFailure === "string" ? { ...meta, batchFailure: REDACTED } : result.metadata;
134
136
  return {
135
137
  ...result,
138
+ metadata: scrubbedMetadata,
136
139
  items: result.items.map(redactEvalItem)
137
140
  };
138
141
  }
@@ -927,13 +930,18 @@ function reduceEvalTrends(acc, entry) {
927
930
  const cost = extractCost(entry.data);
928
931
  const model = extractModel(entry.data);
929
932
  const duration = extractDuration(entry.data);
933
+ const metadata = entry.data?.metadata;
934
+ const runGroupId = typeof metadata?.runGroupId === "string" ? metadata.runGroupId : void 0;
935
+ const batchAttempted = typeof metadata?.batchAttempted === "number" && Number.isFinite(metadata.batchAttempted) ? metadata.batchAttempted : void 0;
930
936
  const run = {
931
937
  timestamp: entry.timestamp,
932
938
  id: entry.id,
933
939
  scores,
934
940
  cost,
935
941
  ...model !== void 0 ? { model } : {},
936
- ...duration !== void 0 ? { duration } : {}
942
+ ...duration !== void 0 ? { duration } : {},
943
+ ...runGroupId !== void 0 ? { runGroupId } : {},
944
+ ...batchAttempted !== void 0 ? { batchAttempted } : {}
937
945
  };
938
946
  const byEval = { ...acc.byEval };
939
947
  const prev = byEval[entry.eval];
@@ -1616,33 +1624,79 @@ function createEvalRoutes(connMgr, evalLoader) {
1616
1624
  if (runs > 1) {
1617
1625
  const runGroupId = (0, import_node_crypto.randomUUID)();
1618
1626
  const results = [];
1627
+ let runFailure;
1628
+ let cancelled = false;
1619
1629
  for (let r = 0; r < runs; r++) {
1620
- if (ac.signal.aborted) break;
1621
- const result = await runtime.runRegisteredEval(name, {
1622
- metadata: { runGroupId, runIndex: r },
1623
- signal: ac.signal,
1624
- captureTraces,
1625
- onProgress: (event) => {
1626
- if (event.type === "run_done") return;
1630
+ if (ac.signal.aborted) {
1631
+ cancelled = true;
1632
+ break;
1633
+ }
1634
+ try {
1635
+ const result = await runtime.runRegisteredEval(name, {
1636
+ metadata: { runGroupId, runIndex: r, batchAttempted: runs },
1637
+ signal: ac.signal,
1638
+ captureTraces,
1639
+ onProgress: (event) => {
1640
+ if (event.type === "run_done") return;
1641
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1642
+ ...event,
1643
+ run: r + 1,
1644
+ totalRuns: runs
1645
+ });
1646
+ }
1647
+ });
1648
+ results.push(result);
1649
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1650
+ type: "run_done",
1651
+ run: r + 1,
1652
+ totalRuns: runs
1653
+ });
1654
+ } catch (err) {
1655
+ const isAbort = ac.signal.aborted || err instanceof Error && err.name === "AbortError";
1656
+ if (isAbort) {
1657
+ cancelled = true;
1627
1658
  connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1628
- ...event,
1659
+ type: "run_cancelled",
1629
1660
  run: r + 1,
1630
1661
  totalRuns: runs
1631
1662
  });
1663
+ break;
1632
1664
  }
1633
- });
1634
- results.push(result);
1635
- connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1636
- type: "run_done",
1637
- run: r + 1,
1638
- totalRuns: runs
1639
- });
1665
+ runFailure = err instanceof Error ? err : new Error(String(err));
1666
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1667
+ type: "run_failed",
1668
+ run: r + 1,
1669
+ totalRuns: runs,
1670
+ message: redactErrorMessage(runFailure, redactOn)
1671
+ });
1672
+ break;
1673
+ }
1640
1674
  }
1641
1675
  if (results.length > 0) {
1676
+ const partial = results.length < runs;
1677
+ const failureMsg = runFailure ? redactErrorMessage(runFailure, redactOn) || String(runFailure) || void 0 : void 0;
1642
1678
  connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1643
1679
  type: "done",
1644
1680
  evalResultId: results[0].id,
1645
- runGroupId
1681
+ runGroupId,
1682
+ ...partial && {
1683
+ partial: true,
1684
+ batchCompleted: results.length,
1685
+ batchAttempted: runs,
1686
+ // `cancelled` and `batchFailure` are mutually exclusive:
1687
+ // the catch block sets at most one of {cancelled,
1688
+ // runFailure}. The client uses `cancelled` to render a
1689
+ // neutral "Cancelled — X of N runs completed" caption
1690
+ // instead of the amber "Stopped after: <message>"
1691
+ // failure caption.
1692
+ ...cancelled ? { cancelled: true } : {},
1693
+ ...failureMsg ? { batchFailure: failureMsg } : {}
1694
+ }
1695
+ });
1696
+ } else if (runFailure) {
1697
+ connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
1698
+ type: "error",
1699
+ message: redactErrorMessage(runFailure, redactOn)
1646
1700
  });
1647
1701
  } else {
1648
1702
  connMgr.broadcastWithWildcard(`eval:${evalRunId}`, {
@@ -1680,19 +1734,38 @@ function createEvalRoutes(connMgr, evalLoader) {
1680
1734
  const { aggregateRuns } = await import("@axlsdk/eval");
1681
1735
  const runGroupId = (0, import_node_crypto.randomUUID)();
1682
1736
  const results = [];
1737
+ let runFailure;
1683
1738
  for (let r = 0; r < runs; r++) {
1684
- const result2 = await runtime.runRegisteredEval(name, {
1685
- metadata: { runGroupId, runIndex: r },
1686
- captureTraces
1687
- });
1688
- results.push(result2);
1739
+ try {
1740
+ const result2 = await runtime.runRegisteredEval(name, {
1741
+ metadata: { runGroupId, runIndex: r, batchAttempted: runs },
1742
+ captureTraces
1743
+ });
1744
+ results.push(result2);
1745
+ } catch (err) {
1746
+ runFailure = err instanceof Error ? err : new Error(String(err));
1747
+ break;
1748
+ }
1749
+ }
1750
+ if (results.length === 0) {
1751
+ throw runFailure ?? new Error("No runs completed");
1689
1752
  }
1690
- const typedResults = results;
1691
- const aggregate = aggregateRuns(typedResults);
1692
- const first = typedResults[0];
1753
+ const aggregate = aggregateRuns(results);
1754
+ const first = results[0];
1755
+ const partial = results.length < runs;
1756
+ const failureMsg = runFailure ? redactErrorMessage(runFailure, redactOn) || String(runFailure) || void 0 : void 0;
1693
1757
  const result = {
1694
1758
  ...first,
1695
- _multiRun: { aggregate, allRuns: typedResults }
1759
+ _multiRun: {
1760
+ aggregate,
1761
+ allRuns: results,
1762
+ ...partial && {
1763
+ partial: true,
1764
+ batchCompleted: results.length,
1765
+ batchAttempted: runs,
1766
+ ...failureMsg ? { batchFailure: failureMsg } : {}
1767
+ }
1768
+ }
1696
1769
  };
1697
1770
  return c.json({
1698
1771
  ok: true,
@@ -1864,60 +1937,75 @@ function createEvalRoutes(connMgr, evalLoader) {
1864
1937
  const runtime = c.get("runtime");
1865
1938
  const body = await c.req.json();
1866
1939
  const bad = (message) => c.json({ ok: false, error: { code: "BAD_REQUEST", message } }, 400);
1867
- if (!body.result || typeof body.result !== "object") {
1940
+ if (body.result === void 0 || body.result === null) {
1868
1941
  return bad("result is required");
1869
1942
  }
1870
- const result = body.result;
1871
- if (!Array.isArray(result.items)) {
1872
- return bad("result.items must be an array");
1873
- }
1874
- if (typeof result.summary !== "object" || result.summary == null) {
1875
- return bad("result.summary must be an object");
1876
- }
1877
- if (typeof result.dataset !== "string" || !result.dataset) {
1878
- return bad("result.dataset must be a non-empty string (required for compare)");
1879
- }
1880
- const summary = result.summary;
1881
- if (typeof summary.scorers !== "object" || summary.scorers == null) {
1882
- return bad("result.summary.scorers must be an object");
1883
- }
1884
- const summaryScorerNames = Object.keys(summary.scorers);
1885
- const items = result.items;
1886
- const summaryScorerSet = new Set(summaryScorerNames);
1887
- const uncoveredAcrossItems = /* @__PURE__ */ new Set();
1888
- for (const item of items) {
1889
- const itemScores = item?.scores;
1890
- if (itemScores && typeof itemScores === "object") {
1891
- for (const name of Object.keys(itemScores)) {
1892
- if (!summaryScorerSet.has(name)) uncoveredAcrossItems.add(name);
1943
+ const resultsRaw = Array.isArray(body.result) ? body.result : [body.result];
1944
+ if (resultsRaw.length === 0) {
1945
+ return bad("result must be a non-empty array or object");
1946
+ }
1947
+ const validatedResults = [];
1948
+ for (let i = 0; i < resultsRaw.length; i++) {
1949
+ const entry = resultsRaw[i];
1950
+ const prefix = resultsRaw.length > 1 ? `result[${i}]` : "result";
1951
+ if (!entry || typeof entry !== "object") {
1952
+ return bad(`${prefix} must be an object`);
1953
+ }
1954
+ const r = entry;
1955
+ if (!Array.isArray(r.items)) {
1956
+ return bad(`${prefix}.items must be an array`);
1957
+ }
1958
+ if (typeof r.summary !== "object" || r.summary == null) {
1959
+ return bad(`${prefix}.summary must be an object`);
1960
+ }
1961
+ if (typeof r.dataset !== "string" || !r.dataset) {
1962
+ return bad(`${prefix}.dataset must be a non-empty string (required for compare)`);
1963
+ }
1964
+ const summary = r.summary;
1965
+ if (typeof summary.scorers !== "object" || summary.scorers == null) {
1966
+ return bad(`${prefix}.summary.scorers must be an object`);
1967
+ }
1968
+ const summaryScorerNames = Object.keys(summary.scorers);
1969
+ const items = r.items;
1970
+ const summaryScorerSet = new Set(summaryScorerNames);
1971
+ const uncoveredAcrossItems = /* @__PURE__ */ new Set();
1972
+ for (const item of items) {
1973
+ const itemScores = item?.scores;
1974
+ if (itemScores && typeof itemScores === "object") {
1975
+ for (const name of Object.keys(itemScores)) {
1976
+ if (!summaryScorerSet.has(name)) uncoveredAcrossItems.add(name);
1977
+ }
1893
1978
  }
1894
1979
  }
1895
- }
1896
- if (uncoveredAcrossItems.size > 0) {
1897
- return bad(
1898
- `item scores reference scorer(s) not in summary.scorers: ${[...uncoveredAcrossItems].join(", ")}`
1899
- );
1980
+ if (uncoveredAcrossItems.size > 0) {
1981
+ return bad(
1982
+ `${prefix} item scores reference scorer(s) not in summary.scorers: ${[...uncoveredAcrossItems].join(", ")}`
1983
+ );
1984
+ }
1985
+ validatedResults.push(r);
1900
1986
  }
1901
1987
  const trim = (v) => typeof v === "string" && v.trim() !== "" ? v.trim() : void 0;
1902
- const metadataObj = typeof result.metadata === "object" && result.metadata != null ? result.metadata : {};
1988
+ const firstResult = validatedResults[0];
1989
+ const metadataObj = typeof firstResult.metadata === "object" && firstResult.metadata != null ? firstResult.metadata : {};
1903
1990
  const workflowsFromMeta = Array.isArray(metadataObj.workflows) ? metadataObj.workflows : [];
1904
1991
  const primaryWorkflow = workflowsFromMeta.find((w) => typeof w === "string");
1905
- const evalName = trim(body.eval) ?? trim(primaryWorkflow) ?? // Legacy fallback: pre-0.14 CLI artifacts had workflow at the top level.
1906
- trim(result.workflow) ?? "imported";
1907
- const id = (0, import_node_crypto.randomUUID)();
1992
+ const evalName = trim(body.eval) ?? trim(primaryWorkflow) ?? trim(firstResult.workflow) ?? "imported";
1908
1993
  const timestamp = Date.now();
1909
- const imported = {
1910
- ...result,
1911
- id,
1912
- metadata: typeof result.metadata === "object" && result.metadata != null ? result.metadata : {}
1913
- };
1914
- await runtime.saveEvalResult({
1915
- id,
1916
- eval: evalName,
1917
- timestamp,
1918
- data: imported
1919
- });
1920
- return c.json({ ok: true, data: { id, eval: evalName, timestamp } });
1994
+ const imported = [];
1995
+ for (const r of validatedResults) {
1996
+ const id = (0, import_node_crypto.randomUUID)();
1997
+ const entry = {
1998
+ ...r,
1999
+ id,
2000
+ metadata: typeof r.metadata === "object" && r.metadata != null ? r.metadata : {}
2001
+ };
2002
+ await runtime.saveEvalResult({ id, eval: evalName, timestamp, data: entry });
2003
+ imported.push({ id, eval: evalName, timestamp });
2004
+ }
2005
+ if (imported.length === 1) {
2006
+ return c.json({ ok: true, data: imported[0] });
2007
+ }
2008
+ return c.json({ ok: true, data: { imported } });
1921
2009
  });
1922
2010
  function closeActiveRuns() {
1923
2011
  for (const ac of activeRuns.values()) ac.abort();
@@ -2234,28 +2322,10 @@ function createServer(options) {
2234
2322
  }
2235
2323
 
2236
2324
  // src/resolve-runtime.ts
2237
- function resolveRuntime(mod) {
2238
- const def = mod.default;
2239
- return def?.default ?? def ?? mod.runtime;
2240
- }
2325
+ var import_axl6 = require("@axlsdk/axl");
2241
2326
 
2242
2327
  // src/cli-utils.ts
2243
- var import_node_path2 = require("path");
2244
- var import_node_fs2 = require("fs");
2245
- var import_node_url = require("url");
2246
- var CONFIG_CANDIDATES = [
2247
- "axl.config.mts",
2248
- "axl.config.ts",
2249
- "axl.config.mjs",
2250
- "axl.config.js"
2251
- ];
2252
- function findConfig(cwd) {
2253
- for (const name of CONFIG_CANDIDATES) {
2254
- const p = (0, import_node_path2.resolve)(cwd, name);
2255
- if ((0, import_node_fs2.existsSync)(p)) return p;
2256
- }
2257
- return void 0;
2258
- }
2328
+ var import_axl7 = require("@axlsdk/axl");
2259
2329
  function parseArgs(argv) {
2260
2330
  let port = 4400;
2261
2331
  let config;
@@ -2288,29 +2358,6 @@ function parseArgs(argv) {
2288
2358
  }
2289
2359
  return result;
2290
2360
  }
2291
- function needsTsxLoader(configPath) {
2292
- return /\.[mc]?tsx?$/.test(configPath);
2293
- }
2294
- var tsImportFn;
2295
- async function importModule(filePath, parentURL) {
2296
- if (needsTsxLoader(filePath)) {
2297
- if (tsImportFn === void 0) {
2298
- try {
2299
- const mod = await import("tsx/esm/api");
2300
- tsImportFn = mod.tsImport ?? null;
2301
- } catch {
2302
- tsImportFn = null;
2303
- console.warn(
2304
- "[axl-studio] Warning: tsx is not installed. TypeScript config files require tsx as a dependency.\n Install it with: npm install -D tsx"
2305
- );
2306
- }
2307
- }
2308
- if (tsImportFn) {
2309
- return await tsImportFn((0, import_node_url.pathToFileURL)(filePath).href, parentURL);
2310
- }
2311
- }
2312
- return await import((0, import_node_url.pathToFileURL)(filePath).href);
2313
- }
2314
2361
 
2315
2362
  // src/cli.ts
2316
2363
  var import_meta = {};
@@ -2332,7 +2379,7 @@ Options:
2332
2379
  -h, --help Show this help message
2333
2380
 
2334
2381
  Config auto-detection order:
2335
- ${CONFIG_CANDIDATES.join(" \u2192 ")}
2382
+ ${import_axl7.CONFIG_CANDIDATES.join(" \u2192 ")}
2336
2383
 
2337
2384
  Tip: Use .mts for configs with top-level await or in projects without "type": "module".
2338
2385
  `);
@@ -2344,15 +2391,15 @@ Tip: Use .mts for configs with top-level await or in projects without "type": "m
2344
2391
  }
2345
2392
  let configPath;
2346
2393
  if (args.config) {
2347
- configPath = (0, import_node_path3.resolve)(process.cwd(), args.config);
2348
- if (!(0, import_node_fs3.existsSync)(configPath)) {
2394
+ configPath = (0, import_node_path2.resolve)(process.cwd(), args.config);
2395
+ if (!(0, import_node_fs2.existsSync)(configPath)) {
2349
2396
  console.error(`Config file not found: ${configPath}`);
2350
2397
  process.exit(1);
2351
2398
  }
2352
2399
  } else {
2353
- const found = findConfig(process.cwd());
2400
+ const found = (0, import_axl7.findConfig)(process.cwd());
2354
2401
  if (!found) {
2355
- console.error(`No config file found. Searched for: ${CONFIG_CANDIDATES.join(", ")}`);
2402
+ console.error(`No config file found. Searched for: ${import_axl7.CONFIG_CANDIDATES.join(", ")}`);
2356
2403
  console.error(`Create an axl.config.mts that exports a default AxlRuntime instance.`);
2357
2404
  process.exit(1);
2358
2405
  }
@@ -2377,10 +2424,10 @@ Tip: Use .mts for configs with top-level await or in projects without "type": "m
2377
2424
  }
2378
2425
  console.log(`[axl-studio] Loading config from ${configPath}`);
2379
2426
  let runtime;
2380
- const ext = (0, import_node_path3.extname)(configPath);
2427
+ const ext = (0, import_node_path2.extname)(configPath);
2381
2428
  try {
2382
- const mod = await importModule(configPath, import_meta.url);
2383
- runtime = resolveRuntime(mod);
2429
+ const mod = await (0, import_axl7.importModule)(configPath, import_meta.url);
2430
+ runtime = (0, import_axl6.resolveRuntime)(mod);
2384
2431
  if (!runtime || typeof runtime.execute !== "function") {
2385
2432
  console.error(`Config must export a default AxlRuntime instance.`);
2386
2433
  if (runtime) {
@@ -2412,8 +2459,8 @@ Tip: Use .mts for configs with top-level await or in projects without "type": "m
2412
2459
  console.error(`Failed to load config:`, err);
2413
2460
  process.exit(1);
2414
2461
  }
2415
- const staticRoot = (0, import_node_path3.resolve)(import_meta.dirname ?? __dirname, "client");
2416
- const hasStaticAssets = (0, import_node_fs3.existsSync)((0, import_node_path3.resolve)(staticRoot, "index.html"));
2462
+ const staticRoot = (0, import_node_path2.resolve)(import_meta.dirname ?? __dirname, "client");
2463
+ const hasStaticAssets = (0, import_node_fs2.existsSync)((0, import_node_path2.resolve)(staticRoot, "index.html"));
2417
2464
  const { app: app6, createWsHandlers: createWsHandlers2 } = createServer({
2418
2465
  runtime,
2419
2466
  staticRoot: hasStaticAssets ? staticRoot : void 0,