@runtypelabs/cli 2.23.0 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +303 -185
  2. package/package.json +3 -3
package/dist/index.js CHANGED
@@ -20404,6 +20404,41 @@ function validateUpsertRecordSourceShape(flowSteps, buckets) {
20404
20404
  }
20405
20405
  }
20406
20406
  }
20407
+ function validateStoreVectorSource(flowSteps, buckets, declaredFlowInputs) {
20408
+ const declaredVariables = collectDeclaredFlowInputs(flowSteps, declaredFlowInputs);
20409
+ for (const [stepIndex, step] of flowSteps.entries()) {
20410
+ if (step.enabled === false) continue;
20411
+ if (!isObjectRecord(step.config)) continue;
20412
+ const config3 = step.config;
20413
+ if (step.type === "store-vector") {
20414
+ const rawSource = typeof config3.vectorsSource === "string" ? config3.vectorsSource.trim() : "";
20415
+ if (rawSource) {
20416
+ const templateMatch = rawSource.match(/^\s*\{\{\s*([^}]+?)\s*\}\}\s*$/);
20417
+ const reference = (templateMatch?.[1] ?? rawSource).trim();
20418
+ const classification = classifyVariableReference(reference);
20419
+ if (classification.namespace === "plain" || classification.namespace === "flow") {
20420
+ const baseName = classification.baseName;
20421
+ const rootVariable = baseName.split(".")[0] || "";
20422
+ if (rootVariable && !declaredVariables.has(rootVariable) && !declaredVariables.has(baseName)) {
20423
+ addIssue(
20424
+ "warning",
20425
+ {
20426
+ code: "STORE_VECTOR_SOURCE_UNRESOLVED",
20427
+ message: `Vectors source "${rawSource}" references variable "${rootVariable}", but no earlier step declares an output variable "${rootVariable}" (and it is not a flow input). This will fail at runtime with "Could not resolve vectors". Set a prior step's outputVariable to "${rootVariable}" (typically a generate-embedding step), or declare "${rootVariable}" as a flow input.`,
20428
+ path: `flowSteps[${stepIndex}].config.vectorsSource`,
20429
+ step: { index: stepIndex, name: step.name, type: step.type },
20430
+ details: { vectorsSource: rawSource, rootVariable }
20431
+ },
20432
+ buckets
20433
+ );
20434
+ }
20435
+ }
20436
+ }
20437
+ }
20438
+ const outputVar = getStepOutputVariable(step);
20439
+ if (outputVar) declaredVariables.add(outputVar);
20440
+ }
20441
+ }
20407
20442
  function checkConditionExpression(expr, path19, stepRef, buckets) {
20408
20443
  if (typeof expr !== "string" || !expr.includes("{{")) return;
20409
20444
  const match = UNQUOTED_TEMPLATE_BEFORE_OP.exec(expr) || UNQUOTED_TEMPLATE_AFTER_OP.exec(expr);
@@ -21061,6 +21096,7 @@ function collectFlowStructureIssues(flowData, deps, buckets) {
21061
21096
  deps.declaredFlowInputs
21062
21097
  );
21063
21098
  validateUpsertRecordSourceShape(flowData.flowSteps, buckets);
21099
+ validateStoreVectorSource(flowData.flowSteps, buckets, deps.declaredFlowInputs);
21064
21100
  validateConditionExpressions(flowData.flowSteps, buckets, conditionalStepsExceedingDepth);
21065
21101
  return { pendingChecks };
21066
21102
  }
@@ -37555,39 +37591,87 @@ var BUILT_IN_GRADER_IDS = [
37555
37591
  "rightTone",
37556
37592
  "safeToSend"
37557
37593
  ];
37594
+ var graderSeveritySchema = external_exports.enum(["gate", "soft"]);
37595
+ var severityFields = { severity: graderSeveritySchema.optional() };
37558
37596
  var checkGraderSchema = external_exports.discriminatedUnion("kind", [
37559
37597
  external_exports.object({
37560
37598
  kind: external_exports.literal("contains"),
37561
37599
  value: external_exports.string(),
37562
- caseSensitive: external_exports.boolean().optional()
37600
+ caseSensitive: external_exports.boolean().optional(),
37601
+ ...severityFields
37563
37602
  }),
37564
37603
  external_exports.object({
37565
37604
  kind: external_exports.literal("not_contains"),
37566
37605
  value: external_exports.string(),
37567
- caseSensitive: external_exports.boolean().optional()
37606
+ caseSensitive: external_exports.boolean().optional(),
37607
+ ...severityFields
37568
37608
  }),
37569
37609
  // Exact/normalized match against `case.expected.text`.
37570
- external_exports.object({ kind: external_exports.literal("matches_expected") }),
37610
+ external_exports.object({ kind: external_exports.literal("matches_expected"), ...severityFields }),
37571
37611
  external_exports.object({
37572
37612
  kind: external_exports.literal("regex"),
37573
37613
  pattern: external_exports.string(),
37574
- flags: external_exports.string().optional()
37614
+ flags: external_exports.string().optional(),
37615
+ ...severityFields
37575
37616
  }),
37576
- external_exports.object({ kind: external_exports.literal("valid_json") }),
37617
+ external_exports.object({ kind: external_exports.literal("valid_json"), ...severityFields }),
37577
37618
  external_exports.object({
37578
37619
  kind: external_exports.literal("json_field"),
37579
37620
  path: external_exports.string(),
37580
37621
  equals: external_exports.unknown().optional(),
37581
- exists: external_exports.boolean().optional()
37622
+ exists: external_exports.boolean().optional(),
37623
+ ...severityFields
37582
37624
  }),
37583
37625
  external_exports.object({
37584
37626
  kind: external_exports.literal("length"),
37585
37627
  minChars: external_exports.number().int().nonnegative().optional(),
37586
- maxChars: external_exports.number().int().nonnegative().optional()
37628
+ maxChars: external_exports.number().int().nonnegative().optional(),
37629
+ ...severityFields
37587
37630
  }),
37588
- external_exports.object({ kind: external_exports.literal("latency"), maxMs: external_exports.number().int().positive() }),
37631
+ external_exports.object({ kind: external_exports.literal("latency"), maxMs: external_exports.number().int().positive(), ...severityFields }),
37589
37632
  // Today's implicit "success" made explicit: the case produced output without erroring.
37590
- external_exports.object({ kind: external_exports.literal("no_error") })
37633
+ external_exports.object({ kind: external_exports.literal("no_error"), ...severityFields }),
37634
+ // -------------------------------------------------------------------------
37635
+ // Trace checks — deterministic, free, pure assertions over the run's
37636
+ // EXECUTION TRACE (which tools/steps ran, in what order, whether it
37637
+ // completed, what it cost) rather than its final output text. Scored by the
37638
+ // same pure `runCheck` engine against `GradingTarget.trace`. These are the
37639
+ // assertions a string/JSON check can't express (planning doc §3.1).
37640
+ // -------------------------------------------------------------------------
37641
+ // At least one tool call named `name` happened. Optional filters narrow the
37642
+ // match: `input`/`output` deep-equal a call's resolved input/result,
37643
+ // `isError` matches a call's error flag, and `times` asserts the matching
37644
+ // count EXACTLY (omit `times` for "at least once").
37645
+ external_exports.object({
37646
+ kind: external_exports.literal("called_tool"),
37647
+ name: external_exports.string().min(1),
37648
+ input: external_exports.unknown().optional(),
37649
+ output: external_exports.unknown().optional(),
37650
+ isError: external_exports.boolean().optional(),
37651
+ times: external_exports.number().int().positive().optional(),
37652
+ ...severityFields
37653
+ }),
37654
+ // No tool named `name` was called.
37655
+ external_exports.object({ kind: external_exports.literal("not_called_tool"), name: external_exports.string().min(1), ...severityFields }),
37656
+ // The run made no tool calls at all.
37657
+ external_exports.object({ kind: external_exports.literal("used_no_tools"), ...severityFields }),
37658
+ // The run made at most `max` tool calls.
37659
+ external_exports.object({
37660
+ kind: external_exports.literal("max_tool_calls"),
37661
+ max: external_exports.number().int().nonnegative(),
37662
+ ...severityFields
37663
+ }),
37664
+ // `tools` appears as an ordered SUBSEQUENCE of the tool-call names (other
37665
+ // calls may interleave; relative order of the listed tools must hold).
37666
+ external_exports.object({ kind: external_exports.literal("tool_order"), tools: external_exports.array(external_exports.string()).min(1), ...severityFields }),
37667
+ // A step named (or typed) `name` ran.
37668
+ external_exports.object({ kind: external_exports.literal("ran_step"), name: external_exports.string().min(1), ...severityFields }),
37669
+ // `steps` appears as an ordered SUBSEQUENCE of the steps that ran.
37670
+ external_exports.object({ kind: external_exports.literal("step_order"), steps: external_exports.array(external_exports.string()).min(1), ...severityFields }),
37671
+ // The run completed (finished without erroring and was not left paused).
37672
+ external_exports.object({ kind: external_exports.literal("completed"), ...severityFields }),
37673
+ // Total run cost was within `maxUsd` (US dollars).
37674
+ external_exports.object({ kind: external_exports.literal("cost"), maxUsd: external_exports.number().positive(), ...severityFields })
37591
37675
  ]);
37592
37676
  var aiGraderSchema = external_exports.object({
37593
37677
  kind: external_exports.literal("ai"),
@@ -37599,7 +37683,8 @@ var aiGraderSchema = external_exports.object({
37599
37683
  /** Defaults to a cheap routed model (e.g. claude-haiku-4-5) at execution time. */
37600
37684
  model: external_exports.string().optional(),
37601
37685
  /** Pass cutoff for the 1-5 scale. */
37602
- threshold: external_exports.number().min(1).max(5).optional()
37686
+ threshold: external_exports.number().min(1).max(5).optional(),
37687
+ ...severityFields
37603
37688
  });
37604
37689
  var graderConfigSchema = external_exports.union([checkGraderSchema, aiGraderSchema]);
37605
37690
  var gradersSchema = external_exports.array(graderConfigSchema);
@@ -42043,7 +42128,7 @@ var FLOW_STEP_TYPE_METADATA = {
42043
42128
  configHints: "provider, query, maxResults, outputVariable"
42044
42129
  },
42045
42130
  "generate-embedding": {
42046
- description: "Create a vector embedding from text using an embedding model.",
42131
+ description: "Create a vector embedding from text using an embedding model. Writes { embedding, model, dimensions, textLength, metadata } to outputVariable; feed that variable into a store-vector step via vectorsSource.",
42047
42132
  category: "vector",
42048
42133
  isPrompt: false,
42049
42134
  configHints: "inputSource, text, embeddingModel, outputVariable"
@@ -42055,10 +42140,10 @@ var FLOW_STEP_TYPE_METADATA = {
42055
42140
  configHints: "query, limit, threshold, outputVariable"
42056
42141
  },
42057
42142
  "store-vector": {
42058
- description: "Store vector embeddings in a vector database.",
42143
+ description: "Store vector embeddings in a vector database (pgvector, Weaviate, or Vectorize). vectorsSource accepts a bare variable name, a dot-path, or a {{var}} template, and must resolve to a number[] or an { embedding: number[] } object (the output of a prior generate-embedding step). The vector length must match the target index dimension.",
42059
42144
  category: "vector",
42060
42145
  isPrompt: false,
42061
- configHints: "vectorsSource, destination, outputVariable"
42146
+ configHints: "vectorsSource, destination, idTemplate, outputVariable"
42062
42147
  },
42063
42148
  crawl: {
42064
42149
  description: "Crawl a website and extract content from pages.",
@@ -63918,185 +64003,191 @@ function buildJUnitXml(suites) {
63918
64003
 
63919
64004
  // src/commands/eval.ts
63920
64005
  var evalCommand = new Command20("eval").description("Manage evaluations");
63921
- evalCommand.command("submit").description("Submit an eval batch").requiredOption("-f, --flow <id>", "Flow ID to evaluate").requiredOption("-r, --records <file>", "JSON file with record IDs").option("-n, --name <name>", "Eval batch name").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(async (options) => {
63922
- const apiKey = await ensureAuth();
63923
- if (!apiKey) return;
63924
- let recordIds;
63925
- try {
63926
- const content = readFileSync16(options.records, "utf-8");
63927
- const parsed = JSON.parse(content);
63928
- recordIds = Array.isArray(parsed) ? parsed : parsed.recordIds || parsed.records || [];
63929
- } catch (error51) {
63930
- const message = error51 instanceof Error ? error51.message : "Unknown error";
63931
- console.error(chalk27.red(`Failed to read records file: ${message}`));
63932
- process.exit(1);
63933
- return;
63934
- }
63935
- const client = createCliClient(apiKey);
63936
- if (!isTTY(options) || options.json) {
64006
+ evalCommand.command("submit").description("Submit an eval batch").requiredOption("-f, --flow <id>", "Flow ID to evaluate").requiredOption("-r, --records <file>", "JSON file with record IDs").option("-n, --name <name>", "Eval batch name").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(
64007
+ async (options) => {
64008
+ const apiKey = await ensureAuth();
64009
+ if (!apiKey) return;
64010
+ let recordIds;
63937
64011
  try {
63938
- const data = await client.post("/eval/submit", {
63939
- flowId: options.flow,
63940
- recordIds,
63941
- name: options.name
63942
- });
63943
- if (options.json) {
63944
- printJson(data);
63945
- } else {
63946
- console.log(chalk27.green("Eval submitted"));
63947
- console.log(` Batch ID: ${chalk27.green(data.id)}`);
63948
- if (data.name) console.log(` Name: ${data.name}`);
63949
- console.log(` Status: ${data.status}`);
63950
- console.log(` Records: ${data.totalRecords}`);
63951
- if (data.groupId) console.log(` Group: ${data.groupId}`);
63952
- }
64012
+ const content = readFileSync16(options.records, "utf-8");
64013
+ const parsed = JSON.parse(content);
64014
+ recordIds = Array.isArray(parsed) ? parsed : parsed.recordIds || parsed.records || [];
63953
64015
  } catch (error51) {
63954
64016
  const message = error51 instanceof Error ? error51.message : "Unknown error";
63955
- console.error(chalk27.red("Failed to submit eval"));
63956
- console.error(chalk27.red(message));
64017
+ console.error(chalk27.red(`Failed to read records file: ${message}`));
63957
64018
  process.exit(1);
64019
+ return;
63958
64020
  }
63959
- return;
63960
- }
63961
- const App = () => {
63962
- const [loading, setLoading] = useState36(true);
63963
- const [success2, setSuccess] = useState36(null);
63964
- const [error51, setError] = useState36(null);
63965
- const [resultNode, setResultNode] = useState36(void 0);
63966
- useEffect30(() => {
63967
- const run2 = async () => {
63968
- try {
63969
- const data = await client.post("/eval/submit", {
63970
- flowId: options.flow,
63971
- recordIds,
63972
- name: options.name
63973
- });
63974
- const fields = [
63975
- { label: "Batch ID", value: data.id, color: "green" }
63976
- ];
63977
- if (data.name) fields.push({ label: "Name", value: data.name });
63978
- fields.push({ label: "Status", value: data.status });
63979
- fields.push({ label: "Records", value: data.totalRecords });
63980
- if (data.groupId) fields.push({ label: "Group", value: data.groupId });
63981
- setResultNode(React19.createElement(EntityCard, { fields }));
63982
- setSuccess(true);
63983
- setLoading(false);
63984
- } catch (err) {
63985
- setError(err instanceof Error ? err : new Error(String(err)));
63986
- setSuccess(false);
63987
- setLoading(false);
63988
- }
63989
- };
63990
- run2();
63991
- }, []);
63992
- return React19.createElement(MutationResult, {
63993
- loading,
63994
- loadingLabel: `Submitting eval with ${recordIds.length} records...`,
63995
- success: success2,
63996
- successMessage: "Eval submitted",
63997
- error: error51,
63998
- result: resultNode
63999
- });
64000
- };
64001
- const { waitUntilExit } = render19(React19.createElement(App));
64002
- await waitUntilExit();
64003
- });
64004
- evalCommand.command("list").description("List eval batches").option("--flow <id>", "Filter by flow ID").option("--limit <n>", "Limit results", "20").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(async (options) => {
64005
- const apiKey = await ensureAuth();
64006
- if (!apiKey) return;
64007
- const client = createCliClient(apiKey);
64008
- const params = { limit: options.limit };
64009
- if (options.flow) params.flowId = options.flow;
64010
- if (!isTTY(options) || options.json) {
64011
- try {
64012
- const data = await client.get("/eval/batches", params);
64013
- if (options.json) {
64014
- printJson(data);
64015
- } else {
64016
- const batches = data.data ?? [];
64017
- if (batches.length === 0) {
64018
- console.log(chalk27.gray("No eval batches found"));
64019
- return;
64020
- }
64021
- console.log(chalk27.cyan("Eval Batches:"));
64022
- for (const batch of batches) {
64023
- const name = batch.name || batch.id;
64024
- const progress = batch.totalRecords ? `${batch.completedRecords ?? 0}/${batch.totalRecords}` : "";
64025
- const statusColor = batch.status === "completed" ? "green" : "yellow";
64026
- console.log(
64027
- ` ${chalk27.green(batch.id)} ${name} ${chalk27[statusColor](`[${batch.status}]`)} ${chalk27.gray(progress)}`
64028
- );
64029
- }
64030
- const total = getTotalCount(data.pagination);
64031
- if (total !== void 0) {
64032
- console.log(chalk27.dim(`
64033
- Total: ${total} batches`));
64021
+ const client = createCliClient(apiKey);
64022
+ if (!isTTY(options) || options.json) {
64023
+ try {
64024
+ const data = await client.post("/eval/submit", {
64025
+ flowId: options.flow,
64026
+ recordIds,
64027
+ name: options.name
64028
+ });
64029
+ if (options.json) {
64030
+ printJson(data);
64031
+ } else {
64032
+ console.log(chalk27.green("Eval submitted"));
64033
+ console.log(` Batch ID: ${chalk27.green(data.id)}`);
64034
+ if (data.name) console.log(` Name: ${data.name}`);
64035
+ console.log(` Status: ${data.status}`);
64036
+ console.log(` Records: ${data.totalRecords}`);
64037
+ if (data.groupId) console.log(` Group: ${data.groupId}`);
64034
64038
  }
64039
+ } catch (error51) {
64040
+ const message = error51 instanceof Error ? error51.message : "Unknown error";
64041
+ console.error(chalk27.red("Failed to submit eval"));
64042
+ console.error(chalk27.red(message));
64043
+ process.exit(1);
64035
64044
  }
64036
- } catch (error51) {
64037
- const message = error51 instanceof Error ? error51.message : "Unknown error";
64038
- console.error(chalk27.red("Failed to fetch eval batches"));
64039
- console.error(chalk27.red(message));
64040
- process.exit(1);
64045
+ return;
64041
64046
  }
64042
- return;
64047
+ const App = () => {
64048
+ const [loading, setLoading] = useState36(true);
64049
+ const [success2, setSuccess] = useState36(null);
64050
+ const [error51, setError] = useState36(null);
64051
+ const [resultNode, setResultNode] = useState36(void 0);
64052
+ useEffect30(() => {
64053
+ const run2 = async () => {
64054
+ try {
64055
+ const data = await client.post("/eval/submit", {
64056
+ flowId: options.flow,
64057
+ recordIds,
64058
+ name: options.name
64059
+ });
64060
+ const fields = [{ label: "Batch ID", value: data.id, color: "green" }];
64061
+ if (data.name) fields.push({ label: "Name", value: data.name });
64062
+ fields.push({ label: "Status", value: data.status });
64063
+ fields.push({ label: "Records", value: data.totalRecords });
64064
+ if (data.groupId) fields.push({ label: "Group", value: data.groupId });
64065
+ setResultNode(React19.createElement(EntityCard, { fields }));
64066
+ setSuccess(true);
64067
+ setLoading(false);
64068
+ } catch (err) {
64069
+ setError(err instanceof Error ? err : new Error(String(err)));
64070
+ setSuccess(false);
64071
+ setLoading(false);
64072
+ }
64073
+ };
64074
+ run2();
64075
+ }, []);
64076
+ return React19.createElement(MutationResult, {
64077
+ loading,
64078
+ loadingLabel: `Submitting eval with ${recordIds.length} records...`,
64079
+ success: success2,
64080
+ successMessage: "Eval submitted",
64081
+ error: error51,
64082
+ result: resultNode
64083
+ });
64084
+ };
64085
+ const { waitUntilExit } = render19(React19.createElement(App));
64086
+ await waitUntilExit();
64043
64087
  }
64044
- const App = () => {
64045
- const [loading, setLoading] = useState36(true);
64046
- const [items, setItems] = useState36(null);
64047
- const [total, setTotal] = useState36(void 0);
64048
- const [error51, setError] = useState36(null);
64049
- useEffect30(() => {
64050
- const run2 = async () => {
64051
- try {
64052
- const data = await client.get("/eval/batches", params);
64053
- setItems(data.data ?? []);
64054
- setTotal(getTotalCount(data.pagination));
64055
- setLoading(false);
64056
- } catch (err) {
64057
- setError(err instanceof Error ? err : new Error(String(err)));
64058
- setLoading(false);
64088
+ );
64089
+ evalCommand.command("list").description("List eval batches").option("--flow <id>", "Filter by flow ID").option("--limit <n>", "Limit results", "20").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(
64090
+ async (options) => {
64091
+ const apiKey = await ensureAuth();
64092
+ if (!apiKey) return;
64093
+ const client = createCliClient(apiKey);
64094
+ const params = { limit: options.limit };
64095
+ if (options.flow) params.flowId = options.flow;
64096
+ if (!isTTY(options) || options.json) {
64097
+ try {
64098
+ const data = await client.get("/eval/batches", params);
64099
+ if (options.json) {
64100
+ printJson(data);
64101
+ } else {
64102
+ const batches = data.data ?? [];
64103
+ if (batches.length === 0) {
64104
+ console.log(chalk27.gray("No eval batches found"));
64105
+ return;
64106
+ }
64107
+ console.log(chalk27.cyan("Eval Batches:"));
64108
+ for (const batch of batches) {
64109
+ const name = batch.name || batch.id;
64110
+ const progress = batch.totalRecords ? `${batch.completedRecords ?? 0}/${batch.totalRecords}` : "";
64111
+ const statusColor = batch.status === "completed" ? "green" : "yellow";
64112
+ console.log(
64113
+ ` ${chalk27.green(batch.id)} ${name} ${chalk27[statusColor](`[${batch.status}]`)} ${chalk27.gray(progress)}`
64114
+ );
64115
+ }
64116
+ const total = getTotalCount(data.pagination);
64117
+ if (total !== void 0) {
64118
+ console.log(chalk27.dim(`
64119
+ Total: ${total} batches`));
64120
+ }
64059
64121
  }
64060
- };
64061
- run2();
64062
- }, []);
64063
- return React19.createElement(DataList, {
64064
- title: "Eval Batches",
64065
- items,
64066
- error: error51,
64067
- loading,
64068
- total,
64069
- emptyMessage: "No eval batches found",
64070
- renderCard: (item) => {
64071
- const b = item;
64072
- const name = b.name || b.id;
64073
- const progress = b.totalRecords ? `${b.completedRecords ?? 0}/${b.totalRecords}` : "";
64074
- const statusColor = b.status === "completed" ? "green" : "yellow";
64075
- return React19.createElement(
64076
- Text34,
64077
- { color: statusColor },
64078
- ` ${b.id} ${name} [${b.status}] ${progress}`
64079
- );
64122
+ } catch (error51) {
64123
+ const message = error51 instanceof Error ? error51.message : "Unknown error";
64124
+ console.error(chalk27.red("Failed to fetch eval batches"));
64125
+ console.error(chalk27.red(message));
64126
+ process.exit(1);
64080
64127
  }
64081
- });
64082
- };
64083
- const { waitUntilExit } = render19(React19.createElement(App));
64084
- await waitUntilExit();
64085
- });
64128
+ return;
64129
+ }
64130
+ const App = () => {
64131
+ const [loading, setLoading] = useState36(true);
64132
+ const [items, setItems] = useState36(null);
64133
+ const [total, setTotal] = useState36(void 0);
64134
+ const [error51, setError] = useState36(null);
64135
+ useEffect30(() => {
64136
+ const run2 = async () => {
64137
+ try {
64138
+ const data = await client.get("/eval/batches", params);
64139
+ setItems(data.data ?? []);
64140
+ setTotal(getTotalCount(data.pagination));
64141
+ setLoading(false);
64142
+ } catch (err) {
64143
+ setError(err instanceof Error ? err : new Error(String(err)));
64144
+ setLoading(false);
64145
+ }
64146
+ };
64147
+ run2();
64148
+ }, []);
64149
+ return React19.createElement(DataList, {
64150
+ title: "Eval Batches",
64151
+ items,
64152
+ error: error51,
64153
+ loading,
64154
+ total,
64155
+ emptyMessage: "No eval batches found",
64156
+ renderCard: (item) => {
64157
+ const b = item;
64158
+ const name = b.name || b.id;
64159
+ const progress = b.totalRecords ? `${b.completedRecords ?? 0}/${b.totalRecords}` : "";
64160
+ const statusColor = b.status === "completed" ? "green" : "yellow";
64161
+ return React19.createElement(
64162
+ Text34,
64163
+ { color: statusColor },
64164
+ ` ${b.id} ${name} [${b.status}] ${progress}`
64165
+ );
64166
+ }
64167
+ });
64168
+ };
64169
+ const { waitUntilExit } = render19(React19.createElement(App));
64170
+ await waitUntilExit();
64171
+ }
64172
+ );
64086
64173
  evalCommand.command("results <id>").description("Get eval batch results").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(async (id, options) => {
64087
64174
  const apiKey = await ensureAuth();
64088
64175
  if (!apiKey) return;
64089
64176
  const client = createCliClient(apiKey);
64090
64177
  if (!isTTY(options) || options.json) {
64091
64178
  try {
64092
- const data = await client.get(`/eval/${id}/results`);
64179
+ const data = await client.get(
64180
+ `/eval/${id}/results`
64181
+ );
64093
64182
  if (options.json) {
64094
64183
  printJson(data);
64095
64184
  } else {
64096
64185
  if (data.batch) {
64097
64186
  console.log(chalk27.cyan(`Eval: ${data.batch.name || data.batch.id}`));
64098
64187
  console.log(` Status: ${data.batch.status}`);
64099
- console.log(` Progress: ${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}`);
64188
+ console.log(
64189
+ ` Progress: ${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}`
64190
+ );
64100
64191
  console.log();
64101
64192
  }
64102
64193
  const results = data.data ?? [];
@@ -64129,19 +64220,28 @@ evalCommand.command("results <id>").description("Get eval batch results").option
64129
64220
  useEffect30(() => {
64130
64221
  const run2 = async () => {
64131
64222
  try {
64132
- const data = await client.get(`/eval/${id}/results`);
64223
+ const data = await client.get(
64224
+ `/eval/${id}/results`
64225
+ );
64133
64226
  const results = data.data ?? [];
64134
64227
  const fields = [];
64135
64228
  if (data.batch) {
64136
64229
  fields.push({ label: "Eval", value: data.batch.name || data.batch.id });
64137
64230
  fields.push({ label: "Status", value: data.batch.status });
64138
- fields.push({ label: "Progress", value: `${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}` });
64231
+ fields.push({
64232
+ label: "Progress",
64233
+ value: `${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}`
64234
+ });
64139
64235
  }
64140
64236
  fields.push({ label: "Results", value: results.length });
64141
64237
  if (results.length > 0) {
64142
64238
  const completed = results.filter((r) => r.status === "completed").length;
64143
64239
  const avgScore = results.filter((r) => r.score !== void 0).reduce((sum, r) => sum + (r.score ?? 0), 0) / (results.filter((r) => r.score !== void 0).length || 1);
64144
- fields.push({ label: "Completed", value: `${completed}/${results.length}`, color: "green" });
64240
+ fields.push({
64241
+ label: "Completed",
64242
+ value: `${completed}/${results.length}`,
64243
+ color: "green"
64244
+ });
64145
64245
  if (results.some((r) => r.score !== void 0)) {
64146
64246
  fields.push({ label: "Avg Score", value: avgScore.toFixed(2) });
64147
64247
  }
@@ -64237,6 +64337,13 @@ function printSuiteResult(rootDir, outcome) {
64237
64337
  if (testCase.errored) {
64238
64338
  console.log(chalk27.red(` \u2717 errored: ${testCase.outputExcerpt.slice(0, 200)}`));
64239
64339
  }
64340
+ } else {
64341
+ for (const outcomeItem of testCase.outcomes.filter(
64342
+ (o) => !o.passed && o.severity === "soft"
64343
+ )) {
64344
+ const reason = outcomeItem.reasoning ? `: ${outcomeItem.reasoning}` : "";
64345
+ console.log(chalk27.yellow(` \u26A0 soft ${outcomeItem.kind}${reason}`));
64346
+ }
64240
64347
  }
64241
64348
  }
64242
64349
  }
@@ -64251,7 +64358,12 @@ function toJUnitSuite(outcome) {
64251
64358
  }))
64252
64359
  };
64253
64360
  }
64254
- evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval suites (**/*.eval.ts) as a CI gate (exit 0 pass / 1 fail / 2 config)").option("--strict", "Fail on soft-threshold misses too (no-op until severity lands)").option("--virtual", "Run inline without persisting a suite/batch to the dashboard").option("--junit <path>", "Write JUnit XML results to <path>").option("--url <api>", "Override the API base URL (e.g. staging)").option("--cwd <dir>", "Directory to discover *.eval.ts under (default: current directory)").action(
64361
+ evalCommand.command("run [idOrDirPrefix]").description(
64362
+ "Run code-colocated eval suites (**/*.eval.ts) as a CI gate (exit 0 pass / 1 fail / 2 config)"
64363
+ ).option(
64364
+ "--strict",
64365
+ "Fail the exit code on soft grader misses too (default: soft misses are reported but do not fail)"
64366
+ ).option("--virtual", "Run inline without persisting a suite/batch to the dashboard").option("--junit <path>", "Write JUnit XML results to <path>").option("--url <api>", "Override the API base URL (e.g. staging)").option("--cwd <dir>", "Directory to discover *.eval.ts under (default: current directory)").action(
64255
64367
  async (idOrDirPrefix, options) => {
64256
64368
  const apiKey = await ensureAuth();
64257
64369
  if (!apiKey) {
@@ -64289,11 +64401,6 @@ evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval
64289
64401
  } else {
64290
64402
  loaded = await loadAll(allFiles);
64291
64403
  }
64292
- if (options.strict) {
64293
- console.log(
64294
- chalk27.gray("Note: --strict has no effect yet (grader severity lands in a later increment).")
64295
- );
64296
- }
64297
64404
  const client = createCliClient(apiKey, options.url);
64298
64405
  const outcomes = [];
64299
64406
  for (const { file: file2, def } of loaded) {
@@ -64301,16 +64408,25 @@ evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval
64301
64408
  try {
64302
64409
  let result;
64303
64410
  if (runVirtual) {
64304
- result = await client.post("/eval/run", { definition: def });
64305
- } else {
64306
- const ensured = await client.post("/eval/ensure", {
64307
- name: def.name,
64308
- definition: def
64411
+ result = await client.post("/eval/run", {
64412
+ definition: def,
64413
+ strict: options.strict ?? false
64309
64414
  });
64415
+ } else {
64416
+ const ensured = await client.post(
64417
+ "/eval/ensure",
64418
+ {
64419
+ name: def.name,
64420
+ definition: def
64421
+ }
64422
+ );
64310
64423
  if (!ensured.suiteId) {
64311
64424
  throw new Error(`ensure did not return a suiteId (result: ${ensured.result})`);
64312
64425
  }
64313
- result = await client.post("/eval/run", { suiteId: ensured.suiteId });
64426
+ result = await client.post("/eval/run", {
64427
+ suiteId: ensured.suiteId,
64428
+ strict: options.strict ?? false
64429
+ });
64314
64430
  }
64315
64431
  outcomes.push({ file: file2, definition: def, result });
64316
64432
  printSuiteResult(rootDir, { file: file2, definition: def, result });
@@ -64328,7 +64444,9 @@ evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval
64328
64444
  writeFileSync6(outPath, xml, "utf-8");
64329
64445
  console.log(chalk27.gray(`JUnit results written to ${options.junit}`));
64330
64446
  } catch (error51) {
64331
- failConfig(`Failed to write JUnit report: ${error51 instanceof Error ? error51.message : String(error51)}`);
64447
+ failConfig(
64448
+ `Failed to write JUnit report: ${error51 instanceof Error ? error51.message : String(error51)}`
64449
+ );
64332
64450
  }
64333
64451
  }
64334
64452
  const failedSuites = outcomes.filter((o) => !o.result.passed);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@runtypelabs/cli",
3
- "version": "2.23.0",
3
+ "version": "2.24.0",
4
4
  "description": "Command-line interface for Runtype AI platform",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -24,7 +24,7 @@
24
24
  "rosie-skills": "0.8.1",
25
25
  "yaml": "^2.9.0",
26
26
  "@runtypelabs/ink-components": "0.3.4",
27
- "@runtypelabs/sdk": "5.4.0",
27
+ "@runtypelabs/sdk": "5.6.0",
28
28
  "@runtypelabs/terminal-animations": "0.2.1"
29
29
  },
30
30
  "devDependencies": {
@@ -39,7 +39,7 @@
39
39
  "tsx": "^4.7.1",
40
40
  "typescript": "^6.0.3",
41
41
  "vitest": "^4.1.0",
42
- "@runtypelabs/shared": "1.42.4"
42
+ "@runtypelabs/shared": "1.42.6"
43
43
  },
44
44
  "engines": {
45
45
  "node": ">=22.0.0"