npm - @runtypelabs/cli - Versions diffs - 2.23.0 → 2.24.0 - Mend

@runtypelabs/cli 2.23.0 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +303 -185
package/package.json +3 -3

package/dist/index.js CHANGED Viewed

@@ -20404,6 +20404,41 @@ function validateUpsertRecordSourceShape(flowSteps, buckets) {
     }
   }
 }
+function validateStoreVectorSource(flowSteps, buckets, declaredFlowInputs) {
+  const declaredVariables = collectDeclaredFlowInputs(flowSteps, declaredFlowInputs);
+  for (const [stepIndex, step] of flowSteps.entries()) {
+    if (step.enabled === false) continue;
+    if (!isObjectRecord(step.config)) continue;
+    const config3 = step.config;
+    if (step.type === "store-vector") {
+      const rawSource = typeof config3.vectorsSource === "string" ? config3.vectorsSource.trim() : "";
+      if (rawSource) {
+        const templateMatch = rawSource.match(/^\s*\{\{\s*([^}]+?)\s*\}\}\s*$/);
+        const reference = (templateMatch?.[1] ?? rawSource).trim();
+        const classification = classifyVariableReference(reference);
+        if (classification.namespace === "plain" || classification.namespace === "flow") {
+          const baseName = classification.baseName;
+          const rootVariable = baseName.split(".")[0] || "";
+          if (rootVariable && !declaredVariables.has(rootVariable) && !declaredVariables.has(baseName)) {
+            addIssue(
+              "warning",
+              {
+                code: "STORE_VECTOR_SOURCE_UNRESOLVED",
+                message: `Vectors source "${rawSource}" references variable "${rootVariable}", but no earlier step declares an output variable "${rootVariable}" (and it is not a flow input). This will fail at runtime with "Could not resolve vectors". Set a prior step's outputVariable to "${rootVariable}" (typically a generate-embedding step), or declare "${rootVariable}" as a flow input.`,
+                path: `flowSteps[${stepIndex}].config.vectorsSource`,
+                step: { index: stepIndex, name: step.name, type: step.type },
+                details: { vectorsSource: rawSource, rootVariable }
+              },
+              buckets
+            );
+          }
+        }
+      }
+    }
+    const outputVar = getStepOutputVariable(step);
+    if (outputVar) declaredVariables.add(outputVar);
+  }
+}
 function checkConditionExpression(expr, path19, stepRef, buckets) {
   if (typeof expr !== "string" || !expr.includes("{{")) return;
   const match = UNQUOTED_TEMPLATE_BEFORE_OP.exec(expr) || UNQUOTED_TEMPLATE_AFTER_OP.exec(expr);
@@ -21061,6 +21096,7 @@ function collectFlowStructureIssues(flowData, deps, buckets) {
     deps.declaredFlowInputs
   );
   validateUpsertRecordSourceShape(flowData.flowSteps, buckets);
+  validateStoreVectorSource(flowData.flowSteps, buckets, deps.declaredFlowInputs);
   validateConditionExpressions(flowData.flowSteps, buckets, conditionalStepsExceedingDepth);
   return { pendingChecks };
 }
@@ -37555,39 +37591,87 @@ var BUILT_IN_GRADER_IDS = [
   "rightTone",
   "safeToSend"
 ];
+var graderSeveritySchema = external_exports.enum(["gate", "soft"]);
+var severityFields = { severity: graderSeveritySchema.optional() };
 var checkGraderSchema = external_exports.discriminatedUnion("kind", [
   external_exports.object({
     kind: external_exports.literal("contains"),
     value: external_exports.string(),
-    caseSensitive: external_exports.boolean().optional()
+    caseSensitive: external_exports.boolean().optional(),
+    ...severityFields
   }),
   external_exports.object({
     kind: external_exports.literal("not_contains"),
     value: external_exports.string(),
-    caseSensitive: external_exports.boolean().optional()
+    caseSensitive: external_exports.boolean().optional(),
+    ...severityFields
   }),
   // Exact/normalized match against `case.expected.text`.
-  external_exports.object({ kind: external_exports.literal("matches_expected") }),
+  external_exports.object({ kind: external_exports.literal("matches_expected"), ...severityFields }),
   external_exports.object({
     kind: external_exports.literal("regex"),
     pattern: external_exports.string(),
-    flags: external_exports.string().optional()
+    flags: external_exports.string().optional(),
+    ...severityFields
   }),
-  external_exports.object({ kind: external_exports.literal("valid_json") }),
+  external_exports.object({ kind: external_exports.literal("valid_json"), ...severityFields }),
   external_exports.object({
     kind: external_exports.literal("json_field"),
     path: external_exports.string(),
     equals: external_exports.unknown().optional(),
-    exists: external_exports.boolean().optional()
+    exists: external_exports.boolean().optional(),
+    ...severityFields
   }),
   external_exports.object({
     kind: external_exports.literal("length"),
     minChars: external_exports.number().int().nonnegative().optional(),
-    maxChars: external_exports.number().int().nonnegative().optional()
+    maxChars: external_exports.number().int().nonnegative().optional(),
+    ...severityFields
   }),
-  external_exports.object({ kind: external_exports.literal("latency"), maxMs: external_exports.number().int().positive() }),
+  external_exports.object({ kind: external_exports.literal("latency"), maxMs: external_exports.number().int().positive(), ...severityFields }),
   // Today's implicit "success" made explicit: the case produced output without erroring.
-  external_exports.object({ kind: external_exports.literal("no_error") })
+  external_exports.object({ kind: external_exports.literal("no_error"), ...severityFields }),
+  // -------------------------------------------------------------------------
+  // Trace checks — deterministic, free, pure assertions over the run's
+  // EXECUTION TRACE (which tools/steps ran, in what order, whether it
+  // completed, what it cost) rather than its final output text. Scored by the
+  // same pure `runCheck` engine against `GradingTarget.trace`. These are the
+  // assertions a string/JSON check can't express (planning doc §3.1).
+  // -------------------------------------------------------------------------
+  // At least one tool call named `name` happened. Optional filters narrow the
+  // match: `input`/`output` deep-equal a call's resolved input/result,
+  // `isError` matches a call's error flag, and `times` asserts the matching
+  // count EXACTLY (omit `times` for "at least once").
+  external_exports.object({
+    kind: external_exports.literal("called_tool"),
+    name: external_exports.string().min(1),
+    input: external_exports.unknown().optional(),
+    output: external_exports.unknown().optional(),
+    isError: external_exports.boolean().optional(),
+    times: external_exports.number().int().positive().optional(),
+    ...severityFields
+  }),
+  // No tool named `name` was called.
+  external_exports.object({ kind: external_exports.literal("not_called_tool"), name: external_exports.string().min(1), ...severityFields }),
+  // The run made no tool calls at all.
+  external_exports.object({ kind: external_exports.literal("used_no_tools"), ...severityFields }),
+  // The run made at most `max` tool calls.
+  external_exports.object({
+    kind: external_exports.literal("max_tool_calls"),
+    max: external_exports.number().int().nonnegative(),
+    ...severityFields
+  }),
+  // `tools` appears as an ordered SUBSEQUENCE of the tool-call names (other
+  // calls may interleave; relative order of the listed tools must hold).
+  external_exports.object({ kind: external_exports.literal("tool_order"), tools: external_exports.array(external_exports.string()).min(1), ...severityFields }),
+  // A step named (or typed) `name` ran.
+  external_exports.object({ kind: external_exports.literal("ran_step"), name: external_exports.string().min(1), ...severityFields }),
+  // `steps` appears as an ordered SUBSEQUENCE of the steps that ran.
+  external_exports.object({ kind: external_exports.literal("step_order"), steps: external_exports.array(external_exports.string()).min(1), ...severityFields }),
+  // The run completed (finished without erroring and was not left paused).
+  external_exports.object({ kind: external_exports.literal("completed"), ...severityFields }),
+  // Total run cost was within `maxUsd` (US dollars).
+  external_exports.object({ kind: external_exports.literal("cost"), maxUsd: external_exports.number().positive(), ...severityFields })
 ]);
 var aiGraderSchema = external_exports.object({
   kind: external_exports.literal("ai"),
@@ -37599,7 +37683,8 @@ var aiGraderSchema = external_exports.object({
   /** Defaults to a cheap routed model (e.g. claude-haiku-4-5) at execution time. */
   model: external_exports.string().optional(),
   /** Pass cutoff for the 1-5 scale. */
-  threshold: external_exports.number().min(1).max(5).optional()
+  threshold: external_exports.number().min(1).max(5).optional(),
+  ...severityFields
 });
 var graderConfigSchema = external_exports.union([checkGraderSchema, aiGraderSchema]);
 var gradersSchema = external_exports.array(graderConfigSchema);
@@ -42043,7 +42128,7 @@ var FLOW_STEP_TYPE_METADATA = {
     configHints: "provider, query, maxResults, outputVariable"
   },
   "generate-embedding": {
-    description: "Create a vector embedding from text using an embedding model.",
+    description: "Create a vector embedding from text using an embedding model. Writes { embedding, model, dimensions, textLength, metadata } to outputVariable; feed that variable into a store-vector step via vectorsSource.",
     category: "vector",
     isPrompt: false,
     configHints: "inputSource, text, embeddingModel, outputVariable"
@@ -42055,10 +42140,10 @@ var FLOW_STEP_TYPE_METADATA = {
     configHints: "query, limit, threshold, outputVariable"
   },
   "store-vector": {
-    description: "Store vector embeddings in a vector database.",
+    description: "Store vector embeddings in a vector database (pgvector, Weaviate, or Vectorize). vectorsSource accepts a bare variable name, a dot-path, or a {{var}} template, and must resolve to a number[] or an { embedding: number[] } object (the output of a prior generate-embedding step). The vector length must match the target index dimension.",
     category: "vector",
     isPrompt: false,
-    configHints: "vectorsSource, destination, outputVariable"
+    configHints: "vectorsSource, destination, idTemplate, outputVariable"
   },
   crawl: {
     description: "Crawl a website and extract content from pages.",
@@ -63918,185 +64003,191 @@ function buildJUnitXml(suites) {
 // src/commands/eval.ts
 var evalCommand = new Command20("eval").description("Manage evaluations");
-evalCommand.command("submit").description("Submit an eval batch").requiredOption("-f, --flow <id>", "Flow ID to evaluate").requiredOption("-r, --records <file>", "JSON file with record IDs").option("-n, --name <name>", "Eval batch name").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(async (options) => {
-  const apiKey = await ensureAuth();
-  if (!apiKey) return;
-  let recordIds;
-  try {
-    const content = readFileSync16(options.records, "utf-8");
-    const parsed = JSON.parse(content);
-    recordIds = Array.isArray(parsed) ? parsed : parsed.recordIds || parsed.records || [];
-  } catch (error51) {
-    const message = error51 instanceof Error ? error51.message : "Unknown error";
-    console.error(chalk27.red(`Failed to read records file: ${message}`));
-    process.exit(1);
-    return;
-  }
-  const client = createCliClient(apiKey);
-  if (!isTTY(options) || options.json) {
+evalCommand.command("submit").description("Submit an eval batch").requiredOption("-f, --flow <id>", "Flow ID to evaluate").requiredOption("-r, --records <file>", "JSON file with record IDs").option("-n, --name <name>", "Eval batch name").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(
+  async (options) => {
+    const apiKey = await ensureAuth();
+    if (!apiKey) return;
+    let recordIds;
     try {
-      const data = await client.post("/eval/submit", {
-        flowId: options.flow,
-        recordIds,
-        name: options.name
-      });
-      if (options.json) {
-        printJson(data);
-      } else {
-        console.log(chalk27.green("Eval submitted"));
-        console.log(`  Batch ID: ${chalk27.green(data.id)}`);
-        if (data.name) console.log(`  Name: ${data.name}`);
-        console.log(`  Status: ${data.status}`);
-        console.log(`  Records: ${data.totalRecords}`);
-        if (data.groupId) console.log(`  Group: ${data.groupId}`);
-      }
+      const content = readFileSync16(options.records, "utf-8");
+      const parsed = JSON.parse(content);
+      recordIds = Array.isArray(parsed) ? parsed : parsed.recordIds || parsed.records || [];
     } catch (error51) {
       const message = error51 instanceof Error ? error51.message : "Unknown error";
-      console.error(chalk27.red("Failed to submit eval"));
-      console.error(chalk27.red(message));
+      console.error(chalk27.red(`Failed to read records file: ${message}`));
       process.exit(1);
+      return;
     }
-    return;
-  }
-  const App = () => {
-    const [loading, setLoading] = useState36(true);
-    const [success2, setSuccess] = useState36(null);
-    const [error51, setError] = useState36(null);
-    const [resultNode, setResultNode] = useState36(void 0);
-    useEffect30(() => {
-      const run2 = async () => {
-        try {
-          const data = await client.post("/eval/submit", {
-            flowId: options.flow,
-            recordIds,
-            name: options.name
-          });
-          const fields = [
-            { label: "Batch ID", value: data.id, color: "green" }
-          ];
-          if (data.name) fields.push({ label: "Name", value: data.name });
-          fields.push({ label: "Status", value: data.status });
-          fields.push({ label: "Records", value: data.totalRecords });
-          if (data.groupId) fields.push({ label: "Group", value: data.groupId });
-          setResultNode(React19.createElement(EntityCard, { fields }));
-          setSuccess(true);
-          setLoading(false);
-        } catch (err) {
-          setError(err instanceof Error ? err : new Error(String(err)));
-          setSuccess(false);
-          setLoading(false);
-        }
-      };
-      run2();
-    }, []);
-    return React19.createElement(MutationResult, {
-      loading,
-      loadingLabel: `Submitting eval with ${recordIds.length} records...`,
-      success: success2,
-      successMessage: "Eval submitted",
-      error: error51,
-      result: resultNode
-    });
-  };
-  const { waitUntilExit } = render19(React19.createElement(App));
-  await waitUntilExit();
-});
-evalCommand.command("list").description("List eval batches").option("--flow <id>", "Filter by flow ID").option("--limit <n>", "Limit results", "20").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(async (options) => {
-  const apiKey = await ensureAuth();
-  if (!apiKey) return;
-  const client = createCliClient(apiKey);
-  const params = { limit: options.limit };
-  if (options.flow) params.flowId = options.flow;
-  if (!isTTY(options) || options.json) {
-    try {
-      const data = await client.get("/eval/batches", params);
-      if (options.json) {
-        printJson(data);
-      } else {
-        const batches = data.data ?? [];
-        if (batches.length === 0) {
-          console.log(chalk27.gray("No eval batches found"));
-          return;
-        }
-        console.log(chalk27.cyan("Eval Batches:"));
-        for (const batch of batches) {
-          const name = batch.name || batch.id;
-          const progress = batch.totalRecords ? `${batch.completedRecords ?? 0}/${batch.totalRecords}` : "";
-          const statusColor = batch.status === "completed" ? "green" : "yellow";
-          console.log(
-            `  ${chalk27.green(batch.id)} ${name} ${chalk27[statusColor](`[${batch.status}]`)} ${chalk27.gray(progress)}`
-          );
-        }
-        const total = getTotalCount(data.pagination);
-        if (total !== void 0) {
-          console.log(chalk27.dim(`
-  Total: ${total} batches`));
+    const client = createCliClient(apiKey);
+    if (!isTTY(options) || options.json) {
+      try {
+        const data = await client.post("/eval/submit", {
+          flowId: options.flow,
+          recordIds,
+          name: options.name
+        });
+        if (options.json) {
+          printJson(data);
+        } else {
+          console.log(chalk27.green("Eval submitted"));
+          console.log(`  Batch ID: ${chalk27.green(data.id)}`);
+          if (data.name) console.log(`  Name: ${data.name}`);
+          console.log(`  Status: ${data.status}`);
+          console.log(`  Records: ${data.totalRecords}`);
+          if (data.groupId) console.log(`  Group: ${data.groupId}`);
         }
+      } catch (error51) {
+        const message = error51 instanceof Error ? error51.message : "Unknown error";
+        console.error(chalk27.red("Failed to submit eval"));
+        console.error(chalk27.red(message));
+        process.exit(1);
       }
-    } catch (error51) {
-      const message = error51 instanceof Error ? error51.message : "Unknown error";
-      console.error(chalk27.red("Failed to fetch eval batches"));
-      console.error(chalk27.red(message));
-      process.exit(1);
+      return;
     }
-    return;
+    const App = () => {
+      const [loading, setLoading] = useState36(true);
+      const [success2, setSuccess] = useState36(null);
+      const [error51, setError] = useState36(null);
+      const [resultNode, setResultNode] = useState36(void 0);
+      useEffect30(() => {
+        const run2 = async () => {
+          try {
+            const data = await client.post("/eval/submit", {
+              flowId: options.flow,
+              recordIds,
+              name: options.name
+            });
+            const fields = [{ label: "Batch ID", value: data.id, color: "green" }];
+            if (data.name) fields.push({ label: "Name", value: data.name });
+            fields.push({ label: "Status", value: data.status });
+            fields.push({ label: "Records", value: data.totalRecords });
+            if (data.groupId) fields.push({ label: "Group", value: data.groupId });
+            setResultNode(React19.createElement(EntityCard, { fields }));
+            setSuccess(true);
+            setLoading(false);
+          } catch (err) {
+            setError(err instanceof Error ? err : new Error(String(err)));
+            setSuccess(false);
+            setLoading(false);
+          }
+        };
+        run2();
+      }, []);
+      return React19.createElement(MutationResult, {
+        loading,
+        loadingLabel: `Submitting eval with ${recordIds.length} records...`,
+        success: success2,
+        successMessage: "Eval submitted",
+        error: error51,
+        result: resultNode
+      });
+    };
+    const { waitUntilExit } = render19(React19.createElement(App));
+    await waitUntilExit();
   }
-  const App = () => {
-    const [loading, setLoading] = useState36(true);
-    const [items, setItems] = useState36(null);
-    const [total, setTotal] = useState36(void 0);
-    const [error51, setError] = useState36(null);
-    useEffect30(() => {
-      const run2 = async () => {
-        try {
-          const data = await client.get("/eval/batches", params);
-          setItems(data.data ?? []);
-          setTotal(getTotalCount(data.pagination));
-          setLoading(false);
-        } catch (err) {
-          setError(err instanceof Error ? err : new Error(String(err)));
-          setLoading(false);
+);
+evalCommand.command("list").description("List eval batches").option("--flow <id>", "Filter by flow ID").option("--limit <n>", "Limit results", "20").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(
+  async (options) => {
+    const apiKey = await ensureAuth();
+    if (!apiKey) return;
+    const client = createCliClient(apiKey);
+    const params = { limit: options.limit };
+    if (options.flow) params.flowId = options.flow;
+    if (!isTTY(options) || options.json) {
+      try {
+        const data = await client.get("/eval/batches", params);
+        if (options.json) {
+          printJson(data);
+        } else {
+          const batches = data.data ?? [];
+          if (batches.length === 0) {
+            console.log(chalk27.gray("No eval batches found"));
+            return;
+          }
+          console.log(chalk27.cyan("Eval Batches:"));
+          for (const batch of batches) {
+            const name = batch.name || batch.id;
+            const progress = batch.totalRecords ? `${batch.completedRecords ?? 0}/${batch.totalRecords}` : "";
+            const statusColor = batch.status === "completed" ? "green" : "yellow";
+            console.log(
+              `  ${chalk27.green(batch.id)} ${name} ${chalk27[statusColor](`[${batch.status}]`)} ${chalk27.gray(progress)}`
+            );
+          }
+          const total = getTotalCount(data.pagination);
+          if (total !== void 0) {
+            console.log(chalk27.dim(`
+  Total: ${total} batches`));
+          }
         }
-      };
-      run2();
-    }, []);
-    return React19.createElement(DataList, {
-      title: "Eval Batches",
-      items,
-      error: error51,
-      loading,
-      total,
-      emptyMessage: "No eval batches found",
-      renderCard: (item) => {
-        const b = item;
-        const name = b.name || b.id;
-        const progress = b.totalRecords ? `${b.completedRecords ?? 0}/${b.totalRecords}` : "";
-        const statusColor = b.status === "completed" ? "green" : "yellow";
-        return React19.createElement(
-          Text34,
-          { color: statusColor },
-          `  ${b.id} ${name} [${b.status}] ${progress}`
-        );
+      } catch (error51) {
+        const message = error51 instanceof Error ? error51.message : "Unknown error";
+        console.error(chalk27.red("Failed to fetch eval batches"));
+        console.error(chalk27.red(message));
+        process.exit(1);
       }
-    });
-  };
-  const { waitUntilExit } = render19(React19.createElement(App));
-  await waitUntilExit();
-});
+      return;
+    }
+    const App = () => {
+      const [loading, setLoading] = useState36(true);
+      const [items, setItems] = useState36(null);
+      const [total, setTotal] = useState36(void 0);
+      const [error51, setError] = useState36(null);
+      useEffect30(() => {
+        const run2 = async () => {
+          try {
+            const data = await client.get("/eval/batches", params);
+            setItems(data.data ?? []);
+            setTotal(getTotalCount(data.pagination));
+            setLoading(false);
+          } catch (err) {
+            setError(err instanceof Error ? err : new Error(String(err)));
+            setLoading(false);
+          }
+        };
+        run2();
+      }, []);
+      return React19.createElement(DataList, {
+        title: "Eval Batches",
+        items,
+        error: error51,
+        loading,
+        total,
+        emptyMessage: "No eval batches found",
+        renderCard: (item) => {
+          const b = item;
+          const name = b.name || b.id;
+          const progress = b.totalRecords ? `${b.completedRecords ?? 0}/${b.totalRecords}` : "";
+          const statusColor = b.status === "completed" ? "green" : "yellow";
+          return React19.createElement(
+            Text34,
+            { color: statusColor },
+            `  ${b.id} ${name} [${b.status}] ${progress}`
+          );
+        }
+      });
+    };
+    const { waitUntilExit } = render19(React19.createElement(App));
+    await waitUntilExit();
+  }
+);
 evalCommand.command("results <id>").description("Get eval batch results").option("--json", "Output as JSON").option("--tty", "Force TTY mode").option("--no-tty", "Force non-TTY mode").action(async (id, options) => {
   const apiKey = await ensureAuth();
   if (!apiKey) return;
   const client = createCliClient(apiKey);
   if (!isTTY(options) || options.json) {
     try {
-      const data = await client.get(`/eval/${id}/results`);
+      const data = await client.get(
+        `/eval/${id}/results`
+      );
       if (options.json) {
         printJson(data);
       } else {
         if (data.batch) {
           console.log(chalk27.cyan(`Eval: ${data.batch.name || data.batch.id}`));
           console.log(`  Status: ${data.batch.status}`);
-          console.log(`  Progress: ${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}`);
+          console.log(
+            `  Progress: ${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}`
+          );
           console.log();
         }
         const results = data.data ?? [];
@@ -64129,19 +64220,28 @@ evalCommand.command("results <id>").description("Get eval batch results").option
     useEffect30(() => {
       const run2 = async () => {
         try {
-          const data = await client.get(`/eval/${id}/results`);
+          const data = await client.get(
+            `/eval/${id}/results`
+          );
           const results = data.data ?? [];
           const fields = [];
           if (data.batch) {
             fields.push({ label: "Eval", value: data.batch.name || data.batch.id });
             fields.push({ label: "Status", value: data.batch.status });
-            fields.push({ label: "Progress", value: `${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}` });
+            fields.push({
+              label: "Progress",
+              value: `${data.batch.completedRecords ?? 0}/${data.batch.totalRecords ?? 0}`
+            });
           }
           fields.push({ label: "Results", value: results.length });
           if (results.length > 0) {
             const completed = results.filter((r) => r.status === "completed").length;
             const avgScore = results.filter((r) => r.score !== void 0).reduce((sum, r) => sum + (r.score ?? 0), 0) / (results.filter((r) => r.score !== void 0).length || 1);
-            fields.push({ label: "Completed", value: `${completed}/${results.length}`, color: "green" });
+            fields.push({
+              label: "Completed",
+              value: `${completed}/${results.length}`,
+              color: "green"
+            });
             if (results.some((r) => r.score !== void 0)) {
               fields.push({ label: "Avg Score", value: avgScore.toFixed(2) });
             }
@@ -64237,6 +64337,13 @@ function printSuiteResult(rootDir, outcome) {
       if (testCase.errored) {
         console.log(chalk27.red(`      \u2717 errored: ${testCase.outputExcerpt.slice(0, 200)}`));
       }
+    } else {
+      for (const outcomeItem of testCase.outcomes.filter(
+        (o) => !o.passed && o.severity === "soft"
+      )) {
+        const reason = outcomeItem.reasoning ? `: ${outcomeItem.reasoning}` : "";
+        console.log(chalk27.yellow(`      \u26A0 soft ${outcomeItem.kind}${reason}`));
+      }
     }
   }
 }
@@ -64251,7 +64358,12 @@ function toJUnitSuite(outcome) {
     }))
   };
 }
-evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval suites (**/*.eval.ts) as a CI gate (exit 0 pass / 1 fail / 2 config)").option("--strict", "Fail on soft-threshold misses too (no-op until severity lands)").option("--virtual", "Run inline without persisting a suite/batch to the dashboard").option("--junit <path>", "Write JUnit XML results to <path>").option("--url <api>", "Override the API base URL (e.g. staging)").option("--cwd <dir>", "Directory to discover *.eval.ts under (default: current directory)").action(
+evalCommand.command("run [idOrDirPrefix]").description(
+  "Run code-colocated eval suites (**/*.eval.ts) as a CI gate (exit 0 pass / 1 fail / 2 config)"
+).option(
+  "--strict",
+  "Fail the exit code on soft grader misses too (default: soft misses are reported but do not fail)"
+).option("--virtual", "Run inline without persisting a suite/batch to the dashboard").option("--junit <path>", "Write JUnit XML results to <path>").option("--url <api>", "Override the API base URL (e.g. staging)").option("--cwd <dir>", "Directory to discover *.eval.ts under (default: current directory)").action(
   async (idOrDirPrefix, options) => {
     const apiKey = await ensureAuth();
     if (!apiKey) {
@@ -64289,11 +64401,6 @@ evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval
     } else {
       loaded = await loadAll(allFiles);
     }
-    if (options.strict) {
-      console.log(
-        chalk27.gray("Note: --strict has no effect yet (grader severity lands in a later increment).")
-      );
-    }
     const client = createCliClient(apiKey, options.url);
     const outcomes = [];
     for (const { file: file2, def } of loaded) {
@@ -64301,16 +64408,25 @@ evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval
       try {
         let result;
         if (runVirtual) {
-          result = await client.post("/eval/run", { definition: def });
-        } else {
-          const ensured = await client.post("/eval/ensure", {
-            name: def.name,
-            definition: def
+          result = await client.post("/eval/run", {
+            definition: def,
+            strict: options.strict ?? false
           });
+        } else {
+          const ensured = await client.post(
+            "/eval/ensure",
+            {
+              name: def.name,
+              definition: def
+            }
+          );
           if (!ensured.suiteId) {
             throw new Error(`ensure did not return a suiteId (result: ${ensured.result})`);
           }
-          result = await client.post("/eval/run", { suiteId: ensured.suiteId });
+          result = await client.post("/eval/run", {
+            suiteId: ensured.suiteId,
+            strict: options.strict ?? false
+          });
         }
         outcomes.push({ file: file2, definition: def, result });
         printSuiteResult(rootDir, { file: file2, definition: def, result });
@@ -64328,7 +64444,9 @@ evalCommand.command("run [idOrDirPrefix]").description("Run code-colocated eval
         writeFileSync6(outPath, xml, "utf-8");
         console.log(chalk27.gray(`JUnit results written to ${options.junit}`));
       } catch (error51) {
-        failConfig(`Failed to write JUnit report: ${error51 instanceof Error ? error51.message : String(error51)}`);
+        failConfig(
+          `Failed to write JUnit report: ${error51 instanceof Error ? error51.message : String(error51)}`
+        );
       }
     }
     const failedSuites = outcomes.filter((o) => !o.result.passed);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@runtypelabs/cli",
-  "version": "2.23.0",
+  "version": "2.24.0",
   "description": "Command-line interface for Runtype AI platform",
   "type": "module",
   "main": "dist/index.js",
@@ -24,7 +24,7 @@
     "rosie-skills": "0.8.1",
     "yaml": "^2.9.0",
     "@runtypelabs/ink-components": "0.3.4",
-    "@runtypelabs/sdk": "5.4.0",
+    "@runtypelabs/sdk": "5.6.0",
     "@runtypelabs/terminal-animations": "0.2.1"
   },
   "devDependencies": {
@@ -39,7 +39,7 @@
     "tsx": "^4.7.1",
     "typescript": "^6.0.3",
     "vitest": "^4.1.0",
-    "@runtypelabs/shared": "1.42.4"
+    "@runtypelabs/shared": "1.42.6"
   },
   "engines": {
     "node": ">=22.0.0"