npm - @wix/eval-assertions - Versions diffs - 0.17.0 → 0.19.0 - Mend

@wix/eval-assertions 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +3 -2
package/build/index.js +218 -112
package/build/index.js.map +4 -4
package/build/index.mjs +209 -111
package/build/index.mjs.map +4 -4
package/build/types/evaluators/assertion-evaluator.d.ts +4 -17
package/build/types/evaluators/index.d.ts +3 -2
package/build/types/evaluators/llm-judge-evaluator.d.ts +11 -0
package/build/types/evaluators/tool-called-with-param-evaluator.d.ts +12 -0
package/build/types/index.d.ts +3 -2
package/build/types/tools/index.d.ts +1 -0
package/build/types/tools/read-file-tool.d.ts +10 -0
package/build/types/types/assertions.d.ts +14 -0
package/build/types/types/index.d.ts +1 -1
package/package.json +4 -3

package/README.md CHANGED Viewed

@@ -149,11 +149,12 @@ Optional context for assertions:
 ```typescript
 interface AssertionContext {
   workDir?: string;                           // For build_passed
-  llmConfig?: {                               // For llm_judge
+  llmConfig?: {                               // For llm_judge
     baseUrl: string;
     headers: Record<string, string>;
   };
-  generateTextForLlmJudge?: (options) => Promise<{ text: string }>;  // For testing
+  defaultJudgeModel?: string;                 // Default model for llm_judge
+  model?: LanguageModel;                      // Override model
 }
 ```

package/build/index.js CHANGED Viewed

@@ -1,7 +1,9 @@
 "use strict";
+var __create = Object.create;
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
 var __export = (target, all) => {
   for (var name in all)
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
   }
   return to;
 };
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
 // src/index.ts
@@ -28,6 +38,7 @@ __export(index_exports, {
   BuildPassedEvaluator: () => BuildPassedEvaluator,
   CostAssertionSchema: () => CostAssertionSchema,
   CostEvaluator: () => CostEvaluator,
+  JudgeResultSchema: () => JudgeResultSchema,
   LLMBreakdownStatsSchema: () => LLMBreakdownStatsSchema,
   LLMStepType: () => LLMStepType,
   LLMTraceSchema: () => LLMTraceSchema,
@@ -40,6 +51,9 @@ __export(index_exports, {
   TimeAssertionSchema: () => TimeAssertionSchema,
   TimeEvaluator: () => TimeEvaluator,
   TokenUsageSchema: () => TokenUsageSchema,
+  ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
+  ToolCalledWithParamEvaluator: () => ToolCalledWithParamEvaluator,
+  createReadFileTool: () => createReadFileTool,
   evaluateAssertions: () => evaluateAssertions,
   formatTraceForJudge: () => formatTraceForJudge,
   getEvaluator: () => getEvaluator,
@@ -57,6 +71,13 @@ var SkillWasCalledAssertionSchema = import_zod.z.object({
   /** Names of the skills that must have been called (matched against trace Skill tool args) */
   skillNames: import_zod.z.array(import_zod.z.string()).min(1)
 });
+var ToolCalledWithParamAssertionSchema = import_zod.z.object({
+  type: import_zod.z.literal("tool_called_with_param"),
+  /** Name of the tool that must have been called */
+  toolName: import_zod.z.string().min(1),
+  /** JSON string of key-value pairs for expected parameters (substring match) */
+  expectedParams: import_zod.z.string().min(1)
+});
 var BuildPassedAssertionSchema = import_zod.z.object({
   type: import_zod.z.literal("build_passed"),
   /** Command to run (default: "yarn build") */
@@ -89,6 +110,7 @@ var TimeAssertionSchema = import_zod.z.object({
 });
 var AssertionSchema = import_zod.z.union([
   SkillWasCalledAssertionSchema,
+  ToolCalledWithParamAssertionSchema,
   BuildPassedAssertionSchema,
   TimeAssertionSchema,
   CostAssertionSchema,
@@ -171,7 +193,7 @@ var AssertionResultSchema = import_zod3.z.object({
 });
 // src/evaluators/index.ts
-var import_crypto6 = require("crypto");
+var import_crypto7 = require("crypto");
 // src/evaluators/skill-was-called-evaluator.ts
 var import_crypto = require("crypto");
@@ -250,15 +272,79 @@ var SkillWasCalledEvaluator = class extends AssertionEvaluator {
   }
 };
-// src/evaluators/build-passed-evaluator.ts
+// src/evaluators/tool-called-with-param-evaluator.ts
 var import_crypto2 = require("crypto");
+var ASSERTION_TYPE = "tool_called_with_param";
+var ASSERTION_NAME = "Tool called with param";
+var containsAll = ({
+  actual,
+  expected
+}) => Object.entries(expected).every(([key, val]) => {
+  const actualVal = actual[key];
+  if (actualVal === null || actualVal === void 0) return false;
+  const actualStr = typeof actualVal === "string" ? actualVal : JSON.stringify(actualVal);
+  return actualStr.includes(String(val));
+});
+var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
+  type = ASSERTION_TYPE;
+  evaluate(assertion, input, _context) {
+    const assertionId = (0, import_crypto2.randomUUID)();
+    const { toolName, expectedParams: expectedParamsStr } = assertion;
+    const buildResult = (status, message, expected2, actual) => ({
+      id: (0, import_crypto2.randomUUID)(),
+      assertionId,
+      assertionType: ASSERTION_TYPE,
+      assertionName: ASSERTION_NAME,
+      status,
+      message,
+      expected: expected2,
+      ...actual !== void 0 ? { actual } : {}
+    });
+    let expected;
+    try {
+      expected = JSON.parse(expectedParamsStr);
+    } catch {
+      return buildResult(
+        "failed" /* FAILED */,
+        `Tool "${toolName}" assertion has invalid expected params JSON`,
+        `${toolName}(invalid expected params)`,
+        "Invalid expected params JSON"
+      );
+    }
+    const expectedLabel = `${toolName}(${Object.entries(expected).map(([k, v]) => `${k}="${v}"`).join(", ")})`;
+    const steps = input.llmTrace?.steps ?? [];
+    const toolCalls = steps.filter((s) => s.toolName === toolName && s.toolArguments !== void 0).map((s) => {
+      try {
+        return JSON.parse(s.toolArguments);
+      } catch {
+        return null;
+      }
+    }).filter((call) => call !== null);
+    if (toolCalls.some((actual) => containsAll({ actual, expected }))) {
+      return buildResult(
+        "passed" /* PASSED */,
+        `Tool "${toolName}" was called with params matching ${expectedParamsStr}`,
+        expectedLabel
+      );
+    }
+    return buildResult(
+      "failed" /* FAILED */,
+      `Tool "${toolName}" was never called with params matching ${expectedParamsStr}`,
+      expectedLabel,
+      toolCalls.length > 0 ? `Found ${toolName} calls but params didn't match` : `No matching tool calls found`
+    );
+  }
+};
+// src/evaluators/build-passed-evaluator.ts
+var import_crypto3 = require("crypto");
 var import_child_process = require("child_process");
 var DEFAULT_COMMAND = "yarn build";
 var DEFAULT_EXIT_CODE = 0;
 var BuildPassedEvaluator = class extends AssertionEvaluator {
   type = "build_passed";
   evaluate(assertion, _input, context) {
-    const assertionId = (0, import_crypto2.randomUUID)();
+    const assertionId = (0, import_crypto3.randomUUID)();
     const workDir = context?.workDir;
     const command = assertion.command ?? DEFAULT_COMMAND;
     const expectedExitCode = assertion.expectedExitCode ?? DEFAULT_EXIT_CODE;
@@ -306,7 +392,7 @@ var BuildPassedEvaluator = class extends AssertionEvaluator {
   }
   createResult(assertionId, fields) {
     return {
-      id: (0, import_crypto2.randomUUID)(),
+      id: (0, import_crypto3.randomUUID)(),
       assertionId,
       assertionType: "build_passed",
       assertionName: "Build passed",
@@ -331,7 +417,7 @@ var BuildPassedEvaluator = class extends AssertionEvaluator {
 };
 // src/evaluators/time-evaluator.ts
-var import_crypto3 = require("crypto");
+var import_crypto4 = require("crypto");
 var TimeEvaluator = class extends AssertionEvaluator {
   type = "time_limit";
   evaluate(assertion, input) {
@@ -353,8 +439,8 @@ var TimeEvaluator = class extends AssertionEvaluator {
   }
   createResult(fields) {
     return {
-      id: (0, import_crypto3.randomUUID)(),
-      assertionId: (0, import_crypto3.randomUUID)(),
+      id: (0, import_crypto4.randomUUID)(),
+      assertionId: (0, import_crypto4.randomUUID)(),
       assertionType: "time_limit",
       assertionName: "Time limit",
       status: "failed" /* FAILED */,
@@ -364,12 +450,12 @@ var TimeEvaluator = class extends AssertionEvaluator {
 };
 // src/evaluators/cost-evaluator.ts
-var import_crypto4 = require("crypto");
+var import_crypto5 = require("crypto");
 var CostEvaluator = class extends AssertionEvaluator {
   type = "cost";
   evaluate(assertion, input) {
-    const assertionId = (0, import_crypto4.randomUUID)();
-    const id = (0, import_crypto4.randomUUID)();
+    const assertionId = (0, import_crypto5.randomUUID)();
+    const id = (0, import_crypto5.randomUUID)();
     const assertionName = "Cost";
     const assertionType = "cost";
     const maxCostUsd = assertion.maxCostUsd;
@@ -401,10 +487,48 @@ var CostEvaluator = class extends AssertionEvaluator {
   }
 };
+// src/tools/read-file-tool.ts
+var import_ai = require("ai");
+var import_zod4 = require("zod");
+var import_promises = require("fs/promises");
+var import_path = __toESM(require("path"));
+function createReadFileTool(workDir) {
+  const resolvedWorkDir = import_path.default.resolve(workDir);
+  return (0, import_ai.tool)({
+    description: "Read the content of any file in the workspace by its relative path. Use this to inspect file contents when evaluating code changes.",
+    inputSchema: import_zod4.z.object({
+      path: import_zod4.z.string().describe("Relative file path in the workspace")
+    }),
+    execute: async ({
+      path: filePath
+    }) => {
+      const resolved = import_path.default.resolve(resolvedWorkDir, filePath);
+      if (!resolved.startsWith(resolvedWorkDir + import_path.default.sep)) {
+        return { error: `Access denied: path escapes workspace directory` };
+      }
+      try {
+        const content = await (0, import_promises.readFile)(resolved, "utf-8");
+        return { path: filePath, content };
+      } catch {
+        return { error: `File not found: ${filePath}` };
+      }
+    }
+  });
+}
 // src/evaluators/llm-judge-evaluator.ts
-var import_crypto5 = require("crypto");
+var import_crypto6 = require("crypto");
 var import_anthropic = require("@ai-sdk/anthropic");
-var import_ai = require("ai");
+var import_ai2 = require("ai");
+var import_zod5 = require("zod");
+var JudgeResultSchema = import_zod5.z.object({
+  text: import_zod5.z.string().describe("A brief textual verdict of the test result"),
+  score: import_zod5.z.number().min(0).max(100).describe(
+    "A number from 0 to 100 reflecting how well the answer meets the acceptance criteria"
+  ),
+  scoreReasoning: import_zod5.z.string().describe("A concise explanation justifying the assigned score")
+});
+var MAX_JUDGE_STEPS = 20;
 function formatTraceForJudge(llmTrace) {
   if (!llmTrace?.steps?.length) {
     return "No trace available.";
@@ -475,40 +599,22 @@ var DEFAULT_JUDGE_CONTEXT = `You are judging a scenario run. The ACTUAL run data
 - {{newFiles}}: list of new files that were created (or "No new files were created")
 - {{trace}}: step-by-step trace (tool calls, completions) so you can check e.g. which tools were called and how many times
-CRITICAL: When the user asks you to verify a specific fact, compare it strictly against the actual data above. If the expected outcome does NOT match the actual outcome, you MUST give a score of 0 or near 0. Do not be lenient \u2014 factual mismatches are failures.`;
-var JSON_OUTPUT_FORMAT_INSTRUCTIONS = `You must respond only with a valid JSON object that conforms exactly to the following structure:
-{
-  "text": string,
-  "score": number (0-100),
-  "scoreReasoning": string
-}
-- text: A brief textual verdict of the test result.
-- score: A number from 0 to 100 that reflects how well the answer meets the acceptance criteria.
-- scoreReasoning: A concise explanation justifying the assigned score.
+You have access to a read_file tool that lets you read the content of ANY file in the workspace (not just changed files). Use it to inspect file contents whenever you need to verify claims about code, check imports, review implementations, or validate that specific code patterns exist. Always read files before making judgments about their content \u2014 do not guess.
-Your response must:
-- Contain only the JSON object above \u2014 no introductory text, no code formatting (e.g., no triple backticks), and no trailing comments.
-- Be valid and parseable by \`JSON.parse\`.
-- Use only double quotes for all keys and strings, as required by JSON.
-Any response that includes extra content or deviates from the specified format will cause parsing to fail. Follow these instructions exactly.`;
+CRITICAL: When the user asks you to verify a specific fact, compare it strictly against the actual data above and the actual file contents (use the read_file tool). If the expected outcome does NOT match the actual outcome, you MUST give a score of 0 or near 0. Do not be lenient \u2014 factual mismatches are failures.`;
 var LlmJudgeEvaluator = class extends AssertionEvaluator {
   type = "llm_judge";
   async evaluate(assertion, input, context) {
-    const assertionId = (0, import_crypto5.randomUUID)();
-    const llmConfig = context?.llmConfig;
+    const assertionId = (0, import_crypto6.randomUUID)();
     const workDir = context?.workDir ?? "";
-    const generateTextStub = context?.generateTextForLlmJudge;
     const output = input.outputText ?? "";
     const fileDiffs = input.fileDiffs ?? [];
     const changedPaths = fileDiffs.map((d) => d.path);
     const modifiedPaths = fileDiffs.filter((d) => d.status === "modified").map((d) => d.path);
     const newPaths = fileDiffs.filter((d) => d.status === "new").map((d) => d.path);
-    const changedFiles = changedPaths.length > 0 ? changedPaths.map((path) => `- ${path}`).join("\n") : "No files were changed";
-    const modifiedFiles = modifiedPaths.length > 0 ? modifiedPaths.map((path) => `- ${path}`).join("\n") : "No files were modified";
-    const newFiles = newPaths.length > 0 ? newPaths.map((path) => `- ${path}`).join("\n") : "No new files were created";
+    const changedFiles = changedPaths.length > 0 ? changedPaths.map((p) => `- ${p}`).join("\n") : "No files were changed";
+    const modifiedFiles = modifiedPaths.length > 0 ? modifiedPaths.map((p) => `- ${p}`).join("\n") : "No files were modified";
+    const newFiles = newPaths.length > 0 ? newPaths.map((p) => `- ${p}`).join("\n") : "No new files were created";
     const trace = formatTraceForJudge(input.llmTrace);
     const ctx = {
       output,
@@ -520,101 +626,77 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
     };
     const replace = (s) => replacePlaceholders(s, ctx);
     const finalPrompt = replace(assertion.prompt);
-    const systemPrompt = assertion.systemPrompt != null && assertion.systemPrompt !== "" ? replace(assertion.systemPrompt) + "\n\n" + JSON_OUTPUT_FORMAT_INSTRUCTIONS : replace(DEFAULT_JUDGE_CONTEXT) + "\n\n" + JSON_OUTPUT_FORMAT_INSTRUCTIONS;
     const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;
     const maxOutputTokens = assertion.maxTokens ?? 1024;
     const temperature = assertion.temperature ?? 0;
-    const modelUsed = assertion.model ?? context?.defaultJudgeModel;
-    if (!modelUsed && !generateTextStub) {
+    const modelId = assertion.model ?? context?.defaultJudgeModel;
+    const model = this.resolveModel(context, modelId);
+    if (!model) {
+      const reason = !modelId && !context?.model ? "No model configured for llm_judge assertion (set model on assertion or provide defaultJudgeModel/model in context)" : "No llmConfig for llm_judge assertion (AI gateway required)";
       return {
-        id: (0, import_crypto5.randomUUID)(),
+        id: (0, import_crypto6.randomUUID)(),
         assertionId,
         assertionType: "llm_judge",
         assertionName: "LLM judge",
         status: "failed" /* FAILED */,
-        message: "No model configured for llm_judge assertion (set model on assertion or provide defaultJudgeModel in context)",
+        message: reason,
         expected: String(minScore)
       };
     }
-    if (!generateTextStub && !llmConfig) {
-      return {
-        id: (0, import_crypto5.randomUUID)(),
-        assertionId,
-        assertionType: "llm_judge",
-        assertionName: "LLM judge",
-        status: "failed" /* FAILED */,
-        message: "No llmConfig for llm_judge assertion (AI gateway required)",
-        expected: String(minScore)
-      };
-    }
-    const maxParseAttempts = 3;
-    let lastParseError;
-    let lastRawText;
+    const systemPrompt = assertion.systemPrompt != null && assertion.systemPrompt !== "" ? replace(assertion.systemPrompt) : replace(DEFAULT_JUDGE_CONTEXT);
     try {
-      for (let attempt = 1; attempt <= maxParseAttempts; attempt++) {
-        const result = generateTextStub ? await generateTextStub({
-          prompt: finalPrompt,
-          system: systemPrompt,
-          maxOutputTokens,
-          temperature
-        }) : await this.callGenerateText(
-          llmConfig,
-          modelUsed,
-          finalPrompt,
-          systemPrompt,
-          maxOutputTokens,
-          temperature
-        );
-        lastRawText = result.text;
-        try {
-          const cleaned = stripMarkdownCodeBlock(result.text);
-          const parsed = JSON.parse(cleaned);
-          const judgeResult = validateJudgeResult(parsed);
-          const passed = judgeResult.score >= minScore;
-          return {
-            id: (0, import_crypto5.randomUUID)(),
-            assertionId,
-            assertionType: "llm_judge",
-            assertionName: "LLM judge",
-            status: passed ? "passed" /* PASSED */ : "failed" /* FAILED */,
-            message: passed ? `Judge score ${judgeResult.score} >= ${minScore}: ${judgeResult.text}` : `Judge score ${judgeResult.score} < ${minScore}: ${judgeResult.text}`,
-            expected: String(minScore),
-            actual: String(judgeResult.score),
-            details: {
-              score: judgeResult.score,
-              scoreReasoning: judgeResult.scoreReasoning,
-              text: judgeResult.text
-            }
-          };
-        } catch (parseErr) {
-          lastParseError = parseErr instanceof Error ? parseErr : new Error(String(parseErr));
-        }
-      }
+      const judgeResult = await this.callGenerateText(
+        model,
+        finalPrompt,
+        systemPrompt,
+        maxOutputTokens,
+        temperature,
+        workDir || void 0
+      );
+      const passed = judgeResult.score >= minScore;
       return {
-        id: (0, import_crypto5.randomUUID)(),
+        id: (0, import_crypto6.randomUUID)(),
         assertionId,
         assertionType: "llm_judge",
         assertionName: "LLM judge",
-        status: "failed" /* FAILED */,
-        message: `Failed to parse judge response after ${maxParseAttempts} attempts: ${lastParseError?.message ?? "unknown"}`,
+        status: passed ? "passed" /* PASSED */ : "failed" /* FAILED */,
+        message: passed ? `Judge score ${judgeResult.score} >= ${minScore}: ${judgeResult.text}` : `Judge score ${judgeResult.score} < ${minScore}: ${judgeResult.text}`,
         expected: String(minScore),
-        actual: void 0,
-        details: { rawText: lastRawText?.slice(0, 500) }
+        actual: String(judgeResult.score),
+        details: {
+          score: judgeResult.score,
+          scoreReasoning: judgeResult.scoreReasoning,
+          text: judgeResult.text
+        }
       };
     } catch (err) {
+      if (import_ai2.NoObjectGeneratedError.isInstance(err)) {
+        return {
+          id: (0, import_crypto6.randomUUID)(),
+          assertionId,
+          assertionType: "llm_judge",
+          assertionName: "LLM judge",
+          status: "failed" /* FAILED */,
+          message: "LLM judge failed to produce valid structured output",
+          expected: String(minScore),
+          details: {
+            rawText: typeof err.text === "string" ? err.text.slice(0, 500) : void 0
+          }
+        };
+      }
       const message = err instanceof Error ? err.message : String(err);
       const details = {
         error: message,
-        model: modelUsed
+        model: modelId
       };
-      if (import_ai.APICallError.isInstance(err)) {
+      if (import_ai2.APICallError.isInstance(err)) {
         details.statusCode = err.statusCode;
         details.url = err.url;
         details.isRetryable = err.isRetryable;
         details.responseBody = typeof err.responseBody === "string" ? err.responseBody.slice(0, 2e3) : err.responseBody;
       }
       return {
-        id: (0, import_crypto5.randomUUID)(),
+        id: (0, import_crypto6.randomUUID)(),
         assertionId,
         assertionType: "llm_judge",
         assertionName: "LLM judge",
@@ -625,20 +707,39 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
       };
     }
   }
-  async callGenerateText(llmConfig, modelId, prompt, system, maxOutputTokens, temperature) {
+  /**
+   * Resolve the LanguageModel to use: context.model (injected mock/override)
+   * takes precedence, otherwise create from llmConfig + modelId.
+   */
+  resolveModel(context, modelId) {
+    if (context?.model) {
+      return context.model;
+    }
+    if (!modelId || !context?.llmConfig) {
+      return null;
+    }
     const anthropic = (0, import_anthropic.createAnthropic)({
-      baseURL: llmConfig.baseUrl,
+      baseURL: context.llmConfig.baseUrl,
       apiKey: "dummy",
-      headers: llmConfig.headers
+      headers: context.llmConfig.headers
     });
-    const result = await (0, import_ai.generateText)({
-      model: anthropic(modelId),
+    return anthropic(modelId);
+  }
+  async callGenerateText(model, prompt, system, maxOutputTokens, temperature, workDir) {
+    const baseOptions = {
+      model,
       prompt,
       system,
       maxOutputTokens,
-      temperature
-    });
-    return { text: result.text };
+      temperature,
+      output: import_ai2.Output.object({ schema: JudgeResultSchema }),
+      stopWhen: (0, import_ai2.stepCountIs)(MAX_JUDGE_STEPS)
+    };
+    const { output } = workDir ? await (0, import_ai2.generateText)({
+      ...baseOptions,
+      tools: { read_file: createReadFileTool(workDir) }
+    }) : await (0, import_ai2.generateText)(baseOptions);
+    return output;
   }
 };
@@ -646,6 +747,7 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
 var llmJudgeEvaluator = new LlmJudgeEvaluator();
 var evaluators = {
   skill_was_called: new SkillWasCalledEvaluator(),
+  tool_called_with_param: new ToolCalledWithParamEvaluator(),
   build_passed: new BuildPassedEvaluator(),
   time_limit: new TimeEvaluator(),
   cost: new CostEvaluator(),
@@ -668,8 +770,8 @@ async function evaluateAssertions(input, assertions, context) {
       const evaluator = evaluators[assertion.type];
       if (!evaluator) {
         return {
-          id: (0, import_crypto6.randomUUID)(),
-          assertionId: (0, import_crypto6.randomUUID)(),
+          id: (0, import_crypto7.randomUUID)(),
+          assertionId: (0, import_crypto7.randomUUID)(),
           assertionType: assertion.type,
           assertionName: "Unknown assertion",
           status: "error" /* ERROR */,
@@ -694,6 +796,7 @@ async function evaluateAssertions(input, assertions, context) {
   BuildPassedEvaluator,
   CostAssertionSchema,
   CostEvaluator,
+  JudgeResultSchema,
   LLMBreakdownStatsSchema,
   LLMStepType,
   LLMTraceSchema,
@@ -706,6 +809,9 @@ async function evaluateAssertions(input, assertions, context) {
   TimeAssertionSchema,
   TimeEvaluator,
   TokenUsageSchema,
+  ToolCalledWithParamAssertionSchema,
+  ToolCalledWithParamEvaluator,
+  createReadFileTool,
   evaluateAssertions,
   formatTraceForJudge,
   getEvaluator,