@wix/eval-assertions 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -110,7 +110,6 @@ Uses an LLM to evaluate the output with a customizable prompt. The default syste
110
110
  {
111
111
  type: 'llm_judge',
112
112
  prompt: 'Evaluate the quality of this code:\n\n{{output}}',
113
- systemPrompt: 'You are a code reviewer...', // Optional custom system prompt
114
113
  minScore: 70, // Minimum passing score (0-100, default: 70)
115
114
  model: 'claude-3-5-haiku-20241022', // Model to use
116
115
  maxTokens: 1024, // Max output tokens
package/build/index.js CHANGED
@@ -94,8 +94,6 @@ var LlmJudgeAssertionSchema = import_zod.z.object({
94
94
  type: import_zod.z.literal("llm_judge"),
95
95
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}} */
96
96
  prompt: import_zod.z.string(),
97
- /** Optional system prompt for the judge (default asks for JSON with score) */
98
- systemPrompt: import_zod.z.string().optional(),
99
97
  /** Minimum score to pass (0-100, default 70) */
100
98
  minScore: import_zod.z.number().int().min(0).max(100).optional(),
101
99
  /** Model for the judge (e.g. claude-3-5-haiku) */
@@ -290,7 +288,7 @@ var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
290
288
  evaluate(assertion, input, _context) {
291
289
  const assertionId = (0, import_crypto2.randomUUID)();
292
290
  const { toolName, expectedParams: expectedParamsStr } = assertion;
293
- const buildResult = (status, message, expected2, actual) => ({
291
+ const buildResult = (status, message, expected2, actual2) => ({
294
292
  id: (0, import_crypto2.randomUUID)(),
295
293
  assertionId,
296
294
  assertionType: ASSERTION_TYPE,
@@ -298,7 +296,7 @@ var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
298
296
  status,
299
297
  message,
300
298
  expected: expected2,
301
- ...actual !== void 0 ? { actual } : {}
299
+ ...actual2 !== void 0 ? { actual: actual2 } : {}
302
300
  });
303
301
  let expected;
304
302
  try {
@@ -320,18 +318,21 @@ var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
320
318
  return null;
321
319
  }
322
320
  }).filter((call) => call !== null);
323
- if (toolCalls.some((actual) => containsAll({ actual, expected }))) {
321
+ if (toolCalls.some((actual2) => containsAll({ actual: actual2, expected }))) {
324
322
  return buildResult(
325
323
  "passed" /* PASSED */,
326
324
  `Tool "${toolName}" was called with params matching ${expectedParamsStr}`,
327
325
  expectedLabel
328
326
  );
329
327
  }
328
+ const MAX_SHOWN = 5;
329
+ const formatCall = (call) => `${toolName}(${Object.entries(call).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(", ")})`;
330
+ const actual = toolCalls.length === 0 ? "No matching tool calls found" : toolCalls.slice(0, MAX_SHOWN).map(formatCall).join(" | ") + (toolCalls.length > MAX_SHOWN ? ` ... and ${toolCalls.length - MAX_SHOWN} more` : "");
330
331
  return buildResult(
331
332
  "failed" /* FAILED */,
332
333
  `Tool "${toolName}" was never called with params matching ${expectedParamsStr}`,
333
334
  expectedLabel,
334
- toolCalls.length > 0 ? `Found ${toolName} calls but params didn't match` : `No matching tool calls found`
335
+ actual
335
336
  );
336
337
  }
337
338
  };
@@ -626,6 +627,7 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
626
627
  };
627
628
  const replace = (s) => replacePlaceholders(s, ctx);
628
629
  const finalPrompt = replace(assertion.prompt);
630
+ const systemPrompt = replace(DEFAULT_JUDGE_CONTEXT);
629
631
  const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;
630
632
  const maxOutputTokens = assertion.maxTokens ?? 1024;
631
633
  const temperature = assertion.temperature ?? 0;
@@ -643,7 +645,6 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
643
645
  expected: String(minScore)
644
646
  };
645
647
  }
646
- const systemPrompt = assertion.systemPrompt != null && assertion.systemPrompt !== "" ? replace(assertion.systemPrompt) : replace(DEFAULT_JUDGE_CONTEXT);
647
648
  try {
648
649
  const judgeResult = await this.callGenerateText(
649
650
  model,
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../src/types/assertions.ts", "../src/types/trace.ts", "../src/types/result.ts", "../src/evaluators/index.ts", "../src/evaluators/skill-was-called-evaluator.ts", "../src/evaluators/assertion-evaluator.ts", "../src/evaluators/tool-called-with-param-evaluator.ts", "../src/evaluators/build-passed-evaluator.ts", "../src/evaluators/time-evaluator.ts", "../src/evaluators/cost-evaluator.ts", "../src/tools/read-file-tool.ts", "../src/evaluators/llm-judge-evaluator.ts"],
4
- "sourcesContent": ["/**\n * @wix/eval-assertions\n *\n * Assertion framework for AI agent evaluations.\n * Supports skill invocation checks, build validation, and LLM-based judging.\n */\n\n// Types\nexport {\n // Assertion schemas and types\n AssertionSchema,\n SkillWasCalledAssertionSchema,\n ToolCalledWithParamAssertionSchema,\n BuildPassedAssertionSchema,\n TimeAssertionSchema,\n CostAssertionSchema,\n LlmJudgeAssertionSchema,\n type Assertion,\n type SkillWasCalledAssertion,\n type ToolCalledWithParamAssertion,\n type BuildPassedAssertion,\n type TimeAssertion,\n type CostAssertion,\n type LlmJudgeAssertion,\n // Trace types\n LLMTraceSchema,\n LLMTraceStepSchema,\n LLMTraceSummarySchema,\n LLMBreakdownStatsSchema,\n TokenUsageSchema,\n LLMStepType,\n type LLMTrace,\n type LLMTraceStep,\n type LLMTraceSummary,\n type LLMBreakdownStats,\n type TokenUsage,\n // Result types\n AssertionResultSchema,\n AssertionResultStatus,\n type AssertionResult,\n // Input types\n type EvaluationInput,\n type FileDiff,\n} from \"./types/index.js\";\n\n// Evaluators\nexport {\n evaluateAssertions,\n registerEvaluator,\n getEvaluator,\n AssertionEvaluator,\n SkillWasCalledEvaluator,\n ToolCalledWithParamEvaluator,\n BuildPassedEvaluator,\n TimeEvaluator,\n CostEvaluator,\n LlmJudgeEvaluator,\n JudgeResultSchema,\n formatTraceForJudge,\n replacePlaceholders,\n stripMarkdownCodeBlock,\n validateJudgeResult,\n type AssertionContext,\n type LlmConfig,\n type JudgeResult,\n} from \"./evaluators/index.js\";\n\n// Tools\nexport { createReadFileTool } from \"./tools/index.js\";\n", "import { z } from \"zod\";\n\n/**\n * Assertion: the agent must have invoked one or more skills during the run.\n * Checked by inspecting the LLM trace for \"Skill\" tool uses with the given skills.\n * When multiple skills are in one assertion, they are treated as a group (1 assertion).\n * Each skill in the group must have been called for the assertion to pass.\n */\nexport const SkillWasCalledAssertionSchema = z.object({\n type: z.literal(\"skill_was_called\"),\n /** Names of the skills that must have been called (matched against trace Skill tool args) */\n skillNames: z.array(z.string()).min(1),\n});\n\nexport type SkillWasCalledAssertion = z.infer<\n typeof SkillWasCalledAssertionSchema\n>;\n\n/**\n * Assertion: a specific tool must have been called with expected parameters.\n * Checked by inspecting the LLM trace for tool calls with matching name and arguments.\n */\nexport const ToolCalledWithParamAssertionSchema = z.object({\n type: z.literal(\"tool_called_with_param\"),\n /** Name of the tool that must have been called */\n toolName: z.string().min(1),\n /** JSON string of key-value pairs for expected parameters (substring match) */\n expectedParams: z.string().min(1),\n});\n\nexport type ToolCalledWithParamAssertion = z.infer<\n typeof ToolCalledWithParamAssertionSchema\n>;\n\n/**\n * Assertion: a build command must exit with the expected code (default 0).\n * Runs the command in the scenario working directory.\n */\nexport const BuildPassedAssertionSchema = z.object({\n type: z.literal(\"build_passed\"),\n /** Command to run (default: \"yarn build\") */\n command: z.string().optional(),\n /** Expected exit code (default: 0) */\n expectedExitCode: z.number().int().optional(),\n});\n\nexport type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;\n\n/**\n * Assertion: the scenario LLM execution cost must stay within a USD threshold.\n * Checked by reading llmTrace.summary.totalCostUsd.\n */\nexport const CostAssertionSchema = z.object({\n type: z.literal(\"cost\"),\n /** Maximum allowed cost in USD */\n maxCostUsd: z.number().positive(),\n});\n\nexport type CostAssertion = z.infer<typeof CostAssertionSchema>;\n\n/**\n * Assertion: an LLM judges the scenario output (score 0-100).\n * Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}}.\n * Passes if judge score >= minScore.\n */\nexport const LlmJudgeAssertionSchema = z.object({\n type: z.literal(\"llm_judge\"),\n /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}} */\n prompt: z.string(),\n /** Optional system prompt for the judge (default asks for JSON with score) */\n systemPrompt: z.string().optional(),\n /** Minimum score to pass (0-100, default 70) */\n minScore: z.number().int().min(0).max(100).optional(),\n /** Model for the judge (e.g. claude-3-5-haiku) */\n model: z.string().optional(),\n maxTokens: z.number().int().optional(),\n temperature: z.number().min(0).max(1).optional(),\n});\n\nexport type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;\n\n/**\n * Assertion: scenario must complete within a maximum duration.\n * Deterministic check against the scenario execution time.\n */\nexport const TimeAssertionSchema = z.object({\n type: z.literal(\"time_limit\"),\n /** Maximum allowed duration in milliseconds */\n maxDurationMs: z.number().int().positive(),\n});\n\nexport type TimeAssertion = z.infer<typeof TimeAssertionSchema>;\n\n/**\n * Union of all assertion types.\n * Each assertion has a type and type-specific data.\n * Uses z.union (not z.discriminatedUnion) for Zod v4 compatibility when used as array element.\n */\nexport const AssertionSchema = z.union([\n SkillWasCalledAssertionSchema,\n ToolCalledWithParamAssertionSchema,\n BuildPassedAssertionSchema,\n TimeAssertionSchema,\n CostAssertionSchema,\n LlmJudgeAssertionSchema,\n]);\n\nexport type Assertion = z.infer<typeof AssertionSchema>;\n", "import { z } from \"zod\";\n\n/**\n * Token usage schema.\n */\nexport const TokenUsageSchema = z.object({\n prompt: z.number(),\n completion: z.number(),\n total: z.number(),\n});\n\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * LLM step type enum.\n */\nexport enum LLMStepType {\n COMPLETION = \"completion\",\n TOOL_USE = \"tool_use\",\n TOOL_RESULT = \"tool_result\",\n THINKING = \"thinking\",\n}\n\n/**\n * LLM trace step schema.\n */\nexport const LLMTraceStepSchema = z.object({\n id: z.string(),\n stepNumber: z.number(),\n type: z.enum(LLMStepType),\n model: z.string(),\n provider: z.string(),\n startedAt: z.string(),\n durationMs: z.number(),\n tokenUsage: TokenUsageSchema,\n costUsd: z.number(),\n toolName: z.string().optional(),\n toolArguments: z.string().optional(),\n inputPreview: z.string().optional(),\n outputPreview: z.string().optional(),\n success: z.boolean(),\n error: z.string().optional(),\n});\n\nexport type LLMTraceStep = z.infer<typeof LLMTraceStepSchema>;\n\n/**\n * LLM breakdown stats schema.\n */\nexport const LLMBreakdownStatsSchema = z.object({\n count: z.number(),\n durationMs: z.number(),\n tokens: z.number(),\n costUsd: z.number(),\n});\n\nexport type LLMBreakdownStats = z.infer<typeof LLMBreakdownStatsSchema>;\n\n/**\n * LLM trace summary schema.\n */\nexport const LLMTraceSummarySchema = z.object({\n totalSteps: z.number(),\n totalDurationMs: z.number(),\n totalTokens: TokenUsageSchema,\n totalCostUsd: z.number(),\n stepTypeBreakdown: z.record(z.string(), LLMBreakdownStatsSchema).optional(),\n modelBreakdown: z.record(z.string(), LLMBreakdownStatsSchema),\n modelsUsed: z.array(z.string()),\n});\n\nexport type LLMTraceSummary = z.infer<typeof LLMTraceSummarySchema>;\n\n/**\n * LLM trace schema.\n */\nexport const LLMTraceSchema = z.object({\n id: z.string(),\n steps: z.array(LLMTraceStepSchema),\n summary: LLMTraceSummarySchema,\n});\n\nexport type LLMTrace = z.infer<typeof LLMTraceSchema>;\n", "import { z } from \"zod\";\nimport { LLMTraceStepSchema } from \"./trace.js\";\n\n/**\n * Assertion result status enum.\n */\nexport enum AssertionResultStatus {\n PASSED = \"passed\",\n FAILED = \"failed\",\n SKIPPED = \"skipped\",\n ERROR = \"error\",\n}\n\n/**\n * Assertion result schema.\n */\nexport const AssertionResultSchema = z.object({\n id: z.string(),\n assertionId: z.string(),\n assertionType: z.string(),\n assertionName: z.string(),\n status: z.enum(AssertionResultStatus),\n message: z.string().optional(),\n expected: z.string().optional(),\n actual: z.string().optional(),\n duration: z.number().optional(),\n details: z.record(z.string(), z.unknown()).optional(),\n llmTraceSteps: z.array(LLMTraceStepSchema).optional(),\n});\n\nexport type AssertionResult = z.infer<typeof AssertionResultSchema>;\n", "import type { Assertion, AssertionResult } from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nimport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nimport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nimport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nimport { TimeEvaluator } from \"./time-evaluator.js\";\nimport { CostEvaluator } from \"./cost-evaluator.js\";\nimport { LlmJudgeEvaluator } from \"./llm-judge-evaluator.js\";\nimport type { EvaluationInput } from \"../types/index.js\";\n\nconst llmJudgeEvaluator = new LlmJudgeEvaluator();\n\nconst evaluators: Record<string, AssertionEvaluator> = {\n skill_was_called: new SkillWasCalledEvaluator(),\n tool_called_with_param: new ToolCalledWithParamEvaluator(),\n build_passed: new BuildPassedEvaluator(),\n time_limit: new TimeEvaluator(),\n cost: new CostEvaluator(),\n llm_judge: llmJudgeEvaluator,\n // Custom assertions use the same LLM-based evaluation as llm_judge\n custom: llmJudgeEvaluator,\n};\n\n/**\n * Register a custom assertion evaluator.\n *\n * @param type - The assertion type identifier\n * @param evaluator - The evaluator instance\n */\nexport function registerEvaluator(\n type: string,\n evaluator: AssertionEvaluator,\n): void {\n evaluators[type] = evaluator;\n}\n\n/**\n * Get a registered evaluator by type.\n *\n * @param type - The assertion type identifier\n * @returns The evaluator or undefined if not found\n */\nexport function getEvaluator(type: string): AssertionEvaluator | undefined {\n return evaluators[type];\n}\n\n/**\n * Evaluate all assertions against the input.\n *\n * @param input - Evaluation input (includes outputText, llmTrace, fileDiffs)\n * @param assertions - List of assertions to evaluate\n * @param context - Optional context (e.g. workDir for build_passed, llmConfig for llm_judge)\n * @returns Array of assertion results; empty if no assertions\n */\nexport async function evaluateAssertions(\n input: EvaluationInput,\n assertions: Assertion[],\n context?: AssertionContext,\n): Promise<AssertionResult[]> {\n if (assertions.length === 0) {\n return [];\n }\n return Promise.all(\n assertions.map(async (assertion) => {\n const evaluator = evaluators[assertion.type];\n if (!evaluator) {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: assertion.type,\n assertionName: \"Unknown assertion\",\n status: AssertionResultStatus.ERROR,\n message: `Unsupported assertion type: ${assertion.type}`,\n duration: 0,\n };\n }\n const startMs = Date.now();\n const result = await evaluator.evaluate(assertion, input, context);\n const durationMs = Date.now() - startMs;\n return { ...result, duration: durationMs };\n }),\n );\n}\n\n// Re-export evaluator classes and types\nexport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nexport type { AssertionContext, LlmConfig } from \"./assertion-evaluator.js\";\nexport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nexport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nexport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nexport { TimeEvaluator } from \"./time-evaluator.js\";\nexport { CostEvaluator } from \"./cost-evaluator.js\";\nexport {\n LlmJudgeEvaluator,\n JudgeResultSchema,\n formatTraceForJudge,\n replacePlaceholders,\n stripMarkdownCodeBlock,\n validateJudgeResult,\n type JudgeResult,\n} from \"./llm-judge-evaluator.js\";\n", "import type {\n SkillWasCalledAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Collect all skill names that were called in the LLM trace.\n */\nfunction collectCalledSkillNames(llmTrace: LLMTrace): Set<string> {\n const calledSkills = new Set<string>();\n for (const step of llmTrace.steps) {\n if (step.toolName !== \"Skill\") {\n continue;\n }\n let args: unknown;\n try {\n args = step.toolArguments\n ? (JSON.parse(step.toolArguments) as unknown)\n : undefined;\n } catch {\n continue;\n }\n if (args !== null && typeof args === \"object\") {\n const obj = args as Record<string, unknown>;\n if (typeof obj.skill === \"string\") {\n calledSkills.add(obj.skill);\n }\n }\n }\n return calledSkills;\n}\n\n/**\n * Evaluator for \"skill_was_called\" assertion: the LLM trace must contain steps\n * where the \"Skill\" tool was used with ALL expected skills (by name).\n *\n * Multiple skills in one assertion are treated as a group \u2014 all must be called\n * for the assertion to pass. To check skills independently, add separate assertions.\n */\nexport class SkillWasCalledEvaluator extends AssertionEvaluator<SkillWasCalledAssertion> {\n readonly type = \"skill_was_called\" as const;\n\n evaluate(\n assertion: SkillWasCalledAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- context not used for skill_was_called\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const expectedSkills = assertion.skillNames;\n const expectedLabel = expectedSkills.join(\", \");\n\n const llmTrace: LLMTrace | undefined = input.llmTrace;\n if (!llmTrace?.steps?.length) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message: \"No LLM trace steps to check for skill invocation\",\n expected: expectedLabel,\n };\n }\n\n const calledSkills = collectCalledSkillNames(llmTrace);\n const missingSkills = expectedSkills.filter(\n (name) => !calledSkills.has(name),\n );\n\n if (missingSkills.length === 0) {\n const message =\n expectedSkills.length === 1\n ? `Skill \"${expectedSkills[0]}\" was called`\n : `All skills were called: ${expectedLabel}`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.PASSED,\n message,\n expected: expectedLabel,\n };\n }\n\n const missingLabel = missingSkills.join(\", \");\n const message =\n expectedSkills.length === 1\n ? `Skill \"${missingSkills[0]}\" was not called`\n : `Missing skills: ${missingLabel} (expected all of: ${expectedLabel})`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message,\n expected: expectedLabel,\n };\n }\n}\n", "import type {\n Assertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport type { LanguageModel } from \"ai\";\n\n/**\n * Configuration for LLM calls (used by llm_judge assertion).\n */\nexport interface LlmConfig {\n /** Base URL for the AI API (e.g., 'https://api.anthropic.com') */\n baseUrl: string;\n /** Headers to include in API requests (e.g., API key) */\n headers: Record<string, string>;\n}\n\n/**\n * Optional context passed when evaluating assertions.\n */\nexport interface AssertionContext {\n /** Working directory for the scenario (used by build_passed) */\n workDir?: string;\n /** LLM configuration (used by llm_judge) */\n llmConfig?: LlmConfig;\n /** Default model for llm_judge when assertion.model is not set */\n defaultJudgeModel?: string;\n /** Optional model override \u2014 when provided, used instead of creating from llmConfig + modelId */\n model?: LanguageModel;\n}\n\n/**\n * Abstract base for assertion evaluators.\n * Each assertion type has a concrete class that implements evaluate().\n * evaluate() may return a Promise for async assertions (e.g. llm_judge).\n */\nexport abstract class AssertionEvaluator<T extends Assertion = Assertion> {\n abstract readonly type: T[\"type\"];\n\n abstract evaluate(\n assertion: T,\n input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult | Promise<AssertionResult>;\n}\n", "import type {\n ToolCalledWithParamAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst ASSERTION_TYPE = \"tool_called_with_param\" as const;\nconst ASSERTION_NAME = \"Tool called with param\";\n\nconst containsAll = ({\n actual,\n expected,\n}: {\n actual: Record<string, unknown>;\n expected: Record<string, unknown>;\n}): boolean =>\n Object.entries(expected).every(([key, val]) => {\n const actualVal = actual[key];\n // actual comes from LLM trace \u2014 can be null/undefined\n if (actualVal === null || actualVal === undefined) return false;\n const actualStr =\n typeof actualVal === \"string\" ? actualVal : JSON.stringify(actualVal);\n return actualStr.includes(String(val));\n });\n\n/**\n * Evaluator for \"tool_called_with_param\" assertion: the LLM trace must contain\n * a step where a specific tool was called with arguments where each expected\n * param value is a substring of the actual value.\n */\nexport class ToolCalledWithParamEvaluator extends AssertionEvaluator<ToolCalledWithParamAssertion> {\n readonly type = ASSERTION_TYPE;\n\n evaluate(\n assertion: ToolCalledWithParamAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- required by base class\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const { toolName, expectedParams: expectedParamsStr } = assertion;\n\n const buildResult = (\n status: AssertionResultStatus,\n message: string,\n expected: string,\n actual?: string,\n ): AssertionResult => ({\n id: randomUUID(),\n assertionId,\n assertionType: ASSERTION_TYPE,\n assertionName: ASSERTION_NAME,\n status,\n message,\n expected,\n ...(actual !== undefined ? { actual } : {}),\n });\n\n // expectedParams is validated upstream, but guard here to avoid runtime throws.\n let expected: Record<string, unknown>;\n try {\n expected = JSON.parse(expectedParamsStr) as Record<string, unknown>;\n } catch {\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" assertion has invalid expected params JSON`,\n `${toolName}(invalid expected params)`,\n \"Invalid expected params JSON\",\n );\n }\n\n const expectedLabel = `${toolName}(${Object.entries(expected)\n .map(([k, v]) => `${k}=\"${v}\"`)\n .join(\", \")})`;\n\n const steps = input.llmTrace?.steps ?? [];\n // toolArguments is always JSON.stringify(args) from the trace builder, or undefined\n const toolCalls = steps\n .filter((s) => s.toolName === toolName && s.toolArguments !== undefined)\n .map((s) => {\n try {\n return JSON.parse(s.toolArguments!) as Record<string, unknown>;\n } catch {\n return null;\n }\n })\n .filter((call): call is Record<string, unknown> => call !== null);\n\n if (toolCalls.some((actual) => containsAll({ actual, expected }))) {\n return buildResult(\n AssertionResultStatus.PASSED,\n `Tool \"${toolName}\" was called with params matching ${expectedParamsStr}`,\n expectedLabel,\n );\n }\n\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" was never called with params matching ${expectedParamsStr}`,\n expectedLabel,\n toolCalls.length > 0\n ? `Found ${toolName} calls but params didn't match`\n : `No matching tool calls found`,\n );\n }\n}\n", "import type {\n BuildPassedAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { execSync } from \"child_process\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst DEFAULT_COMMAND = \"yarn build\";\nconst DEFAULT_EXIT_CODE = 0;\n\n/**\n * Evaluator for \"build_passed\" assertion: runs a build command in the scenario\n * working directory and passes if the command exits with the expected code (default 0).\n */\nexport class BuildPassedEvaluator extends AssertionEvaluator<BuildPassedAssertion> {\n readonly type = \"build_passed\" as const;\n\n evaluate(\n assertion: BuildPassedAssertion,\n _input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const workDir = context?.workDir;\n const command = assertion.command ?? DEFAULT_COMMAND;\n const expectedExitCode = assertion.expectedExitCode ?? DEFAULT_EXIT_CODE;\n\n if (!workDir) {\n return this.createResult(assertionId, {\n status: AssertionResultStatus.FAILED,\n message: \"No working directory provided for build_passed assertion\",\n expected: String(expectedExitCode),\n });\n }\n\n let exitCode: number | null = null;\n let errorMessage: string | null = null;\n let stdout: string | undefined;\n let stderr: string | undefined;\n\n console.log(`[build_passed] Running \"${command}\" in: ${workDir}`);\n\n try {\n execSync(command, {\n cwd: workDir,\n encoding: \"utf-8\",\n stdio: [\"ignore\", \"pipe\", \"pipe\"],\n });\n exitCode = 0;\n } catch (err) {\n const error = err as Error & {\n status?: number;\n code?: number;\n stdout?: string | Buffer;\n stderr?: string | Buffer;\n };\n exitCode =\n typeof error.status === \"number\"\n ? error.status\n : typeof error.code === \"number\"\n ? error.code\n : null;\n errorMessage = error.message;\n stdout = this.bufferToString(error.stdout);\n stderr = this.bufferToString(error.stderr);\n }\n\n const passed = exitCode !== null && exitCode === expectedExitCode;\n\n const details: Record<string, unknown> = { workDir, command };\n if (stdout !== undefined && stdout !== \"\") {\n details.stdout = stdout;\n }\n if (stderr !== undefined && stderr !== \"\") {\n details.stderr = stderr;\n }\n\n return this.createResult(assertionId, {\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: this.formatMessage(exitCode, expectedExitCode, errorMessage),\n expected: String(expectedExitCode),\n actual: exitCode !== null ? String(exitCode) : undefined,\n details,\n });\n }\n\n private createResult(\n assertionId: string,\n fields: Partial<AssertionResult>,\n ): AssertionResult {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"build_passed\",\n assertionName: \"Build passed\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n\n private bufferToString(\n value: string | Buffer | undefined,\n ): string | undefined {\n if (value === undefined || value === null) return undefined;\n if (typeof value === \"string\") return value;\n return value.toString(\"utf-8\");\n }\n\n private formatMessage(\n exitCode: number | null,\n expectedExitCode: number,\n errorMessage: string | null,\n ): string {\n if (exitCode === null) {\n return `Build failed: ${errorMessage}`;\n }\n if (exitCode === expectedExitCode) {\n return `Build passed (exit code ${exitCode})`;\n }\n return `Build exited with ${exitCode}, expected ${expectedExitCode}`;\n }\n}\n", "import type {\n TimeAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"time_limit\" assertion: passes if the scenario completed\n * within the configured maximum duration (maxDurationMs).\n */\nexport class TimeEvaluator extends AssertionEvaluator<TimeAssertion> {\n readonly type = \"time_limit\" as const;\n\n evaluate(assertion: TimeAssertion, input: EvaluationInput): AssertionResult {\n const maxDurationMs = assertion.maxDurationMs;\n\n if (input.durationMs == null) {\n return this.createResult({\n status: AssertionResultStatus.FAILED,\n message: \"No duration data available for time assertion\",\n expected: `<= ${maxDurationMs}ms`,\n });\n }\n\n const passed = input.durationMs <= maxDurationMs;\n\n return this.createResult({\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Completed in ${input.durationMs}ms (limit: ${maxDurationMs}ms)`\n : `Exceeded time limit: ${input.durationMs}ms > ${maxDurationMs}ms`,\n expected: `<= ${maxDurationMs}ms`,\n actual: `${input.durationMs}ms`,\n });\n }\n\n private createResult(fields: Partial<AssertionResult>): AssertionResult {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: \"time_limit\",\n assertionName: \"Time limit\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n}\n", "import type {\n CostAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"cost\" assertion: checks that the scenario's LLM execution cost\n * stays within a configured USD threshold by reading llmTrace.summary.totalCostUsd.\n */\nexport class CostEvaluator extends AssertionEvaluator<CostAssertion> {\n readonly type = \"cost\" as const;\n\n evaluate(assertion: CostAssertion, input: EvaluationInput): AssertionResult {\n const assertionId = randomUUID();\n const id = randomUUID();\n const assertionName = \"Cost\";\n const assertionType = \"cost\";\n const maxCostUsd = assertion.maxCostUsd;\n\n if (!input.llmTrace) {\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: AssertionResultStatus.SKIPPED,\n message: \"No LLM trace available to check cost\",\n };\n }\n\n const actualCostUsd = input.llmTrace.summary.totalCostUsd;\n const formattedActual = actualCostUsd.toFixed(6);\n const formattedMax = maxCostUsd.toFixed(6);\n const passed = Number(formattedActual) <= Number(formattedMax);\n\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Cost $${formattedActual} is within limit of $${formattedMax}`\n : `Cost $${formattedActual} exceeds limit of $${formattedMax}`,\n expected: `<= $${formattedMax}`,\n actual: `$${formattedActual}`,\n details: { actualCostUsd, maxCostUsd },\n };\n }\n}\n", "import { tool, type Tool } from \"ai\";\nimport { z } from \"zod\";\nimport { readFile } from \"fs/promises\";\nimport path from \"path\";\n\nexport type ReadFileResult =\n | { path: string; content: string }\n | { error: string };\n\nexport function createReadFileTool(\n workDir: string,\n): Tool<{ path: string }, ReadFileResult> {\n const resolvedWorkDir = path.resolve(workDir);\n return tool({\n description:\n \"Read the content of any file in the workspace by its relative path. Use this to inspect file contents when evaluating code changes.\",\n inputSchema: z.object({\n path: z.string().describe(\"Relative file path in the workspace\"),\n }),\n execute: async ({\n path: filePath,\n }: {\n path: string;\n }): Promise<{ path: string; content: string } | { error: string }> => {\n const resolved = path.resolve(resolvedWorkDir, filePath);\n if (!resolved.startsWith(resolvedWorkDir + path.sep)) {\n return { error: `Access denied: path escapes workspace directory` };\n }\n try {\n const content = await readFile(resolved, \"utf-8\");\n return { path: filePath, content };\n } catch {\n return { error: `File not found: ${filePath}` };\n }\n },\n });\n}\n", "import type {\n LlmJudgeAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { createReadFileTool } from \"../tools/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { createAnthropic } from \"@ai-sdk/anthropic\";\nimport {\n generateText,\n Output,\n APICallError,\n NoObjectGeneratedError,\n stepCountIs,\n type LanguageModel,\n} from \"ai\";\nimport { z } from \"zod\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nexport interface JudgeResult {\n text: string;\n score: number;\n scoreReasoning: string;\n}\n\nexport const JudgeResultSchema = z.object({\n text: z.string().describe(\"A brief textual verdict of the test result\"),\n score: z\n .number()\n .min(0)\n .max(100)\n .describe(\n \"A number from 0 to 100 reflecting how well the answer meets the acceptance criteria\",\n ),\n scoreReasoning: z\n .string()\n .describe(\"A concise explanation justifying the assigned score\"),\n});\n\nconst MAX_JUDGE_STEPS = 20;\n\n/**\n * Format LLM trace as readable text for the judge (step number, type, tool name/args, output preview).\n */\nexport function formatTraceForJudge(llmTrace: LLMTrace | undefined): string {\n if (!llmTrace?.steps?.length) {\n return \"No trace available.\";\n }\n const lines: string[] = [];\n for (const step of llmTrace.steps) {\n const parts: string[] = [\n `Step ${step.stepNumber}`,\n `type: ${step.type}`,\n `duration: ${step.durationMs}ms`,\n ];\n if (step.toolName) {\n parts.push(`tool: ${step.toolName}`);\n if (step.toolArguments) {\n parts.push(`args: ${step.toolArguments}`);\n }\n }\n if (step.outputPreview) {\n parts.push(`output: ${step.outputPreview}`);\n }\n if (step.error) {\n parts.push(`error: ${step.error}`);\n }\n lines.push(parts.join(\", \"));\n }\n return lines.join(\"\\n\");\n}\n\n/**\n * Context object for placeholder replacement.\n */\nexport interface PlaceholderContext {\n output: string;\n cwd: string;\n changedFiles: string;\n modifiedFiles: string;\n newFiles: string;\n trace: string;\n}\n\nexport function replacePlaceholders(\n str: string,\n ctx: PlaceholderContext,\n): string {\n return str\n .replace(/\\{\\{output\\}\\}/g, ctx.output)\n .replace(/\\{\\{cwd\\}\\}/g, ctx.cwd)\n .replace(/\\{\\{changedFiles\\}\\}/g, ctx.changedFiles)\n .replace(/\\{\\{modifiedFiles\\}\\}/g, ctx.modifiedFiles)\n .replace(/\\{\\{newFiles\\}\\}/g, ctx.newFiles)\n .replace(/\\{\\{trace\\}\\}/g, ctx.trace);\n}\n\n/**\n * Strip markdown code fences (e.g. ```json ... ```) from LLM output,\n * returning only the inner content for JSON parsing.\n */\nexport function stripMarkdownCodeBlock(text: string): string {\n const trimmed = text.trim();\n const match = trimmed.match(/^```(?:\\w+)?\\s*\\n?([\\s\\S]*?)\\n?\\s*```$/);\n return match ? match[1].trim() : trimmed;\n}\n\nexport function validateJudgeResult(parsed: unknown): JudgeResult {\n if (parsed === null || typeof parsed !== \"object\") {\n throw new Error(\"Judge result is not an object\");\n }\n const obj = parsed as Record<string, unknown>;\n if (typeof obj.text !== \"string\") {\n throw new Error(\"Judge result does not contain a valid text field\");\n }\n if (typeof obj.score !== \"number\") {\n throw new Error(\"Judge result does not contain a valid score field\");\n }\n if (obj.score < 0 || obj.score > 100) {\n throw new Error(\"Judge result score is not between 0 and 100\");\n }\n if (typeof obj.scoreReasoning !== \"string\") {\n throw new Error(\n \"Judge result does not contain a valid scoreReasoning field\",\n );\n }\n return {\n text: obj.text,\n score: obj.score,\n scoreReasoning: obj.scoreReasoning,\n };\n}\n\nconst DEFAULT_MIN_SCORE = 70;\n\n/** Default judge context (run data + placeholders); used when assertion.systemPrompt is empty. */\nconst DEFAULT_JUDGE_CONTEXT = `You are judging a scenario run. The ACTUAL run data is provided below \u2014 use it to verify facts:\n\n- {{output}}: the agent's final output\n- {{cwd}}: working directory\n- {{changedFiles}}: list of all files changed (or \"No files were changed\")\n- {{modifiedFiles}}: list of existing files that were modified (or \"No files were modified\")\n- {{newFiles}}: list of new files that were created (or \"No new files were created\")\n- {{trace}}: step-by-step trace (tool calls, completions) so you can check e.g. which tools were called and how many times\n\nYou have access to a read_file tool that lets you read the content of ANY file in the workspace (not just changed files). Use it to inspect file contents whenever you need to verify claims about code, check imports, review implementations, or validate that specific code patterns exist. Always read files before making judgments about their content \u2014 do not guess.\n\nCRITICAL: When the user asks you to verify a specific fact, compare it strictly against the actual data above and the actual file contents (use the read_file tool). If the expected outcome does NOT match the actual outcome, you MUST give a score of 0 or near 0. Do not be lenient \u2014 factual mismatches are failures.`;\n\n/**\n * Evaluator for \"llm_judge\" assertion: an LLM judges the scenario output\n * (prompt with {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}) and returns a score 0-100.\n * Passes if score >= minScore.\n */\nexport class LlmJudgeEvaluator extends AssertionEvaluator<LlmJudgeAssertion> {\n readonly type = \"llm_judge\" as const;\n\n async evaluate(\n assertion: LlmJudgeAssertion,\n input: EvaluationInput,\n context?: AssertionContext,\n ): Promise<AssertionResult> {\n const assertionId = randomUUID();\n const workDir = context?.workDir ?? \"\";\n\n const output = input.outputText ?? \"\";\n const fileDiffs = input.fileDiffs ?? [];\n\n const changedPaths = fileDiffs.map((d) => d.path);\n const modifiedPaths = fileDiffs\n .filter((d) => d.status === \"modified\")\n .map((d) => d.path);\n const newPaths = fileDiffs\n .filter((d) => d.status === \"new\")\n .map((d) => d.path);\n\n const changedFiles =\n changedPaths.length > 0\n ? changedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were changed\";\n const modifiedFiles =\n modifiedPaths.length > 0\n ? modifiedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were modified\";\n const newFiles =\n newPaths.length > 0\n ? newPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No new files were created\";\n\n const trace = formatTraceForJudge(input.llmTrace);\n const ctx: PlaceholderContext = {\n output,\n cwd: workDir,\n changedFiles,\n modifiedFiles,\n newFiles,\n trace,\n };\n const replace = (s: string) => replacePlaceholders(s, ctx);\n\n const finalPrompt = replace(assertion.prompt);\n\n const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;\n const maxOutputTokens = assertion.maxTokens ?? 1024;\n const temperature = assertion.temperature ?? 0;\n const modelId = assertion.model ?? context?.defaultJudgeModel;\n\n const model = this.resolveModel(context, modelId);\n if (!model) {\n const reason =\n !modelId && !context?.model\n ? \"No model configured for llm_judge assertion (set model on assertion or provide defaultJudgeModel/model in context)\"\n : \"No llmConfig for llm_judge assertion (AI gateway required)\";\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: reason,\n expected: String(minScore),\n };\n }\n\n const systemPrompt =\n assertion.systemPrompt != null && assertion.systemPrompt !== \"\"\n ? replace(assertion.systemPrompt)\n : replace(DEFAULT_JUDGE_CONTEXT);\n\n try {\n const judgeResult = await this.callGenerateText(\n model,\n finalPrompt,\n systemPrompt,\n maxOutputTokens,\n temperature,\n workDir || undefined,\n );\n\n const passed = judgeResult.score >= minScore;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Judge score ${judgeResult.score} >= ${minScore}: ${judgeResult.text}`\n : `Judge score ${judgeResult.score} < ${minScore}: ${judgeResult.text}`,\n expected: String(minScore),\n actual: String(judgeResult.score),\n details: {\n score: judgeResult.score,\n scoreReasoning: judgeResult.scoreReasoning,\n text: judgeResult.text,\n },\n };\n } catch (err) {\n if (NoObjectGeneratedError.isInstance(err)) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: \"LLM judge failed to produce valid structured output\",\n expected: String(minScore),\n details: {\n rawText:\n typeof err.text === \"string\" ? err.text.slice(0, 500) : undefined,\n },\n };\n }\n\n const message = err instanceof Error ? err.message : String(err);\n const details: Record<string, unknown> = {\n error: message,\n model: modelId,\n };\n\n if (APICallError.isInstance(err)) {\n details.statusCode = err.statusCode;\n details.url = err.url;\n details.isRetryable = err.isRetryable;\n details.responseBody =\n typeof err.responseBody === \"string\"\n ? err.responseBody.slice(0, 2000)\n : err.responseBody;\n }\n\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: `LLM judge call failed: ${message}`,\n expected: String(minScore),\n details,\n };\n }\n }\n\n /**\n * Resolve the LanguageModel to use: context.model (injected mock/override)\n * takes precedence, otherwise create from llmConfig + modelId.\n */\n private resolveModel(\n context: AssertionContext | undefined,\n modelId: string | undefined,\n ): LanguageModel | null {\n if (context?.model) {\n return context.model;\n }\n if (!modelId || !context?.llmConfig) {\n return null;\n }\n const anthropic = createAnthropic({\n baseURL: context.llmConfig.baseUrl,\n apiKey: \"dummy\",\n headers: context.llmConfig.headers,\n });\n return anthropic(modelId);\n }\n\n private async callGenerateText(\n model: LanguageModel,\n prompt: string,\n system: string,\n maxOutputTokens: number,\n temperature: number,\n workDir?: string,\n ): Promise<JudgeResult> {\n const baseOptions = {\n model,\n prompt,\n system,\n maxOutputTokens,\n temperature,\n output: Output.object({ schema: JudgeResultSchema }),\n stopWhen: stepCountIs(MAX_JUDGE_STEPS),\n } as const;\n\n const { output } = workDir\n ? await generateText({\n ...baseOptions,\n tools: { read_file: createReadFileTool(workDir) },\n })\n : await generateText(baseOptions);\n\n return output;\n }\n}\n"],
5
- "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,iBAAkB;AAQX,IAAM,gCAAgC,aAAE,OAAO;AAAA,EACpD,MAAM,aAAE,QAAQ,kBAAkB;AAAA;AAAA,EAElC,YAAY,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,IAAI,CAAC;AACvC,CAAC;AAUM,IAAM,qCAAqC,aAAE,OAAO;AAAA,EACzD,MAAM,aAAE,QAAQ,wBAAwB;AAAA;AAAA,EAExC,UAAU,aAAE,OAAO,EAAE,IAAI,CAAC;AAAA;AAAA,EAE1B,gBAAgB,aAAE,OAAO,EAAE,IAAI,CAAC;AAClC,CAAC;AAUM,IAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,cAAc;AAAA;AAAA,EAE9B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE7B,kBAAkB,aAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC9C,CAAC;AAQM,IAAM,sBAAsB,aAAE,OAAO;AAAA,EAC1C,MAAM,aAAE,QAAQ,MAAM;AAAA;AAAA,EAEtB,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AASM,IAAM,0BAA0B,aAAE,OAAO;AAAA,EAC9C,MAAM,aAAE,QAAQ,WAAW;AAAA;AAAA,EAE3B,QAAQ,aAAE,OAAO;AAAA;AAAA,EAEjB,cAAc,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAElC,UAAU,aAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,GAAG,EAAE,SAAS;AAAA;AAAA,EAEpD,OAAO,aAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,WAAW,aAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACrC,aAAa,aAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AACjD,CAAC;AAQM,IAAM,sBAAsB,aAAE,OAAO;AAAA,EAC1C,MAAM,aAAE,QAAQ,YAAY;AAAA;AAAA,EAE5B,eAAe,aAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC3C,CAAC;AASM,IAAM,kBAAkB,aAAE,MAAM;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;;;ACzGD,IAAAA,cAAkB;AAKX,IAAM,mBAAmB,cAAE,OAAO;AAAA,EACvC,QAAQ,cAAE,OAAO;AAAA,EACjB,YAAY,cAAE,OAAO;AAAA,EACrB,OAAO,cAAE,OAAO;AAClB,CAAC;AAOM,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,gBAAa;AACb,EAAAA,aAAA,cAAW;AACX,EAAAA,aAAA,iBAAc;AACd,EAAAA,aAAA,cAAW;AAJD,SAAAA;AAAA,GAAA;AAUL,IAAM,qBAAqB,cAAE,OAAO;AAAA,EACzC,IAAI,cAAE,OAAO;AAAA,EACb,YAAY,cAAE,OAAO;AAAA,EACrB,MAAM,cAAE,KAAK,WAAW;AAAA,EACxB,OAAO,cAAE,OAAO;AAAA,EAChB,UAAU,cAAE,OAAO;AAAA,EACnB,WAAW,cAAE,OAAO;AAAA,EACpB,YAAY,cAAE,OAAO;AAAA,EACrB,YAAY;AAAA,EACZ,SAAS,cAAE,OAAO;AAAA,EAClB,UAAU,cAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,eAAe,cAAE,OAAO,EAAE,SAAS;AAAA,EACnC,cAAc,cAAE,OAAO,EAAE,SAAS;AAAA,EAClC,eAAe,cAAE,OAAO,EAAE,SAAS;AAAA,EACnC,SAAS,cAAE,QAAQ;AAAA,EACnB,OAAO,cAAE,OAAO,EAAE,SAAS;AAC7B,CAAC;AAOM,IAAM,0BAA0B,cAAE,OAAO;AAAA,EAC9C,OAAO,cAAE,OAAO;AAAA,EAChB,YAAY,cAAE,OAAO;AAAA,EACrB,QAAQ,cAAE,OAAO;AAAA,EACjB,SAAS,cAAE,OAAO;AACpB,CAAC;AAOM,IAAM,wBAAwB,cAAE,OAAO;AAAA,EAC5C,YAAY,cAAE,OAAO;AAAA,EACrB,iBAAiB,cAAE,OAAO;AAAA,EAC1B,aAAa;AAAA,EACb,cAAc,cAAE,OAAO;AAAA,EACvB,mBAAmB,cAAE,OAAO,cAAE,OAAO,GAAG,uBAAuB,EAAE,SAAS;AAAA,EAC1E,gBAAgB,cAAE,OAAO,cAAE,OAAO,GAAG,uBAAuB;AAAA,EAC5D,YAAY,cAAE,MAAM,cAAE,OAAO,CAAC;AAChC,CAAC;AAOM,IAAM,iBAAiB,cAAE,OAAO;AAAA,EACrC,IAAI,cAAE,OAAO;AAAA,EACb,OAAO,cAAE,MAAM,kBAAkB;AAAA,EACjC,SAAS;AACX,CAAC;;;AChFD,IAAAC,cAAkB;AAMX,IAAK,wBAAL,kBAAKC,2BAAL;AACL,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,aAAU;AACV,EAAAA,uBAAA,WAAQ;AAJE,SAAAA;AAAA,GAAA;AAUL,IAAM,wBAAwB,cAAE,OAAO;AAAA,EAC5C,IAAI,cAAE,OAAO;AAAA,EACb,aAAa,cAAE,OAAO;AAAA,EACtB,eAAe,cAAE,OAAO;AAAA,EACxB,eAAe,cAAE,OAAO;AAAA,EACxB,QAAQ,cAAE,KAAK,qBAAqB;AAAA,EACpC,SAAS,cAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,UAAU,cAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,QAAQ,cAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAU,cAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,cAAE,OAAO,cAAE,OAAO,GAAG,cAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACpD,eAAe,cAAE,MAAM,kBAAkB,EAAE,SAAS;AACtD,CAAC;;;AC1BD,IAAAC,iBAA2B;;;ACK3B,oBAA2B;;;AC6BpB,IAAe,qBAAf,MAAmE;AAQ1E;;;AD9BA,SAAS,wBAAwB,UAAiC;AAChE,QAAM,eAAe,oBAAI,IAAY;AACrC,aAAW,QAAQ,SAAS,OAAO;AACjC,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AACA,QAAI;AACJ,QAAI;AACF,aAAO,KAAK,gBACP,KAAK,MAAM,KAAK,aAAa,IAC9B;AAAA,IACN,QAAQ;AACN;AAAA,IACF;AACA,QAAI,SAAS,QAAQ,OAAO,SAAS,UAAU;AAC7C,YAAM,MAAM;AACZ,UAAI,OAAO,IAAI,UAAU,UAAU;AACjC,qBAAa,IAAI,IAAI,KAAK;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AASO,IAAM,0BAAN,cAAsC,mBAA4C;AAAA,EAC9E,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,kBAAc,0BAAW;AAC/B,UAAM,iBAAiB,UAAU;AACjC,UAAM,gBAAgB,eAAe,KAAK,IAAI;AAE9C,UAAM,WAAiC,MAAM;AAC7C,QAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,aAAO;AAAA,QACL,QAAI,0BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,wBAAwB,QAAQ;AACrD,UAAM,gBAAgB,eAAe;AAAA,MACnC,CAAC,SAAS,CAAC,aAAa,IAAI,IAAI;AAAA,IAClC;AAEA,QAAI,cAAc,WAAW,GAAG;AAC9B,YAAMC,WACJ,eAAe,WAAW,IACtB,UAAU,eAAe,CAAC,CAAC,iBAC3B,2BAA2B,aAAa;AAC9C,aAAO;AAAA,QACL,QAAI,0BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAAA;AAAA,QACA,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,cAAc,KAAK,IAAI;AAC5C,UAAM,UACJ,eAAe,WAAW,IACtB,UAAU,cAAc,CAAC,CAAC,qBAC1B,mBAAmB,YAAY,sBAAsB,aAAa;AACxE,WAAO;AAAA,MACL,QAAI,0BAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ;AAAA,EACF;AACF;;;AErGA,IAAAC,iBAA2B;AAI3B,IAAM,iBAAiB;AACvB,IAAM,iBAAiB;AAEvB,IAAM,cAAc,CAAC;AAAA,EACnB;AAAA,EACA;AACF,MAIE,OAAO,QAAQ,QAAQ,EAAE,MAAM,CAAC,CAAC,KAAK,GAAG,MAAM;AAC7C,QAAM,YAAY,OAAO,GAAG;AAE5B,MAAI,cAAc,QAAQ,cAAc,OAAW,QAAO;AAC1D,QAAM,YACJ,OAAO,cAAc,WAAW,YAAY,KAAK,UAAU,SAAS;AACtE,SAAO,UAAU,SAAS,OAAO,GAAG,CAAC;AACvC,CAAC;AAOI,IAAM,+BAAN,cAA2C,mBAAiD;AAAA,EACxF,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,kBAAc,2BAAW;AAC/B,UAAM,EAAE,UAAU,gBAAgB,kBAAkB,IAAI;AAExD,UAAM,cAAc,CAClB,QACA,SACAC,WACA,YACqB;AAAA,MACrB,QAAI,2BAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAAA;AAAA,MACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IAC3C;AAGA,QAAI;AACJ,QAAI;AACF,iBAAW,KAAK,MAAM,iBAAiB;AAAA,IACzC,QAAQ;AACN,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ;AAAA,QACjB,GAAG,QAAQ;AAAA,QACX;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,GAAG,QAAQ,IAAI,OAAO,QAAQ,QAAQ,EACzD,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,GAAG,EAC7B,KAAK,IAAI,CAAC;AAEb,UAAM,QAAQ,MAAM,UAAU,SAAS,CAAC;AAExC,UAAM,YAAY,MACf,OAAO,CAAC,MAAM,EAAE,aAAa,YAAY,EAAE,kBAAkB,MAAS,EACtE,IAAI,CAAC,MAAM;AACV,UAAI;AACF,eAAO,KAAK,MAAM,EAAE,aAAc;AAAA,MACpC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF,CAAC,EACA,OAAO,CAAC,SAA0C,SAAS,IAAI;AAElE,QAAI,UAAU,KAAK,CAAC,WAAW,YAAY,EAAE,QAAQ,SAAS,CAAC,CAAC,GAAG;AACjE,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ,qCAAqC,iBAAiB;AAAA,QACvE;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA;AAAA,MAEL,SAAS,QAAQ,2CAA2C,iBAAiB;AAAA,MAC7E;AAAA,MACA,UAAU,SAAS,IACf,SAAS,QAAQ,mCACjB;AAAA,IACN;AAAA,EACF;AACF;;;ACvGA,IAAAC,iBAA2B;AAC3B,2BAAyB;AAIzB,IAAM,kBAAkB;AACxB,IAAM,oBAAoB;AAMnB,IAAM,uBAAN,cAAmC,mBAAyC;AAAA,EACxE,OAAO;AAAA,EAEhB,SACE,WACA,QACA,SACiB;AACjB,UAAM,kBAAc,2BAAW;AAC/B,UAAM,UAAU,SAAS;AACzB,UAAM,UAAU,UAAU,WAAW;AACrC,UAAM,mBAAmB,UAAU,oBAAoB;AAEvD,QAAI,CAAC,SAAS;AACZ,aAAO,KAAK,aAAa,aAAa;AAAA,QACpC;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,gBAAgB;AAAA,MACnC,CAAC;AAAA,IACH;AAEA,QAAI,WAA0B;AAC9B,QAAI,eAA8B;AAClC,QAAI;AACJ,QAAI;AAEJ,YAAQ,IAAI,2BAA2B,OAAO,SAAS,OAAO,EAAE;AAEhE,QAAI;AACF,yCAAS,SAAS;AAAA,QAChB,KAAK;AAAA,QACL,UAAU;AAAA,QACV,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,MAClC,CAAC;AACD,iBAAW;AAAA,IACb,SAAS,KAAK;AACZ,YAAM,QAAQ;AAMd,iBACE,OAAO,MAAM,WAAW,WACpB,MAAM,SACN,OAAO,MAAM,SAAS,WACpB,MAAM,OACN;AACR,qBAAe,MAAM;AACrB,eAAS,KAAK,eAAe,MAAM,MAAM;AACzC,eAAS,KAAK,eAAe,MAAM,MAAM;AAAA,IAC3C;AAEA,UAAM,SAAS,aAAa,QAAQ,aAAa;AAEjD,UAAM,UAAmC,EAAE,SAAS,QAAQ;AAC5D,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AACA,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AAEA,WAAO,KAAK,aAAa,aAAa;AAAA,MACpC,QAAQ;AAAA,MAGR,SAAS,KAAK,cAAc,UAAU,kBAAkB,YAAY;AAAA,MACpE,UAAU,OAAO,gBAAgB;AAAA,MACjC,QAAQ,aAAa,OAAO,OAAO,QAAQ,IAAI;AAAA,MAC/C;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEQ,aACN,aACA,QACiB;AACjB,WAAO;AAAA,MACL,QAAI,2BAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEQ,eACN,OACoB;AACpB,QAAI,UAAU,UAAa,UAAU,KAAM,QAAO;AAClD,QAAI,OAAO,UAAU,SAAU,QAAO;AACtC,WAAO,MAAM,SAAS,OAAO;AAAA,EAC/B;AAAA,EAEQ,cACN,UACA,kBACA,cACQ;AACR,QAAI,aAAa,MAAM;AACrB,aAAO,iBAAiB,YAAY;AAAA,IACtC;AACA,QAAI,aAAa,kBAAkB;AACjC,aAAO,2BAA2B,QAAQ;AAAA,IAC5C;AACA,WAAO,qBAAqB,QAAQ,cAAc,gBAAgB;AAAA,EACpE;AACF;;;ACzHA,IAAAC,iBAA2B;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,gBAAgB,UAAU;AAEhC,QAAI,MAAM,cAAc,MAAM;AAC5B,aAAO,KAAK,aAAa;AAAA,QACvB;AAAA,QACA,SAAS;AAAA,QACT,UAAU,MAAM,aAAa;AAAA,MAC/B,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,MAAM,cAAc;AAEnC,WAAO,KAAK,aAAa;AAAA,MACvB,QAAQ;AAAA,MAGR,SAAS,SACL,gBAAgB,MAAM,UAAU,cAAc,aAAa,QAC3D,wBAAwB,MAAM,UAAU,QAAQ,aAAa;AAAA,MACjE,UAAU,MAAM,aAAa;AAAA,MAC7B,QAAQ,GAAG,MAAM,UAAU;AAAA,IAC7B,CAAC;AAAA,EACH;AAAA,EAEQ,aAAa,QAAmD;AACtE,WAAO;AAAA,MACL,QAAI,2BAAW;AAAA,MACf,iBAAa,2BAAW;AAAA,MACxB,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AACF;;;AC7CA,IAAAC,iBAA2B;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,kBAAc,2BAAW;AAC/B,UAAM,SAAK,2BAAW;AACtB,UAAM,gBAAgB;AACtB,UAAM,gBAAgB;AACtB,UAAM,aAAa,UAAU;AAE7B,QAAI,CAAC,MAAM,UAAU;AACnB,aAAO;AAAA,QACL;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,SAAS,QAAQ;AAC7C,UAAM,kBAAkB,cAAc,QAAQ,CAAC;AAC/C,UAAM,eAAe,WAAW,QAAQ,CAAC;AACzC,UAAM,SAAS,OAAO,eAAe,KAAK,OAAO,YAAY;AAE7D,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ;AAAA,MAGR,SAAS,SACL,SAAS,eAAe,wBAAwB,YAAY,KAC5D,SAAS,eAAe,sBAAsB,YAAY;AAAA,MAC9D,UAAU,OAAO,YAAY;AAAA,MAC7B,QAAQ,IAAI,eAAe;AAAA,MAC3B,SAAS,EAAE,eAAe,WAAW;AAAA,IACvC;AAAA,EACF;AACF;;;ACvDA,gBAAgC;AAChC,IAAAC,cAAkB;AAClB,sBAAyB;AACzB,kBAAiB;AAMV,SAAS,mBACd,SACwC;AACxC,QAAM,kBAAkB,YAAAC,QAAK,QAAQ,OAAO;AAC5C,aAAO,gBAAK;AAAA,IACV,aACE;AAAA,IACF,aAAa,cAAE,OAAO;AAAA,MACpB,MAAM,cAAE,OAAO,EAAE,SAAS,qCAAqC;AAAA,IACjE,CAAC;AAAA,IACD,SAAS,OAAO;AAAA,MACd,MAAM;AAAA,IACR,MAEsE;AACpE,YAAM,WAAW,YAAAA,QAAK,QAAQ,iBAAiB,QAAQ;AACvD,UAAI,CAAC,SAAS,WAAW,kBAAkB,YAAAA,QAAK,GAAG,GAAG;AACpD,eAAO,EAAE,OAAO,kDAAkD;AAAA,MACpE;AACA,UAAI;AACF,cAAM,UAAU,UAAM,0BAAS,UAAU,OAAO;AAChD,eAAO,EAAE,MAAM,UAAU,QAAQ;AAAA,MACnC,QAAQ;AACN,eAAO,EAAE,OAAO,mBAAmB,QAAQ,GAAG;AAAA,MAChD;AAAA,IACF;AAAA,EACF,CAAC;AACH;;;AC5BA,IAAAC,iBAA2B;AAC3B,uBAAgC;AAChC,IAAAC,aAOO;AACP,IAAAC,cAAkB;AAUX,IAAM,oBAAoB,cAAE,OAAO;AAAA,EACxC,MAAM,cAAE,OAAO,EAAE,SAAS,4CAA4C;AAAA,EACtE,OAAO,cACJ,OAAO,EACP,IAAI,CAAC,EACL,IAAI,GAAG,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,gBAAgB,cACb,OAAO,EACP,SAAS,qDAAqD;AACnE,CAAC;AAED,IAAM,kBAAkB;AAKjB,SAAS,oBAAoB,UAAwC;AAC1E,MAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,WAAO;AAAA,EACT;AACA,QAAM,QAAkB,CAAC;AACzB,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,QAAkB;AAAA,MACtB,QAAQ,KAAK,UAAU;AAAA,MACvB,SAAS,KAAK,IAAI;AAAA,MAClB,aAAa,KAAK,UAAU;AAAA,IAC9B;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,KAAK,QAAQ,EAAE;AACnC,UAAI,KAAK,eAAe;AACtB,cAAM,KAAK,SAAS,KAAK,aAAa,EAAE;AAAA,MAC1C;AAAA,IACF;AACA,QAAI,KAAK,eAAe;AACtB,YAAM,KAAK,WAAW,KAAK,aAAa,EAAE;AAAA,IAC5C;AACA,QAAI,KAAK,OAAO;AACd,YAAM,KAAK,UAAU,KAAK,KAAK,EAAE;AAAA,IACnC;AACA,UAAM,KAAK,MAAM,KAAK,IAAI,CAAC;AAAA,EAC7B;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAcO,SAAS,oBACd,KACA,KACQ;AACR,SAAO,IACJ,QAAQ,mBAAmB,IAAI,MAAM,EACrC,QAAQ,gBAAgB,IAAI,GAAG,EAC/B,QAAQ,yBAAyB,IAAI,YAAY,EACjD,QAAQ,0BAA0B,IAAI,aAAa,EACnD,QAAQ,qBAAqB,IAAI,QAAQ,EACzC,QAAQ,kBAAkB,IAAI,KAAK;AACxC;AAMO,SAAS,uBAAuB,MAAsB;AAC3D,QAAM,UAAU,KAAK,KAAK;AAC1B,QAAM,QAAQ,QAAQ,MAAM,wCAAwC;AACpE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;AAEO,SAAS,oBAAoB,QAA8B;AAChE,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,MAAM;AACZ,MAAI,OAAO,IAAI,SAAS,UAAU;AAChC,UAAM,IAAI,MAAM,kDAAkD;AAAA,EACpE;AACA,MAAI,OAAO,IAAI,UAAU,UAAU;AACjC,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACrE;AACA,MAAI,IAAI,QAAQ,KAAK,IAAI,QAAQ,KAAK;AACpC,UAAM,IAAI,MAAM,6CAA6C;AAAA,EAC/D;AACA,MAAI,OAAO,IAAI,mBAAmB,UAAU;AAC1C,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AAAA,IACL,MAAM,IAAI;AAAA,IACV,OAAO,IAAI;AAAA,IACX,gBAAgB,IAAI;AAAA,EACtB;AACF;AAEA,IAAM,oBAAoB;AAG1B,IAAM,wBAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBvB,IAAM,oBAAN,cAAgC,mBAAsC;AAAA,EAClE,OAAO;AAAA,EAEhB,MAAM,SACJ,WACA,OACA,SAC0B;AAC1B,UAAM,kBAAc,2BAAW;AAC/B,UAAM,UAAU,SAAS,WAAW;AAEpC,UAAM,SAAS,MAAM,cAAc;AACnC,UAAM,YAAY,MAAM,aAAa,CAAC;AAEtC,UAAM,eAAe,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI;AAChD,UAAM,gBAAgB,UACnB,OAAO,CAAC,MAAM,EAAE,WAAW,UAAU,EACrC,IAAI,CAAC,MAAM,EAAE,IAAI;AACpB,UAAM,WAAW,UACd,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,IAAI;AAEpB,UAAM,eACJ,aAAa,SAAS,IAClB,aAAa,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACnD;AACN,UAAM,gBACJ,cAAc,SAAS,IACnB,cAAc,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACpD;AACN,UAAM,WACJ,SAAS,SAAS,IACd,SAAS,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC/C;AAEN,UAAM,QAAQ,oBAAoB,MAAM,QAAQ;AAChD,UAAM,MAA0B;AAAA,MAC9B;AAAA,MACA,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,UAAM,UAAU,CAAC,MAAc,oBAAoB,GAAG,GAAG;AAEzD,UAAM,cAAc,QAAQ,UAAU,MAAM;AAE5C,UAAM,WAAW,UAAU,YAAY;AACvC,UAAM,kBAAkB,UAAU,aAAa;AAC/C,UAAM,cAAc,UAAU,eAAe;AAC7C,UAAM,UAAU,UAAU,SAAS,SAAS;AAE5C,UAAM,QAAQ,KAAK,aAAa,SAAS,OAAO;AAChD,QAAI,CAAC,OAAO;AACV,YAAM,SACJ,CAAC,WAAW,CAAC,SAAS,QAClB,uHACA;AACN,aAAO;AAAA,QACL,QAAI,2BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,QAAQ;AAAA,MAC3B;AAAA,IACF;AAEA,UAAM,eACJ,UAAU,gBAAgB,QAAQ,UAAU,iBAAiB,KACzD,QAAQ,UAAU,YAAY,IAC9B,QAAQ,qBAAqB;AAEnC,QAAI;AACF,YAAM,cAAc,MAAM,KAAK;AAAA,QAC7B;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW;AAAA,MACb;AAEA,YAAM,SAAS,YAAY,SAAS;AACpC,aAAO;AAAA,QACL,QAAI,2BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf,QAAQ;AAAA,QAGR,SAAS,SACL,eAAe,YAAY,KAAK,OAAO,QAAQ,KAAK,YAAY,IAAI,KACpE,eAAe,YAAY,KAAK,MAAM,QAAQ,KAAK,YAAY,IAAI;AAAA,QACvE,UAAU,OAAO,QAAQ;AAAA,QACzB,QAAQ,OAAO,YAAY,KAAK;AAAA,QAChC,SAAS;AAAA,UACP,OAAO,YAAY;AAAA,UACnB,gBAAgB,YAAY;AAAA,UAC5B,MAAM,YAAY;AAAA,QACpB;AAAA,MACF;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,kCAAuB,WAAW,GAAG,GAAG;AAC1C,eAAO;AAAA,UACL,QAAI,2BAAW;AAAA,UACf;AAAA,UACA,eAAe;AAAA,UACf,eAAe;AAAA,UACf;AAAA,UACA,SAAS;AAAA,UACT,UAAU,OAAO,QAAQ;AAAA,UACzB,SAAS;AAAA,YACP,SACE,OAAO,IAAI,SAAS,WAAW,IAAI,KAAK,MAAM,GAAG,GAAG,IAAI;AAAA,UAC5D;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAM,UAAmC;AAAA,QACvC,OAAO;AAAA,QACP,OAAO;AAAA,MACT;AAEA,UAAI,wBAAa,WAAW,GAAG,GAAG;AAChC,gBAAQ,aAAa,IAAI;AACzB,gBAAQ,MAAM,IAAI;AAClB,gBAAQ,cAAc,IAAI;AAC1B,gBAAQ,eACN,OAAO,IAAI,iBAAiB,WACxB,IAAI,aAAa,MAAM,GAAG,GAAI,IAC9B,IAAI;AAAA,MACZ;AAEA,aAAO;AAAA,QACL,QAAI,2BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS,0BAA0B,OAAO;AAAA,QAC1C,UAAU,OAAO,QAAQ;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,aACN,SACA,SACsB;AACtB,QAAI,SAAS,OAAO;AAClB,aAAO,QAAQ;AAAA,IACjB;AACA,QAAI,CAAC,WAAW,CAAC,SAAS,WAAW;AACnC,aAAO;AAAA,IACT;AACA,UAAM,gBAAY,kCAAgB;AAAA,MAChC,SAAS,QAAQ,UAAU;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS,QAAQ,UAAU;AAAA,IAC7B,CAAC;AACD,WAAO,UAAU,OAAO;AAAA,EAC1B;AAAA,EAEA,MAAc,iBACZ,OACA,QACA,QACA,iBACA,aACA,SACsB;AACtB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ,kBAAO,OAAO,EAAE,QAAQ,kBAAkB,CAAC;AAAA,MACnD,cAAU,wBAAY,eAAe;AAAA,IACvC;AAEA,UAAM,EAAE,OAAO,IAAI,UACf,UAAM,yBAAa;AAAA,MACjB,GAAG;AAAA,MACH,OAAO,EAAE,WAAW,mBAAmB,OAAO,EAAE;AAAA,IAClD,CAAC,IACD,UAAM,yBAAa,WAAW;AAElC,WAAO;AAAA,EACT;AACF;;;ARxVA,IAAM,oBAAoB,IAAI,kBAAkB;AAEhD,IAAM,aAAiD;AAAA,EACrD,kBAAkB,IAAI,wBAAwB;AAAA,EAC9C,wBAAwB,IAAI,6BAA6B;AAAA,EACzD,cAAc,IAAI,qBAAqB;AAAA,EACvC,YAAY,IAAI,cAAc;AAAA,EAC9B,MAAM,IAAI,cAAc;AAAA,EACxB,WAAW;AAAA;AAAA,EAEX,QAAQ;AACV;AAQO,SAAS,kBACd,MACA,WACM;AACN,aAAW,IAAI,IAAI;AACrB;AAQO,SAAS,aAAa,MAA8C;AACzE,SAAO,WAAW,IAAI;AACxB;AAUA,eAAsB,mBACpB,OACA,YACA,SAC4B;AAC5B,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,CAAC;AAAA,EACV;AACA,SAAO,QAAQ;AAAA,IACb,WAAW,IAAI,OAAO,cAAc;AAClC,YAAM,YAAY,WAAW,UAAU,IAAI;AAC3C,UAAI,CAAC,WAAW;AACd,eAAO;AAAA,UACL,QAAI,2BAAW;AAAA,UACf,iBAAa,2BAAW;AAAA,UACxB,eAAe,UAAU;AAAA,UACzB,eAAe;AAAA,UACf;AAAA,UACA,SAAS,+BAA+B,UAAU,IAAI;AAAA,UACtD,UAAU;AAAA,QACZ;AAAA,MACF;AACA,YAAM,UAAU,KAAK,IAAI;AACzB,YAAM,SAAS,MAAM,UAAU,SAAS,WAAW,OAAO,OAAO;AACjE,YAAM,aAAa,KAAK,IAAI,IAAI;AAChC,aAAO,EAAE,GAAG,QAAQ,UAAU,WAAW;AAAA,IAC3C,CAAC;AAAA,EACH;AACF;",
6
- "names": ["import_zod", "LLMStepType", "import_zod", "AssertionResultStatus", "import_crypto", "message", "import_crypto", "expected", "import_crypto", "import_crypto", "import_crypto", "import_zod", "path", "import_crypto", "import_ai", "import_zod"]
4
+ "sourcesContent": ["/**\n * @wix/eval-assertions\n *\n * Assertion framework for AI agent evaluations.\n * Supports skill invocation checks, build validation, and LLM-based judging.\n */\n\n// Types\nexport {\n // Assertion schemas and types\n AssertionSchema,\n SkillWasCalledAssertionSchema,\n ToolCalledWithParamAssertionSchema,\n BuildPassedAssertionSchema,\n TimeAssertionSchema,\n CostAssertionSchema,\n LlmJudgeAssertionSchema,\n type Assertion,\n type SkillWasCalledAssertion,\n type ToolCalledWithParamAssertion,\n type BuildPassedAssertion,\n type TimeAssertion,\n type CostAssertion,\n type LlmJudgeAssertion,\n // Trace types\n LLMTraceSchema,\n LLMTraceStepSchema,\n LLMTraceSummarySchema,\n LLMBreakdownStatsSchema,\n TokenUsageSchema,\n LLMStepType,\n type LLMTrace,\n type LLMTraceStep,\n type LLMTraceSummary,\n type LLMBreakdownStats,\n type TokenUsage,\n // Result types\n AssertionResultSchema,\n AssertionResultStatus,\n type AssertionResult,\n // Input types\n type EvaluationInput,\n type FileDiff,\n} from \"./types/index.js\";\n\n// Evaluators\nexport {\n evaluateAssertions,\n registerEvaluator,\n getEvaluator,\n AssertionEvaluator,\n SkillWasCalledEvaluator,\n ToolCalledWithParamEvaluator,\n BuildPassedEvaluator,\n TimeEvaluator,\n CostEvaluator,\n LlmJudgeEvaluator,\n JudgeResultSchema,\n formatTraceForJudge,\n replacePlaceholders,\n stripMarkdownCodeBlock,\n validateJudgeResult,\n type AssertionContext,\n type LlmConfig,\n type JudgeResult,\n} from \"./evaluators/index.js\";\n\n// Tools\nexport { createReadFileTool } from \"./tools/index.js\";\n", "import { z } from \"zod\";\n\n/**\n * Assertion: the agent must have invoked one or more skills during the run.\n * Checked by inspecting the LLM trace for \"Skill\" tool uses with the given skills.\n * When multiple skills are in one assertion, they are treated as a group (1 assertion).\n * Each skill in the group must have been called for the assertion to pass.\n */\nexport const SkillWasCalledAssertionSchema = z.object({\n type: z.literal(\"skill_was_called\"),\n /** Names of the skills that must have been called (matched against trace Skill tool args) */\n skillNames: z.array(z.string()).min(1),\n});\n\nexport type SkillWasCalledAssertion = z.infer<\n typeof SkillWasCalledAssertionSchema\n>;\n\n/**\n * Assertion: a specific tool must have been called with expected parameters.\n * Checked by inspecting the LLM trace for tool calls with matching name and arguments.\n */\nexport const ToolCalledWithParamAssertionSchema = z.object({\n type: z.literal(\"tool_called_with_param\"),\n /** Name of the tool that must have been called */\n toolName: z.string().min(1),\n /** JSON string of key-value pairs for expected parameters (substring match) */\n expectedParams: z.string().min(1),\n});\n\nexport type ToolCalledWithParamAssertion = z.infer<\n typeof ToolCalledWithParamAssertionSchema\n>;\n\n/**\n * Assertion: a build command must exit with the expected code (default 0).\n * Runs the command in the scenario working directory.\n */\nexport const BuildPassedAssertionSchema = z.object({\n type: z.literal(\"build_passed\"),\n /** Command to run (default: \"yarn build\") */\n command: z.string().optional(),\n /** Expected exit code (default: 0) */\n expectedExitCode: z.number().int().optional(),\n});\n\nexport type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;\n\n/**\n * Assertion: the scenario LLM execution cost must stay within a USD threshold.\n * Checked by reading llmTrace.summary.totalCostUsd.\n */\nexport const CostAssertionSchema = z.object({\n type: z.literal(\"cost\"),\n /** Maximum allowed cost in USD */\n maxCostUsd: z.number().positive(),\n});\n\nexport type CostAssertion = z.infer<typeof CostAssertionSchema>;\n\n/**\n * Assertion: an LLM judges the scenario output (score 0-100).\n * Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}}.\n * Passes if judge score >= minScore.\n */\nexport const LlmJudgeAssertionSchema = z.object({\n type: z.literal(\"llm_judge\"),\n /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}} */\n prompt: z.string(),\n /** Minimum score to pass (0-100, default 70) */\n minScore: z.number().int().min(0).max(100).optional(),\n /** Model for the judge (e.g. claude-3-5-haiku) */\n model: z.string().optional(),\n maxTokens: z.number().int().optional(),\n temperature: z.number().min(0).max(1).optional(),\n});\n\nexport type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;\n\n/**\n * Assertion: scenario must complete within a maximum duration.\n * Deterministic check against the scenario execution time.\n */\nexport const TimeAssertionSchema = z.object({\n type: z.literal(\"time_limit\"),\n /** Maximum allowed duration in milliseconds */\n maxDurationMs: z.number().int().positive(),\n});\n\nexport type TimeAssertion = z.infer<typeof TimeAssertionSchema>;\n\n/**\n * Union of all assertion types.\n * Each assertion has a type and type-specific data.\n * Uses z.union (not z.discriminatedUnion) for Zod v4 compatibility when used as array element.\n */\nexport const AssertionSchema = z.union([\n SkillWasCalledAssertionSchema,\n ToolCalledWithParamAssertionSchema,\n BuildPassedAssertionSchema,\n TimeAssertionSchema,\n CostAssertionSchema,\n LlmJudgeAssertionSchema,\n]);\n\nexport type Assertion = z.infer<typeof AssertionSchema>;\n", "import { z } from \"zod\";\n\n/**\n * Token usage schema.\n */\nexport const TokenUsageSchema = z.object({\n prompt: z.number(),\n completion: z.number(),\n total: z.number(),\n});\n\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * LLM step type enum.\n */\nexport enum LLMStepType {\n COMPLETION = \"completion\",\n TOOL_USE = \"tool_use\",\n TOOL_RESULT = \"tool_result\",\n THINKING = \"thinking\",\n}\n\n/**\n * LLM trace step schema.\n */\nexport const LLMTraceStepSchema = z.object({\n id: z.string(),\n stepNumber: z.number(),\n type: z.enum(LLMStepType),\n model: z.string(),\n provider: z.string(),\n startedAt: z.string(),\n durationMs: z.number(),\n tokenUsage: TokenUsageSchema,\n costUsd: z.number(),\n toolName: z.string().optional(),\n toolArguments: z.string().optional(),\n inputPreview: z.string().optional(),\n outputPreview: z.string().optional(),\n success: z.boolean(),\n error: z.string().optional(),\n});\n\nexport type LLMTraceStep = z.infer<typeof LLMTraceStepSchema>;\n\n/**\n * LLM breakdown stats schema.\n */\nexport const LLMBreakdownStatsSchema = z.object({\n count: z.number(),\n durationMs: z.number(),\n tokens: z.number(),\n costUsd: z.number(),\n});\n\nexport type LLMBreakdownStats = z.infer<typeof LLMBreakdownStatsSchema>;\n\n/**\n * LLM trace summary schema.\n */\nexport const LLMTraceSummarySchema = z.object({\n totalSteps: z.number(),\n totalDurationMs: z.number(),\n totalTokens: TokenUsageSchema,\n totalCostUsd: z.number(),\n stepTypeBreakdown: z.record(z.string(), LLMBreakdownStatsSchema).optional(),\n modelBreakdown: z.record(z.string(), LLMBreakdownStatsSchema),\n modelsUsed: z.array(z.string()),\n});\n\nexport type LLMTraceSummary = z.infer<typeof LLMTraceSummarySchema>;\n\n/**\n * LLM trace schema.\n */\nexport const LLMTraceSchema = z.object({\n id: z.string(),\n steps: z.array(LLMTraceStepSchema),\n summary: LLMTraceSummarySchema,\n});\n\nexport type LLMTrace = z.infer<typeof LLMTraceSchema>;\n", "import { z } from \"zod\";\nimport { LLMTraceStepSchema } from \"./trace.js\";\n\n/**\n * Assertion result status enum.\n */\nexport enum AssertionResultStatus {\n PASSED = \"passed\",\n FAILED = \"failed\",\n SKIPPED = \"skipped\",\n ERROR = \"error\",\n}\n\n/**\n * Assertion result schema.\n */\nexport const AssertionResultSchema = z.object({\n id: z.string(),\n assertionId: z.string(),\n assertionType: z.string(),\n assertionName: z.string(),\n status: z.enum(AssertionResultStatus),\n message: z.string().optional(),\n expected: z.string().optional(),\n actual: z.string().optional(),\n duration: z.number().optional(),\n details: z.record(z.string(), z.unknown()).optional(),\n llmTraceSteps: z.array(LLMTraceStepSchema).optional(),\n});\n\nexport type AssertionResult = z.infer<typeof AssertionResultSchema>;\n", "import type { Assertion, AssertionResult } from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nimport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nimport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nimport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nimport { TimeEvaluator } from \"./time-evaluator.js\";\nimport { CostEvaluator } from \"./cost-evaluator.js\";\nimport { LlmJudgeEvaluator } from \"./llm-judge-evaluator.js\";\nimport type { EvaluationInput } from \"../types/index.js\";\n\nconst llmJudgeEvaluator = new LlmJudgeEvaluator();\n\nconst evaluators: Record<string, AssertionEvaluator> = {\n skill_was_called: new SkillWasCalledEvaluator(),\n tool_called_with_param: new ToolCalledWithParamEvaluator(),\n build_passed: new BuildPassedEvaluator(),\n time_limit: new TimeEvaluator(),\n cost: new CostEvaluator(),\n llm_judge: llmJudgeEvaluator,\n // Custom assertions use the same LLM-based evaluation as llm_judge\n custom: llmJudgeEvaluator,\n};\n\n/**\n * Register a custom assertion evaluator.\n *\n * @param type - The assertion type identifier\n * @param evaluator - The evaluator instance\n */\nexport function registerEvaluator(\n type: string,\n evaluator: AssertionEvaluator,\n): void {\n evaluators[type] = evaluator;\n}\n\n/**\n * Get a registered evaluator by type.\n *\n * @param type - The assertion type identifier\n * @returns The evaluator or undefined if not found\n */\nexport function getEvaluator(type: string): AssertionEvaluator | undefined {\n return evaluators[type];\n}\n\n/**\n * Evaluate all assertions against the input.\n *\n * @param input - Evaluation input (includes outputText, llmTrace, fileDiffs)\n * @param assertions - List of assertions to evaluate\n * @param context - Optional context (e.g. workDir for build_passed, llmConfig for llm_judge)\n * @returns Array of assertion results; empty if no assertions\n */\nexport async function evaluateAssertions(\n input: EvaluationInput,\n assertions: Assertion[],\n context?: AssertionContext,\n): Promise<AssertionResult[]> {\n if (assertions.length === 0) {\n return [];\n }\n return Promise.all(\n assertions.map(async (assertion) => {\n const evaluator = evaluators[assertion.type];\n if (!evaluator) {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: assertion.type,\n assertionName: \"Unknown assertion\",\n status: AssertionResultStatus.ERROR,\n message: `Unsupported assertion type: ${assertion.type}`,\n duration: 0,\n };\n }\n const startMs = Date.now();\n const result = await evaluator.evaluate(assertion, input, context);\n const durationMs = Date.now() - startMs;\n return { ...result, duration: durationMs };\n }),\n );\n}\n\n// Re-export evaluator classes and types\nexport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nexport type { AssertionContext, LlmConfig } from \"./assertion-evaluator.js\";\nexport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nexport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nexport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nexport { TimeEvaluator } from \"./time-evaluator.js\";\nexport { CostEvaluator } from \"./cost-evaluator.js\";\nexport {\n LlmJudgeEvaluator,\n JudgeResultSchema,\n formatTraceForJudge,\n replacePlaceholders,\n stripMarkdownCodeBlock,\n validateJudgeResult,\n type JudgeResult,\n} from \"./llm-judge-evaluator.js\";\n", "import type {\n SkillWasCalledAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Collect all skill names that were called in the LLM trace.\n */\nfunction collectCalledSkillNames(llmTrace: LLMTrace): Set<string> {\n const calledSkills = new Set<string>();\n for (const step of llmTrace.steps) {\n if (step.toolName !== \"Skill\") {\n continue;\n }\n let args: unknown;\n try {\n args = step.toolArguments\n ? (JSON.parse(step.toolArguments) as unknown)\n : undefined;\n } catch {\n continue;\n }\n if (args !== null && typeof args === \"object\") {\n const obj = args as Record<string, unknown>;\n if (typeof obj.skill === \"string\") {\n calledSkills.add(obj.skill);\n }\n }\n }\n return calledSkills;\n}\n\n/**\n * Evaluator for \"skill_was_called\" assertion: the LLM trace must contain steps\n * where the \"Skill\" tool was used with ALL expected skills (by name).\n *\n * Multiple skills in one assertion are treated as a group \u2014 all must be called\n * for the assertion to pass. To check skills independently, add separate assertions.\n */\nexport class SkillWasCalledEvaluator extends AssertionEvaluator<SkillWasCalledAssertion> {\n readonly type = \"skill_was_called\" as const;\n\n evaluate(\n assertion: SkillWasCalledAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- context not used for skill_was_called\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const expectedSkills = assertion.skillNames;\n const expectedLabel = expectedSkills.join(\", \");\n\n const llmTrace: LLMTrace | undefined = input.llmTrace;\n if (!llmTrace?.steps?.length) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message: \"No LLM trace steps to check for skill invocation\",\n expected: expectedLabel,\n };\n }\n\n const calledSkills = collectCalledSkillNames(llmTrace);\n const missingSkills = expectedSkills.filter(\n (name) => !calledSkills.has(name),\n );\n\n if (missingSkills.length === 0) {\n const message =\n expectedSkills.length === 1\n ? `Skill \"${expectedSkills[0]}\" was called`\n : `All skills were called: ${expectedLabel}`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.PASSED,\n message,\n expected: expectedLabel,\n };\n }\n\n const missingLabel = missingSkills.join(\", \");\n const message =\n expectedSkills.length === 1\n ? `Skill \"${missingSkills[0]}\" was not called`\n : `Missing skills: ${missingLabel} (expected all of: ${expectedLabel})`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message,\n expected: expectedLabel,\n };\n }\n}\n", "import type {\n Assertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport type { LanguageModel } from \"ai\";\n\n/**\n * Configuration for LLM calls (used by llm_judge assertion).\n */\nexport interface LlmConfig {\n /** Base URL for the AI API (e.g., 'https://api.anthropic.com') */\n baseUrl: string;\n /** Headers to include in API requests (e.g., API key) */\n headers: Record<string, string>;\n}\n\n/**\n * Optional context passed when evaluating assertions.\n */\nexport interface AssertionContext {\n /** Working directory for the scenario (used by build_passed) */\n workDir?: string;\n /** LLM configuration (used by llm_judge) */\n llmConfig?: LlmConfig;\n /** Default model for llm_judge when assertion.model is not set */\n defaultJudgeModel?: string;\n /** Optional model override \u2014 when provided, used instead of creating from llmConfig + modelId */\n model?: LanguageModel;\n}\n\n/**\n * Abstract base for assertion evaluators.\n * Each assertion type has a concrete class that implements evaluate().\n * evaluate() may return a Promise for async assertions (e.g. llm_judge).\n */\nexport abstract class AssertionEvaluator<T extends Assertion = Assertion> {\n abstract readonly type: T[\"type\"];\n\n abstract evaluate(\n assertion: T,\n input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult | Promise<AssertionResult>;\n}\n", "import type {\n ToolCalledWithParamAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst ASSERTION_TYPE = \"tool_called_with_param\" as const;\nconst ASSERTION_NAME = \"Tool called with param\";\n\nconst containsAll = ({\n actual,\n expected,\n}: {\n actual: Record<string, unknown>;\n expected: Record<string, unknown>;\n}): boolean =>\n Object.entries(expected).every(([key, val]) => {\n const actualVal = actual[key];\n // actual comes from LLM trace \u2014 can be null/undefined\n if (actualVal === null || actualVal === undefined) return false;\n const actualStr =\n typeof actualVal === \"string\" ? actualVal : JSON.stringify(actualVal);\n return actualStr.includes(String(val));\n });\n\n/**\n * Evaluator for \"tool_called_with_param\" assertion: the LLM trace must contain\n * a step where a specific tool was called with arguments where each expected\n * param value is a substring of the actual value.\n */\nexport class ToolCalledWithParamEvaluator extends AssertionEvaluator<ToolCalledWithParamAssertion> {\n readonly type = ASSERTION_TYPE;\n\n evaluate(\n assertion: ToolCalledWithParamAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- required by base class\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const { toolName, expectedParams: expectedParamsStr } = assertion;\n\n const buildResult = (\n status: AssertionResultStatus,\n message: string,\n expected: string,\n actual?: string,\n ): AssertionResult => ({\n id: randomUUID(),\n assertionId,\n assertionType: ASSERTION_TYPE,\n assertionName: ASSERTION_NAME,\n status,\n message,\n expected,\n ...(actual !== undefined ? { actual } : {}),\n });\n\n // expectedParams is validated upstream, but guard here to avoid runtime throws.\n let expected: Record<string, unknown>;\n try {\n expected = JSON.parse(expectedParamsStr) as Record<string, unknown>;\n } catch {\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" assertion has invalid expected params JSON`,\n `${toolName}(invalid expected params)`,\n \"Invalid expected params JSON\",\n );\n }\n\n const expectedLabel = `${toolName}(${Object.entries(expected)\n .map(([k, v]) => `${k}=\"${v}\"`)\n .join(\", \")})`;\n\n const steps = input.llmTrace?.steps ?? [];\n // toolArguments is always JSON.stringify(args) from the trace builder, or undefined\n const toolCalls = steps\n .filter((s) => s.toolName === toolName && s.toolArguments !== undefined)\n .map((s) => {\n try {\n return JSON.parse(s.toolArguments!) as Record<string, unknown>;\n } catch {\n return null;\n }\n })\n .filter((call): call is Record<string, unknown> => call !== null);\n\n if (toolCalls.some((actual) => containsAll({ actual, expected }))) {\n return buildResult(\n AssertionResultStatus.PASSED,\n `Tool \"${toolName}\" was called with params matching ${expectedParamsStr}`,\n expectedLabel,\n );\n }\n\n const MAX_SHOWN = 5;\n const formatCall = (call: Record<string, unknown>) =>\n `${toolName}(${Object.entries(call)\n .map(([k, v]) => `${k}=${JSON.stringify(v)}`)\n .join(\", \")})`;\n\n const actual =\n toolCalls.length === 0\n ? \"No matching tool calls found\"\n : toolCalls.slice(0, MAX_SHOWN).map(formatCall).join(\" | \") +\n (toolCalls.length > MAX_SHOWN\n ? ` ... and ${toolCalls.length - MAX_SHOWN} more`\n : \"\");\n\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" was never called with params matching ${expectedParamsStr}`,\n expectedLabel,\n actual,\n );\n }\n}\n", "import type {\n BuildPassedAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { execSync } from \"child_process\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst DEFAULT_COMMAND = \"yarn build\";\nconst DEFAULT_EXIT_CODE = 0;\n\n/**\n * Evaluator for \"build_passed\" assertion: runs a build command in the scenario\n * working directory and passes if the command exits with the expected code (default 0).\n */\nexport class BuildPassedEvaluator extends AssertionEvaluator<BuildPassedAssertion> {\n readonly type = \"build_passed\" as const;\n\n evaluate(\n assertion: BuildPassedAssertion,\n _input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const workDir = context?.workDir;\n const command = assertion.command ?? DEFAULT_COMMAND;\n const expectedExitCode = assertion.expectedExitCode ?? DEFAULT_EXIT_CODE;\n\n if (!workDir) {\n return this.createResult(assertionId, {\n status: AssertionResultStatus.FAILED,\n message: \"No working directory provided for build_passed assertion\",\n expected: String(expectedExitCode),\n });\n }\n\n let exitCode: number | null = null;\n let errorMessage: string | null = null;\n let stdout: string | undefined;\n let stderr: string | undefined;\n\n console.log(`[build_passed] Running \"${command}\" in: ${workDir}`);\n\n try {\n execSync(command, {\n cwd: workDir,\n encoding: \"utf-8\",\n stdio: [\"ignore\", \"pipe\", \"pipe\"],\n });\n exitCode = 0;\n } catch (err) {\n const error = err as Error & {\n status?: number;\n code?: number;\n stdout?: string | Buffer;\n stderr?: string | Buffer;\n };\n exitCode =\n typeof error.status === \"number\"\n ? error.status\n : typeof error.code === \"number\"\n ? error.code\n : null;\n errorMessage = error.message;\n stdout = this.bufferToString(error.stdout);\n stderr = this.bufferToString(error.stderr);\n }\n\n const passed = exitCode !== null && exitCode === expectedExitCode;\n\n const details: Record<string, unknown> = { workDir, command };\n if (stdout !== undefined && stdout !== \"\") {\n details.stdout = stdout;\n }\n if (stderr !== undefined && stderr !== \"\") {\n details.stderr = stderr;\n }\n\n return this.createResult(assertionId, {\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: this.formatMessage(exitCode, expectedExitCode, errorMessage),\n expected: String(expectedExitCode),\n actual: exitCode !== null ? String(exitCode) : undefined,\n details,\n });\n }\n\n private createResult(\n assertionId: string,\n fields: Partial<AssertionResult>,\n ): AssertionResult {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"build_passed\",\n assertionName: \"Build passed\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n\n private bufferToString(\n value: string | Buffer | undefined,\n ): string | undefined {\n if (value === undefined || value === null) return undefined;\n if (typeof value === \"string\") return value;\n return value.toString(\"utf-8\");\n }\n\n private formatMessage(\n exitCode: number | null,\n expectedExitCode: number,\n errorMessage: string | null,\n ): string {\n if (exitCode === null) {\n return `Build failed: ${errorMessage}`;\n }\n if (exitCode === expectedExitCode) {\n return `Build passed (exit code ${exitCode})`;\n }\n return `Build exited with ${exitCode}, expected ${expectedExitCode}`;\n }\n}\n", "import type {\n TimeAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"time_limit\" assertion: passes if the scenario completed\n * within the configured maximum duration (maxDurationMs).\n */\nexport class TimeEvaluator extends AssertionEvaluator<TimeAssertion> {\n readonly type = \"time_limit\" as const;\n\n evaluate(assertion: TimeAssertion, input: EvaluationInput): AssertionResult {\n const maxDurationMs = assertion.maxDurationMs;\n\n if (input.durationMs == null) {\n return this.createResult({\n status: AssertionResultStatus.FAILED,\n message: \"No duration data available for time assertion\",\n expected: `<= ${maxDurationMs}ms`,\n });\n }\n\n const passed = input.durationMs <= maxDurationMs;\n\n return this.createResult({\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Completed in ${input.durationMs}ms (limit: ${maxDurationMs}ms)`\n : `Exceeded time limit: ${input.durationMs}ms > ${maxDurationMs}ms`,\n expected: `<= ${maxDurationMs}ms`,\n actual: `${input.durationMs}ms`,\n });\n }\n\n private createResult(fields: Partial<AssertionResult>): AssertionResult {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: \"time_limit\",\n assertionName: \"Time limit\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n}\n", "import type {\n CostAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"cost\" assertion: checks that the scenario's LLM execution cost\n * stays within a configured USD threshold by reading llmTrace.summary.totalCostUsd.\n */\nexport class CostEvaluator extends AssertionEvaluator<CostAssertion> {\n readonly type = \"cost\" as const;\n\n evaluate(assertion: CostAssertion, input: EvaluationInput): AssertionResult {\n const assertionId = randomUUID();\n const id = randomUUID();\n const assertionName = \"Cost\";\n const assertionType = \"cost\";\n const maxCostUsd = assertion.maxCostUsd;\n\n if (!input.llmTrace) {\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: AssertionResultStatus.SKIPPED,\n message: \"No LLM trace available to check cost\",\n };\n }\n\n const actualCostUsd = input.llmTrace.summary.totalCostUsd;\n const formattedActual = actualCostUsd.toFixed(6);\n const formattedMax = maxCostUsd.toFixed(6);\n const passed = Number(formattedActual) <= Number(formattedMax);\n\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Cost $${formattedActual} is within limit of $${formattedMax}`\n : `Cost $${formattedActual} exceeds limit of $${formattedMax}`,\n expected: `<= $${formattedMax}`,\n actual: `$${formattedActual}`,\n details: { actualCostUsd, maxCostUsd },\n };\n }\n}\n", "import { tool, type Tool } from \"ai\";\nimport { z } from \"zod\";\nimport { readFile } from \"fs/promises\";\nimport path from \"path\";\n\nexport type ReadFileResult =\n | { path: string; content: string }\n | { error: string };\n\nexport function createReadFileTool(\n workDir: string,\n): Tool<{ path: string }, ReadFileResult> {\n const resolvedWorkDir = path.resolve(workDir);\n return tool({\n description:\n \"Read the content of any file in the workspace by its relative path. Use this to inspect file contents when evaluating code changes.\",\n inputSchema: z.object({\n path: z.string().describe(\"Relative file path in the workspace\"),\n }),\n execute: async ({\n path: filePath,\n }: {\n path: string;\n }): Promise<{ path: string; content: string } | { error: string }> => {\n const resolved = path.resolve(resolvedWorkDir, filePath);\n if (!resolved.startsWith(resolvedWorkDir + path.sep)) {\n return { error: `Access denied: path escapes workspace directory` };\n }\n try {\n const content = await readFile(resolved, \"utf-8\");\n return { path: filePath, content };\n } catch {\n return { error: `File not found: ${filePath}` };\n }\n },\n });\n}\n", "import type {\n LlmJudgeAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { createReadFileTool } from \"../tools/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { createAnthropic } from \"@ai-sdk/anthropic\";\nimport {\n generateText,\n Output,\n APICallError,\n NoObjectGeneratedError,\n stepCountIs,\n type LanguageModel,\n} from \"ai\";\nimport { z } from \"zod\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nexport interface JudgeResult {\n text: string;\n score: number;\n scoreReasoning: string;\n}\n\nexport const JudgeResultSchema = z.object({\n text: z.string().describe(\"A brief textual verdict of the test result\"),\n score: z\n .number()\n .min(0)\n .max(100)\n .describe(\n \"A number from 0 to 100 reflecting how well the answer meets the acceptance criteria\",\n ),\n scoreReasoning: z\n .string()\n .describe(\"A concise explanation justifying the assigned score\"),\n});\n\nconst MAX_JUDGE_STEPS = 20;\n\n/**\n * Format LLM trace as readable text for the judge (step number, type, tool name/args, output preview).\n */\nexport function formatTraceForJudge(llmTrace: LLMTrace | undefined): string {\n if (!llmTrace?.steps?.length) {\n return \"No trace available.\";\n }\n const lines: string[] = [];\n for (const step of llmTrace.steps) {\n const parts: string[] = [\n `Step ${step.stepNumber}`,\n `type: ${step.type}`,\n `duration: ${step.durationMs}ms`,\n ];\n if (step.toolName) {\n parts.push(`tool: ${step.toolName}`);\n if (step.toolArguments) {\n parts.push(`args: ${step.toolArguments}`);\n }\n }\n if (step.outputPreview) {\n parts.push(`output: ${step.outputPreview}`);\n }\n if (step.error) {\n parts.push(`error: ${step.error}`);\n }\n lines.push(parts.join(\", \"));\n }\n return lines.join(\"\\n\");\n}\n\n/**\n * Context object for placeholder replacement.\n */\nexport interface PlaceholderContext {\n output: string;\n cwd: string;\n changedFiles: string;\n modifiedFiles: string;\n newFiles: string;\n trace: string;\n}\n\nexport function replacePlaceholders(\n str: string,\n ctx: PlaceholderContext,\n): string {\n return str\n .replace(/\\{\\{output\\}\\}/g, ctx.output)\n .replace(/\\{\\{cwd\\}\\}/g, ctx.cwd)\n .replace(/\\{\\{changedFiles\\}\\}/g, ctx.changedFiles)\n .replace(/\\{\\{modifiedFiles\\}\\}/g, ctx.modifiedFiles)\n .replace(/\\{\\{newFiles\\}\\}/g, ctx.newFiles)\n .replace(/\\{\\{trace\\}\\}/g, ctx.trace);\n}\n\n/**\n * Strip markdown code fences (e.g. ```json ... ```) from LLM output,\n * returning only the inner content for JSON parsing.\n */\nexport function stripMarkdownCodeBlock(text: string): string {\n const trimmed = text.trim();\n const match = trimmed.match(/^```(?:\\w+)?\\s*\\n?([\\s\\S]*?)\\n?\\s*```$/);\n return match ? match[1].trim() : trimmed;\n}\n\nexport function validateJudgeResult(parsed: unknown): JudgeResult {\n if (parsed === null || typeof parsed !== \"object\") {\n throw new Error(\"Judge result is not an object\");\n }\n const obj = parsed as Record<string, unknown>;\n if (typeof obj.text !== \"string\") {\n throw new Error(\"Judge result does not contain a valid text field\");\n }\n if (typeof obj.score !== \"number\") {\n throw new Error(\"Judge result does not contain a valid score field\");\n }\n if (obj.score < 0 || obj.score > 100) {\n throw new Error(\"Judge result score is not between 0 and 100\");\n }\n if (typeof obj.scoreReasoning !== \"string\") {\n throw new Error(\n \"Judge result does not contain a valid scoreReasoning field\",\n );\n }\n return {\n text: obj.text,\n score: obj.score,\n scoreReasoning: obj.scoreReasoning,\n };\n}\n\nconst DEFAULT_MIN_SCORE = 70;\n\nconst DEFAULT_JUDGE_CONTEXT = `You are judging a scenario run. The ACTUAL run data is provided below \u2014 use it to verify facts:\n\n- {{output}}: the agent's final output\n- {{cwd}}: working directory\n- {{changedFiles}}: list of all files changed (or \"No files were changed\")\n- {{modifiedFiles}}: list of existing files that were modified (or \"No files were modified\")\n- {{newFiles}}: list of new files that were created (or \"No new files were created\")\n- {{trace}}: step-by-step trace (tool calls, completions) so you can check e.g. which tools were called and how many times\n\nYou have access to a read_file tool that lets you read the content of ANY file in the workspace (not just changed files). Use it to inspect file contents whenever you need to verify claims about code, check imports, review implementations, or validate that specific code patterns exist. Always read files before making judgments about their content \u2014 do not guess.\n\nCRITICAL: When the user asks you to verify a specific fact, compare it strictly against the actual data above and the actual file contents (use the read_file tool). If the expected outcome does NOT match the actual outcome, you MUST give a score of 0 or near 0. Do not be lenient \u2014 factual mismatches are failures.`;\n\n/**\n * Evaluator for \"llm_judge\" assertion: an LLM judges the scenario output\n * (prompt with {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}) and returns a score 0-100.\n * Passes if score >= minScore.\n */\nexport class LlmJudgeEvaluator extends AssertionEvaluator<LlmJudgeAssertion> {\n readonly type = \"llm_judge\" as const;\n\n async evaluate(\n assertion: LlmJudgeAssertion,\n input: EvaluationInput,\n context?: AssertionContext,\n ): Promise<AssertionResult> {\n const assertionId = randomUUID();\n const workDir = context?.workDir ?? \"\";\n\n const output = input.outputText ?? \"\";\n const fileDiffs = input.fileDiffs ?? [];\n\n const changedPaths = fileDiffs.map((d) => d.path);\n const modifiedPaths = fileDiffs\n .filter((d) => d.status === \"modified\")\n .map((d) => d.path);\n const newPaths = fileDiffs\n .filter((d) => d.status === \"new\")\n .map((d) => d.path);\n\n const changedFiles =\n changedPaths.length > 0\n ? changedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were changed\";\n const modifiedFiles =\n modifiedPaths.length > 0\n ? modifiedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were modified\";\n const newFiles =\n newPaths.length > 0\n ? newPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No new files were created\";\n\n const trace = formatTraceForJudge(input.llmTrace);\n const ctx: PlaceholderContext = {\n output,\n cwd: workDir,\n changedFiles,\n modifiedFiles,\n newFiles,\n trace,\n };\n const replace = (s: string) => replacePlaceholders(s, ctx);\n\n const finalPrompt = replace(assertion.prompt);\n const systemPrompt = replace(DEFAULT_JUDGE_CONTEXT);\n\n const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;\n const maxOutputTokens = assertion.maxTokens ?? 1024;\n const temperature = assertion.temperature ?? 0;\n const modelId = assertion.model ?? context?.defaultJudgeModel;\n\n const model = this.resolveModel(context, modelId);\n if (!model) {\n const reason =\n !modelId && !context?.model\n ? \"No model configured for llm_judge assertion (set model on assertion or provide defaultJudgeModel/model in context)\"\n : \"No llmConfig for llm_judge assertion (AI gateway required)\";\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: reason,\n expected: String(minScore),\n };\n }\n\n try {\n const judgeResult = await this.callGenerateText(\n model,\n finalPrompt,\n systemPrompt,\n maxOutputTokens,\n temperature,\n workDir || undefined,\n );\n\n const passed = judgeResult.score >= minScore;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Judge score ${judgeResult.score} >= ${minScore}: ${judgeResult.text}`\n : `Judge score ${judgeResult.score} < ${minScore}: ${judgeResult.text}`,\n expected: String(minScore),\n actual: String(judgeResult.score),\n details: {\n score: judgeResult.score,\n scoreReasoning: judgeResult.scoreReasoning,\n text: judgeResult.text,\n },\n };\n } catch (err) {\n if (NoObjectGeneratedError.isInstance(err)) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: \"LLM judge failed to produce valid structured output\",\n expected: String(minScore),\n details: {\n rawText:\n typeof err.text === \"string\" ? err.text.slice(0, 500) : undefined,\n },\n };\n }\n\n const message = err instanceof Error ? err.message : String(err);\n const details: Record<string, unknown> = {\n error: message,\n model: modelId,\n };\n\n if (APICallError.isInstance(err)) {\n details.statusCode = err.statusCode;\n details.url = err.url;\n details.isRetryable = err.isRetryable;\n details.responseBody =\n typeof err.responseBody === \"string\"\n ? err.responseBody.slice(0, 2000)\n : err.responseBody;\n }\n\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: `LLM judge call failed: ${message}`,\n expected: String(minScore),\n details,\n };\n }\n }\n\n /**\n * Resolve the LanguageModel to use: context.model (injected mock/override)\n * takes precedence, otherwise create from llmConfig + modelId.\n */\n private resolveModel(\n context: AssertionContext | undefined,\n modelId: string | undefined,\n ): LanguageModel | null {\n if (context?.model) {\n return context.model;\n }\n if (!modelId || !context?.llmConfig) {\n return null;\n }\n const anthropic = createAnthropic({\n baseURL: context.llmConfig.baseUrl,\n apiKey: \"dummy\",\n headers: context.llmConfig.headers,\n });\n return anthropic(modelId);\n }\n\n private async callGenerateText(\n model: LanguageModel,\n prompt: string,\n system: string,\n maxOutputTokens: number,\n temperature: number,\n workDir?: string,\n ): Promise<JudgeResult> {\n const baseOptions = {\n model,\n prompt,\n system,\n maxOutputTokens,\n temperature,\n output: Output.object({ schema: JudgeResultSchema }),\n stopWhen: stepCountIs(MAX_JUDGE_STEPS),\n } as const;\n\n const { output } = workDir\n ? await generateText({\n ...baseOptions,\n tools: { read_file: createReadFileTool(workDir) },\n })\n : await generateText(baseOptions);\n\n return output;\n }\n}\n"],
5
+ "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,iBAAkB;AAQX,IAAM,gCAAgC,aAAE,OAAO;AAAA,EACpD,MAAM,aAAE,QAAQ,kBAAkB;AAAA;AAAA,EAElC,YAAY,aAAE,MAAM,aAAE,OAAO,CAAC,EAAE,IAAI,CAAC;AACvC,CAAC;AAUM,IAAM,qCAAqC,aAAE,OAAO;AAAA,EACzD,MAAM,aAAE,QAAQ,wBAAwB;AAAA;AAAA,EAExC,UAAU,aAAE,OAAO,EAAE,IAAI,CAAC;AAAA;AAAA,EAE1B,gBAAgB,aAAE,OAAO,EAAE,IAAI,CAAC;AAClC,CAAC;AAUM,IAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,cAAc;AAAA;AAAA,EAE9B,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE7B,kBAAkB,aAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC9C,CAAC;AAQM,IAAM,sBAAsB,aAAE,OAAO;AAAA,EAC1C,MAAM,aAAE,QAAQ,MAAM;AAAA;AAAA,EAEtB,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AASM,IAAM,0BAA0B,aAAE,OAAO;AAAA,EAC9C,MAAM,aAAE,QAAQ,WAAW;AAAA;AAAA,EAE3B,QAAQ,aAAE,OAAO;AAAA;AAAA,EAEjB,UAAU,aAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,GAAG,EAAE,SAAS;AAAA;AAAA,EAEpD,OAAO,aAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,WAAW,aAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACrC,aAAa,aAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AACjD,CAAC;AAQM,IAAM,sBAAsB,aAAE,OAAO;AAAA,EAC1C,MAAM,aAAE,QAAQ,YAAY;AAAA;AAAA,EAE5B,eAAe,aAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC3C,CAAC;AASM,IAAM,kBAAkB,aAAE,MAAM;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;;;ACvGD,IAAAA,cAAkB;AAKX,IAAM,mBAAmB,cAAE,OAAO;AAAA,EACvC,QAAQ,cAAE,OAAO;AAAA,EACjB,YAAY,cAAE,OAAO;AAAA,EACrB,OAAO,cAAE,OAAO;AAClB,CAAC;AAOM,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,gBAAa;AACb,EAAAA,aAAA,cAAW;AACX,EAAAA,aAAA,iBAAc;AACd,EAAAA,aAAA,cAAW;AAJD,SAAAA;AAAA,GAAA;AAUL,IAAM,qBAAqB,cAAE,OAAO;AAAA,EACzC,IAAI,cAAE,OAAO;AAAA,EACb,YAAY,cAAE,OAAO;AAAA,EACrB,MAAM,cAAE,KAAK,WAAW;AAAA,EACxB,OAAO,cAAE,OAAO;AAAA,EAChB,UAAU,cAAE,OAAO;AAAA,EACnB,WAAW,cAAE,OAAO;AAAA,EACpB,YAAY,cAAE,OAAO;AAAA,EACrB,YAAY;AAAA,EACZ,SAAS,cAAE,OAAO;AAAA,EAClB,UAAU,cAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,eAAe,cAAE,OAAO,EAAE,SAAS;AAAA,EACnC,cAAc,cAAE,OAAO,EAAE,SAAS;AAAA,EAClC,eAAe,cAAE,OAAO,EAAE,SAAS;AAAA,EACnC,SAAS,cAAE,QAAQ;AAAA,EACnB,OAAO,cAAE,OAAO,EAAE,SAAS;AAC7B,CAAC;AAOM,IAAM,0BAA0B,cAAE,OAAO;AAAA,EAC9C,OAAO,cAAE,OAAO;AAAA,EAChB,YAAY,cAAE,OAAO;AAAA,EACrB,QAAQ,cAAE,OAAO;AAAA,EACjB,SAAS,cAAE,OAAO;AACpB,CAAC;AAOM,IAAM,wBAAwB,cAAE,OAAO;AAAA,EAC5C,YAAY,cAAE,OAAO;AAAA,EACrB,iBAAiB,cAAE,OAAO;AAAA,EAC1B,aAAa;AAAA,EACb,cAAc,cAAE,OAAO;AAAA,EACvB,mBAAmB,cAAE,OAAO,cAAE,OAAO,GAAG,uBAAuB,EAAE,SAAS;AAAA,EAC1E,gBAAgB,cAAE,OAAO,cAAE,OAAO,GAAG,uBAAuB;AAAA,EAC5D,YAAY,cAAE,MAAM,cAAE,OAAO,CAAC;AAChC,CAAC;AAOM,IAAM,iBAAiB,cAAE,OAAO;AAAA,EACrC,IAAI,cAAE,OAAO;AAAA,EACb,OAAO,cAAE,MAAM,kBAAkB;AAAA,EACjC,SAAS;AACX,CAAC;;;AChFD,IAAAC,cAAkB;AAMX,IAAK,wBAAL,kBAAKC,2BAAL;AACL,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,aAAU;AACV,EAAAA,uBAAA,WAAQ;AAJE,SAAAA;AAAA,GAAA;AAUL,IAAM,wBAAwB,cAAE,OAAO;AAAA,EAC5C,IAAI,cAAE,OAAO;AAAA,EACb,aAAa,cAAE,OAAO;AAAA,EACtB,eAAe,cAAE,OAAO;AAAA,EACxB,eAAe,cAAE,OAAO;AAAA,EACxB,QAAQ,cAAE,KAAK,qBAAqB;AAAA,EACpC,SAAS,cAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,UAAU,cAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,QAAQ,cAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAU,cAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,cAAE,OAAO,cAAE,OAAO,GAAG,cAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACpD,eAAe,cAAE,MAAM,kBAAkB,EAAE,SAAS;AACtD,CAAC;;;AC1BD,IAAAC,iBAA2B;;;ACK3B,oBAA2B;;;AC6BpB,IAAe,qBAAf,MAAmE;AAQ1E;;;AD9BA,SAAS,wBAAwB,UAAiC;AAChE,QAAM,eAAe,oBAAI,IAAY;AACrC,aAAW,QAAQ,SAAS,OAAO;AACjC,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AACA,QAAI;AACJ,QAAI;AACF,aAAO,KAAK,gBACP,KAAK,MAAM,KAAK,aAAa,IAC9B;AAAA,IACN,QAAQ;AACN;AAAA,IACF;AACA,QAAI,SAAS,QAAQ,OAAO,SAAS,UAAU;AAC7C,YAAM,MAAM;AACZ,UAAI,OAAO,IAAI,UAAU,UAAU;AACjC,qBAAa,IAAI,IAAI,KAAK;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AASO,IAAM,0BAAN,cAAsC,mBAA4C;AAAA,EAC9E,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,kBAAc,0BAAW;AAC/B,UAAM,iBAAiB,UAAU;AACjC,UAAM,gBAAgB,eAAe,KAAK,IAAI;AAE9C,UAAM,WAAiC,MAAM;AAC7C,QAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,aAAO;AAAA,QACL,QAAI,0BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,wBAAwB,QAAQ;AACrD,UAAM,gBAAgB,eAAe;AAAA,MACnC,CAAC,SAAS,CAAC,aAAa,IAAI,IAAI;AAAA,IAClC;AAEA,QAAI,cAAc,WAAW,GAAG;AAC9B,YAAMC,WACJ,eAAe,WAAW,IACtB,UAAU,eAAe,CAAC,CAAC,iBAC3B,2BAA2B,aAAa;AAC9C,aAAO;AAAA,QACL,QAAI,0BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAAA;AAAA,QACA,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,cAAc,KAAK,IAAI;AAC5C,UAAM,UACJ,eAAe,WAAW,IACtB,UAAU,cAAc,CAAC,CAAC,qBAC1B,mBAAmB,YAAY,sBAAsB,aAAa;AACxE,WAAO;AAAA,MACL,QAAI,0BAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ;AAAA,EACF;AACF;;;AErGA,IAAAC,iBAA2B;AAI3B,IAAM,iBAAiB;AACvB,IAAM,iBAAiB;AAEvB,IAAM,cAAc,CAAC;AAAA,EACnB;AAAA,EACA;AACF,MAIE,OAAO,QAAQ,QAAQ,EAAE,MAAM,CAAC,CAAC,KAAK,GAAG,MAAM;AAC7C,QAAM,YAAY,OAAO,GAAG;AAE5B,MAAI,cAAc,QAAQ,cAAc,OAAW,QAAO;AAC1D,QAAM,YACJ,OAAO,cAAc,WAAW,YAAY,KAAK,UAAU,SAAS;AACtE,SAAO,UAAU,SAAS,OAAO,GAAG,CAAC;AACvC,CAAC;AAOI,IAAM,+BAAN,cAA2C,mBAAiD;AAAA,EACxF,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,kBAAc,2BAAW;AAC/B,UAAM,EAAE,UAAU,gBAAgB,kBAAkB,IAAI;AAExD,UAAM,cAAc,CAClB,QACA,SACAC,WACAC,aACqB;AAAA,MACrB,QAAI,2BAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAAD;AAAA,MACA,GAAIC,YAAW,SAAY,EAAE,QAAAA,QAAO,IAAI,CAAC;AAAA,IAC3C;AAGA,QAAI;AACJ,QAAI;AACF,iBAAW,KAAK,MAAM,iBAAiB;AAAA,IACzC,QAAQ;AACN,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ;AAAA,QACjB,GAAG,QAAQ;AAAA,QACX;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,GAAG,QAAQ,IAAI,OAAO,QAAQ,QAAQ,EACzD,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,GAAG,EAC7B,KAAK,IAAI,CAAC;AAEb,UAAM,QAAQ,MAAM,UAAU,SAAS,CAAC;AAExC,UAAM,YAAY,MACf,OAAO,CAAC,MAAM,EAAE,aAAa,YAAY,EAAE,kBAAkB,MAAS,EACtE,IAAI,CAAC,MAAM;AACV,UAAI;AACF,eAAO,KAAK,MAAM,EAAE,aAAc;AAAA,MACpC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF,CAAC,EACA,OAAO,CAAC,SAA0C,SAAS,IAAI;AAElE,QAAI,UAAU,KAAK,CAACA,YAAW,YAAY,EAAE,QAAAA,SAAQ,SAAS,CAAC,CAAC,GAAG;AACjE,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ,qCAAqC,iBAAiB;AAAA,QACvE;AAAA,MACF;AAAA,IACF;AAEA,UAAM,YAAY;AAClB,UAAM,aAAa,CAAC,SAClB,GAAG,QAAQ,IAAI,OAAO,QAAQ,IAAI,EAC/B,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,EAAE,EAC3C,KAAK,IAAI,CAAC;AAEf,UAAM,SACJ,UAAU,WAAW,IACjB,iCACA,UAAU,MAAM,GAAG,SAAS,EAAE,IAAI,UAAU,EAAE,KAAK,KAAK,KACvD,UAAU,SAAS,YAChB,YAAY,UAAU,SAAS,SAAS,UACxC;AAEV,WAAO;AAAA;AAAA,MAEL,SAAS,QAAQ,2CAA2C,iBAAiB;AAAA,MAC7E;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;;;ACnHA,IAAAC,iBAA2B;AAC3B,2BAAyB;AAIzB,IAAM,kBAAkB;AACxB,IAAM,oBAAoB;AAMnB,IAAM,uBAAN,cAAmC,mBAAyC;AAAA,EACxE,OAAO;AAAA,EAEhB,SACE,WACA,QACA,SACiB;AACjB,UAAM,kBAAc,2BAAW;AAC/B,UAAM,UAAU,SAAS;AACzB,UAAM,UAAU,UAAU,WAAW;AACrC,UAAM,mBAAmB,UAAU,oBAAoB;AAEvD,QAAI,CAAC,SAAS;AACZ,aAAO,KAAK,aAAa,aAAa;AAAA,QACpC;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,gBAAgB;AAAA,MACnC,CAAC;AAAA,IACH;AAEA,QAAI,WAA0B;AAC9B,QAAI,eAA8B;AAClC,QAAI;AACJ,QAAI;AAEJ,YAAQ,IAAI,2BAA2B,OAAO,SAAS,OAAO,EAAE;AAEhE,QAAI;AACF,yCAAS,SAAS;AAAA,QAChB,KAAK;AAAA,QACL,UAAU;AAAA,QACV,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,MAClC,CAAC;AACD,iBAAW;AAAA,IACb,SAAS,KAAK;AACZ,YAAM,QAAQ;AAMd,iBACE,OAAO,MAAM,WAAW,WACpB,MAAM,SACN,OAAO,MAAM,SAAS,WACpB,MAAM,OACN;AACR,qBAAe,MAAM;AACrB,eAAS,KAAK,eAAe,MAAM,MAAM;AACzC,eAAS,KAAK,eAAe,MAAM,MAAM;AAAA,IAC3C;AAEA,UAAM,SAAS,aAAa,QAAQ,aAAa;AAEjD,UAAM,UAAmC,EAAE,SAAS,QAAQ;AAC5D,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AACA,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AAEA,WAAO,KAAK,aAAa,aAAa;AAAA,MACpC,QAAQ;AAAA,MAGR,SAAS,KAAK,cAAc,UAAU,kBAAkB,YAAY;AAAA,MACpE,UAAU,OAAO,gBAAgB;AAAA,MACjC,QAAQ,aAAa,OAAO,OAAO,QAAQ,IAAI;AAAA,MAC/C;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEQ,aACN,aACA,QACiB;AACjB,WAAO;AAAA,MACL,QAAI,2BAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEQ,eACN,OACoB;AACpB,QAAI,UAAU,UAAa,UAAU,KAAM,QAAO;AAClD,QAAI,OAAO,UAAU,SAAU,QAAO;AACtC,WAAO,MAAM,SAAS,OAAO;AAAA,EAC/B;AAAA,EAEQ,cACN,UACA,kBACA,cACQ;AACR,QAAI,aAAa,MAAM;AACrB,aAAO,iBAAiB,YAAY;AAAA,IACtC;AACA,QAAI,aAAa,kBAAkB;AACjC,aAAO,2BAA2B,QAAQ;AAAA,IAC5C;AACA,WAAO,qBAAqB,QAAQ,cAAc,gBAAgB;AAAA,EACpE;AACF;;;ACzHA,IAAAC,iBAA2B;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,gBAAgB,UAAU;AAEhC,QAAI,MAAM,cAAc,MAAM;AAC5B,aAAO,KAAK,aAAa;AAAA,QACvB;AAAA,QACA,SAAS;AAAA,QACT,UAAU,MAAM,aAAa;AAAA,MAC/B,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,MAAM,cAAc;AAEnC,WAAO,KAAK,aAAa;AAAA,MACvB,QAAQ;AAAA,MAGR,SAAS,SACL,gBAAgB,MAAM,UAAU,cAAc,aAAa,QAC3D,wBAAwB,MAAM,UAAU,QAAQ,aAAa;AAAA,MACjE,UAAU,MAAM,aAAa;AAAA,MAC7B,QAAQ,GAAG,MAAM,UAAU;AAAA,IAC7B,CAAC;AAAA,EACH;AAAA,EAEQ,aAAa,QAAmD;AACtE,WAAO;AAAA,MACL,QAAI,2BAAW;AAAA,MACf,iBAAa,2BAAW;AAAA,MACxB,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AACF;;;AC7CA,IAAAC,iBAA2B;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,kBAAc,2BAAW;AAC/B,UAAM,SAAK,2BAAW;AACtB,UAAM,gBAAgB;AACtB,UAAM,gBAAgB;AACtB,UAAM,aAAa,UAAU;AAE7B,QAAI,CAAC,MAAM,UAAU;AACnB,aAAO;AAAA,QACL;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,SAAS,QAAQ;AAC7C,UAAM,kBAAkB,cAAc,QAAQ,CAAC;AAC/C,UAAM,eAAe,WAAW,QAAQ,CAAC;AACzC,UAAM,SAAS,OAAO,eAAe,KAAK,OAAO,YAAY;AAE7D,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ;AAAA,MAGR,SAAS,SACL,SAAS,eAAe,wBAAwB,YAAY,KAC5D,SAAS,eAAe,sBAAsB,YAAY;AAAA,MAC9D,UAAU,OAAO,YAAY;AAAA,MAC7B,QAAQ,IAAI,eAAe;AAAA,MAC3B,SAAS,EAAE,eAAe,WAAW;AAAA,IACvC;AAAA,EACF;AACF;;;ACvDA,gBAAgC;AAChC,IAAAC,cAAkB;AAClB,sBAAyB;AACzB,kBAAiB;AAMV,SAAS,mBACd,SACwC;AACxC,QAAM,kBAAkB,YAAAC,QAAK,QAAQ,OAAO;AAC5C,aAAO,gBAAK;AAAA,IACV,aACE;AAAA,IACF,aAAa,cAAE,OAAO;AAAA,MACpB,MAAM,cAAE,OAAO,EAAE,SAAS,qCAAqC;AAAA,IACjE,CAAC;AAAA,IACD,SAAS,OAAO;AAAA,MACd,MAAM;AAAA,IACR,MAEsE;AACpE,YAAM,WAAW,YAAAA,QAAK,QAAQ,iBAAiB,QAAQ;AACvD,UAAI,CAAC,SAAS,WAAW,kBAAkB,YAAAA,QAAK,GAAG,GAAG;AACpD,eAAO,EAAE,OAAO,kDAAkD;AAAA,MACpE;AACA,UAAI;AACF,cAAM,UAAU,UAAM,0BAAS,UAAU,OAAO;AAChD,eAAO,EAAE,MAAM,UAAU,QAAQ;AAAA,MACnC,QAAQ;AACN,eAAO,EAAE,OAAO,mBAAmB,QAAQ,GAAG;AAAA,MAChD;AAAA,IACF;AAAA,EACF,CAAC;AACH;;;AC5BA,IAAAC,iBAA2B;AAC3B,uBAAgC;AAChC,IAAAC,aAOO;AACP,IAAAC,cAAkB;AAUX,IAAM,oBAAoB,cAAE,OAAO;AAAA,EACxC,MAAM,cAAE,OAAO,EAAE,SAAS,4CAA4C;AAAA,EACtE,OAAO,cACJ,OAAO,EACP,IAAI,CAAC,EACL,IAAI,GAAG,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,gBAAgB,cACb,OAAO,EACP,SAAS,qDAAqD;AACnE,CAAC;AAED,IAAM,kBAAkB;AAKjB,SAAS,oBAAoB,UAAwC;AAC1E,MAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,WAAO;AAAA,EACT;AACA,QAAM,QAAkB,CAAC;AACzB,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,QAAkB;AAAA,MACtB,QAAQ,KAAK,UAAU;AAAA,MACvB,SAAS,KAAK,IAAI;AAAA,MAClB,aAAa,KAAK,UAAU;AAAA,IAC9B;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,KAAK,QAAQ,EAAE;AACnC,UAAI,KAAK,eAAe;AACtB,cAAM,KAAK,SAAS,KAAK,aAAa,EAAE;AAAA,MAC1C;AAAA,IACF;AACA,QAAI,KAAK,eAAe;AACtB,YAAM,KAAK,WAAW,KAAK,aAAa,EAAE;AAAA,IAC5C;AACA,QAAI,KAAK,OAAO;AACd,YAAM,KAAK,UAAU,KAAK,KAAK,EAAE;AAAA,IACnC;AACA,UAAM,KAAK,MAAM,KAAK,IAAI,CAAC;AAAA,EAC7B;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAcO,SAAS,oBACd,KACA,KACQ;AACR,SAAO,IACJ,QAAQ,mBAAmB,IAAI,MAAM,EACrC,QAAQ,gBAAgB,IAAI,GAAG,EAC/B,QAAQ,yBAAyB,IAAI,YAAY,EACjD,QAAQ,0BAA0B,IAAI,aAAa,EACnD,QAAQ,qBAAqB,IAAI,QAAQ,EACzC,QAAQ,kBAAkB,IAAI,KAAK;AACxC;AAMO,SAAS,uBAAuB,MAAsB;AAC3D,QAAM,UAAU,KAAK,KAAK;AAC1B,QAAM,QAAQ,QAAQ,MAAM,wCAAwC;AACpE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;AAEO,SAAS,oBAAoB,QAA8B;AAChE,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,MAAM;AACZ,MAAI,OAAO,IAAI,SAAS,UAAU;AAChC,UAAM,IAAI,MAAM,kDAAkD;AAAA,EACpE;AACA,MAAI,OAAO,IAAI,UAAU,UAAU;AACjC,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACrE;AACA,MAAI,IAAI,QAAQ,KAAK,IAAI,QAAQ,KAAK;AACpC,UAAM,IAAI,MAAM,6CAA6C;AAAA,EAC/D;AACA,MAAI,OAAO,IAAI,mBAAmB,UAAU;AAC1C,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AAAA,IACL,MAAM,IAAI;AAAA,IACV,OAAO,IAAI;AAAA,IACX,gBAAgB,IAAI;AAAA,EACtB;AACF;AAEA,IAAM,oBAAoB;AAE1B,IAAM,wBAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBvB,IAAM,oBAAN,cAAgC,mBAAsC;AAAA,EAClE,OAAO;AAAA,EAEhB,MAAM,SACJ,WACA,OACA,SAC0B;AAC1B,UAAM,kBAAc,2BAAW;AAC/B,UAAM,UAAU,SAAS,WAAW;AAEpC,UAAM,SAAS,MAAM,cAAc;AACnC,UAAM,YAAY,MAAM,aAAa,CAAC;AAEtC,UAAM,eAAe,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI;AAChD,UAAM,gBAAgB,UACnB,OAAO,CAAC,MAAM,EAAE,WAAW,UAAU,EACrC,IAAI,CAAC,MAAM,EAAE,IAAI;AACpB,UAAM,WAAW,UACd,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,IAAI;AAEpB,UAAM,eACJ,aAAa,SAAS,IAClB,aAAa,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACnD;AACN,UAAM,gBACJ,cAAc,SAAS,IACnB,cAAc,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACpD;AACN,UAAM,WACJ,SAAS,SAAS,IACd,SAAS,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC/C;AAEN,UAAM,QAAQ,oBAAoB,MAAM,QAAQ;AAChD,UAAM,MAA0B;AAAA,MAC9B;AAAA,MACA,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,UAAM,UAAU,CAAC,MAAc,oBAAoB,GAAG,GAAG;AAEzD,UAAM,cAAc,QAAQ,UAAU,MAAM;AAC5C,UAAM,eAAe,QAAQ,qBAAqB;AAElD,UAAM,WAAW,UAAU,YAAY;AACvC,UAAM,kBAAkB,UAAU,aAAa;AAC/C,UAAM,cAAc,UAAU,eAAe;AAC7C,UAAM,UAAU,UAAU,SAAS,SAAS;AAE5C,UAAM,QAAQ,KAAK,aAAa,SAAS,OAAO;AAChD,QAAI,CAAC,OAAO;AACV,YAAM,SACJ,CAAC,WAAW,CAAC,SAAS,QAClB,uHACA;AACN,aAAO;AAAA,QACL,QAAI,2BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,QAAQ;AAAA,MAC3B;AAAA,IACF;AAEA,QAAI;AACF,YAAM,cAAc,MAAM,KAAK;AAAA,QAC7B;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW;AAAA,MACb;AAEA,YAAM,SAAS,YAAY,SAAS;AACpC,aAAO;AAAA,QACL,QAAI,2BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf,QAAQ;AAAA,QAGR,SAAS,SACL,eAAe,YAAY,KAAK,OAAO,QAAQ,KAAK,YAAY,IAAI,KACpE,eAAe,YAAY,KAAK,MAAM,QAAQ,KAAK,YAAY,IAAI;AAAA,QACvE,UAAU,OAAO,QAAQ;AAAA,QACzB,QAAQ,OAAO,YAAY,KAAK;AAAA,QAChC,SAAS;AAAA,UACP,OAAO,YAAY;AAAA,UACnB,gBAAgB,YAAY;AAAA,UAC5B,MAAM,YAAY;AAAA,QACpB;AAAA,MACF;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,kCAAuB,WAAW,GAAG,GAAG;AAC1C,eAAO;AAAA,UACL,QAAI,2BAAW;AAAA,UACf;AAAA,UACA,eAAe;AAAA,UACf,eAAe;AAAA,UACf;AAAA,UACA,SAAS;AAAA,UACT,UAAU,OAAO,QAAQ;AAAA,UACzB,SAAS;AAAA,YACP,SACE,OAAO,IAAI,SAAS,WAAW,IAAI,KAAK,MAAM,GAAG,GAAG,IAAI;AAAA,UAC5D;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAM,UAAmC;AAAA,QACvC,OAAO;AAAA,QACP,OAAO;AAAA,MACT;AAEA,UAAI,wBAAa,WAAW,GAAG,GAAG;AAChC,gBAAQ,aAAa,IAAI;AACzB,gBAAQ,MAAM,IAAI;AAClB,gBAAQ,cAAc,IAAI;AAC1B,gBAAQ,eACN,OAAO,IAAI,iBAAiB,WACxB,IAAI,aAAa,MAAM,GAAG,GAAI,IAC9B,IAAI;AAAA,MACZ;AAEA,aAAO;AAAA,QACL,QAAI,2BAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS,0BAA0B,OAAO;AAAA,QAC1C,UAAU,OAAO,QAAQ;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,aACN,SACA,SACsB;AACtB,QAAI,SAAS,OAAO;AAClB,aAAO,QAAQ;AAAA,IACjB;AACA,QAAI,CAAC,WAAW,CAAC,SAAS,WAAW;AACnC,aAAO;AAAA,IACT;AACA,UAAM,gBAAY,kCAAgB;AAAA,MAChC,SAAS,QAAQ,UAAU;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS,QAAQ,UAAU;AAAA,IAC7B,CAAC;AACD,WAAO,UAAU,OAAO;AAAA,EAC1B;AAAA,EAEA,MAAc,iBACZ,OACA,QACA,QACA,iBACA,aACA,SACsB;AACtB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ,kBAAO,OAAO,EAAE,QAAQ,kBAAkB,CAAC;AAAA,MACnD,cAAU,wBAAY,eAAe;AAAA,IACvC;AAEA,UAAM,EAAE,OAAO,IAAI,UACf,UAAM,yBAAa;AAAA,MACjB,GAAG;AAAA,MACH,OAAO,EAAE,WAAW,mBAAmB,OAAO,EAAE;AAAA,IAClD,CAAC,IACD,UAAM,yBAAa,WAAW;AAElC,WAAO;AAAA,EACT;AACF;;;ARnVA,IAAM,oBAAoB,IAAI,kBAAkB;AAEhD,IAAM,aAAiD;AAAA,EACrD,kBAAkB,IAAI,wBAAwB;AAAA,EAC9C,wBAAwB,IAAI,6BAA6B;AAAA,EACzD,cAAc,IAAI,qBAAqB;AAAA,EACvC,YAAY,IAAI,cAAc;AAAA,EAC9B,MAAM,IAAI,cAAc;AAAA,EACxB,WAAW;AAAA;AAAA,EAEX,QAAQ;AACV;AAQO,SAAS,kBACd,MACA,WACM;AACN,aAAW,IAAI,IAAI;AACrB;AAQO,SAAS,aAAa,MAA8C;AACzE,SAAO,WAAW,IAAI;AACxB;AAUA,eAAsB,mBACpB,OACA,YACA,SAC4B;AAC5B,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,CAAC;AAAA,EACV;AACA,SAAO,QAAQ;AAAA,IACb,WAAW,IAAI,OAAO,cAAc;AAClC,YAAM,YAAY,WAAW,UAAU,IAAI;AAC3C,UAAI,CAAC,WAAW;AACd,eAAO;AAAA,UACL,QAAI,2BAAW;AAAA,UACf,iBAAa,2BAAW;AAAA,UACxB,eAAe,UAAU;AAAA,UACzB,eAAe;AAAA,UACf;AAAA,UACA,SAAS,+BAA+B,UAAU,IAAI;AAAA,UACtD,UAAU;AAAA,QACZ;AAAA,MACF;AACA,YAAM,UAAU,KAAK,IAAI;AACzB,YAAM,SAAS,MAAM,UAAU,SAAS,WAAW,OAAO,OAAO;AACjE,YAAM,aAAa,KAAK,IAAI,IAAI;AAChC,aAAO,EAAE,GAAG,QAAQ,UAAU,WAAW;AAAA,IAC3C,CAAC;AAAA,EACH;AACF;",
6
+ "names": ["import_zod", "LLMStepType", "import_zod", "AssertionResultStatus", "import_crypto", "message", "import_crypto", "expected", "actual", "import_crypto", "import_crypto", "import_crypto", "import_zod", "path", "import_crypto", "import_ai", "import_zod"]
7
7
  }
package/build/index.mjs CHANGED
@@ -28,8 +28,6 @@ var LlmJudgeAssertionSchema = z.object({
28
28
  type: z.literal("llm_judge"),
29
29
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}} */
30
30
  prompt: z.string(),
31
- /** Optional system prompt for the judge (default asks for JSON with score) */
32
- systemPrompt: z.string().optional(),
33
31
  /** Minimum score to pass (0-100, default 70) */
34
32
  minScore: z.number().int().min(0).max(100).optional(),
35
33
  /** Model for the judge (e.g. claude-3-5-haiku) */
@@ -224,7 +222,7 @@ var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
224
222
  evaluate(assertion, input, _context) {
225
223
  const assertionId = randomUUID2();
226
224
  const { toolName, expectedParams: expectedParamsStr } = assertion;
227
- const buildResult = (status, message, expected2, actual) => ({
225
+ const buildResult = (status, message, expected2, actual2) => ({
228
226
  id: randomUUID2(),
229
227
  assertionId,
230
228
  assertionType: ASSERTION_TYPE,
@@ -232,7 +230,7 @@ var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
232
230
  status,
233
231
  message,
234
232
  expected: expected2,
235
- ...actual !== void 0 ? { actual } : {}
233
+ ...actual2 !== void 0 ? { actual: actual2 } : {}
236
234
  });
237
235
  let expected;
238
236
  try {
@@ -254,18 +252,21 @@ var ToolCalledWithParamEvaluator = class extends AssertionEvaluator {
254
252
  return null;
255
253
  }
256
254
  }).filter((call) => call !== null);
257
- if (toolCalls.some((actual) => containsAll({ actual, expected }))) {
255
+ if (toolCalls.some((actual2) => containsAll({ actual: actual2, expected }))) {
258
256
  return buildResult(
259
257
  "passed" /* PASSED */,
260
258
  `Tool "${toolName}" was called with params matching ${expectedParamsStr}`,
261
259
  expectedLabel
262
260
  );
263
261
  }
262
+ const MAX_SHOWN = 5;
263
+ const formatCall = (call) => `${toolName}(${Object.entries(call).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(", ")})`;
264
+ const actual = toolCalls.length === 0 ? "No matching tool calls found" : toolCalls.slice(0, MAX_SHOWN).map(formatCall).join(" | ") + (toolCalls.length > MAX_SHOWN ? ` ... and ${toolCalls.length - MAX_SHOWN} more` : "");
264
265
  return buildResult(
265
266
  "failed" /* FAILED */,
266
267
  `Tool "${toolName}" was never called with params matching ${expectedParamsStr}`,
267
268
  expectedLabel,
268
- toolCalls.length > 0 ? `Found ${toolName} calls but params didn't match` : `No matching tool calls found`
269
+ actual
269
270
  );
270
271
  }
271
272
  };
@@ -566,6 +567,7 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
566
567
  };
567
568
  const replace = (s) => replacePlaceholders(s, ctx);
568
569
  const finalPrompt = replace(assertion.prompt);
570
+ const systemPrompt = replace(DEFAULT_JUDGE_CONTEXT);
569
571
  const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;
570
572
  const maxOutputTokens = assertion.maxTokens ?? 1024;
571
573
  const temperature = assertion.temperature ?? 0;
@@ -583,7 +585,6 @@ var LlmJudgeEvaluator = class extends AssertionEvaluator {
583
585
  expected: String(minScore)
584
586
  };
585
587
  }
586
- const systemPrompt = assertion.systemPrompt != null && assertion.systemPrompt !== "" ? replace(assertion.systemPrompt) : replace(DEFAULT_JUDGE_CONTEXT);
587
588
  try {
588
589
  const judgeResult = await this.callGenerateText(
589
590
  model,
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/types/assertions.ts", "../src/types/trace.ts", "../src/types/result.ts", "../src/evaluators/index.ts", "../src/evaluators/skill-was-called-evaluator.ts", "../src/evaluators/assertion-evaluator.ts", "../src/evaluators/tool-called-with-param-evaluator.ts", "../src/evaluators/build-passed-evaluator.ts", "../src/evaluators/time-evaluator.ts", "../src/evaluators/cost-evaluator.ts", "../src/tools/read-file-tool.ts", "../src/evaluators/llm-judge-evaluator.ts"],
4
- "sourcesContent": ["import { z } from \"zod\";\n\n/**\n * Assertion: the agent must have invoked one or more skills during the run.\n * Checked by inspecting the LLM trace for \"Skill\" tool uses with the given skills.\n * When multiple skills are in one assertion, they are treated as a group (1 assertion).\n * Each skill in the group must have been called for the assertion to pass.\n */\nexport const SkillWasCalledAssertionSchema = z.object({\n type: z.literal(\"skill_was_called\"),\n /** Names of the skills that must have been called (matched against trace Skill tool args) */\n skillNames: z.array(z.string()).min(1),\n});\n\nexport type SkillWasCalledAssertion = z.infer<\n typeof SkillWasCalledAssertionSchema\n>;\n\n/**\n * Assertion: a specific tool must have been called with expected parameters.\n * Checked by inspecting the LLM trace for tool calls with matching name and arguments.\n */\nexport const ToolCalledWithParamAssertionSchema = z.object({\n type: z.literal(\"tool_called_with_param\"),\n /** Name of the tool that must have been called */\n toolName: z.string().min(1),\n /** JSON string of key-value pairs for expected parameters (substring match) */\n expectedParams: z.string().min(1),\n});\n\nexport type ToolCalledWithParamAssertion = z.infer<\n typeof ToolCalledWithParamAssertionSchema\n>;\n\n/**\n * Assertion: a build command must exit with the expected code (default 0).\n * Runs the command in the scenario working directory.\n */\nexport const BuildPassedAssertionSchema = z.object({\n type: z.literal(\"build_passed\"),\n /** Command to run (default: \"yarn build\") */\n command: z.string().optional(),\n /** Expected exit code (default: 0) */\n expectedExitCode: z.number().int().optional(),\n});\n\nexport type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;\n\n/**\n * Assertion: the scenario LLM execution cost must stay within a USD threshold.\n * Checked by reading llmTrace.summary.totalCostUsd.\n */\nexport const CostAssertionSchema = z.object({\n type: z.literal(\"cost\"),\n /** Maximum allowed cost in USD */\n maxCostUsd: z.number().positive(),\n});\n\nexport type CostAssertion = z.infer<typeof CostAssertionSchema>;\n\n/**\n * Assertion: an LLM judges the scenario output (score 0-100).\n * Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}}.\n * Passes if judge score >= minScore.\n */\nexport const LlmJudgeAssertionSchema = z.object({\n type: z.literal(\"llm_judge\"),\n /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}} */\n prompt: z.string(),\n /** Optional system prompt for the judge (default asks for JSON with score) */\n systemPrompt: z.string().optional(),\n /** Minimum score to pass (0-100, default 70) */\n minScore: z.number().int().min(0).max(100).optional(),\n /** Model for the judge (e.g. claude-3-5-haiku) */\n model: z.string().optional(),\n maxTokens: z.number().int().optional(),\n temperature: z.number().min(0).max(1).optional(),\n});\n\nexport type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;\n\n/**\n * Assertion: scenario must complete within a maximum duration.\n * Deterministic check against the scenario execution time.\n */\nexport const TimeAssertionSchema = z.object({\n type: z.literal(\"time_limit\"),\n /** Maximum allowed duration in milliseconds */\n maxDurationMs: z.number().int().positive(),\n});\n\nexport type TimeAssertion = z.infer<typeof TimeAssertionSchema>;\n\n/**\n * Union of all assertion types.\n * Each assertion has a type and type-specific data.\n * Uses z.union (not z.discriminatedUnion) for Zod v4 compatibility when used as array element.\n */\nexport const AssertionSchema = z.union([\n SkillWasCalledAssertionSchema,\n ToolCalledWithParamAssertionSchema,\n BuildPassedAssertionSchema,\n TimeAssertionSchema,\n CostAssertionSchema,\n LlmJudgeAssertionSchema,\n]);\n\nexport type Assertion = z.infer<typeof AssertionSchema>;\n", "import { z } from \"zod\";\n\n/**\n * Token usage schema.\n */\nexport const TokenUsageSchema = z.object({\n prompt: z.number(),\n completion: z.number(),\n total: z.number(),\n});\n\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * LLM step type enum.\n */\nexport enum LLMStepType {\n COMPLETION = \"completion\",\n TOOL_USE = \"tool_use\",\n TOOL_RESULT = \"tool_result\",\n THINKING = \"thinking\",\n}\n\n/**\n * LLM trace step schema.\n */\nexport const LLMTraceStepSchema = z.object({\n id: z.string(),\n stepNumber: z.number(),\n type: z.enum(LLMStepType),\n model: z.string(),\n provider: z.string(),\n startedAt: z.string(),\n durationMs: z.number(),\n tokenUsage: TokenUsageSchema,\n costUsd: z.number(),\n toolName: z.string().optional(),\n toolArguments: z.string().optional(),\n inputPreview: z.string().optional(),\n outputPreview: z.string().optional(),\n success: z.boolean(),\n error: z.string().optional(),\n});\n\nexport type LLMTraceStep = z.infer<typeof LLMTraceStepSchema>;\n\n/**\n * LLM breakdown stats schema.\n */\nexport const LLMBreakdownStatsSchema = z.object({\n count: z.number(),\n durationMs: z.number(),\n tokens: z.number(),\n costUsd: z.number(),\n});\n\nexport type LLMBreakdownStats = z.infer<typeof LLMBreakdownStatsSchema>;\n\n/**\n * LLM trace summary schema.\n */\nexport const LLMTraceSummarySchema = z.object({\n totalSteps: z.number(),\n totalDurationMs: z.number(),\n totalTokens: TokenUsageSchema,\n totalCostUsd: z.number(),\n stepTypeBreakdown: z.record(z.string(), LLMBreakdownStatsSchema).optional(),\n modelBreakdown: z.record(z.string(), LLMBreakdownStatsSchema),\n modelsUsed: z.array(z.string()),\n});\n\nexport type LLMTraceSummary = z.infer<typeof LLMTraceSummarySchema>;\n\n/**\n * LLM trace schema.\n */\nexport const LLMTraceSchema = z.object({\n id: z.string(),\n steps: z.array(LLMTraceStepSchema),\n summary: LLMTraceSummarySchema,\n});\n\nexport type LLMTrace = z.infer<typeof LLMTraceSchema>;\n", "import { z } from \"zod\";\nimport { LLMTraceStepSchema } from \"./trace.js\";\n\n/**\n * Assertion result status enum.\n */\nexport enum AssertionResultStatus {\n PASSED = \"passed\",\n FAILED = \"failed\",\n SKIPPED = \"skipped\",\n ERROR = \"error\",\n}\n\n/**\n * Assertion result schema.\n */\nexport const AssertionResultSchema = z.object({\n id: z.string(),\n assertionId: z.string(),\n assertionType: z.string(),\n assertionName: z.string(),\n status: z.enum(AssertionResultStatus),\n message: z.string().optional(),\n expected: z.string().optional(),\n actual: z.string().optional(),\n duration: z.number().optional(),\n details: z.record(z.string(), z.unknown()).optional(),\n llmTraceSteps: z.array(LLMTraceStepSchema).optional(),\n});\n\nexport type AssertionResult = z.infer<typeof AssertionResultSchema>;\n", "import type { Assertion, AssertionResult } from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nimport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nimport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nimport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nimport { TimeEvaluator } from \"./time-evaluator.js\";\nimport { CostEvaluator } from \"./cost-evaluator.js\";\nimport { LlmJudgeEvaluator } from \"./llm-judge-evaluator.js\";\nimport type { EvaluationInput } from \"../types/index.js\";\n\nconst llmJudgeEvaluator = new LlmJudgeEvaluator();\n\nconst evaluators: Record<string, AssertionEvaluator> = {\n skill_was_called: new SkillWasCalledEvaluator(),\n tool_called_with_param: new ToolCalledWithParamEvaluator(),\n build_passed: new BuildPassedEvaluator(),\n time_limit: new TimeEvaluator(),\n cost: new CostEvaluator(),\n llm_judge: llmJudgeEvaluator,\n // Custom assertions use the same LLM-based evaluation as llm_judge\n custom: llmJudgeEvaluator,\n};\n\n/**\n * Register a custom assertion evaluator.\n *\n * @param type - The assertion type identifier\n * @param evaluator - The evaluator instance\n */\nexport function registerEvaluator(\n type: string,\n evaluator: AssertionEvaluator,\n): void {\n evaluators[type] = evaluator;\n}\n\n/**\n * Get a registered evaluator by type.\n *\n * @param type - The assertion type identifier\n * @returns The evaluator or undefined if not found\n */\nexport function getEvaluator(type: string): AssertionEvaluator | undefined {\n return evaluators[type];\n}\n\n/**\n * Evaluate all assertions against the input.\n *\n * @param input - Evaluation input (includes outputText, llmTrace, fileDiffs)\n * @param assertions - List of assertions to evaluate\n * @param context - Optional context (e.g. workDir for build_passed, llmConfig for llm_judge)\n * @returns Array of assertion results; empty if no assertions\n */\nexport async function evaluateAssertions(\n input: EvaluationInput,\n assertions: Assertion[],\n context?: AssertionContext,\n): Promise<AssertionResult[]> {\n if (assertions.length === 0) {\n return [];\n }\n return Promise.all(\n assertions.map(async (assertion) => {\n const evaluator = evaluators[assertion.type];\n if (!evaluator) {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: assertion.type,\n assertionName: \"Unknown assertion\",\n status: AssertionResultStatus.ERROR,\n message: `Unsupported assertion type: ${assertion.type}`,\n duration: 0,\n };\n }\n const startMs = Date.now();\n const result = await evaluator.evaluate(assertion, input, context);\n const durationMs = Date.now() - startMs;\n return { ...result, duration: durationMs };\n }),\n );\n}\n\n// Re-export evaluator classes and types\nexport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nexport type { AssertionContext, LlmConfig } from \"./assertion-evaluator.js\";\nexport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nexport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nexport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nexport { TimeEvaluator } from \"./time-evaluator.js\";\nexport { CostEvaluator } from \"./cost-evaluator.js\";\nexport {\n LlmJudgeEvaluator,\n JudgeResultSchema,\n formatTraceForJudge,\n replacePlaceholders,\n stripMarkdownCodeBlock,\n validateJudgeResult,\n type JudgeResult,\n} from \"./llm-judge-evaluator.js\";\n", "import type {\n SkillWasCalledAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Collect all skill names that were called in the LLM trace.\n */\nfunction collectCalledSkillNames(llmTrace: LLMTrace): Set<string> {\n const calledSkills = new Set<string>();\n for (const step of llmTrace.steps) {\n if (step.toolName !== \"Skill\") {\n continue;\n }\n let args: unknown;\n try {\n args = step.toolArguments\n ? (JSON.parse(step.toolArguments) as unknown)\n : undefined;\n } catch {\n continue;\n }\n if (args !== null && typeof args === \"object\") {\n const obj = args as Record<string, unknown>;\n if (typeof obj.skill === \"string\") {\n calledSkills.add(obj.skill);\n }\n }\n }\n return calledSkills;\n}\n\n/**\n * Evaluator for \"skill_was_called\" assertion: the LLM trace must contain steps\n * where the \"Skill\" tool was used with ALL expected skills (by name).\n *\n * Multiple skills in one assertion are treated as a group \u2014 all must be called\n * for the assertion to pass. To check skills independently, add separate assertions.\n */\nexport class SkillWasCalledEvaluator extends AssertionEvaluator<SkillWasCalledAssertion> {\n readonly type = \"skill_was_called\" as const;\n\n evaluate(\n assertion: SkillWasCalledAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- context not used for skill_was_called\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const expectedSkills = assertion.skillNames;\n const expectedLabel = expectedSkills.join(\", \");\n\n const llmTrace: LLMTrace | undefined = input.llmTrace;\n if (!llmTrace?.steps?.length) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message: \"No LLM trace steps to check for skill invocation\",\n expected: expectedLabel,\n };\n }\n\n const calledSkills = collectCalledSkillNames(llmTrace);\n const missingSkills = expectedSkills.filter(\n (name) => !calledSkills.has(name),\n );\n\n if (missingSkills.length === 0) {\n const message =\n expectedSkills.length === 1\n ? `Skill \"${expectedSkills[0]}\" was called`\n : `All skills were called: ${expectedLabel}`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.PASSED,\n message,\n expected: expectedLabel,\n };\n }\n\n const missingLabel = missingSkills.join(\", \");\n const message =\n expectedSkills.length === 1\n ? `Skill \"${missingSkills[0]}\" was not called`\n : `Missing skills: ${missingLabel} (expected all of: ${expectedLabel})`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message,\n expected: expectedLabel,\n };\n }\n}\n", "import type {\n Assertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport type { LanguageModel } from \"ai\";\n\n/**\n * Configuration for LLM calls (used by llm_judge assertion).\n */\nexport interface LlmConfig {\n /** Base URL for the AI API (e.g., 'https://api.anthropic.com') */\n baseUrl: string;\n /** Headers to include in API requests (e.g., API key) */\n headers: Record<string, string>;\n}\n\n/**\n * Optional context passed when evaluating assertions.\n */\nexport interface AssertionContext {\n /** Working directory for the scenario (used by build_passed) */\n workDir?: string;\n /** LLM configuration (used by llm_judge) */\n llmConfig?: LlmConfig;\n /** Default model for llm_judge when assertion.model is not set */\n defaultJudgeModel?: string;\n /** Optional model override \u2014 when provided, used instead of creating from llmConfig + modelId */\n model?: LanguageModel;\n}\n\n/**\n * Abstract base for assertion evaluators.\n * Each assertion type has a concrete class that implements evaluate().\n * evaluate() may return a Promise for async assertions (e.g. llm_judge).\n */\nexport abstract class AssertionEvaluator<T extends Assertion = Assertion> {\n abstract readonly type: T[\"type\"];\n\n abstract evaluate(\n assertion: T,\n input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult | Promise<AssertionResult>;\n}\n", "import type {\n ToolCalledWithParamAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst ASSERTION_TYPE = \"tool_called_with_param\" as const;\nconst ASSERTION_NAME = \"Tool called with param\";\n\nconst containsAll = ({\n actual,\n expected,\n}: {\n actual: Record<string, unknown>;\n expected: Record<string, unknown>;\n}): boolean =>\n Object.entries(expected).every(([key, val]) => {\n const actualVal = actual[key];\n // actual comes from LLM trace \u2014 can be null/undefined\n if (actualVal === null || actualVal === undefined) return false;\n const actualStr =\n typeof actualVal === \"string\" ? actualVal : JSON.stringify(actualVal);\n return actualStr.includes(String(val));\n });\n\n/**\n * Evaluator for \"tool_called_with_param\" assertion: the LLM trace must contain\n * a step where a specific tool was called with arguments where each expected\n * param value is a substring of the actual value.\n */\nexport class ToolCalledWithParamEvaluator extends AssertionEvaluator<ToolCalledWithParamAssertion> {\n readonly type = ASSERTION_TYPE;\n\n evaluate(\n assertion: ToolCalledWithParamAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- required by base class\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const { toolName, expectedParams: expectedParamsStr } = assertion;\n\n const buildResult = (\n status: AssertionResultStatus,\n message: string,\n expected: string,\n actual?: string,\n ): AssertionResult => ({\n id: randomUUID(),\n assertionId,\n assertionType: ASSERTION_TYPE,\n assertionName: ASSERTION_NAME,\n status,\n message,\n expected,\n ...(actual !== undefined ? { actual } : {}),\n });\n\n // expectedParams is validated upstream, but guard here to avoid runtime throws.\n let expected: Record<string, unknown>;\n try {\n expected = JSON.parse(expectedParamsStr) as Record<string, unknown>;\n } catch {\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" assertion has invalid expected params JSON`,\n `${toolName}(invalid expected params)`,\n \"Invalid expected params JSON\",\n );\n }\n\n const expectedLabel = `${toolName}(${Object.entries(expected)\n .map(([k, v]) => `${k}=\"${v}\"`)\n .join(\", \")})`;\n\n const steps = input.llmTrace?.steps ?? [];\n // toolArguments is always JSON.stringify(args) from the trace builder, or undefined\n const toolCalls = steps\n .filter((s) => s.toolName === toolName && s.toolArguments !== undefined)\n .map((s) => {\n try {\n return JSON.parse(s.toolArguments!) as Record<string, unknown>;\n } catch {\n return null;\n }\n })\n .filter((call): call is Record<string, unknown> => call !== null);\n\n if (toolCalls.some((actual) => containsAll({ actual, expected }))) {\n return buildResult(\n AssertionResultStatus.PASSED,\n `Tool \"${toolName}\" was called with params matching ${expectedParamsStr}`,\n expectedLabel,\n );\n }\n\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" was never called with params matching ${expectedParamsStr}`,\n expectedLabel,\n toolCalls.length > 0\n ? `Found ${toolName} calls but params didn't match`\n : `No matching tool calls found`,\n );\n }\n}\n", "import type {\n BuildPassedAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { execSync } from \"child_process\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst DEFAULT_COMMAND = \"yarn build\";\nconst DEFAULT_EXIT_CODE = 0;\n\n/**\n * Evaluator for \"build_passed\" assertion: runs a build command in the scenario\n * working directory and passes if the command exits with the expected code (default 0).\n */\nexport class BuildPassedEvaluator extends AssertionEvaluator<BuildPassedAssertion> {\n readonly type = \"build_passed\" as const;\n\n evaluate(\n assertion: BuildPassedAssertion,\n _input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const workDir = context?.workDir;\n const command = assertion.command ?? DEFAULT_COMMAND;\n const expectedExitCode = assertion.expectedExitCode ?? DEFAULT_EXIT_CODE;\n\n if (!workDir) {\n return this.createResult(assertionId, {\n status: AssertionResultStatus.FAILED,\n message: \"No working directory provided for build_passed assertion\",\n expected: String(expectedExitCode),\n });\n }\n\n let exitCode: number | null = null;\n let errorMessage: string | null = null;\n let stdout: string | undefined;\n let stderr: string | undefined;\n\n console.log(`[build_passed] Running \"${command}\" in: ${workDir}`);\n\n try {\n execSync(command, {\n cwd: workDir,\n encoding: \"utf-8\",\n stdio: [\"ignore\", \"pipe\", \"pipe\"],\n });\n exitCode = 0;\n } catch (err) {\n const error = err as Error & {\n status?: number;\n code?: number;\n stdout?: string | Buffer;\n stderr?: string | Buffer;\n };\n exitCode =\n typeof error.status === \"number\"\n ? error.status\n : typeof error.code === \"number\"\n ? error.code\n : null;\n errorMessage = error.message;\n stdout = this.bufferToString(error.stdout);\n stderr = this.bufferToString(error.stderr);\n }\n\n const passed = exitCode !== null && exitCode === expectedExitCode;\n\n const details: Record<string, unknown> = { workDir, command };\n if (stdout !== undefined && stdout !== \"\") {\n details.stdout = stdout;\n }\n if (stderr !== undefined && stderr !== \"\") {\n details.stderr = stderr;\n }\n\n return this.createResult(assertionId, {\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: this.formatMessage(exitCode, expectedExitCode, errorMessage),\n expected: String(expectedExitCode),\n actual: exitCode !== null ? String(exitCode) : undefined,\n details,\n });\n }\n\n private createResult(\n assertionId: string,\n fields: Partial<AssertionResult>,\n ): AssertionResult {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"build_passed\",\n assertionName: \"Build passed\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n\n private bufferToString(\n value: string | Buffer | undefined,\n ): string | undefined {\n if (value === undefined || value === null) return undefined;\n if (typeof value === \"string\") return value;\n return value.toString(\"utf-8\");\n }\n\n private formatMessage(\n exitCode: number | null,\n expectedExitCode: number,\n errorMessage: string | null,\n ): string {\n if (exitCode === null) {\n return `Build failed: ${errorMessage}`;\n }\n if (exitCode === expectedExitCode) {\n return `Build passed (exit code ${exitCode})`;\n }\n return `Build exited with ${exitCode}, expected ${expectedExitCode}`;\n }\n}\n", "import type {\n TimeAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"time_limit\" assertion: passes if the scenario completed\n * within the configured maximum duration (maxDurationMs).\n */\nexport class TimeEvaluator extends AssertionEvaluator<TimeAssertion> {\n readonly type = \"time_limit\" as const;\n\n evaluate(assertion: TimeAssertion, input: EvaluationInput): AssertionResult {\n const maxDurationMs = assertion.maxDurationMs;\n\n if (input.durationMs == null) {\n return this.createResult({\n status: AssertionResultStatus.FAILED,\n message: \"No duration data available for time assertion\",\n expected: `<= ${maxDurationMs}ms`,\n });\n }\n\n const passed = input.durationMs <= maxDurationMs;\n\n return this.createResult({\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Completed in ${input.durationMs}ms (limit: ${maxDurationMs}ms)`\n : `Exceeded time limit: ${input.durationMs}ms > ${maxDurationMs}ms`,\n expected: `<= ${maxDurationMs}ms`,\n actual: `${input.durationMs}ms`,\n });\n }\n\n private createResult(fields: Partial<AssertionResult>): AssertionResult {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: \"time_limit\",\n assertionName: \"Time limit\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n}\n", "import type {\n CostAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"cost\" assertion: checks that the scenario's LLM execution cost\n * stays within a configured USD threshold by reading llmTrace.summary.totalCostUsd.\n */\nexport class CostEvaluator extends AssertionEvaluator<CostAssertion> {\n readonly type = \"cost\" as const;\n\n evaluate(assertion: CostAssertion, input: EvaluationInput): AssertionResult {\n const assertionId = randomUUID();\n const id = randomUUID();\n const assertionName = \"Cost\";\n const assertionType = \"cost\";\n const maxCostUsd = assertion.maxCostUsd;\n\n if (!input.llmTrace) {\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: AssertionResultStatus.SKIPPED,\n message: \"No LLM trace available to check cost\",\n };\n }\n\n const actualCostUsd = input.llmTrace.summary.totalCostUsd;\n const formattedActual = actualCostUsd.toFixed(6);\n const formattedMax = maxCostUsd.toFixed(6);\n const passed = Number(formattedActual) <= Number(formattedMax);\n\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Cost $${formattedActual} is within limit of $${formattedMax}`\n : `Cost $${formattedActual} exceeds limit of $${formattedMax}`,\n expected: `<= $${formattedMax}`,\n actual: `$${formattedActual}`,\n details: { actualCostUsd, maxCostUsd },\n };\n }\n}\n", "import { tool, type Tool } from \"ai\";\nimport { z } from \"zod\";\nimport { readFile } from \"fs/promises\";\nimport path from \"path\";\n\nexport type ReadFileResult =\n | { path: string; content: string }\n | { error: string };\n\nexport function createReadFileTool(\n workDir: string,\n): Tool<{ path: string }, ReadFileResult> {\n const resolvedWorkDir = path.resolve(workDir);\n return tool({\n description:\n \"Read the content of any file in the workspace by its relative path. Use this to inspect file contents when evaluating code changes.\",\n inputSchema: z.object({\n path: z.string().describe(\"Relative file path in the workspace\"),\n }),\n execute: async ({\n path: filePath,\n }: {\n path: string;\n }): Promise<{ path: string; content: string } | { error: string }> => {\n const resolved = path.resolve(resolvedWorkDir, filePath);\n if (!resolved.startsWith(resolvedWorkDir + path.sep)) {\n return { error: `Access denied: path escapes workspace directory` };\n }\n try {\n const content = await readFile(resolved, \"utf-8\");\n return { path: filePath, content };\n } catch {\n return { error: `File not found: ${filePath}` };\n }\n },\n });\n}\n", "import type {\n LlmJudgeAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { createReadFileTool } from \"../tools/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { createAnthropic } from \"@ai-sdk/anthropic\";\nimport {\n generateText,\n Output,\n APICallError,\n NoObjectGeneratedError,\n stepCountIs,\n type LanguageModel,\n} from \"ai\";\nimport { z } from \"zod\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nexport interface JudgeResult {\n text: string;\n score: number;\n scoreReasoning: string;\n}\n\nexport const JudgeResultSchema = z.object({\n text: z.string().describe(\"A brief textual verdict of the test result\"),\n score: z\n .number()\n .min(0)\n .max(100)\n .describe(\n \"A number from 0 to 100 reflecting how well the answer meets the acceptance criteria\",\n ),\n scoreReasoning: z\n .string()\n .describe(\"A concise explanation justifying the assigned score\"),\n});\n\nconst MAX_JUDGE_STEPS = 20;\n\n/**\n * Format LLM trace as readable text for the judge (step number, type, tool name/args, output preview).\n */\nexport function formatTraceForJudge(llmTrace: LLMTrace | undefined): string {\n if (!llmTrace?.steps?.length) {\n return \"No trace available.\";\n }\n const lines: string[] = [];\n for (const step of llmTrace.steps) {\n const parts: string[] = [\n `Step ${step.stepNumber}`,\n `type: ${step.type}`,\n `duration: ${step.durationMs}ms`,\n ];\n if (step.toolName) {\n parts.push(`tool: ${step.toolName}`);\n if (step.toolArguments) {\n parts.push(`args: ${step.toolArguments}`);\n }\n }\n if (step.outputPreview) {\n parts.push(`output: ${step.outputPreview}`);\n }\n if (step.error) {\n parts.push(`error: ${step.error}`);\n }\n lines.push(parts.join(\", \"));\n }\n return lines.join(\"\\n\");\n}\n\n/**\n * Context object for placeholder replacement.\n */\nexport interface PlaceholderContext {\n output: string;\n cwd: string;\n changedFiles: string;\n modifiedFiles: string;\n newFiles: string;\n trace: string;\n}\n\nexport function replacePlaceholders(\n str: string,\n ctx: PlaceholderContext,\n): string {\n return str\n .replace(/\\{\\{output\\}\\}/g, ctx.output)\n .replace(/\\{\\{cwd\\}\\}/g, ctx.cwd)\n .replace(/\\{\\{changedFiles\\}\\}/g, ctx.changedFiles)\n .replace(/\\{\\{modifiedFiles\\}\\}/g, ctx.modifiedFiles)\n .replace(/\\{\\{newFiles\\}\\}/g, ctx.newFiles)\n .replace(/\\{\\{trace\\}\\}/g, ctx.trace);\n}\n\n/**\n * Strip markdown code fences (e.g. ```json ... ```) from LLM output,\n * returning only the inner content for JSON parsing.\n */\nexport function stripMarkdownCodeBlock(text: string): string {\n const trimmed = text.trim();\n const match = trimmed.match(/^```(?:\\w+)?\\s*\\n?([\\s\\S]*?)\\n?\\s*```$/);\n return match ? match[1].trim() : trimmed;\n}\n\nexport function validateJudgeResult(parsed: unknown): JudgeResult {\n if (parsed === null || typeof parsed !== \"object\") {\n throw new Error(\"Judge result is not an object\");\n }\n const obj = parsed as Record<string, unknown>;\n if (typeof obj.text !== \"string\") {\n throw new Error(\"Judge result does not contain a valid text field\");\n }\n if (typeof obj.score !== \"number\") {\n throw new Error(\"Judge result does not contain a valid score field\");\n }\n if (obj.score < 0 || obj.score > 100) {\n throw new Error(\"Judge result score is not between 0 and 100\");\n }\n if (typeof obj.scoreReasoning !== \"string\") {\n throw new Error(\n \"Judge result does not contain a valid scoreReasoning field\",\n );\n }\n return {\n text: obj.text,\n score: obj.score,\n scoreReasoning: obj.scoreReasoning,\n };\n}\n\nconst DEFAULT_MIN_SCORE = 70;\n\n/** Default judge context (run data + placeholders); used when assertion.systemPrompt is empty. */\nconst DEFAULT_JUDGE_CONTEXT = `You are judging a scenario run. The ACTUAL run data is provided below \u2014 use it to verify facts:\n\n- {{output}}: the agent's final output\n- {{cwd}}: working directory\n- {{changedFiles}}: list of all files changed (or \"No files were changed\")\n- {{modifiedFiles}}: list of existing files that were modified (or \"No files were modified\")\n- {{newFiles}}: list of new files that were created (or \"No new files were created\")\n- {{trace}}: step-by-step trace (tool calls, completions) so you can check e.g. which tools were called and how many times\n\nYou have access to a read_file tool that lets you read the content of ANY file in the workspace (not just changed files). Use it to inspect file contents whenever you need to verify claims about code, check imports, review implementations, or validate that specific code patterns exist. Always read files before making judgments about their content \u2014 do not guess.\n\nCRITICAL: When the user asks you to verify a specific fact, compare it strictly against the actual data above and the actual file contents (use the read_file tool). If the expected outcome does NOT match the actual outcome, you MUST give a score of 0 or near 0. Do not be lenient \u2014 factual mismatches are failures.`;\n\n/**\n * Evaluator for \"llm_judge\" assertion: an LLM judges the scenario output\n * (prompt with {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}) and returns a score 0-100.\n * Passes if score >= minScore.\n */\nexport class LlmJudgeEvaluator extends AssertionEvaluator<LlmJudgeAssertion> {\n readonly type = \"llm_judge\" as const;\n\n async evaluate(\n assertion: LlmJudgeAssertion,\n input: EvaluationInput,\n context?: AssertionContext,\n ): Promise<AssertionResult> {\n const assertionId = randomUUID();\n const workDir = context?.workDir ?? \"\";\n\n const output = input.outputText ?? \"\";\n const fileDiffs = input.fileDiffs ?? [];\n\n const changedPaths = fileDiffs.map((d) => d.path);\n const modifiedPaths = fileDiffs\n .filter((d) => d.status === \"modified\")\n .map((d) => d.path);\n const newPaths = fileDiffs\n .filter((d) => d.status === \"new\")\n .map((d) => d.path);\n\n const changedFiles =\n changedPaths.length > 0\n ? changedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were changed\";\n const modifiedFiles =\n modifiedPaths.length > 0\n ? modifiedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were modified\";\n const newFiles =\n newPaths.length > 0\n ? newPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No new files were created\";\n\n const trace = formatTraceForJudge(input.llmTrace);\n const ctx: PlaceholderContext = {\n output,\n cwd: workDir,\n changedFiles,\n modifiedFiles,\n newFiles,\n trace,\n };\n const replace = (s: string) => replacePlaceholders(s, ctx);\n\n const finalPrompt = replace(assertion.prompt);\n\n const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;\n const maxOutputTokens = assertion.maxTokens ?? 1024;\n const temperature = assertion.temperature ?? 0;\n const modelId = assertion.model ?? context?.defaultJudgeModel;\n\n const model = this.resolveModel(context, modelId);\n if (!model) {\n const reason =\n !modelId && !context?.model\n ? \"No model configured for llm_judge assertion (set model on assertion or provide defaultJudgeModel/model in context)\"\n : \"No llmConfig for llm_judge assertion (AI gateway required)\";\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: reason,\n expected: String(minScore),\n };\n }\n\n const systemPrompt =\n assertion.systemPrompt != null && assertion.systemPrompt !== \"\"\n ? replace(assertion.systemPrompt)\n : replace(DEFAULT_JUDGE_CONTEXT);\n\n try {\n const judgeResult = await this.callGenerateText(\n model,\n finalPrompt,\n systemPrompt,\n maxOutputTokens,\n temperature,\n workDir || undefined,\n );\n\n const passed = judgeResult.score >= minScore;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Judge score ${judgeResult.score} >= ${minScore}: ${judgeResult.text}`\n : `Judge score ${judgeResult.score} < ${minScore}: ${judgeResult.text}`,\n expected: String(minScore),\n actual: String(judgeResult.score),\n details: {\n score: judgeResult.score,\n scoreReasoning: judgeResult.scoreReasoning,\n text: judgeResult.text,\n },\n };\n } catch (err) {\n if (NoObjectGeneratedError.isInstance(err)) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: \"LLM judge failed to produce valid structured output\",\n expected: String(minScore),\n details: {\n rawText:\n typeof err.text === \"string\" ? err.text.slice(0, 500) : undefined,\n },\n };\n }\n\n const message = err instanceof Error ? err.message : String(err);\n const details: Record<string, unknown> = {\n error: message,\n model: modelId,\n };\n\n if (APICallError.isInstance(err)) {\n details.statusCode = err.statusCode;\n details.url = err.url;\n details.isRetryable = err.isRetryable;\n details.responseBody =\n typeof err.responseBody === \"string\"\n ? err.responseBody.slice(0, 2000)\n : err.responseBody;\n }\n\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: `LLM judge call failed: ${message}`,\n expected: String(minScore),\n details,\n };\n }\n }\n\n /**\n * Resolve the LanguageModel to use: context.model (injected mock/override)\n * takes precedence, otherwise create from llmConfig + modelId.\n */\n private resolveModel(\n context: AssertionContext | undefined,\n modelId: string | undefined,\n ): LanguageModel | null {\n if (context?.model) {\n return context.model;\n }\n if (!modelId || !context?.llmConfig) {\n return null;\n }\n const anthropic = createAnthropic({\n baseURL: context.llmConfig.baseUrl,\n apiKey: \"dummy\",\n headers: context.llmConfig.headers,\n });\n return anthropic(modelId);\n }\n\n private async callGenerateText(\n model: LanguageModel,\n prompt: string,\n system: string,\n maxOutputTokens: number,\n temperature: number,\n workDir?: string,\n ): Promise<JudgeResult> {\n const baseOptions = {\n model,\n prompt,\n system,\n maxOutputTokens,\n temperature,\n output: Output.object({ schema: JudgeResultSchema }),\n stopWhen: stepCountIs(MAX_JUDGE_STEPS),\n } as const;\n\n const { output } = workDir\n ? await generateText({\n ...baseOptions,\n tools: { read_file: createReadFileTool(workDir) },\n })\n : await generateText(baseOptions);\n\n return output;\n }\n}\n"],
5
- "mappings": ";AAAA,SAAS,SAAS;AAQX,IAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA;AAAA,EAElC,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC;AACvC,CAAC;AAUM,IAAM,qCAAqC,EAAE,OAAO;AAAA,EACzD,MAAM,EAAE,QAAQ,wBAAwB;AAAA;AAAA,EAExC,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA;AAAA,EAE1B,gBAAgB,EAAE,OAAO,EAAE,IAAI,CAAC;AAClC,CAAC;AAUM,IAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,cAAc;AAAA;AAAA,EAE9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE7B,kBAAkB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC9C,CAAC;AAQM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,MAAM;AAAA;AAAA,EAEtB,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AASM,IAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EAAE,QAAQ,WAAW;AAAA;AAAA,EAE3B,QAAQ,EAAE,OAAO;AAAA;AAAA,EAEjB,cAAc,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAElC,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,GAAG,EAAE,SAAS;AAAA;AAAA,EAEpD,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,WAAW,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACrC,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AACjD,CAAC;AAQM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,YAAY;AAAA;AAAA,EAE5B,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC3C,CAAC;AASM,IAAM,kBAAkB,EAAE,MAAM;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;;;ACzGD,SAAS,KAAAA,UAAS;AAKX,IAAM,mBAAmBA,GAAE,OAAO;AAAA,EACvC,QAAQA,GAAE,OAAO;AAAA,EACjB,YAAYA,GAAE,OAAO;AAAA,EACrB,OAAOA,GAAE,OAAO;AAClB,CAAC;AAOM,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,gBAAa;AACb,EAAAA,aAAA,cAAW;AACX,EAAAA,aAAA,iBAAc;AACd,EAAAA,aAAA,cAAW;AAJD,SAAAA;AAAA,GAAA;AAUL,IAAM,qBAAqBD,GAAE,OAAO;AAAA,EACzC,IAAIA,GAAE,OAAO;AAAA,EACb,YAAYA,GAAE,OAAO;AAAA,EACrB,MAAMA,GAAE,KAAK,WAAW;AAAA,EACxB,OAAOA,GAAE,OAAO;AAAA,EAChB,UAAUA,GAAE,OAAO;AAAA,EACnB,WAAWA,GAAE,OAAO;AAAA,EACpB,YAAYA,GAAE,OAAO;AAAA,EACrB,YAAY;AAAA,EACZ,SAASA,GAAE,OAAO;AAAA,EAClB,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,EACnC,cAAcA,GAAE,OAAO,EAAE,SAAS;AAAA,EAClC,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,EACnC,SAASA,GAAE,QAAQ;AAAA,EACnB,OAAOA,GAAE,OAAO,EAAE,SAAS;AAC7B,CAAC;AAOM,IAAM,0BAA0BA,GAAE,OAAO;AAAA,EAC9C,OAAOA,GAAE,OAAO;AAAA,EAChB,YAAYA,GAAE,OAAO;AAAA,EACrB,QAAQA,GAAE,OAAO;AAAA,EACjB,SAASA,GAAE,OAAO;AACpB,CAAC;AAOM,IAAM,wBAAwBA,GAAE,OAAO;AAAA,EAC5C,YAAYA,GAAE,OAAO;AAAA,EACrB,iBAAiBA,GAAE,OAAO;AAAA,EAC1B,aAAa;AAAA,EACb,cAAcA,GAAE,OAAO;AAAA,EACvB,mBAAmBA,GAAE,OAAOA,GAAE,OAAO,GAAG,uBAAuB,EAAE,SAAS;AAAA,EAC1E,gBAAgBA,GAAE,OAAOA,GAAE,OAAO,GAAG,uBAAuB;AAAA,EAC5D,YAAYA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAChC,CAAC;AAOM,IAAM,iBAAiBA,GAAE,OAAO;AAAA,EACrC,IAAIA,GAAE,OAAO;AAAA,EACb,OAAOA,GAAE,MAAM,kBAAkB;AAAA,EACjC,SAAS;AACX,CAAC;;;AChFD,SAAS,KAAAE,UAAS;AAMX,IAAK,wBAAL,kBAAKC,2BAAL;AACL,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,aAAU;AACV,EAAAA,uBAAA,WAAQ;AAJE,SAAAA;AAAA,GAAA;AAUL,IAAM,wBAAwBC,GAAE,OAAO;AAAA,EAC5C,IAAIA,GAAE,OAAO;AAAA,EACb,aAAaA,GAAE,OAAO;AAAA,EACtB,eAAeA,GAAE,OAAO;AAAA,EACxB,eAAeA,GAAE,OAAO;AAAA,EACxB,QAAQA,GAAE,KAAK,qBAAqB;AAAA,EACpC,SAASA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,QAAQA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAASA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACpD,eAAeA,GAAE,MAAM,kBAAkB,EAAE,SAAS;AACtD,CAAC;;;AC1BD,SAAS,cAAAC,mBAAkB;;;ACK3B,SAAS,kBAAkB;;;AC6BpB,IAAe,qBAAf,MAAmE;AAQ1E;;;AD9BA,SAAS,wBAAwB,UAAiC;AAChE,QAAM,eAAe,oBAAI,IAAY;AACrC,aAAW,QAAQ,SAAS,OAAO;AACjC,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AACA,QAAI;AACJ,QAAI;AACF,aAAO,KAAK,gBACP,KAAK,MAAM,KAAK,aAAa,IAC9B;AAAA,IACN,QAAQ;AACN;AAAA,IACF;AACA,QAAI,SAAS,QAAQ,OAAO,SAAS,UAAU;AAC7C,YAAM,MAAM;AACZ,UAAI,OAAO,IAAI,UAAU,UAAU;AACjC,qBAAa,IAAI,IAAI,KAAK;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AASO,IAAM,0BAAN,cAAsC,mBAA4C;AAAA,EAC9E,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,cAAc,WAAW;AAC/B,UAAM,iBAAiB,UAAU;AACjC,UAAM,gBAAgB,eAAe,KAAK,IAAI;AAE9C,UAAM,WAAiC,MAAM;AAC7C,QAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,aAAO;AAAA,QACL,IAAI,WAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,wBAAwB,QAAQ;AACrD,UAAM,gBAAgB,eAAe;AAAA,MACnC,CAAC,SAAS,CAAC,aAAa,IAAI,IAAI;AAAA,IAClC;AAEA,QAAI,cAAc,WAAW,GAAG;AAC9B,YAAMC,WACJ,eAAe,WAAW,IACtB,UAAU,eAAe,CAAC,CAAC,iBAC3B,2BAA2B,aAAa;AAC9C,aAAO;AAAA,QACL,IAAI,WAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAAA;AAAA,QACA,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,cAAc,KAAK,IAAI;AAC5C,UAAM,UACJ,eAAe,WAAW,IACtB,UAAU,cAAc,CAAC,CAAC,qBAC1B,mBAAmB,YAAY,sBAAsB,aAAa;AACxE,WAAO;AAAA,MACL,IAAI,WAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ;AAAA,EACF;AACF;;;AErGA,SAAS,cAAAC,mBAAkB;AAI3B,IAAM,iBAAiB;AACvB,IAAM,iBAAiB;AAEvB,IAAM,cAAc,CAAC;AAAA,EACnB;AAAA,EACA;AACF,MAIE,OAAO,QAAQ,QAAQ,EAAE,MAAM,CAAC,CAAC,KAAK,GAAG,MAAM;AAC7C,QAAM,YAAY,OAAO,GAAG;AAE5B,MAAI,cAAc,QAAQ,cAAc,OAAW,QAAO;AAC1D,QAAM,YACJ,OAAO,cAAc,WAAW,YAAY,KAAK,UAAU,SAAS;AACtE,SAAO,UAAU,SAAS,OAAO,GAAG,CAAC;AACvC,CAAC;AAOI,IAAM,+BAAN,cAA2C,mBAAiD;AAAA,EACxF,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,cAAcC,YAAW;AAC/B,UAAM,EAAE,UAAU,gBAAgB,kBAAkB,IAAI;AAExD,UAAM,cAAc,CAClB,QACA,SACAC,WACA,YACqB;AAAA,MACrB,IAAID,YAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAAC;AAAA,MACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IAC3C;AAGA,QAAI;AACJ,QAAI;AACF,iBAAW,KAAK,MAAM,iBAAiB;AAAA,IACzC,QAAQ;AACN,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ;AAAA,QACjB,GAAG,QAAQ;AAAA,QACX;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,GAAG,QAAQ,IAAI,OAAO,QAAQ,QAAQ,EACzD,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,GAAG,EAC7B,KAAK,IAAI,CAAC;AAEb,UAAM,QAAQ,MAAM,UAAU,SAAS,CAAC;AAExC,UAAM,YAAY,MACf,OAAO,CAAC,MAAM,EAAE,aAAa,YAAY,EAAE,kBAAkB,MAAS,EACtE,IAAI,CAAC,MAAM;AACV,UAAI;AACF,eAAO,KAAK,MAAM,EAAE,aAAc;AAAA,MACpC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF,CAAC,EACA,OAAO,CAAC,SAA0C,SAAS,IAAI;AAElE,QAAI,UAAU,KAAK,CAAC,WAAW,YAAY,EAAE,QAAQ,SAAS,CAAC,CAAC,GAAG;AACjE,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ,qCAAqC,iBAAiB;AAAA,QACvE;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA;AAAA,MAEL,SAAS,QAAQ,2CAA2C,iBAAiB;AAAA,MAC7E;AAAA,MACA,UAAU,SAAS,IACf,SAAS,QAAQ,mCACjB;AAAA,IACN;AAAA,EACF;AACF;;;ACvGA,SAAS,cAAAC,mBAAkB;AAC3B,SAAS,gBAAgB;AAIzB,IAAM,kBAAkB;AACxB,IAAM,oBAAoB;AAMnB,IAAM,uBAAN,cAAmC,mBAAyC;AAAA,EACxE,OAAO;AAAA,EAEhB,SACE,WACA,QACA,SACiB;AACjB,UAAM,cAAcC,YAAW;AAC/B,UAAM,UAAU,SAAS;AACzB,UAAM,UAAU,UAAU,WAAW;AACrC,UAAM,mBAAmB,UAAU,oBAAoB;AAEvD,QAAI,CAAC,SAAS;AACZ,aAAO,KAAK,aAAa,aAAa;AAAA,QACpC;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,gBAAgB;AAAA,MACnC,CAAC;AAAA,IACH;AAEA,QAAI,WAA0B;AAC9B,QAAI,eAA8B;AAClC,QAAI;AACJ,QAAI;AAEJ,YAAQ,IAAI,2BAA2B,OAAO,SAAS,OAAO,EAAE;AAEhE,QAAI;AACF,eAAS,SAAS;AAAA,QAChB,KAAK;AAAA,QACL,UAAU;AAAA,QACV,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,MAClC,CAAC;AACD,iBAAW;AAAA,IACb,SAAS,KAAK;AACZ,YAAM,QAAQ;AAMd,iBACE,OAAO,MAAM,WAAW,WACpB,MAAM,SACN,OAAO,MAAM,SAAS,WACpB,MAAM,OACN;AACR,qBAAe,MAAM;AACrB,eAAS,KAAK,eAAe,MAAM,MAAM;AACzC,eAAS,KAAK,eAAe,MAAM,MAAM;AAAA,IAC3C;AAEA,UAAM,SAAS,aAAa,QAAQ,aAAa;AAEjD,UAAM,UAAmC,EAAE,SAAS,QAAQ;AAC5D,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AACA,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AAEA,WAAO,KAAK,aAAa,aAAa;AAAA,MACpC,QAAQ;AAAA,MAGR,SAAS,KAAK,cAAc,UAAU,kBAAkB,YAAY;AAAA,MACpE,UAAU,OAAO,gBAAgB;AAAA,MACjC,QAAQ,aAAa,OAAO,OAAO,QAAQ,IAAI;AAAA,MAC/C;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEQ,aACN,aACA,QACiB;AACjB,WAAO;AAAA,MACL,IAAIA,YAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEQ,eACN,OACoB;AACpB,QAAI,UAAU,UAAa,UAAU,KAAM,QAAO;AAClD,QAAI,OAAO,UAAU,SAAU,QAAO;AACtC,WAAO,MAAM,SAAS,OAAO;AAAA,EAC/B;AAAA,EAEQ,cACN,UACA,kBACA,cACQ;AACR,QAAI,aAAa,MAAM;AACrB,aAAO,iBAAiB,YAAY;AAAA,IACtC;AACA,QAAI,aAAa,kBAAkB;AACjC,aAAO,2BAA2B,QAAQ;AAAA,IAC5C;AACA,WAAO,qBAAqB,QAAQ,cAAc,gBAAgB;AAAA,EACpE;AACF;;;ACzHA,SAAS,cAAAC,mBAAkB;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,gBAAgB,UAAU;AAEhC,QAAI,MAAM,cAAc,MAAM;AAC5B,aAAO,KAAK,aAAa;AAAA,QACvB;AAAA,QACA,SAAS;AAAA,QACT,UAAU,MAAM,aAAa;AAAA,MAC/B,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,MAAM,cAAc;AAEnC,WAAO,KAAK,aAAa;AAAA,MACvB,QAAQ;AAAA,MAGR,SAAS,SACL,gBAAgB,MAAM,UAAU,cAAc,aAAa,QAC3D,wBAAwB,MAAM,UAAU,QAAQ,aAAa;AAAA,MACjE,UAAU,MAAM,aAAa;AAAA,MAC7B,QAAQ,GAAG,MAAM,UAAU;AAAA,IAC7B,CAAC;AAAA,EACH;AAAA,EAEQ,aAAa,QAAmD;AACtE,WAAO;AAAA,MACL,IAAIC,YAAW;AAAA,MACf,aAAaA,YAAW;AAAA,MACxB,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AACF;;;AC7CA,SAAS,cAAAC,mBAAkB;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,cAAcC,YAAW;AAC/B,UAAM,KAAKA,YAAW;AACtB,UAAM,gBAAgB;AACtB,UAAM,gBAAgB;AACtB,UAAM,aAAa,UAAU;AAE7B,QAAI,CAAC,MAAM,UAAU;AACnB,aAAO;AAAA,QACL;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,SAAS,QAAQ;AAC7C,UAAM,kBAAkB,cAAc,QAAQ,CAAC;AAC/C,UAAM,eAAe,WAAW,QAAQ,CAAC;AACzC,UAAM,SAAS,OAAO,eAAe,KAAK,OAAO,YAAY;AAE7D,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ;AAAA,MAGR,SAAS,SACL,SAAS,eAAe,wBAAwB,YAAY,KAC5D,SAAS,eAAe,sBAAsB,YAAY;AAAA,MAC9D,UAAU,OAAO,YAAY;AAAA,MAC7B,QAAQ,IAAI,eAAe;AAAA,MAC3B,SAAS,EAAE,eAAe,WAAW;AAAA,IACvC;AAAA,EACF;AACF;;;ACvDA,SAAS,YAAuB;AAChC,SAAS,KAAAC,UAAS;AAClB,SAAS,gBAAgB;AACzB,OAAO,UAAU;AAMV,SAAS,mBACd,SACwC;AACxC,QAAM,kBAAkB,KAAK,QAAQ,OAAO;AAC5C,SAAO,KAAK;AAAA,IACV,aACE;AAAA,IACF,aAAaA,GAAE,OAAO;AAAA,MACpB,MAAMA,GAAE,OAAO,EAAE,SAAS,qCAAqC;AAAA,IACjE,CAAC;AAAA,IACD,SAAS,OAAO;AAAA,MACd,MAAM;AAAA,IACR,MAEsE;AACpE,YAAM,WAAW,KAAK,QAAQ,iBAAiB,QAAQ;AACvD,UAAI,CAAC,SAAS,WAAW,kBAAkB,KAAK,GAAG,GAAG;AACpD,eAAO,EAAE,OAAO,kDAAkD;AAAA,MACpE;AACA,UAAI;AACF,cAAM,UAAU,MAAM,SAAS,UAAU,OAAO;AAChD,eAAO,EAAE,MAAM,UAAU,QAAQ;AAAA,MACnC,QAAQ;AACN,eAAO,EAAE,OAAO,mBAAmB,QAAQ,GAAG;AAAA,MAChD;AAAA,IACF;AAAA,EACF,CAAC;AACH;;;AC5BA,SAAS,cAAAC,mBAAkB;AAC3B,SAAS,uBAAuB;AAChC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,KAAAC,UAAS;AAUX,IAAM,oBAAoBC,GAAE,OAAO;AAAA,EACxC,MAAMA,GAAE,OAAO,EAAE,SAAS,4CAA4C;AAAA,EACtE,OAAOA,GACJ,OAAO,EACP,IAAI,CAAC,EACL,IAAI,GAAG,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,gBAAgBA,GACb,OAAO,EACP,SAAS,qDAAqD;AACnE,CAAC;AAED,IAAM,kBAAkB;AAKjB,SAAS,oBAAoB,UAAwC;AAC1E,MAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,WAAO;AAAA,EACT;AACA,QAAM,QAAkB,CAAC;AACzB,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,QAAkB;AAAA,MACtB,QAAQ,KAAK,UAAU;AAAA,MACvB,SAAS,KAAK,IAAI;AAAA,MAClB,aAAa,KAAK,UAAU;AAAA,IAC9B;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,KAAK,QAAQ,EAAE;AACnC,UAAI,KAAK,eAAe;AACtB,cAAM,KAAK,SAAS,KAAK,aAAa,EAAE;AAAA,MAC1C;AAAA,IACF;AACA,QAAI,KAAK,eAAe;AACtB,YAAM,KAAK,WAAW,KAAK,aAAa,EAAE;AAAA,IAC5C;AACA,QAAI,KAAK,OAAO;AACd,YAAM,KAAK,UAAU,KAAK,KAAK,EAAE;AAAA,IACnC;AACA,UAAM,KAAK,MAAM,KAAK,IAAI,CAAC;AAAA,EAC7B;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAcO,SAAS,oBACd,KACA,KACQ;AACR,SAAO,IACJ,QAAQ,mBAAmB,IAAI,MAAM,EACrC,QAAQ,gBAAgB,IAAI,GAAG,EAC/B,QAAQ,yBAAyB,IAAI,YAAY,EACjD,QAAQ,0BAA0B,IAAI,aAAa,EACnD,QAAQ,qBAAqB,IAAI,QAAQ,EACzC,QAAQ,kBAAkB,IAAI,KAAK;AACxC;AAMO,SAAS,uBAAuB,MAAsB;AAC3D,QAAM,UAAU,KAAK,KAAK;AAC1B,QAAM,QAAQ,QAAQ,MAAM,wCAAwC;AACpE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;AAEO,SAAS,oBAAoB,QAA8B;AAChE,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,MAAM;AACZ,MAAI,OAAO,IAAI,SAAS,UAAU;AAChC,UAAM,IAAI,MAAM,kDAAkD;AAAA,EACpE;AACA,MAAI,OAAO,IAAI,UAAU,UAAU;AACjC,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACrE;AACA,MAAI,IAAI,QAAQ,KAAK,IAAI,QAAQ,KAAK;AACpC,UAAM,IAAI,MAAM,6CAA6C;AAAA,EAC/D;AACA,MAAI,OAAO,IAAI,mBAAmB,UAAU;AAC1C,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AAAA,IACL,MAAM,IAAI;AAAA,IACV,OAAO,IAAI;AAAA,IACX,gBAAgB,IAAI;AAAA,EACtB;AACF;AAEA,IAAM,oBAAoB;AAG1B,IAAM,wBAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBvB,IAAM,oBAAN,cAAgC,mBAAsC;AAAA,EAClE,OAAO;AAAA,EAEhB,MAAM,SACJ,WACA,OACA,SAC0B;AAC1B,UAAM,cAAcC,YAAW;AAC/B,UAAM,UAAU,SAAS,WAAW;AAEpC,UAAM,SAAS,MAAM,cAAc;AACnC,UAAM,YAAY,MAAM,aAAa,CAAC;AAEtC,UAAM,eAAe,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI;AAChD,UAAM,gBAAgB,UACnB,OAAO,CAAC,MAAM,EAAE,WAAW,UAAU,EACrC,IAAI,CAAC,MAAM,EAAE,IAAI;AACpB,UAAM,WAAW,UACd,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,IAAI;AAEpB,UAAM,eACJ,aAAa,SAAS,IAClB,aAAa,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACnD;AACN,UAAM,gBACJ,cAAc,SAAS,IACnB,cAAc,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACpD;AACN,UAAM,WACJ,SAAS,SAAS,IACd,SAAS,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC/C;AAEN,UAAM,QAAQ,oBAAoB,MAAM,QAAQ;AAChD,UAAM,MAA0B;AAAA,MAC9B;AAAA,MACA,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,UAAM,UAAU,CAAC,MAAc,oBAAoB,GAAG,GAAG;AAEzD,UAAM,cAAc,QAAQ,UAAU,MAAM;AAE5C,UAAM,WAAW,UAAU,YAAY;AACvC,UAAM,kBAAkB,UAAU,aAAa;AAC/C,UAAM,cAAc,UAAU,eAAe;AAC7C,UAAM,UAAU,UAAU,SAAS,SAAS;AAE5C,UAAM,QAAQ,KAAK,aAAa,SAAS,OAAO;AAChD,QAAI,CAAC,OAAO;AACV,YAAM,SACJ,CAAC,WAAW,CAAC,SAAS,QAClB,uHACA;AACN,aAAO;AAAA,QACL,IAAIA,YAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,QAAQ;AAAA,MAC3B;AAAA,IACF;AAEA,UAAM,eACJ,UAAU,gBAAgB,QAAQ,UAAU,iBAAiB,KACzD,QAAQ,UAAU,YAAY,IAC9B,QAAQ,qBAAqB;AAEnC,QAAI;AACF,YAAM,cAAc,MAAM,KAAK;AAAA,QAC7B;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW;AAAA,MACb;AAEA,YAAM,SAAS,YAAY,SAAS;AACpC,aAAO;AAAA,QACL,IAAIA,YAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf,QAAQ;AAAA,QAGR,SAAS,SACL,eAAe,YAAY,KAAK,OAAO,QAAQ,KAAK,YAAY,IAAI,KACpE,eAAe,YAAY,KAAK,MAAM,QAAQ,KAAK,YAAY,IAAI;AAAA,QACvE,UAAU,OAAO,QAAQ;AAAA,QACzB,QAAQ,OAAO,YAAY,KAAK;AAAA,QAChC,SAAS;AAAA,UACP,OAAO,YAAY;AAAA,UACnB,gBAAgB,YAAY;AAAA,UAC5B,MAAM,YAAY;AAAA,QACpB;AAAA,MACF;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,uBAAuB,WAAW,GAAG,GAAG;AAC1C,eAAO;AAAA,UACL,IAAIA,YAAW;AAAA,UACf;AAAA,UACA,eAAe;AAAA,UACf,eAAe;AAAA,UACf;AAAA,UACA,SAAS;AAAA,UACT,UAAU,OAAO,QAAQ;AAAA,UACzB,SAAS;AAAA,YACP,SACE,OAAO,IAAI,SAAS,WAAW,IAAI,KAAK,MAAM,GAAG,GAAG,IAAI;AAAA,UAC5D;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAM,UAAmC;AAAA,QACvC,OAAO;AAAA,QACP,OAAO;AAAA,MACT;AAEA,UAAI,aAAa,WAAW,GAAG,GAAG;AAChC,gBAAQ,aAAa,IAAI;AACzB,gBAAQ,MAAM,IAAI;AAClB,gBAAQ,cAAc,IAAI;AAC1B,gBAAQ,eACN,OAAO,IAAI,iBAAiB,WACxB,IAAI,aAAa,MAAM,GAAG,GAAI,IAC9B,IAAI;AAAA,MACZ;AAEA,aAAO;AAAA,QACL,IAAIA,YAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS,0BAA0B,OAAO;AAAA,QAC1C,UAAU,OAAO,QAAQ;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,aACN,SACA,SACsB;AACtB,QAAI,SAAS,OAAO;AAClB,aAAO,QAAQ;AAAA,IACjB;AACA,QAAI,CAAC,WAAW,CAAC,SAAS,WAAW;AACnC,aAAO;AAAA,IACT;AACA,UAAM,YAAY,gBAAgB;AAAA,MAChC,SAAS,QAAQ,UAAU;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS,QAAQ,UAAU;AAAA,IAC7B,CAAC;AACD,WAAO,UAAU,OAAO;AAAA,EAC1B;AAAA,EAEA,MAAc,iBACZ,OACA,QACA,QACA,iBACA,aACA,SACsB;AACtB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ,OAAO,OAAO,EAAE,QAAQ,kBAAkB,CAAC;AAAA,MACnD,UAAU,YAAY,eAAe;AAAA,IACvC;AAEA,UAAM,EAAE,OAAO,IAAI,UACf,MAAM,aAAa;AAAA,MACjB,GAAG;AAAA,MACH,OAAO,EAAE,WAAW,mBAAmB,OAAO,EAAE;AAAA,IAClD,CAAC,IACD,MAAM,aAAa,WAAW;AAElC,WAAO;AAAA,EACT;AACF;;;ARxVA,IAAM,oBAAoB,IAAI,kBAAkB;AAEhD,IAAM,aAAiD;AAAA,EACrD,kBAAkB,IAAI,wBAAwB;AAAA,EAC9C,wBAAwB,IAAI,6BAA6B;AAAA,EACzD,cAAc,IAAI,qBAAqB;AAAA,EACvC,YAAY,IAAI,cAAc;AAAA,EAC9B,MAAM,IAAI,cAAc;AAAA,EACxB,WAAW;AAAA;AAAA,EAEX,QAAQ;AACV;AAQO,SAAS,kBACd,MACA,WACM;AACN,aAAW,IAAI,IAAI;AACrB;AAQO,SAAS,aAAa,MAA8C;AACzE,SAAO,WAAW,IAAI;AACxB;AAUA,eAAsB,mBACpB,OACA,YACA,SAC4B;AAC5B,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,CAAC;AAAA,EACV;AACA,SAAO,QAAQ;AAAA,IACb,WAAW,IAAI,OAAO,cAAc;AAClC,YAAM,YAAY,WAAW,UAAU,IAAI;AAC3C,UAAI,CAAC,WAAW;AACd,eAAO;AAAA,UACL,IAAIC,YAAW;AAAA,UACf,aAAaA,YAAW;AAAA,UACxB,eAAe,UAAU;AAAA,UACzB,eAAe;AAAA,UACf;AAAA,UACA,SAAS,+BAA+B,UAAU,IAAI;AAAA,UACtD,UAAU;AAAA,QACZ;AAAA,MACF;AACA,YAAM,UAAU,KAAK,IAAI;AACzB,YAAM,SAAS,MAAM,UAAU,SAAS,WAAW,OAAO,OAAO;AACjE,YAAM,aAAa,KAAK,IAAI,IAAI;AAChC,aAAO,EAAE,GAAG,QAAQ,UAAU,WAAW;AAAA,IAC3C,CAAC;AAAA,EACH;AACF;",
6
- "names": ["z", "LLMStepType", "z", "AssertionResultStatus", "z", "randomUUID", "message", "randomUUID", "randomUUID", "expected", "randomUUID", "randomUUID", "randomUUID", "randomUUID", "randomUUID", "randomUUID", "z", "randomUUID", "z", "z", "randomUUID", "randomUUID"]
4
+ "sourcesContent": ["import { z } from \"zod\";\n\n/**\n * Assertion: the agent must have invoked one or more skills during the run.\n * Checked by inspecting the LLM trace for \"Skill\" tool uses with the given skills.\n * When multiple skills are in one assertion, they are treated as a group (1 assertion).\n * Each skill in the group must have been called for the assertion to pass.\n */\nexport const SkillWasCalledAssertionSchema = z.object({\n type: z.literal(\"skill_was_called\"),\n /** Names of the skills that must have been called (matched against trace Skill tool args) */\n skillNames: z.array(z.string()).min(1),\n});\n\nexport type SkillWasCalledAssertion = z.infer<\n typeof SkillWasCalledAssertionSchema\n>;\n\n/**\n * Assertion: a specific tool must have been called with expected parameters.\n * Checked by inspecting the LLM trace for tool calls with matching name and arguments.\n */\nexport const ToolCalledWithParamAssertionSchema = z.object({\n type: z.literal(\"tool_called_with_param\"),\n /** Name of the tool that must have been called */\n toolName: z.string().min(1),\n /** JSON string of key-value pairs for expected parameters (substring match) */\n expectedParams: z.string().min(1),\n});\n\nexport type ToolCalledWithParamAssertion = z.infer<\n typeof ToolCalledWithParamAssertionSchema\n>;\n\n/**\n * Assertion: a build command must exit with the expected code (default 0).\n * Runs the command in the scenario working directory.\n */\nexport const BuildPassedAssertionSchema = z.object({\n type: z.literal(\"build_passed\"),\n /** Command to run (default: \"yarn build\") */\n command: z.string().optional(),\n /** Expected exit code (default: 0) */\n expectedExitCode: z.number().int().optional(),\n});\n\nexport type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;\n\n/**\n * Assertion: the scenario LLM execution cost must stay within a USD threshold.\n * Checked by reading llmTrace.summary.totalCostUsd.\n */\nexport const CostAssertionSchema = z.object({\n type: z.literal(\"cost\"),\n /** Maximum allowed cost in USD */\n maxCostUsd: z.number().positive(),\n});\n\nexport type CostAssertion = z.infer<typeof CostAssertionSchema>;\n\n/**\n * Assertion: an LLM judges the scenario output (score 0-100).\n * Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}}.\n * Passes if judge score >= minScore.\n */\nexport const LlmJudgeAssertionSchema = z.object({\n type: z.literal(\"llm_judge\"),\n /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{modifiedFiles}}, {{newFiles}}, {{trace}} */\n prompt: z.string(),\n /** Minimum score to pass (0-100, default 70) */\n minScore: z.number().int().min(0).max(100).optional(),\n /** Model for the judge (e.g. claude-3-5-haiku) */\n model: z.string().optional(),\n maxTokens: z.number().int().optional(),\n temperature: z.number().min(0).max(1).optional(),\n});\n\nexport type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;\n\n/**\n * Assertion: scenario must complete within a maximum duration.\n * Deterministic check against the scenario execution time.\n */\nexport const TimeAssertionSchema = z.object({\n type: z.literal(\"time_limit\"),\n /** Maximum allowed duration in milliseconds */\n maxDurationMs: z.number().int().positive(),\n});\n\nexport type TimeAssertion = z.infer<typeof TimeAssertionSchema>;\n\n/**\n * Union of all assertion types.\n * Each assertion has a type and type-specific data.\n * Uses z.union (not z.discriminatedUnion) for Zod v4 compatibility when used as array element.\n */\nexport const AssertionSchema = z.union([\n SkillWasCalledAssertionSchema,\n ToolCalledWithParamAssertionSchema,\n BuildPassedAssertionSchema,\n TimeAssertionSchema,\n CostAssertionSchema,\n LlmJudgeAssertionSchema,\n]);\n\nexport type Assertion = z.infer<typeof AssertionSchema>;\n", "import { z } from \"zod\";\n\n/**\n * Token usage schema.\n */\nexport const TokenUsageSchema = z.object({\n prompt: z.number(),\n completion: z.number(),\n total: z.number(),\n});\n\nexport type TokenUsage = z.infer<typeof TokenUsageSchema>;\n\n/**\n * LLM step type enum.\n */\nexport enum LLMStepType {\n COMPLETION = \"completion\",\n TOOL_USE = \"tool_use\",\n TOOL_RESULT = \"tool_result\",\n THINKING = \"thinking\",\n}\n\n/**\n * LLM trace step schema.\n */\nexport const LLMTraceStepSchema = z.object({\n id: z.string(),\n stepNumber: z.number(),\n type: z.enum(LLMStepType),\n model: z.string(),\n provider: z.string(),\n startedAt: z.string(),\n durationMs: z.number(),\n tokenUsage: TokenUsageSchema,\n costUsd: z.number(),\n toolName: z.string().optional(),\n toolArguments: z.string().optional(),\n inputPreview: z.string().optional(),\n outputPreview: z.string().optional(),\n success: z.boolean(),\n error: z.string().optional(),\n});\n\nexport type LLMTraceStep = z.infer<typeof LLMTraceStepSchema>;\n\n/**\n * LLM breakdown stats schema.\n */\nexport const LLMBreakdownStatsSchema = z.object({\n count: z.number(),\n durationMs: z.number(),\n tokens: z.number(),\n costUsd: z.number(),\n});\n\nexport type LLMBreakdownStats = z.infer<typeof LLMBreakdownStatsSchema>;\n\n/**\n * LLM trace summary schema.\n */\nexport const LLMTraceSummarySchema = z.object({\n totalSteps: z.number(),\n totalDurationMs: z.number(),\n totalTokens: TokenUsageSchema,\n totalCostUsd: z.number(),\n stepTypeBreakdown: z.record(z.string(), LLMBreakdownStatsSchema).optional(),\n modelBreakdown: z.record(z.string(), LLMBreakdownStatsSchema),\n modelsUsed: z.array(z.string()),\n});\n\nexport type LLMTraceSummary = z.infer<typeof LLMTraceSummarySchema>;\n\n/**\n * LLM trace schema.\n */\nexport const LLMTraceSchema = z.object({\n id: z.string(),\n steps: z.array(LLMTraceStepSchema),\n summary: LLMTraceSummarySchema,\n});\n\nexport type LLMTrace = z.infer<typeof LLMTraceSchema>;\n", "import { z } from \"zod\";\nimport { LLMTraceStepSchema } from \"./trace.js\";\n\n/**\n * Assertion result status enum.\n */\nexport enum AssertionResultStatus {\n PASSED = \"passed\",\n FAILED = \"failed\",\n SKIPPED = \"skipped\",\n ERROR = \"error\",\n}\n\n/**\n * Assertion result schema.\n */\nexport const AssertionResultSchema = z.object({\n id: z.string(),\n assertionId: z.string(),\n assertionType: z.string(),\n assertionName: z.string(),\n status: z.enum(AssertionResultStatus),\n message: z.string().optional(),\n expected: z.string().optional(),\n actual: z.string().optional(),\n duration: z.number().optional(),\n details: z.record(z.string(), z.unknown()).optional(),\n llmTraceSteps: z.array(LLMTraceStepSchema).optional(),\n});\n\nexport type AssertionResult = z.infer<typeof AssertionResultSchema>;\n", "import type { Assertion, AssertionResult } from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nimport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nimport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nimport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nimport { TimeEvaluator } from \"./time-evaluator.js\";\nimport { CostEvaluator } from \"./cost-evaluator.js\";\nimport { LlmJudgeEvaluator } from \"./llm-judge-evaluator.js\";\nimport type { EvaluationInput } from \"../types/index.js\";\n\nconst llmJudgeEvaluator = new LlmJudgeEvaluator();\n\nconst evaluators: Record<string, AssertionEvaluator> = {\n skill_was_called: new SkillWasCalledEvaluator(),\n tool_called_with_param: new ToolCalledWithParamEvaluator(),\n build_passed: new BuildPassedEvaluator(),\n time_limit: new TimeEvaluator(),\n cost: new CostEvaluator(),\n llm_judge: llmJudgeEvaluator,\n // Custom assertions use the same LLM-based evaluation as llm_judge\n custom: llmJudgeEvaluator,\n};\n\n/**\n * Register a custom assertion evaluator.\n *\n * @param type - The assertion type identifier\n * @param evaluator - The evaluator instance\n */\nexport function registerEvaluator(\n type: string,\n evaluator: AssertionEvaluator,\n): void {\n evaluators[type] = evaluator;\n}\n\n/**\n * Get a registered evaluator by type.\n *\n * @param type - The assertion type identifier\n * @returns The evaluator or undefined if not found\n */\nexport function getEvaluator(type: string): AssertionEvaluator | undefined {\n return evaluators[type];\n}\n\n/**\n * Evaluate all assertions against the input.\n *\n * @param input - Evaluation input (includes outputText, llmTrace, fileDiffs)\n * @param assertions - List of assertions to evaluate\n * @param context - Optional context (e.g. workDir for build_passed, llmConfig for llm_judge)\n * @returns Array of assertion results; empty if no assertions\n */\nexport async function evaluateAssertions(\n input: EvaluationInput,\n assertions: Assertion[],\n context?: AssertionContext,\n): Promise<AssertionResult[]> {\n if (assertions.length === 0) {\n return [];\n }\n return Promise.all(\n assertions.map(async (assertion) => {\n const evaluator = evaluators[assertion.type];\n if (!evaluator) {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: assertion.type,\n assertionName: \"Unknown assertion\",\n status: AssertionResultStatus.ERROR,\n message: `Unsupported assertion type: ${assertion.type}`,\n duration: 0,\n };\n }\n const startMs = Date.now();\n const result = await evaluator.evaluate(assertion, input, context);\n const durationMs = Date.now() - startMs;\n return { ...result, duration: durationMs };\n }),\n );\n}\n\n// Re-export evaluator classes and types\nexport { AssertionEvaluator } from \"./assertion-evaluator.js\";\nexport type { AssertionContext, LlmConfig } from \"./assertion-evaluator.js\";\nexport { SkillWasCalledEvaluator } from \"./skill-was-called-evaluator.js\";\nexport { ToolCalledWithParamEvaluator } from \"./tool-called-with-param-evaluator.js\";\nexport { BuildPassedEvaluator } from \"./build-passed-evaluator.js\";\nexport { TimeEvaluator } from \"./time-evaluator.js\";\nexport { CostEvaluator } from \"./cost-evaluator.js\";\nexport {\n LlmJudgeEvaluator,\n JudgeResultSchema,\n formatTraceForJudge,\n replacePlaceholders,\n stripMarkdownCodeBlock,\n validateJudgeResult,\n type JudgeResult,\n} from \"./llm-judge-evaluator.js\";\n", "import type {\n SkillWasCalledAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Collect all skill names that were called in the LLM trace.\n */\nfunction collectCalledSkillNames(llmTrace: LLMTrace): Set<string> {\n const calledSkills = new Set<string>();\n for (const step of llmTrace.steps) {\n if (step.toolName !== \"Skill\") {\n continue;\n }\n let args: unknown;\n try {\n args = step.toolArguments\n ? (JSON.parse(step.toolArguments) as unknown)\n : undefined;\n } catch {\n continue;\n }\n if (args !== null && typeof args === \"object\") {\n const obj = args as Record<string, unknown>;\n if (typeof obj.skill === \"string\") {\n calledSkills.add(obj.skill);\n }\n }\n }\n return calledSkills;\n}\n\n/**\n * Evaluator for \"skill_was_called\" assertion: the LLM trace must contain steps\n * where the \"Skill\" tool was used with ALL expected skills (by name).\n *\n * Multiple skills in one assertion are treated as a group \u2014 all must be called\n * for the assertion to pass. To check skills independently, add separate assertions.\n */\nexport class SkillWasCalledEvaluator extends AssertionEvaluator<SkillWasCalledAssertion> {\n readonly type = \"skill_was_called\" as const;\n\n evaluate(\n assertion: SkillWasCalledAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- context not used for skill_was_called\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const expectedSkills = assertion.skillNames;\n const expectedLabel = expectedSkills.join(\", \");\n\n const llmTrace: LLMTrace | undefined = input.llmTrace;\n if (!llmTrace?.steps?.length) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message: \"No LLM trace steps to check for skill invocation\",\n expected: expectedLabel,\n };\n }\n\n const calledSkills = collectCalledSkillNames(llmTrace);\n const missingSkills = expectedSkills.filter(\n (name) => !calledSkills.has(name),\n );\n\n if (missingSkills.length === 0) {\n const message =\n expectedSkills.length === 1\n ? `Skill \"${expectedSkills[0]}\" was called`\n : `All skills were called: ${expectedLabel}`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.PASSED,\n message,\n expected: expectedLabel,\n };\n }\n\n const missingLabel = missingSkills.join(\", \");\n const message =\n expectedSkills.length === 1\n ? `Skill \"${missingSkills[0]}\" was not called`\n : `Missing skills: ${missingLabel} (expected all of: ${expectedLabel})`;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"skill_was_called\",\n assertionName: \"Skill was called\",\n status: AssertionResultStatus.FAILED,\n message,\n expected: expectedLabel,\n };\n }\n}\n", "import type {\n Assertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport type { LanguageModel } from \"ai\";\n\n/**\n * Configuration for LLM calls (used by llm_judge assertion).\n */\nexport interface LlmConfig {\n /** Base URL for the AI API (e.g., 'https://api.anthropic.com') */\n baseUrl: string;\n /** Headers to include in API requests (e.g., API key) */\n headers: Record<string, string>;\n}\n\n/**\n * Optional context passed when evaluating assertions.\n */\nexport interface AssertionContext {\n /** Working directory for the scenario (used by build_passed) */\n workDir?: string;\n /** LLM configuration (used by llm_judge) */\n llmConfig?: LlmConfig;\n /** Default model for llm_judge when assertion.model is not set */\n defaultJudgeModel?: string;\n /** Optional model override \u2014 when provided, used instead of creating from llmConfig + modelId */\n model?: LanguageModel;\n}\n\n/**\n * Abstract base for assertion evaluators.\n * Each assertion type has a concrete class that implements evaluate().\n * evaluate() may return a Promise for async assertions (e.g. llm_judge).\n */\nexport abstract class AssertionEvaluator<T extends Assertion = Assertion> {\n abstract readonly type: T[\"type\"];\n\n abstract evaluate(\n assertion: T,\n input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult | Promise<AssertionResult>;\n}\n", "import type {\n ToolCalledWithParamAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst ASSERTION_TYPE = \"tool_called_with_param\" as const;\nconst ASSERTION_NAME = \"Tool called with param\";\n\nconst containsAll = ({\n actual,\n expected,\n}: {\n actual: Record<string, unknown>;\n expected: Record<string, unknown>;\n}): boolean =>\n Object.entries(expected).every(([key, val]) => {\n const actualVal = actual[key];\n // actual comes from LLM trace \u2014 can be null/undefined\n if (actualVal === null || actualVal === undefined) return false;\n const actualStr =\n typeof actualVal === \"string\" ? actualVal : JSON.stringify(actualVal);\n return actualStr.includes(String(val));\n });\n\n/**\n * Evaluator for \"tool_called_with_param\" assertion: the LLM trace must contain\n * a step where a specific tool was called with arguments where each expected\n * param value is a substring of the actual value.\n */\nexport class ToolCalledWithParamEvaluator extends AssertionEvaluator<ToolCalledWithParamAssertion> {\n readonly type = ASSERTION_TYPE;\n\n evaluate(\n assertion: ToolCalledWithParamAssertion,\n input: EvaluationInput,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars -- required by base class\n _context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const { toolName, expectedParams: expectedParamsStr } = assertion;\n\n const buildResult = (\n status: AssertionResultStatus,\n message: string,\n expected: string,\n actual?: string,\n ): AssertionResult => ({\n id: randomUUID(),\n assertionId,\n assertionType: ASSERTION_TYPE,\n assertionName: ASSERTION_NAME,\n status,\n message,\n expected,\n ...(actual !== undefined ? { actual } : {}),\n });\n\n // expectedParams is validated upstream, but guard here to avoid runtime throws.\n let expected: Record<string, unknown>;\n try {\n expected = JSON.parse(expectedParamsStr) as Record<string, unknown>;\n } catch {\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" assertion has invalid expected params JSON`,\n `${toolName}(invalid expected params)`,\n \"Invalid expected params JSON\",\n );\n }\n\n const expectedLabel = `${toolName}(${Object.entries(expected)\n .map(([k, v]) => `${k}=\"${v}\"`)\n .join(\", \")})`;\n\n const steps = input.llmTrace?.steps ?? [];\n // toolArguments is always JSON.stringify(args) from the trace builder, or undefined\n const toolCalls = steps\n .filter((s) => s.toolName === toolName && s.toolArguments !== undefined)\n .map((s) => {\n try {\n return JSON.parse(s.toolArguments!) as Record<string, unknown>;\n } catch {\n return null;\n }\n })\n .filter((call): call is Record<string, unknown> => call !== null);\n\n if (toolCalls.some((actual) => containsAll({ actual, expected }))) {\n return buildResult(\n AssertionResultStatus.PASSED,\n `Tool \"${toolName}\" was called with params matching ${expectedParamsStr}`,\n expectedLabel,\n );\n }\n\n const MAX_SHOWN = 5;\n const formatCall = (call: Record<string, unknown>) =>\n `${toolName}(${Object.entries(call)\n .map(([k, v]) => `${k}=${JSON.stringify(v)}`)\n .join(\", \")})`;\n\n const actual =\n toolCalls.length === 0\n ? \"No matching tool calls found\"\n : toolCalls.slice(0, MAX_SHOWN).map(formatCall).join(\" | \") +\n (toolCalls.length > MAX_SHOWN\n ? ` ... and ${toolCalls.length - MAX_SHOWN} more`\n : \"\");\n\n return buildResult(\n AssertionResultStatus.FAILED,\n `Tool \"${toolName}\" was never called with params matching ${expectedParamsStr}`,\n expectedLabel,\n actual,\n );\n }\n}\n", "import type {\n BuildPassedAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { execSync } from \"child_process\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nconst DEFAULT_COMMAND = \"yarn build\";\nconst DEFAULT_EXIT_CODE = 0;\n\n/**\n * Evaluator for \"build_passed\" assertion: runs a build command in the scenario\n * working directory and passes if the command exits with the expected code (default 0).\n */\nexport class BuildPassedEvaluator extends AssertionEvaluator<BuildPassedAssertion> {\n readonly type = \"build_passed\" as const;\n\n evaluate(\n assertion: BuildPassedAssertion,\n _input: EvaluationInput,\n context?: AssertionContext,\n ): AssertionResult {\n const assertionId = randomUUID();\n const workDir = context?.workDir;\n const command = assertion.command ?? DEFAULT_COMMAND;\n const expectedExitCode = assertion.expectedExitCode ?? DEFAULT_EXIT_CODE;\n\n if (!workDir) {\n return this.createResult(assertionId, {\n status: AssertionResultStatus.FAILED,\n message: \"No working directory provided for build_passed assertion\",\n expected: String(expectedExitCode),\n });\n }\n\n let exitCode: number | null = null;\n let errorMessage: string | null = null;\n let stdout: string | undefined;\n let stderr: string | undefined;\n\n console.log(`[build_passed] Running \"${command}\" in: ${workDir}`);\n\n try {\n execSync(command, {\n cwd: workDir,\n encoding: \"utf-8\",\n stdio: [\"ignore\", \"pipe\", \"pipe\"],\n });\n exitCode = 0;\n } catch (err) {\n const error = err as Error & {\n status?: number;\n code?: number;\n stdout?: string | Buffer;\n stderr?: string | Buffer;\n };\n exitCode =\n typeof error.status === \"number\"\n ? error.status\n : typeof error.code === \"number\"\n ? error.code\n : null;\n errorMessage = error.message;\n stdout = this.bufferToString(error.stdout);\n stderr = this.bufferToString(error.stderr);\n }\n\n const passed = exitCode !== null && exitCode === expectedExitCode;\n\n const details: Record<string, unknown> = { workDir, command };\n if (stdout !== undefined && stdout !== \"\") {\n details.stdout = stdout;\n }\n if (stderr !== undefined && stderr !== \"\") {\n details.stderr = stderr;\n }\n\n return this.createResult(assertionId, {\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: this.formatMessage(exitCode, expectedExitCode, errorMessage),\n expected: String(expectedExitCode),\n actual: exitCode !== null ? String(exitCode) : undefined,\n details,\n });\n }\n\n private createResult(\n assertionId: string,\n fields: Partial<AssertionResult>,\n ): AssertionResult {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"build_passed\",\n assertionName: \"Build passed\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n\n private bufferToString(\n value: string | Buffer | undefined,\n ): string | undefined {\n if (value === undefined || value === null) return undefined;\n if (typeof value === \"string\") return value;\n return value.toString(\"utf-8\");\n }\n\n private formatMessage(\n exitCode: number | null,\n expectedExitCode: number,\n errorMessage: string | null,\n ): string {\n if (exitCode === null) {\n return `Build failed: ${errorMessage}`;\n }\n if (exitCode === expectedExitCode) {\n return `Build passed (exit code ${exitCode})`;\n }\n return `Build exited with ${exitCode}, expected ${expectedExitCode}`;\n }\n}\n", "import type {\n TimeAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"time_limit\" assertion: passes if the scenario completed\n * within the configured maximum duration (maxDurationMs).\n */\nexport class TimeEvaluator extends AssertionEvaluator<TimeAssertion> {\n readonly type = \"time_limit\" as const;\n\n evaluate(assertion: TimeAssertion, input: EvaluationInput): AssertionResult {\n const maxDurationMs = assertion.maxDurationMs;\n\n if (input.durationMs == null) {\n return this.createResult({\n status: AssertionResultStatus.FAILED,\n message: \"No duration data available for time assertion\",\n expected: `<= ${maxDurationMs}ms`,\n });\n }\n\n const passed = input.durationMs <= maxDurationMs;\n\n return this.createResult({\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Completed in ${input.durationMs}ms (limit: ${maxDurationMs}ms)`\n : `Exceeded time limit: ${input.durationMs}ms > ${maxDurationMs}ms`,\n expected: `<= ${maxDurationMs}ms`,\n actual: `${input.durationMs}ms`,\n });\n }\n\n private createResult(fields: Partial<AssertionResult>): AssertionResult {\n return {\n id: randomUUID(),\n assertionId: randomUUID(),\n assertionType: \"time_limit\",\n assertionName: \"Time limit\",\n status: AssertionResultStatus.FAILED,\n ...fields,\n };\n }\n}\n", "import type {\n CostAssertion,\n AssertionResult,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\n/**\n * Evaluator for \"cost\" assertion: checks that the scenario's LLM execution cost\n * stays within a configured USD threshold by reading llmTrace.summary.totalCostUsd.\n */\nexport class CostEvaluator extends AssertionEvaluator<CostAssertion> {\n readonly type = \"cost\" as const;\n\n evaluate(assertion: CostAssertion, input: EvaluationInput): AssertionResult {\n const assertionId = randomUUID();\n const id = randomUUID();\n const assertionName = \"Cost\";\n const assertionType = \"cost\";\n const maxCostUsd = assertion.maxCostUsd;\n\n if (!input.llmTrace) {\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: AssertionResultStatus.SKIPPED,\n message: \"No LLM trace available to check cost\",\n };\n }\n\n const actualCostUsd = input.llmTrace.summary.totalCostUsd;\n const formattedActual = actualCostUsd.toFixed(6);\n const formattedMax = maxCostUsd.toFixed(6);\n const passed = Number(formattedActual) <= Number(formattedMax);\n\n return {\n id,\n assertionId,\n assertionType,\n assertionName,\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Cost $${formattedActual} is within limit of $${formattedMax}`\n : `Cost $${formattedActual} exceeds limit of $${formattedMax}`,\n expected: `<= $${formattedMax}`,\n actual: `$${formattedActual}`,\n details: { actualCostUsd, maxCostUsd },\n };\n }\n}\n", "import { tool, type Tool } from \"ai\";\nimport { z } from \"zod\";\nimport { readFile } from \"fs/promises\";\nimport path from \"path\";\n\nexport type ReadFileResult =\n | { path: string; content: string }\n | { error: string };\n\nexport function createReadFileTool(\n workDir: string,\n): Tool<{ path: string }, ReadFileResult> {\n const resolvedWorkDir = path.resolve(workDir);\n return tool({\n description:\n \"Read the content of any file in the workspace by its relative path. Use this to inspect file contents when evaluating code changes.\",\n inputSchema: z.object({\n path: z.string().describe(\"Relative file path in the workspace\"),\n }),\n execute: async ({\n path: filePath,\n }: {\n path: string;\n }): Promise<{ path: string; content: string } | { error: string }> => {\n const resolved = path.resolve(resolvedWorkDir, filePath);\n if (!resolved.startsWith(resolvedWorkDir + path.sep)) {\n return { error: `Access denied: path escapes workspace directory` };\n }\n try {\n const content = await readFile(resolved, \"utf-8\");\n return { path: filePath, content };\n } catch {\n return { error: `File not found: ${filePath}` };\n }\n },\n });\n}\n", "import type {\n LlmJudgeAssertion,\n AssertionResult,\n LLMTrace,\n EvaluationInput,\n} from \"../types/index.js\";\nimport { AssertionResultStatus } from \"../types/index.js\";\nimport { createReadFileTool } from \"../tools/index.js\";\nimport { randomUUID } from \"crypto\";\nimport { createAnthropic } from \"@ai-sdk/anthropic\";\nimport {\n generateText,\n Output,\n APICallError,\n NoObjectGeneratedError,\n stepCountIs,\n type LanguageModel,\n} from \"ai\";\nimport { z } from \"zod\";\nimport type { AssertionContext } from \"./assertion-evaluator.js\";\nimport { AssertionEvaluator } from \"./assertion-evaluator.js\";\n\nexport interface JudgeResult {\n text: string;\n score: number;\n scoreReasoning: string;\n}\n\nexport const JudgeResultSchema = z.object({\n text: z.string().describe(\"A brief textual verdict of the test result\"),\n score: z\n .number()\n .min(0)\n .max(100)\n .describe(\n \"A number from 0 to 100 reflecting how well the answer meets the acceptance criteria\",\n ),\n scoreReasoning: z\n .string()\n .describe(\"A concise explanation justifying the assigned score\"),\n});\n\nconst MAX_JUDGE_STEPS = 20;\n\n/**\n * Format LLM trace as readable text for the judge (step number, type, tool name/args, output preview).\n */\nexport function formatTraceForJudge(llmTrace: LLMTrace | undefined): string {\n if (!llmTrace?.steps?.length) {\n return \"No trace available.\";\n }\n const lines: string[] = [];\n for (const step of llmTrace.steps) {\n const parts: string[] = [\n `Step ${step.stepNumber}`,\n `type: ${step.type}`,\n `duration: ${step.durationMs}ms`,\n ];\n if (step.toolName) {\n parts.push(`tool: ${step.toolName}`);\n if (step.toolArguments) {\n parts.push(`args: ${step.toolArguments}`);\n }\n }\n if (step.outputPreview) {\n parts.push(`output: ${step.outputPreview}`);\n }\n if (step.error) {\n parts.push(`error: ${step.error}`);\n }\n lines.push(parts.join(\", \"));\n }\n return lines.join(\"\\n\");\n}\n\n/**\n * Context object for placeholder replacement.\n */\nexport interface PlaceholderContext {\n output: string;\n cwd: string;\n changedFiles: string;\n modifiedFiles: string;\n newFiles: string;\n trace: string;\n}\n\nexport function replacePlaceholders(\n str: string,\n ctx: PlaceholderContext,\n): string {\n return str\n .replace(/\\{\\{output\\}\\}/g, ctx.output)\n .replace(/\\{\\{cwd\\}\\}/g, ctx.cwd)\n .replace(/\\{\\{changedFiles\\}\\}/g, ctx.changedFiles)\n .replace(/\\{\\{modifiedFiles\\}\\}/g, ctx.modifiedFiles)\n .replace(/\\{\\{newFiles\\}\\}/g, ctx.newFiles)\n .replace(/\\{\\{trace\\}\\}/g, ctx.trace);\n}\n\n/**\n * Strip markdown code fences (e.g. ```json ... ```) from LLM output,\n * returning only the inner content for JSON parsing.\n */\nexport function stripMarkdownCodeBlock(text: string): string {\n const trimmed = text.trim();\n const match = trimmed.match(/^```(?:\\w+)?\\s*\\n?([\\s\\S]*?)\\n?\\s*```$/);\n return match ? match[1].trim() : trimmed;\n}\n\nexport function validateJudgeResult(parsed: unknown): JudgeResult {\n if (parsed === null || typeof parsed !== \"object\") {\n throw new Error(\"Judge result is not an object\");\n }\n const obj = parsed as Record<string, unknown>;\n if (typeof obj.text !== \"string\") {\n throw new Error(\"Judge result does not contain a valid text field\");\n }\n if (typeof obj.score !== \"number\") {\n throw new Error(\"Judge result does not contain a valid score field\");\n }\n if (obj.score < 0 || obj.score > 100) {\n throw new Error(\"Judge result score is not between 0 and 100\");\n }\n if (typeof obj.scoreReasoning !== \"string\") {\n throw new Error(\n \"Judge result does not contain a valid scoreReasoning field\",\n );\n }\n return {\n text: obj.text,\n score: obj.score,\n scoreReasoning: obj.scoreReasoning,\n };\n}\n\nconst DEFAULT_MIN_SCORE = 70;\n\nconst DEFAULT_JUDGE_CONTEXT = `You are judging a scenario run. The ACTUAL run data is provided below \u2014 use it to verify facts:\n\n- {{output}}: the agent's final output\n- {{cwd}}: working directory\n- {{changedFiles}}: list of all files changed (or \"No files were changed\")\n- {{modifiedFiles}}: list of existing files that were modified (or \"No files were modified\")\n- {{newFiles}}: list of new files that were created (or \"No new files were created\")\n- {{trace}}: step-by-step trace (tool calls, completions) so you can check e.g. which tools were called and how many times\n\nYou have access to a read_file tool that lets you read the content of ANY file in the workspace (not just changed files). Use it to inspect file contents whenever you need to verify claims about code, check imports, review implementations, or validate that specific code patterns exist. Always read files before making judgments about their content \u2014 do not guess.\n\nCRITICAL: When the user asks you to verify a specific fact, compare it strictly against the actual data above and the actual file contents (use the read_file tool). If the expected outcome does NOT match the actual outcome, you MUST give a score of 0 or near 0. Do not be lenient \u2014 factual mismatches are failures.`;\n\n/**\n * Evaluator for \"llm_judge\" assertion: an LLM judges the scenario output\n * (prompt with {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}) and returns a score 0-100.\n * Passes if score >= minScore.\n */\nexport class LlmJudgeEvaluator extends AssertionEvaluator<LlmJudgeAssertion> {\n readonly type = \"llm_judge\" as const;\n\n async evaluate(\n assertion: LlmJudgeAssertion,\n input: EvaluationInput,\n context?: AssertionContext,\n ): Promise<AssertionResult> {\n const assertionId = randomUUID();\n const workDir = context?.workDir ?? \"\";\n\n const output = input.outputText ?? \"\";\n const fileDiffs = input.fileDiffs ?? [];\n\n const changedPaths = fileDiffs.map((d) => d.path);\n const modifiedPaths = fileDiffs\n .filter((d) => d.status === \"modified\")\n .map((d) => d.path);\n const newPaths = fileDiffs\n .filter((d) => d.status === \"new\")\n .map((d) => d.path);\n\n const changedFiles =\n changedPaths.length > 0\n ? changedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were changed\";\n const modifiedFiles =\n modifiedPaths.length > 0\n ? modifiedPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No files were modified\";\n const newFiles =\n newPaths.length > 0\n ? newPaths.map((p: string) => `- ${p}`).join(\"\\n\")\n : \"No new files were created\";\n\n const trace = formatTraceForJudge(input.llmTrace);\n const ctx: PlaceholderContext = {\n output,\n cwd: workDir,\n changedFiles,\n modifiedFiles,\n newFiles,\n trace,\n };\n const replace = (s: string) => replacePlaceholders(s, ctx);\n\n const finalPrompt = replace(assertion.prompt);\n const systemPrompt = replace(DEFAULT_JUDGE_CONTEXT);\n\n const minScore = assertion.minScore ?? DEFAULT_MIN_SCORE;\n const maxOutputTokens = assertion.maxTokens ?? 1024;\n const temperature = assertion.temperature ?? 0;\n const modelId = assertion.model ?? context?.defaultJudgeModel;\n\n const model = this.resolveModel(context, modelId);\n if (!model) {\n const reason =\n !modelId && !context?.model\n ? \"No model configured for llm_judge assertion (set model on assertion or provide defaultJudgeModel/model in context)\"\n : \"No llmConfig for llm_judge assertion (AI gateway required)\";\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: reason,\n expected: String(minScore),\n };\n }\n\n try {\n const judgeResult = await this.callGenerateText(\n model,\n finalPrompt,\n systemPrompt,\n maxOutputTokens,\n temperature,\n workDir || undefined,\n );\n\n const passed = judgeResult.score >= minScore;\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: passed\n ? AssertionResultStatus.PASSED\n : AssertionResultStatus.FAILED,\n message: passed\n ? `Judge score ${judgeResult.score} >= ${minScore}: ${judgeResult.text}`\n : `Judge score ${judgeResult.score} < ${minScore}: ${judgeResult.text}`,\n expected: String(minScore),\n actual: String(judgeResult.score),\n details: {\n score: judgeResult.score,\n scoreReasoning: judgeResult.scoreReasoning,\n text: judgeResult.text,\n },\n };\n } catch (err) {\n if (NoObjectGeneratedError.isInstance(err)) {\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: \"LLM judge failed to produce valid structured output\",\n expected: String(minScore),\n details: {\n rawText:\n typeof err.text === \"string\" ? err.text.slice(0, 500) : undefined,\n },\n };\n }\n\n const message = err instanceof Error ? err.message : String(err);\n const details: Record<string, unknown> = {\n error: message,\n model: modelId,\n };\n\n if (APICallError.isInstance(err)) {\n details.statusCode = err.statusCode;\n details.url = err.url;\n details.isRetryable = err.isRetryable;\n details.responseBody =\n typeof err.responseBody === \"string\"\n ? err.responseBody.slice(0, 2000)\n : err.responseBody;\n }\n\n return {\n id: randomUUID(),\n assertionId,\n assertionType: \"llm_judge\",\n assertionName: \"LLM judge\",\n status: AssertionResultStatus.FAILED,\n message: `LLM judge call failed: ${message}`,\n expected: String(minScore),\n details,\n };\n }\n }\n\n /**\n * Resolve the LanguageModel to use: context.model (injected mock/override)\n * takes precedence, otherwise create from llmConfig + modelId.\n */\n private resolveModel(\n context: AssertionContext | undefined,\n modelId: string | undefined,\n ): LanguageModel | null {\n if (context?.model) {\n return context.model;\n }\n if (!modelId || !context?.llmConfig) {\n return null;\n }\n const anthropic = createAnthropic({\n baseURL: context.llmConfig.baseUrl,\n apiKey: \"dummy\",\n headers: context.llmConfig.headers,\n });\n return anthropic(modelId);\n }\n\n private async callGenerateText(\n model: LanguageModel,\n prompt: string,\n system: string,\n maxOutputTokens: number,\n temperature: number,\n workDir?: string,\n ): Promise<JudgeResult> {\n const baseOptions = {\n model,\n prompt,\n system,\n maxOutputTokens,\n temperature,\n output: Output.object({ schema: JudgeResultSchema }),\n stopWhen: stepCountIs(MAX_JUDGE_STEPS),\n } as const;\n\n const { output } = workDir\n ? await generateText({\n ...baseOptions,\n tools: { read_file: createReadFileTool(workDir) },\n })\n : await generateText(baseOptions);\n\n return output;\n }\n}\n"],
5
+ "mappings": ";AAAA,SAAS,SAAS;AAQX,IAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA;AAAA,EAElC,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC;AACvC,CAAC;AAUM,IAAM,qCAAqC,EAAE,OAAO;AAAA,EACzD,MAAM,EAAE,QAAQ,wBAAwB;AAAA;AAAA,EAExC,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC;AAAA;AAAA,EAE1B,gBAAgB,EAAE,OAAO,EAAE,IAAI,CAAC;AAClC,CAAC;AAUM,IAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,cAAc;AAAA;AAAA,EAE9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE7B,kBAAkB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC9C,CAAC;AAQM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,MAAM;AAAA;AAAA,EAEtB,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AASM,IAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EAAE,QAAQ,WAAW;AAAA;AAAA,EAE3B,QAAQ,EAAE,OAAO;AAAA;AAAA,EAEjB,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,GAAG,EAAE,SAAS;AAAA;AAAA,EAEpD,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,WAAW,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACrC,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AACjD,CAAC;AAQM,IAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,YAAY;AAAA;AAAA,EAE5B,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAC3C,CAAC;AASM,IAAM,kBAAkB,EAAE,MAAM;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;;;ACvGD,SAAS,KAAAA,UAAS;AAKX,IAAM,mBAAmBA,GAAE,OAAO;AAAA,EACvC,QAAQA,GAAE,OAAO;AAAA,EACjB,YAAYA,GAAE,OAAO;AAAA,EACrB,OAAOA,GAAE,OAAO;AAClB,CAAC;AAOM,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,gBAAa;AACb,EAAAA,aAAA,cAAW;AACX,EAAAA,aAAA,iBAAc;AACd,EAAAA,aAAA,cAAW;AAJD,SAAAA;AAAA,GAAA;AAUL,IAAM,qBAAqBD,GAAE,OAAO;AAAA,EACzC,IAAIA,GAAE,OAAO;AAAA,EACb,YAAYA,GAAE,OAAO;AAAA,EACrB,MAAMA,GAAE,KAAK,WAAW;AAAA,EACxB,OAAOA,GAAE,OAAO;AAAA,EAChB,UAAUA,GAAE,OAAO;AAAA,EACnB,WAAWA,GAAE,OAAO;AAAA,EACpB,YAAYA,GAAE,OAAO;AAAA,EACrB,YAAY;AAAA,EACZ,SAASA,GAAE,OAAO;AAAA,EAClB,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,EACnC,cAAcA,GAAE,OAAO,EAAE,SAAS;AAAA,EAClC,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,EACnC,SAASA,GAAE,QAAQ;AAAA,EACnB,OAAOA,GAAE,OAAO,EAAE,SAAS;AAC7B,CAAC;AAOM,IAAM,0BAA0BA,GAAE,OAAO;AAAA,EAC9C,OAAOA,GAAE,OAAO;AAAA,EAChB,YAAYA,GAAE,OAAO;AAAA,EACrB,QAAQA,GAAE,OAAO;AAAA,EACjB,SAASA,GAAE,OAAO;AACpB,CAAC;AAOM,IAAM,wBAAwBA,GAAE,OAAO;AAAA,EAC5C,YAAYA,GAAE,OAAO;AAAA,EACrB,iBAAiBA,GAAE,OAAO;AAAA,EAC1B,aAAa;AAAA,EACb,cAAcA,GAAE,OAAO;AAAA,EACvB,mBAAmBA,GAAE,OAAOA,GAAE,OAAO,GAAG,uBAAuB,EAAE,SAAS;AAAA,EAC1E,gBAAgBA,GAAE,OAAOA,GAAE,OAAO,GAAG,uBAAuB;AAAA,EAC5D,YAAYA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAChC,CAAC;AAOM,IAAM,iBAAiBA,GAAE,OAAO;AAAA,EACrC,IAAIA,GAAE,OAAO;AAAA,EACb,OAAOA,GAAE,MAAM,kBAAkB;AAAA,EACjC,SAAS;AACX,CAAC;;;AChFD,SAAS,KAAAE,UAAS;AAMX,IAAK,wBAAL,kBAAKC,2BAAL;AACL,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,YAAS;AACT,EAAAA,uBAAA,aAAU;AACV,EAAAA,uBAAA,WAAQ;AAJE,SAAAA;AAAA,GAAA;AAUL,IAAM,wBAAwBC,GAAE,OAAO;AAAA,EAC5C,IAAIA,GAAE,OAAO;AAAA,EACb,aAAaA,GAAE,OAAO;AAAA,EACtB,eAAeA,GAAE,OAAO;AAAA,EACxB,eAAeA,GAAE,OAAO;AAAA,EACxB,QAAQA,GAAE,KAAK,qBAAqB;AAAA,EACpC,SAASA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,QAAQA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAASA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACpD,eAAeA,GAAE,MAAM,kBAAkB,EAAE,SAAS;AACtD,CAAC;;;AC1BD,SAAS,cAAAC,mBAAkB;;;ACK3B,SAAS,kBAAkB;;;AC6BpB,IAAe,qBAAf,MAAmE;AAQ1E;;;AD9BA,SAAS,wBAAwB,UAAiC;AAChE,QAAM,eAAe,oBAAI,IAAY;AACrC,aAAW,QAAQ,SAAS,OAAO;AACjC,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AACA,QAAI;AACJ,QAAI;AACF,aAAO,KAAK,gBACP,KAAK,MAAM,KAAK,aAAa,IAC9B;AAAA,IACN,QAAQ;AACN;AAAA,IACF;AACA,QAAI,SAAS,QAAQ,OAAO,SAAS,UAAU;AAC7C,YAAM,MAAM;AACZ,UAAI,OAAO,IAAI,UAAU,UAAU;AACjC,qBAAa,IAAI,IAAI,KAAK;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AASO,IAAM,0BAAN,cAAsC,mBAA4C;AAAA,EAC9E,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,cAAc,WAAW;AAC/B,UAAM,iBAAiB,UAAU;AACjC,UAAM,gBAAgB,eAAe,KAAK,IAAI;AAE9C,UAAM,WAAiC,MAAM;AAC7C,QAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,aAAO;AAAA,QACL,IAAI,WAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,wBAAwB,QAAQ;AACrD,UAAM,gBAAgB,eAAe;AAAA,MACnC,CAAC,SAAS,CAAC,aAAa,IAAI,IAAI;AAAA,IAClC;AAEA,QAAI,cAAc,WAAW,GAAG;AAC9B,YAAMC,WACJ,eAAe,WAAW,IACtB,UAAU,eAAe,CAAC,CAAC,iBAC3B,2BAA2B,aAAa;AAC9C,aAAO;AAAA,QACL,IAAI,WAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAAA;AAAA,QACA,UAAU;AAAA,MACZ;AAAA,IACF;AAEA,UAAM,eAAe,cAAc,KAAK,IAAI;AAC5C,UAAM,UACJ,eAAe,WAAW,IACtB,UAAU,cAAc,CAAC,CAAC,qBAC1B,mBAAmB,YAAY,sBAAsB,aAAa;AACxE,WAAO;AAAA,MACL,IAAI,WAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ;AAAA,EACF;AACF;;;AErGA,SAAS,cAAAC,mBAAkB;AAI3B,IAAM,iBAAiB;AACvB,IAAM,iBAAiB;AAEvB,IAAM,cAAc,CAAC;AAAA,EACnB;AAAA,EACA;AACF,MAIE,OAAO,QAAQ,QAAQ,EAAE,MAAM,CAAC,CAAC,KAAK,GAAG,MAAM;AAC7C,QAAM,YAAY,OAAO,GAAG;AAE5B,MAAI,cAAc,QAAQ,cAAc,OAAW,QAAO;AAC1D,QAAM,YACJ,OAAO,cAAc,WAAW,YAAY,KAAK,UAAU,SAAS;AACtE,SAAO,UAAU,SAAS,OAAO,GAAG,CAAC;AACvC,CAAC;AAOI,IAAM,+BAAN,cAA2C,mBAAiD;AAAA,EACxF,OAAO;AAAA,EAEhB,SACE,WACA,OAEA,UACiB;AACjB,UAAM,cAAcC,YAAW;AAC/B,UAAM,EAAE,UAAU,gBAAgB,kBAAkB,IAAI;AAExD,UAAM,cAAc,CAClB,QACA,SACAC,WACAC,aACqB;AAAA,MACrB,IAAIF,YAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA;AAAA,MACA,UAAAC;AAAA,MACA,GAAIC,YAAW,SAAY,EAAE,QAAAA,QAAO,IAAI,CAAC;AAAA,IAC3C;AAGA,QAAI;AACJ,QAAI;AACF,iBAAW,KAAK,MAAM,iBAAiB;AAAA,IACzC,QAAQ;AACN,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ;AAAA,QACjB,GAAG,QAAQ;AAAA,QACX;AAAA,MACF;AAAA,IACF;AAEA,UAAM,gBAAgB,GAAG,QAAQ,IAAI,OAAO,QAAQ,QAAQ,EACzD,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC,GAAG,EAC7B,KAAK,IAAI,CAAC;AAEb,UAAM,QAAQ,MAAM,UAAU,SAAS,CAAC;AAExC,UAAM,YAAY,MACf,OAAO,CAAC,MAAM,EAAE,aAAa,YAAY,EAAE,kBAAkB,MAAS,EACtE,IAAI,CAAC,MAAM;AACV,UAAI;AACF,eAAO,KAAK,MAAM,EAAE,aAAc;AAAA,MACpC,QAAQ;AACN,eAAO;AAAA,MACT;AAAA,IACF,CAAC,EACA,OAAO,CAAC,SAA0C,SAAS,IAAI;AAElE,QAAI,UAAU,KAAK,CAACA,YAAW,YAAY,EAAE,QAAAA,SAAQ,SAAS,CAAC,CAAC,GAAG;AACjE,aAAO;AAAA;AAAA,QAEL,SAAS,QAAQ,qCAAqC,iBAAiB;AAAA,QACvE;AAAA,MACF;AAAA,IACF;AAEA,UAAM,YAAY;AAClB,UAAM,aAAa,CAAC,SAClB,GAAG,QAAQ,IAAI,OAAO,QAAQ,IAAI,EAC/B,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,EAAE,EAC3C,KAAK,IAAI,CAAC;AAEf,UAAM,SACJ,UAAU,WAAW,IACjB,iCACA,UAAU,MAAM,GAAG,SAAS,EAAE,IAAI,UAAU,EAAE,KAAK,KAAK,KACvD,UAAU,SAAS,YAChB,YAAY,UAAU,SAAS,SAAS,UACxC;AAEV,WAAO;AAAA;AAAA,MAEL,SAAS,QAAQ,2CAA2C,iBAAiB;AAAA,MAC7E;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;;;ACnHA,SAAS,cAAAC,mBAAkB;AAC3B,SAAS,gBAAgB;AAIzB,IAAM,kBAAkB;AACxB,IAAM,oBAAoB;AAMnB,IAAM,uBAAN,cAAmC,mBAAyC;AAAA,EACxE,OAAO;AAAA,EAEhB,SACE,WACA,QACA,SACiB;AACjB,UAAM,cAAcC,YAAW;AAC/B,UAAM,UAAU,SAAS;AACzB,UAAM,UAAU,UAAU,WAAW;AACrC,UAAM,mBAAmB,UAAU,oBAAoB;AAEvD,QAAI,CAAC,SAAS;AACZ,aAAO,KAAK,aAAa,aAAa;AAAA,QACpC;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,gBAAgB;AAAA,MACnC,CAAC;AAAA,IACH;AAEA,QAAI,WAA0B;AAC9B,QAAI,eAA8B;AAClC,QAAI;AACJ,QAAI;AAEJ,YAAQ,IAAI,2BAA2B,OAAO,SAAS,OAAO,EAAE;AAEhE,QAAI;AACF,eAAS,SAAS;AAAA,QAChB,KAAK;AAAA,QACL,UAAU;AAAA,QACV,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,MAClC,CAAC;AACD,iBAAW;AAAA,IACb,SAAS,KAAK;AACZ,YAAM,QAAQ;AAMd,iBACE,OAAO,MAAM,WAAW,WACpB,MAAM,SACN,OAAO,MAAM,SAAS,WACpB,MAAM,OACN;AACR,qBAAe,MAAM;AACrB,eAAS,KAAK,eAAe,MAAM,MAAM;AACzC,eAAS,KAAK,eAAe,MAAM,MAAM;AAAA,IAC3C;AAEA,UAAM,SAAS,aAAa,QAAQ,aAAa;AAEjD,UAAM,UAAmC,EAAE,SAAS,QAAQ;AAC5D,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AACA,QAAI,WAAW,UAAa,WAAW,IAAI;AACzC,cAAQ,SAAS;AAAA,IACnB;AAEA,WAAO,KAAK,aAAa,aAAa;AAAA,MACpC,QAAQ;AAAA,MAGR,SAAS,KAAK,cAAc,UAAU,kBAAkB,YAAY;AAAA,MACpE,UAAU,OAAO,gBAAgB;AAAA,MACjC,QAAQ,aAAa,OAAO,OAAO,QAAQ,IAAI;AAAA,MAC/C;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEQ,aACN,aACA,QACiB;AACjB,WAAO;AAAA,MACL,IAAIA,YAAW;AAAA,MACf;AAAA,MACA,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEQ,eACN,OACoB;AACpB,QAAI,UAAU,UAAa,UAAU,KAAM,QAAO;AAClD,QAAI,OAAO,UAAU,SAAU,QAAO;AACtC,WAAO,MAAM,SAAS,OAAO;AAAA,EAC/B;AAAA,EAEQ,cACN,UACA,kBACA,cACQ;AACR,QAAI,aAAa,MAAM;AACrB,aAAO,iBAAiB,YAAY;AAAA,IACtC;AACA,QAAI,aAAa,kBAAkB;AACjC,aAAO,2BAA2B,QAAQ;AAAA,IAC5C;AACA,WAAO,qBAAqB,QAAQ,cAAc,gBAAgB;AAAA,EACpE;AACF;;;ACzHA,SAAS,cAAAC,mBAAkB;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,gBAAgB,UAAU;AAEhC,QAAI,MAAM,cAAc,MAAM;AAC5B,aAAO,KAAK,aAAa;AAAA,QACvB;AAAA,QACA,SAAS;AAAA,QACT,UAAU,MAAM,aAAa;AAAA,MAC/B,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,MAAM,cAAc;AAEnC,WAAO,KAAK,aAAa;AAAA,MACvB,QAAQ;AAAA,MAGR,SAAS,SACL,gBAAgB,MAAM,UAAU,cAAc,aAAa,QAC3D,wBAAwB,MAAM,UAAU,QAAQ,aAAa;AAAA,MACjE,UAAU,MAAM,aAAa;AAAA,MAC7B,QAAQ,GAAG,MAAM,UAAU;AAAA,IAC7B,CAAC;AAAA,EACH;AAAA,EAEQ,aAAa,QAAmD;AACtE,WAAO;AAAA,MACL,IAAIC,YAAW;AAAA,MACf,aAAaA,YAAW;AAAA,MACxB,eAAe;AAAA,MACf,eAAe;AAAA,MACf;AAAA,MACA,GAAG;AAAA,IACL;AAAA,EACF;AACF;;;AC7CA,SAAS,cAAAC,mBAAkB;AAOpB,IAAM,gBAAN,cAA4B,mBAAkC;AAAA,EAC1D,OAAO;AAAA,EAEhB,SAAS,WAA0B,OAAyC;AAC1E,UAAM,cAAcC,YAAW;AAC/B,UAAM,KAAKA,YAAW;AACtB,UAAM,gBAAgB;AACtB,UAAM,gBAAgB;AACtB,UAAM,aAAa,UAAU;AAE7B,QAAI,CAAC,MAAM,UAAU;AACnB,aAAO;AAAA,QACL;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,SAAS,QAAQ;AAC7C,UAAM,kBAAkB,cAAc,QAAQ,CAAC;AAC/C,UAAM,eAAe,WAAW,QAAQ,CAAC;AACzC,UAAM,SAAS,OAAO,eAAe,KAAK,OAAO,YAAY;AAE7D,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ;AAAA,MAGR,SAAS,SACL,SAAS,eAAe,wBAAwB,YAAY,KAC5D,SAAS,eAAe,sBAAsB,YAAY;AAAA,MAC9D,UAAU,OAAO,YAAY;AAAA,MAC7B,QAAQ,IAAI,eAAe;AAAA,MAC3B,SAAS,EAAE,eAAe,WAAW;AAAA,IACvC;AAAA,EACF;AACF;;;ACvDA,SAAS,YAAuB;AAChC,SAAS,KAAAC,UAAS;AAClB,SAAS,gBAAgB;AACzB,OAAO,UAAU;AAMV,SAAS,mBACd,SACwC;AACxC,QAAM,kBAAkB,KAAK,QAAQ,OAAO;AAC5C,SAAO,KAAK;AAAA,IACV,aACE;AAAA,IACF,aAAaA,GAAE,OAAO;AAAA,MACpB,MAAMA,GAAE,OAAO,EAAE,SAAS,qCAAqC;AAAA,IACjE,CAAC;AAAA,IACD,SAAS,OAAO;AAAA,MACd,MAAM;AAAA,IACR,MAEsE;AACpE,YAAM,WAAW,KAAK,QAAQ,iBAAiB,QAAQ;AACvD,UAAI,CAAC,SAAS,WAAW,kBAAkB,KAAK,GAAG,GAAG;AACpD,eAAO,EAAE,OAAO,kDAAkD;AAAA,MACpE;AACA,UAAI;AACF,cAAM,UAAU,MAAM,SAAS,UAAU,OAAO;AAChD,eAAO,EAAE,MAAM,UAAU,QAAQ;AAAA,MACnC,QAAQ;AACN,eAAO,EAAE,OAAO,mBAAmB,QAAQ,GAAG;AAAA,MAChD;AAAA,IACF;AAAA,EACF,CAAC;AACH;;;AC5BA,SAAS,cAAAC,mBAAkB;AAC3B,SAAS,uBAAuB;AAChC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,KAAAC,UAAS;AAUX,IAAM,oBAAoBC,GAAE,OAAO;AAAA,EACxC,MAAMA,GAAE,OAAO,EAAE,SAAS,4CAA4C;AAAA,EACtE,OAAOA,GACJ,OAAO,EACP,IAAI,CAAC,EACL,IAAI,GAAG,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,gBAAgBA,GACb,OAAO,EACP,SAAS,qDAAqD;AACnE,CAAC;AAED,IAAM,kBAAkB;AAKjB,SAAS,oBAAoB,UAAwC;AAC1E,MAAI,CAAC,UAAU,OAAO,QAAQ;AAC5B,WAAO;AAAA,EACT;AACA,QAAM,QAAkB,CAAC;AACzB,aAAW,QAAQ,SAAS,OAAO;AACjC,UAAM,QAAkB;AAAA,MACtB,QAAQ,KAAK,UAAU;AAAA,MACvB,SAAS,KAAK,IAAI;AAAA,MAClB,aAAa,KAAK,UAAU;AAAA,IAC9B;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,KAAK,QAAQ,EAAE;AACnC,UAAI,KAAK,eAAe;AACtB,cAAM,KAAK,SAAS,KAAK,aAAa,EAAE;AAAA,MAC1C;AAAA,IACF;AACA,QAAI,KAAK,eAAe;AACtB,YAAM,KAAK,WAAW,KAAK,aAAa,EAAE;AAAA,IAC5C;AACA,QAAI,KAAK,OAAO;AACd,YAAM,KAAK,UAAU,KAAK,KAAK,EAAE;AAAA,IACnC;AACA,UAAM,KAAK,MAAM,KAAK,IAAI,CAAC;AAAA,EAC7B;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAcO,SAAS,oBACd,KACA,KACQ;AACR,SAAO,IACJ,QAAQ,mBAAmB,IAAI,MAAM,EACrC,QAAQ,gBAAgB,IAAI,GAAG,EAC/B,QAAQ,yBAAyB,IAAI,YAAY,EACjD,QAAQ,0BAA0B,IAAI,aAAa,EACnD,QAAQ,qBAAqB,IAAI,QAAQ,EACzC,QAAQ,kBAAkB,IAAI,KAAK;AACxC;AAMO,SAAS,uBAAuB,MAAsB;AAC3D,QAAM,UAAU,KAAK,KAAK;AAC1B,QAAM,QAAQ,QAAQ,MAAM,wCAAwC;AACpE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;AAEO,SAAS,oBAAoB,QAA8B;AAChE,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,MAAM;AACZ,MAAI,OAAO,IAAI,SAAS,UAAU;AAChC,UAAM,IAAI,MAAM,kDAAkD;AAAA,EACpE;AACA,MAAI,OAAO,IAAI,UAAU,UAAU;AACjC,UAAM,IAAI,MAAM,mDAAmD;AAAA,EACrE;AACA,MAAI,IAAI,QAAQ,KAAK,IAAI,QAAQ,KAAK;AACpC,UAAM,IAAI,MAAM,6CAA6C;AAAA,EAC/D;AACA,MAAI,OAAO,IAAI,mBAAmB,UAAU;AAC1C,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO;AAAA,IACL,MAAM,IAAI;AAAA,IACV,OAAO,IAAI;AAAA,IACX,gBAAgB,IAAI;AAAA,EACtB;AACF;AAEA,IAAM,oBAAoB;AAE1B,IAAM,wBAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBvB,IAAM,oBAAN,cAAgC,mBAAsC;AAAA,EAClE,OAAO;AAAA,EAEhB,MAAM,SACJ,WACA,OACA,SAC0B;AAC1B,UAAM,cAAcC,YAAW;AAC/B,UAAM,UAAU,SAAS,WAAW;AAEpC,UAAM,SAAS,MAAM,cAAc;AACnC,UAAM,YAAY,MAAM,aAAa,CAAC;AAEtC,UAAM,eAAe,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI;AAChD,UAAM,gBAAgB,UACnB,OAAO,CAAC,MAAM,EAAE,WAAW,UAAU,EACrC,IAAI,CAAC,MAAM,EAAE,IAAI;AACpB,UAAM,WAAW,UACd,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,EAChC,IAAI,CAAC,MAAM,EAAE,IAAI;AAEpB,UAAM,eACJ,aAAa,SAAS,IAClB,aAAa,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACnD;AACN,UAAM,gBACJ,cAAc,SAAS,IACnB,cAAc,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACpD;AACN,UAAM,WACJ,SAAS,SAAS,IACd,SAAS,IAAI,CAAC,MAAc,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC/C;AAEN,UAAM,QAAQ,oBAAoB,MAAM,QAAQ;AAChD,UAAM,MAA0B;AAAA,MAC9B;AAAA,MACA,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,UAAM,UAAU,CAAC,MAAc,oBAAoB,GAAG,GAAG;AAEzD,UAAM,cAAc,QAAQ,UAAU,MAAM;AAC5C,UAAM,eAAe,QAAQ,qBAAqB;AAElD,UAAM,WAAW,UAAU,YAAY;AACvC,UAAM,kBAAkB,UAAU,aAAa;AAC/C,UAAM,cAAc,UAAU,eAAe;AAC7C,UAAM,UAAU,UAAU,SAAS,SAAS;AAE5C,UAAM,QAAQ,KAAK,aAAa,SAAS,OAAO;AAChD,QAAI,CAAC,OAAO;AACV,YAAM,SACJ,CAAC,WAAW,CAAC,SAAS,QAClB,uHACA;AACN,aAAO;AAAA,QACL,IAAIA,YAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS;AAAA,QACT,UAAU,OAAO,QAAQ;AAAA,MAC3B;AAAA,IACF;AAEA,QAAI;AACF,YAAM,cAAc,MAAM,KAAK;AAAA,QAC7B;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW;AAAA,MACb;AAEA,YAAM,SAAS,YAAY,SAAS;AACpC,aAAO;AAAA,QACL,IAAIA,YAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf,QAAQ;AAAA,QAGR,SAAS,SACL,eAAe,YAAY,KAAK,OAAO,QAAQ,KAAK,YAAY,IAAI,KACpE,eAAe,YAAY,KAAK,MAAM,QAAQ,KAAK,YAAY,IAAI;AAAA,QACvE,UAAU,OAAO,QAAQ;AAAA,QACzB,QAAQ,OAAO,YAAY,KAAK;AAAA,QAChC,SAAS;AAAA,UACP,OAAO,YAAY;AAAA,UACnB,gBAAgB,YAAY;AAAA,UAC5B,MAAM,YAAY;AAAA,QACpB;AAAA,MACF;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,uBAAuB,WAAW,GAAG,GAAG;AAC1C,eAAO;AAAA,UACL,IAAIA,YAAW;AAAA,UACf;AAAA,UACA,eAAe;AAAA,UACf,eAAe;AAAA,UACf;AAAA,UACA,SAAS;AAAA,UACT,UAAU,OAAO,QAAQ;AAAA,UACzB,SAAS;AAAA,YACP,SACE,OAAO,IAAI,SAAS,WAAW,IAAI,KAAK,MAAM,GAAG,GAAG,IAAI;AAAA,UAC5D;AAAA,QACF;AAAA,MACF;AAEA,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAM,UAAmC;AAAA,QACvC,OAAO;AAAA,QACP,OAAO;AAAA,MACT;AAEA,UAAI,aAAa,WAAW,GAAG,GAAG;AAChC,gBAAQ,aAAa,IAAI;AACzB,gBAAQ,MAAM,IAAI;AAClB,gBAAQ,cAAc,IAAI;AAC1B,gBAAQ,eACN,OAAO,IAAI,iBAAiB,WACxB,IAAI,aAAa,MAAM,GAAG,GAAI,IAC9B,IAAI;AAAA,MACZ;AAEA,aAAO;AAAA,QACL,IAAIA,YAAW;AAAA,QACf;AAAA,QACA,eAAe;AAAA,QACf,eAAe;AAAA,QACf;AAAA,QACA,SAAS,0BAA0B,OAAO;AAAA,QAC1C,UAAU,OAAO,QAAQ;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,aACN,SACA,SACsB;AACtB,QAAI,SAAS,OAAO;AAClB,aAAO,QAAQ;AAAA,IACjB;AACA,QAAI,CAAC,WAAW,CAAC,SAAS,WAAW;AACnC,aAAO;AAAA,IACT;AACA,UAAM,YAAY,gBAAgB;AAAA,MAChC,SAAS,QAAQ,UAAU;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS,QAAQ,UAAU;AAAA,IAC7B,CAAC;AACD,WAAO,UAAU,OAAO;AAAA,EAC1B;AAAA,EAEA,MAAc,iBACZ,OACA,QACA,QACA,iBACA,aACA,SACsB;AACtB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,QAAQ,OAAO,OAAO,EAAE,QAAQ,kBAAkB,CAAC;AAAA,MACnD,UAAU,YAAY,eAAe;AAAA,IACvC;AAEA,UAAM,EAAE,OAAO,IAAI,UACf,MAAM,aAAa;AAAA,MACjB,GAAG;AAAA,MACH,OAAO,EAAE,WAAW,mBAAmB,OAAO,EAAE;AAAA,IAClD,CAAC,IACD,MAAM,aAAa,WAAW;AAElC,WAAO;AAAA,EACT;AACF;;;ARnVA,IAAM,oBAAoB,IAAI,kBAAkB;AAEhD,IAAM,aAAiD;AAAA,EACrD,kBAAkB,IAAI,wBAAwB;AAAA,EAC9C,wBAAwB,IAAI,6BAA6B;AAAA,EACzD,cAAc,IAAI,qBAAqB;AAAA,EACvC,YAAY,IAAI,cAAc;AAAA,EAC9B,MAAM,IAAI,cAAc;AAAA,EACxB,WAAW;AAAA;AAAA,EAEX,QAAQ;AACV;AAQO,SAAS,kBACd,MACA,WACM;AACN,aAAW,IAAI,IAAI;AACrB;AAQO,SAAS,aAAa,MAA8C;AACzE,SAAO,WAAW,IAAI;AACxB;AAUA,eAAsB,mBACpB,OACA,YACA,SAC4B;AAC5B,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,CAAC;AAAA,EACV;AACA,SAAO,QAAQ;AAAA,IACb,WAAW,IAAI,OAAO,cAAc;AAClC,YAAM,YAAY,WAAW,UAAU,IAAI;AAC3C,UAAI,CAAC,WAAW;AACd,eAAO;AAAA,UACL,IAAIC,YAAW;AAAA,UACf,aAAaA,YAAW;AAAA,UACxB,eAAe,UAAU;AAAA,UACzB,eAAe;AAAA,UACf;AAAA,UACA,SAAS,+BAA+B,UAAU,IAAI;AAAA,UACtD,UAAU;AAAA,QACZ;AAAA,MACF;AACA,YAAM,UAAU,KAAK,IAAI;AACzB,YAAM,SAAS,MAAM,UAAU,SAAS,WAAW,OAAO,OAAO;AACjE,YAAM,aAAa,KAAK,IAAI,IAAI;AAChC,aAAO,EAAE,GAAG,QAAQ,UAAU,WAAW;AAAA,IAC3C,CAAC;AAAA,EACH;AACF;",
6
+ "names": ["z", "LLMStepType", "z", "AssertionResultStatus", "z", "randomUUID", "message", "randomUUID", "randomUUID", "expected", "actual", "randomUUID", "randomUUID", "randomUUID", "randomUUID", "randomUUID", "randomUUID", "z", "randomUUID", "z", "z", "randomUUID", "randomUUID"]
7
7
  }
@@ -47,7 +47,6 @@ export type CostAssertion = z.infer<typeof CostAssertionSchema>;
47
47
  export declare const LlmJudgeAssertionSchema: z.ZodObject<{
48
48
  type: z.ZodLiteral<"llm_judge">;
49
49
  prompt: z.ZodString;
50
- systemPrompt: z.ZodOptional<z.ZodString>;
51
50
  minScore: z.ZodOptional<z.ZodNumber>;
52
51
  model: z.ZodOptional<z.ZodString>;
53
52
  maxTokens: z.ZodOptional<z.ZodNumber>;
@@ -88,7 +87,6 @@ export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
88
87
  }, z.core.$strip>, z.ZodObject<{
89
88
  type: z.ZodLiteral<"llm_judge">;
90
89
  prompt: z.ZodString;
91
- systemPrompt: z.ZodOptional<z.ZodString>;
92
90
  minScore: z.ZodOptional<z.ZodNumber>;
93
91
  model: z.ZodOptional<z.ZodString>;
94
92
  maxTokens: z.ZodOptional<z.ZodNumber>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/eval-assertions",
3
- "version": "0.19.0",
3
+ "version": "0.21.0",
4
4
  "description": "Assertion framework for AI agent evaluations - supports skill invocation checks, build validation, and LLM-based judging",
5
5
  "files": [
6
6
  "build"
@@ -61,5 +61,5 @@
61
61
  ],
62
62
  "license": "MIT",
63
63
  "author": "Wix",
64
- "falconPackageHash": "67896c78bdbd22e6a1005b2ee93fac758e73fca32a0745f711973efb"
64
+ "falconPackageHash": "aefe4f447672256e6600e46120e435afa41ccbbd5d4b2840976409ff"
65
65
  }