npm - @wix/evalforge-types - Versions diffs - 0.16.0 → 0.18.0 - Mend

@wix/evalforge-types 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/build/index.js +439 -317
package/build/index.js.map +4 -4
package/build/index.mjs +431 -317
package/build/index.mjs.map +4 -4
package/build/types/assertion/assertion.d.ts +134 -37
package/build/types/assertion/index.d.ts +2 -1
package/build/types/assertion/system-assertions.d.ts +42 -0
package/build/types/scenario/test-scenario.d.ts +12 -0
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -26,6 +26,8 @@ __export(index_exports, {
   AllowedCommands: () => AllowedCommands,
   ApiCallSchema: () => ApiCallSchema,
   AssertionConfigSchema: () => AssertionConfigSchema,
+  AssertionParameterSchema: () => AssertionParameterSchema,
+  AssertionParameterTypeSchema: () => AssertionParameterTypeSchema,
   AssertionResultSchema: () => AssertionResultSchema,
   AssertionResultStatus: () => AssertionResultStatus,
   AssertionSchema: () => AssertionSchema,
@@ -92,6 +94,9 @@ __export(index_exports, {
   ProjectSchema: () => ProjectSchema,
   PromptResultSchema: () => PromptResultSchema,
   SKILL_FOLDER_NAME_REGEX: () => SKILL_FOLDER_NAME_REGEX,
+  SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
+  SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
+  ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
   SiteConfigTestSchema: () => SiteConfigTestSchema,
   SkillMetadataSchema: () => SkillMetadataSchema,
   SkillSchema: () => SkillSchema,
@@ -130,6 +135,9 @@ __export(index_exports, {
   getBuildPassedConfig: () => getBuildPassedConfig,
   getLlmJudgeConfig: () => getLlmJudgeConfig,
   getSkillWasCalledConfig: () => getSkillWasCalledConfig,
+  getSystemAssertion: () => getSystemAssertion,
+  getSystemAssertions: () => getSystemAssertions,
+  isSystemAssertionId: () => isSystemAssertionId,
   isValidSkillFolderName: () => isValidSkillFolderName,
   parseTraceEventLine: () => parseTraceEventLine,
   validateAssertionConfig: () => validateAssertionConfig
@@ -592,22 +600,145 @@ var EnvironmentSchema = import_zod19.z.object({
 });
 // src/scenario/test-scenario.ts
+var import_zod21 = require("zod");
+// src/assertion/assertion.ts
 var import_zod20 = require("zod");
-var ExpectedFileSchema = import_zod20.z.object({
+var AssertionTypeSchema = import_zod20.z.enum([
+  "skill_was_called",
+  "build_passed",
+  "llm_judge"
+]);
+var AssertionParameterTypeSchema = import_zod20.z.enum([
+  "string",
+  "number",
+  "boolean"
+]);
+var AssertionParameterSchema = import_zod20.z.object({
+  /** Parameter name (used as key in params object) */
+  name: import_zod20.z.string().min(1),
+  /** Display label for the parameter */
+  label: import_zod20.z.string().min(1),
+  /** Parameter type */
+  type: AssertionParameterTypeSchema,
+  /** Whether this parameter is required */
+  required: import_zod20.z.boolean(),
+  /** Default value (optional, used when not provided) */
+  defaultValue: import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean()]).optional()
+});
+var ScenarioAssertionLinkSchema = import_zod20.z.object({
+  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
+  assertionId: import_zod20.z.string(),
+  /** Parameter values for this assertion in this scenario */
+  params: import_zod20.z.record(
+    import_zod20.z.string(),
+    import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean(), import_zod20.z.null()])
+  ).optional()
+});
+var SkillWasCalledConfigSchema = import_zod20.z.object({
+  /** Name of the skill that must have been called */
+  skillName: import_zod20.z.string().min(1)
+});
+var BuildPassedConfigSchema = import_zod20.z.strictObject({
+  /** Command to run (default: "yarn build") */
+  command: import_zod20.z.string().optional(),
+  /** Expected exit code (default: 0) */
+  expectedExitCode: import_zod20.z.number().int().optional()
+});
+var LlmJudgeConfigSchema = import_zod20.z.object({
+  /**
+   * Prompt template with placeholders:
+   * - {{output}}: agent's final output
+   * - {{cwd}}: working directory
+   * - {{changedFiles}}: all files changed (new, modified)
+   * - {{modifiedFiles}}: only existing files that were modified
+   * - {{newFiles}}: only new files that were created
+   * - {{trace}}: step-by-step trace of tool calls
+   * - Custom parameters defined in the parameters array
+   */
+  prompt: import_zod20.z.string().min(1),
+  /** Optional system prompt for the judge */
+  systemPrompt: import_zod20.z.string().optional(),
+  /** Minimum score to pass (0-100, default 70) */
+  minScore: import_zod20.z.number().int().min(0).max(100).optional(),
+  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
+  model: import_zod20.z.string().optional(),
+  /** Max output tokens */
+  maxTokens: import_zod20.z.number().int().optional(),
+  /** Temperature (0-1) */
+  temperature: import_zod20.z.number().min(0).max(1).optional(),
+  /** User-defined parameters for this assertion */
+  parameters: import_zod20.z.array(AssertionParameterSchema).optional()
+});
+var AssertionConfigSchema = import_zod20.z.union([
+  LlmJudgeConfigSchema,
+  // requires prompt - check first
+  SkillWasCalledConfigSchema,
+  // requires skillName
+  BuildPassedConfigSchema,
+  // all optional, uses strictObject to reject unknown keys
+  import_zod20.z.object({})
+  // fallback empty config
+]);
+var CustomAssertionSchema = TenantEntitySchema.extend({
+  /** The assertion type */
+  type: AssertionTypeSchema,
+  /** Type-specific configuration */
+  config: AssertionConfigSchema
+});
+var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
+  id: true,
+  createdAt: true,
+  updatedAt: true,
+  deleted: true
+});
+var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
+function validateAssertionConfig(type, config) {
+  switch (type) {
+    case "skill_was_called":
+      return SkillWasCalledConfigSchema.safeParse(config).success;
+    case "build_passed":
+      return BuildPassedConfigSchema.safeParse(config).success;
+    case "llm_judge":
+      return LlmJudgeConfigSchema.safeParse(config).success;
+    default:
+      return false;
+  }
+}
+function getSkillWasCalledConfig(assertion) {
+  if (assertion.type !== "skill_was_called") return null;
+  const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
+  return result.success ? result.data : null;
+}
+function getBuildPassedConfig(assertion) {
+  if (assertion.type !== "build_passed") return null;
+  const result = BuildPassedConfigSchema.safeParse(assertion.config);
+  return result.success ? result.data : null;
+}
+function getLlmJudgeConfig(assertion) {
+  if (assertion.type !== "llm_judge") return null;
+  const result = LlmJudgeConfigSchema.safeParse(assertion.config);
+  return result.success ? result.data : null;
+}
+// src/scenario/test-scenario.ts
+var ExpectedFileSchema = import_zod21.z.object({
   /** Relative path where the file should be created */
-  path: import_zod20.z.string(),
+  path: import_zod21.z.string(),
   /** Optional expected content */
-  content: import_zod20.z.string().optional()
+  content: import_zod21.z.string().optional()
 });
 var TestScenarioSchema = TenantEntitySchema.extend({
   /** The prompt sent to the agent to trigger the task */
-  triggerPrompt: import_zod20.z.string().min(10),
+  triggerPrompt: import_zod21.z.string().min(10),
   /** ID of the template to use for this scenario (null = no template) */
-  templateId: import_zod20.z.string().nullish(),
+  templateId: import_zod21.z.string().nullish(),
   /** Inline assertions to evaluate for this scenario (legacy) */
-  assertions: import_zod20.z.array(AssertionSchema).optional(),
-  /** IDs of saved assertions to evaluate (from assertions table) */
-  assertionIds: import_zod20.z.array(import_zod20.z.string()).optional()
+  assertions: import_zod21.z.array(AssertionSchema).optional(),
+  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
+  assertionIds: import_zod21.z.array(import_zod21.z.string()).optional(),
+  /** Linked assertions with per-scenario parameter values */
+  assertionLinks: import_zod21.z.array(ScenarioAssertionLinkSchema).optional()
 });
 var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
   id: true,
@@ -618,10 +749,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
 var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
 // src/suite/test-suite.ts
-var import_zod21 = require("zod");
+var import_zod22 = require("zod");
 var TestSuiteSchema = TenantEntitySchema.extend({
   /** IDs of test scenarios in this suite */
-  scenarioIds: import_zod21.z.array(import_zod21.z.string())
+  scenarioIds: import_zod22.z.array(import_zod22.z.string())
 });
 var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
   id: true,
@@ -632,21 +763,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
 var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
 // src/evaluation/metrics.ts
-var import_zod22 = require("zod");
-var TokenUsageSchema = import_zod22.z.object({
-  prompt: import_zod22.z.number(),
-  completion: import_zod22.z.number(),
-  total: import_zod22.z.number()
-});
-var EvalMetricsSchema = import_zod22.z.object({
-  totalAssertions: import_zod22.z.number(),
-  passed: import_zod22.z.number(),
-  failed: import_zod22.z.number(),
-  skipped: import_zod22.z.number(),
-  errors: import_zod22.z.number(),
-  passRate: import_zod22.z.number(),
-  avgDuration: import_zod22.z.number(),
-  totalDuration: import_zod22.z.number()
+var import_zod23 = require("zod");
+var TokenUsageSchema = import_zod23.z.object({
+  prompt: import_zod23.z.number(),
+  completion: import_zod23.z.number(),
+  total: import_zod23.z.number()
+});
+var EvalMetricsSchema = import_zod23.z.object({
+  totalAssertions: import_zod23.z.number(),
+  passed: import_zod23.z.number(),
+  failed: import_zod23.z.number(),
+  skipped: import_zod23.z.number(),
+  errors: import_zod23.z.number(),
+  passRate: import_zod23.z.number(),
+  avgDuration: import_zod23.z.number(),
+  totalDuration: import_zod23.z.number()
 });
 var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
   EvalStatus2["PENDING"] = "pending";
@@ -656,7 +787,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
   EvalStatus2["CANCELLED"] = "cancelled";
   return EvalStatus2;
 })(EvalStatus || {});
-var EvalStatusSchema = import_zod22.z.enum(EvalStatus);
+var EvalStatusSchema = import_zod23.z.enum(EvalStatus);
 var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
   LLMStepType2["COMPLETION"] = "completion";
   LLMStepType2["TOOL_USE"] = "tool_use";
@@ -664,52 +795,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
   LLMStepType2["THINKING"] = "thinking";
   return LLMStepType2;
 })(LLMStepType || {});
-var LLMTraceStepSchema = import_zod22.z.object({
-  id: import_zod22.z.string(),
-  stepNumber: import_zod22.z.number(),
-  type: import_zod22.z.enum(LLMStepType),
-  model: import_zod22.z.string(),
-  provider: import_zod22.z.string(),
-  startedAt: import_zod22.z.string(),
-  durationMs: import_zod22.z.number(),
+var LLMTraceStepSchema = import_zod23.z.object({
+  id: import_zod23.z.string(),
+  stepNumber: import_zod23.z.number(),
+  type: import_zod23.z.enum(LLMStepType),
+  model: import_zod23.z.string(),
+  provider: import_zod23.z.string(),
+  startedAt: import_zod23.z.string(),
+  durationMs: import_zod23.z.number(),
   tokenUsage: TokenUsageSchema,
-  costUsd: import_zod22.z.number(),
-  toolName: import_zod22.z.string().optional(),
-  toolArguments: import_zod22.z.string().optional(),
-  inputPreview: import_zod22.z.string().optional(),
-  outputPreview: import_zod22.z.string().optional(),
-  success: import_zod22.z.boolean(),
-  error: import_zod22.z.string().optional()
-});
-var LLMBreakdownStatsSchema = import_zod22.z.object({
-  count: import_zod22.z.number(),
-  durationMs: import_zod22.z.number(),
-  tokens: import_zod22.z.number(),
-  costUsd: import_zod22.z.number()
-});
-var LLMTraceSummarySchema = import_zod22.z.object({
-  totalSteps: import_zod22.z.number(),
-  totalDurationMs: import_zod22.z.number(),
+  costUsd: import_zod23.z.number(),
+  toolName: import_zod23.z.string().optional(),
+  toolArguments: import_zod23.z.string().optional(),
+  inputPreview: import_zod23.z.string().optional(),
+  outputPreview: import_zod23.z.string().optional(),
+  success: import_zod23.z.boolean(),
+  error: import_zod23.z.string().optional()
+});
+var LLMBreakdownStatsSchema = import_zod23.z.object({
+  count: import_zod23.z.number(),
+  durationMs: import_zod23.z.number(),
+  tokens: import_zod23.z.number(),
+  costUsd: import_zod23.z.number()
+});
+var LLMTraceSummarySchema = import_zod23.z.object({
+  totalSteps: import_zod23.z.number(),
+  totalDurationMs: import_zod23.z.number(),
   totalTokens: TokenUsageSchema,
-  totalCostUsd: import_zod22.z.number(),
-  stepTypeBreakdown: import_zod22.z.record(import_zod22.z.string(), LLMBreakdownStatsSchema).optional(),
-  modelBreakdown: import_zod22.z.record(import_zod22.z.string(), LLMBreakdownStatsSchema),
-  modelsUsed: import_zod22.z.array(import_zod22.z.string())
-});
-var LLMTraceSchema = import_zod22.z.object({
-  id: import_zod22.z.string(),
-  steps: import_zod22.z.array(LLMTraceStepSchema),
+  totalCostUsd: import_zod23.z.number(),
+  stepTypeBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema).optional(),
+  modelBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema),
+  modelsUsed: import_zod23.z.array(import_zod23.z.string())
+});
+var LLMTraceSchema = import_zod23.z.object({
+  id: import_zod23.z.string(),
+  steps: import_zod23.z.array(LLMTraceStepSchema),
   summary: LLMTraceSummarySchema
 });
 // src/evaluation/eval-result.ts
-var import_zod25 = require("zod");
+var import_zod26 = require("zod");
 // src/evaluation/eval-run.ts
-var import_zod24 = require("zod");
+var import_zod25 = require("zod");
 // src/evaluation/live-trace.ts
-var import_zod23 = require("zod");
+var import_zod24 = require("zod");
 var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
   LiveTraceEventType2["THINKING"] = "thinking";
   LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -723,37 +854,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
   LiveTraceEventType2["USER"] = "user";
   return LiveTraceEventType2;
 })(LiveTraceEventType || {});
-var LiveTraceEventSchema = import_zod23.z.object({
+var LiveTraceEventSchema = import_zod24.z.object({
   /** The evaluation run ID */
-  evalRunId: import_zod23.z.string(),
+  evalRunId: import_zod24.z.string(),
   /** The scenario ID being executed */
-  scenarioId: import_zod23.z.string(),
+  scenarioId: import_zod24.z.string(),
   /** The scenario name for display */
-  scenarioName: import_zod23.z.string(),
+  scenarioName: import_zod24.z.string(),
   /** The target ID (skill, agent, etc.) */
-  targetId: import_zod23.z.string(),
+  targetId: import_zod24.z.string(),
   /** The target name for display */
-  targetName: import_zod23.z.string(),
+  targetName: import_zod24.z.string(),
   /** Step number in the current scenario execution */
-  stepNumber: import_zod23.z.number(),
+  stepNumber: import_zod24.z.number(),
   /** Type of trace event */
-  type: import_zod23.z.enum(LiveTraceEventType),
+  type: import_zod24.z.enum(LiveTraceEventType),
   /** Tool name if this is a tool_use event */
-  toolName: import_zod23.z.string().optional(),
+  toolName: import_zod24.z.string().optional(),
   /** Tool arguments preview (truncated JSON) */
-  toolArgs: import_zod23.z.string().optional(),
+  toolArgs: import_zod24.z.string().optional(),
   /** Output preview (truncated text) */
-  outputPreview: import_zod23.z.string().optional(),
+  outputPreview: import_zod24.z.string().optional(),
   /** File path for file operations */
-  filePath: import_zod23.z.string().optional(),
+  filePath: import_zod24.z.string().optional(),
   /** Elapsed time in milliseconds for progress events */
-  elapsedMs: import_zod23.z.number().optional(),
+  elapsedMs: import_zod24.z.number().optional(),
   /** Thinking/reasoning text from Claude */
-  thinking: import_zod23.z.string().optional(),
+  thinking: import_zod24.z.string().optional(),
   /** Timestamp when this event occurred */
-  timestamp: import_zod23.z.string(),
+  timestamp: import_zod24.z.string(),
   /** Whether this is the final event for this scenario */
-  isComplete: import_zod23.z.boolean()
+  isComplete: import_zod24.z.boolean()
 });
 var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
 function parseTraceEventLine(line) {
@@ -781,14 +912,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
   TriggerType2["MANUAL"] = "MANUAL";
   return TriggerType2;
 })(TriggerType || {});
-var TriggerMetadataSchema = import_zod24.z.object({
-  version: import_zod24.z.string().optional(),
-  resourceUpdated: import_zod24.z.array(import_zod24.z.string()).optional()
+var TriggerMetadataSchema = import_zod25.z.object({
+  version: import_zod25.z.string().optional(),
+  resourceUpdated: import_zod25.z.array(import_zod25.z.string()).optional()
 });
-var TriggerSchema = import_zod24.z.object({
-  id: import_zod24.z.string(),
+var TriggerSchema = import_zod25.z.object({
+  id: import_zod25.z.string(),
   metadata: TriggerMetadataSchema.optional(),
-  type: import_zod24.z.enum(TriggerType)
+  type: import_zod25.z.enum(TriggerType)
 });
 var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
   FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -806,28 +937,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
   FailureSeverity2["LOW"] = "low";
   return FailureSeverity2;
 })(FailureSeverity || {});
-var DiffLineTypeSchema = import_zod24.z.enum(["added", "removed", "unchanged"]);
-var DiffLineSchema = import_zod24.z.object({
+var DiffLineTypeSchema = import_zod25.z.enum(["added", "removed", "unchanged"]);
+var DiffLineSchema = import_zod25.z.object({
   type: DiffLineTypeSchema,
-  content: import_zod24.z.string(),
-  lineNumber: import_zod24.z.number()
-});
-var DiffContentSchema = import_zod24.z.object({
-  path: import_zod24.z.string(),
-  expected: import_zod24.z.string(),
-  actual: import_zod24.z.string(),
-  diffLines: import_zod24.z.array(DiffLineSchema),
-  renamedFrom: import_zod24.z.string().optional()
-});
-var CommandExecutionSchema = import_zod24.z.object({
-  command: import_zod24.z.string(),
-  exitCode: import_zod24.z.number(),
-  output: import_zod24.z.string().optional(),
-  duration: import_zod24.z.number()
-});
-var FileModificationSchema = import_zod24.z.object({
-  path: import_zod24.z.string(),
-  action: import_zod24.z.enum(["created", "modified", "deleted"])
+  content: import_zod25.z.string(),
+  lineNumber: import_zod25.z.number()
+});
+var DiffContentSchema = import_zod25.z.object({
+  path: import_zod25.z.string(),
+  expected: import_zod25.z.string(),
+  actual: import_zod25.z.string(),
+  diffLines: import_zod25.z.array(DiffLineSchema),
+  renamedFrom: import_zod25.z.string().optional()
+});
+var CommandExecutionSchema = import_zod25.z.object({
+  command: import_zod25.z.string(),
+  exitCode: import_zod25.z.number(),
+  output: import_zod25.z.string().optional(),
+  duration: import_zod25.z.number()
+});
+var FileModificationSchema = import_zod25.z.object({
+  path: import_zod25.z.string(),
+  action: import_zod25.z.enum(["created", "modified", "deleted"])
 });
 var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
   TemplateFileStatus2["NEW"] = "new";
@@ -835,75 +966,75 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
   TemplateFileStatus2["UNCHANGED"] = "unchanged";
   return TemplateFileStatus2;
 })(TemplateFileStatus || {});
-var TemplateFileSchema = import_zod24.z.object({
+var TemplateFileSchema = import_zod25.z.object({
   /** Relative path within the template */
-  path: import_zod24.z.string(),
+  path: import_zod25.z.string(),
   /** Full file content after execution */
-  content: import_zod24.z.string(),
+  content: import_zod25.z.string(),
   /** File status (new, modified, unchanged) */
-  status: import_zod24.z.enum(["new", "modified", "unchanged"])
-});
-var ApiCallSchema = import_zod24.z.object({
-  endpoint: import_zod24.z.string(),
-  tokensUsed: import_zod24.z.number(),
-  duration: import_zod24.z.number()
-});
-var ExecutionTraceSchema = import_zod24.z.object({
-  commands: import_zod24.z.array(CommandExecutionSchema),
-  filesModified: import_zod24.z.array(FileModificationSchema),
-  apiCalls: import_zod24.z.array(ApiCallSchema),
-  totalDuration: import_zod24.z.number()
-});
-var FailureAnalysisSchema = import_zod24.z.object({
-  category: import_zod24.z.enum(FailureCategory),
-  severity: import_zod24.z.enum(FailureSeverity),
-  summary: import_zod24.z.string(),
-  details: import_zod24.z.string(),
-  rootCause: import_zod24.z.string(),
-  suggestedFix: import_zod24.z.string(),
-  relatedAssertions: import_zod24.z.array(import_zod24.z.string()),
-  codeSnippet: import_zod24.z.string().optional(),
-  similarIssues: import_zod24.z.array(import_zod24.z.string()).optional(),
-  patternId: import_zod24.z.string().optional(),
+  status: import_zod25.z.enum(["new", "modified", "unchanged"])
+});
+var ApiCallSchema = import_zod25.z.object({
+  endpoint: import_zod25.z.string(),
+  tokensUsed: import_zod25.z.number(),
+  duration: import_zod25.z.number()
+});
+var ExecutionTraceSchema = import_zod25.z.object({
+  commands: import_zod25.z.array(CommandExecutionSchema),
+  filesModified: import_zod25.z.array(FileModificationSchema),
+  apiCalls: import_zod25.z.array(ApiCallSchema),
+  totalDuration: import_zod25.z.number()
+});
+var FailureAnalysisSchema = import_zod25.z.object({
+  category: import_zod25.z.enum(FailureCategory),
+  severity: import_zod25.z.enum(FailureSeverity),
+  summary: import_zod25.z.string(),
+  details: import_zod25.z.string(),
+  rootCause: import_zod25.z.string(),
+  suggestedFix: import_zod25.z.string(),
+  relatedAssertions: import_zod25.z.array(import_zod25.z.string()),
+  codeSnippet: import_zod25.z.string().optional(),
+  similarIssues: import_zod25.z.array(import_zod25.z.string()).optional(),
+  patternId: import_zod25.z.string().optional(),
   // Extended fields for detailed debugging
   diff: DiffContentSchema.optional(),
   executionTrace: ExecutionTraceSchema.optional()
 });
 var EvalRunSchema = TenantEntitySchema.extend({
   /** Agent ID for this run */
-  agentId: import_zod24.z.string().optional(),
+  agentId: import_zod25.z.string().optional(),
   /** Skills group ID for this run */
-  skillsGroupId: import_zod24.z.string().optional(),
+  skillsGroupId: import_zod25.z.string().optional(),
   /** Scenario IDs to run */
-  scenarioIds: import_zod24.z.array(import_zod24.z.string()),
+  scenarioIds: import_zod25.z.array(import_zod25.z.string()),
   /** Current status */
   status: EvalStatusSchema,
   /** Progress percentage (0-100) */
-  progress: import_zod24.z.number(),
+  progress: import_zod25.z.number(),
   /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
-  results: import_zod24.z.array(import_zod24.z.lazy(() => EvalRunResultSchema)),
+  results: import_zod25.z.array(import_zod25.z.lazy(() => EvalRunResultSchema)),
   /** Aggregated metrics across all results */
   aggregateMetrics: EvalMetricsSchema,
   /** Failure analyses */
-  failureAnalyses: import_zod24.z.array(FailureAnalysisSchema).optional(),
+  failureAnalyses: import_zod25.z.array(FailureAnalysisSchema).optional(),
   /** Aggregated LLM trace summary */
   llmTraceSummary: LLMTraceSummarySchema.optional(),
   /** What triggered this run */
   trigger: TriggerSchema.optional(),
   /** When the run started (set when evaluation is triggered) */
-  startedAt: import_zod24.z.string().optional(),
+  startedAt: import_zod25.z.string().optional(),
   /** When the run completed */
-  completedAt: import_zod24.z.string().optional(),
+  completedAt: import_zod25.z.string().optional(),
   /** Live trace events captured during execution (for playback on results page) */
-  liveTraceEvents: import_zod24.z.array(LiveTraceEventSchema).optional(),
+  liveTraceEvents: import_zod25.z.array(LiveTraceEventSchema).optional(),
   /** Remote job ID for tracking execution in Dev Machines */
-  jobId: import_zod24.z.string().optional(),
+  jobId: import_zod25.z.string().optional(),
   /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
-  jobStatus: import_zod24.z.string().optional(),
+  jobStatus: import_zod25.z.string().optional(),
   /** Remote job error message if the job failed */
-  jobError: import_zod24.z.string().optional(),
+  jobError: import_zod25.z.string().optional(),
   /** Timestamp of the last job status check */
-  jobStatusCheckedAt: import_zod24.z.string().optional()
+  jobStatusCheckedAt: import_zod25.z.string().optional()
 });
 var CreateEvalRunInputSchema = EvalRunSchema.omit({
   id: true,
@@ -916,28 +1047,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
   startedAt: true,
   completedAt: true
 });
-var EvaluationProgressSchema = import_zod24.z.object({
-  runId: import_zod24.z.string(),
-  targetId: import_zod24.z.string(),
-  totalScenarios: import_zod24.z.number(),
-  completedScenarios: import_zod24.z.number(),
-  scenarioProgress: import_zod24.z.array(
-    import_zod24.z.object({
-      scenarioId: import_zod24.z.string(),
-      currentStep: import_zod24.z.string(),
-      error: import_zod24.z.string().optional()
+var EvaluationProgressSchema = import_zod25.z.object({
+  runId: import_zod25.z.string(),
+  targetId: import_zod25.z.string(),
+  totalScenarios: import_zod25.z.number(),
+  completedScenarios: import_zod25.z.number(),
+  scenarioProgress: import_zod25.z.array(
+    import_zod25.z.object({
+      scenarioId: import_zod25.z.string(),
+      currentStep: import_zod25.z.string(),
+      error: import_zod25.z.string().optional()
     })
   ),
-  createdAt: import_zod24.z.number()
+  createdAt: import_zod25.z.number()
 });
-var EvaluationLogSchema = import_zod24.z.object({
-  runId: import_zod24.z.string(),
-  scenarioId: import_zod24.z.string(),
-  log: import_zod24.z.object({
-    level: import_zod24.z.enum(["info", "error", "debug"]),
-    message: import_zod24.z.string().optional(),
-    args: import_zod24.z.array(import_zod24.z.any()).optional(),
-    error: import_zod24.z.string().optional()
+var EvaluationLogSchema = import_zod25.z.object({
+  runId: import_zod25.z.string(),
+  scenarioId: import_zod25.z.string(),
+  log: import_zod25.z.object({
+    level: import_zod25.z.enum(["info", "error", "debug"]),
+    message: import_zod25.z.string().optional(),
+    args: import_zod25.z.array(import_zod25.z.any()).optional(),
+    error: import_zod25.z.string().optional()
   })
 });
 var LLM_TIMEOUT = 12e4;
@@ -950,91 +1081,91 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
   AssertionResultStatus2["ERROR"] = "error";
   return AssertionResultStatus2;
 })(AssertionResultStatus || {});
-var AssertionResultSchema = import_zod25.z.object({
-  id: import_zod25.z.string(),
-  assertionId: import_zod25.z.string(),
-  assertionType: import_zod25.z.string(),
-  assertionName: import_zod25.z.string(),
-  status: import_zod25.z.enum(AssertionResultStatus),
-  message: import_zod25.z.string().optional(),
-  expected: import_zod25.z.string().optional(),
-  actual: import_zod25.z.string().optional(),
-  duration: import_zod25.z.number().optional(),
-  details: import_zod25.z.record(import_zod25.z.string(), import_zod25.z.unknown()).optional(),
-  llmTraceSteps: import_zod25.z.array(LLMTraceStepSchema).optional()
-});
-var EvalRunResultSchema = import_zod25.z.object({
-  id: import_zod25.z.string(),
-  targetId: import_zod25.z.string(),
-  targetName: import_zod25.z.string().optional(),
-  scenarioId: import_zod25.z.string(),
-  scenarioName: import_zod25.z.string(),
+var AssertionResultSchema = import_zod26.z.object({
+  id: import_zod26.z.string(),
+  assertionId: import_zod26.z.string(),
+  assertionType: import_zod26.z.string(),
+  assertionName: import_zod26.z.string(),
+  status: import_zod26.z.enum(AssertionResultStatus),
+  message: import_zod26.z.string().optional(),
+  expected: import_zod26.z.string().optional(),
+  actual: import_zod26.z.string().optional(),
+  duration: import_zod26.z.number().optional(),
+  details: import_zod26.z.record(import_zod26.z.string(), import_zod26.z.unknown()).optional(),
+  llmTraceSteps: import_zod26.z.array(LLMTraceStepSchema).optional()
+});
+var EvalRunResultSchema = import_zod26.z.object({
+  id: import_zod26.z.string(),
+  targetId: import_zod26.z.string(),
+  targetName: import_zod26.z.string().optional(),
+  scenarioId: import_zod26.z.string(),
+  scenarioName: import_zod26.z.string(),
   modelConfig: ModelConfigSchema.optional(),
-  assertionResults: import_zod25.z.array(AssertionResultSchema),
+  assertionResults: import_zod26.z.array(AssertionResultSchema),
   metrics: EvalMetricsSchema.optional(),
-  passed: import_zod25.z.number(),
-  failed: import_zod25.z.number(),
-  passRate: import_zod25.z.number(),
-  duration: import_zod25.z.number(),
-  outputText: import_zod25.z.string().optional(),
-  files: import_zod25.z.array(ExpectedFileSchema).optional(),
-  fileDiffs: import_zod25.z.array(DiffContentSchema).optional(),
+  passed: import_zod26.z.number(),
+  failed: import_zod26.z.number(),
+  passRate: import_zod26.z.number(),
+  duration: import_zod26.z.number(),
+  outputText: import_zod26.z.string().optional(),
+  files: import_zod26.z.array(ExpectedFileSchema).optional(),
+  fileDiffs: import_zod26.z.array(DiffContentSchema).optional(),
   /** Full template files after execution with status indicators */
-  templateFiles: import_zod25.z.array(TemplateFileSchema).optional(),
-  startedAt: import_zod25.z.string().optional(),
-  completedAt: import_zod25.z.string().optional(),
+  templateFiles: import_zod26.z.array(TemplateFileSchema).optional(),
+  startedAt: import_zod26.z.string().optional(),
+  completedAt: import_zod26.z.string().optional(),
   llmTrace: LLMTraceSchema.optional()
 });
-var PromptResultSchema = import_zod25.z.object({
-  text: import_zod25.z.string(),
-  files: import_zod25.z.array(import_zod25.z.unknown()).optional(),
-  finishReason: import_zod25.z.string().optional(),
-  reasoning: import_zod25.z.string().optional(),
-  reasoningDetails: import_zod25.z.unknown().optional(),
-  toolCalls: import_zod25.z.array(import_zod25.z.unknown()).optional(),
-  toolResults: import_zod25.z.array(import_zod25.z.unknown()).optional(),
-  warnings: import_zod25.z.array(import_zod25.z.unknown()).optional(),
-  sources: import_zod25.z.array(import_zod25.z.unknown()).optional(),
-  steps: import_zod25.z.array(import_zod25.z.unknown()),
-  generationTimeMs: import_zod25.z.number(),
-  prompt: import_zod25.z.string(),
-  systemPrompt: import_zod25.z.string(),
-  usage: import_zod25.z.object({
-    totalTokens: import_zod25.z.number().optional(),
-    totalMicrocentsSpent: import_zod25.z.number().optional()
+var PromptResultSchema = import_zod26.z.object({
+  text: import_zod26.z.string(),
+  files: import_zod26.z.array(import_zod26.z.unknown()).optional(),
+  finishReason: import_zod26.z.string().optional(),
+  reasoning: import_zod26.z.string().optional(),
+  reasoningDetails: import_zod26.z.unknown().optional(),
+  toolCalls: import_zod26.z.array(import_zod26.z.unknown()).optional(),
+  toolResults: import_zod26.z.array(import_zod26.z.unknown()).optional(),
+  warnings: import_zod26.z.array(import_zod26.z.unknown()).optional(),
+  sources: import_zod26.z.array(import_zod26.z.unknown()).optional(),
+  steps: import_zod26.z.array(import_zod26.z.unknown()),
+  generationTimeMs: import_zod26.z.number(),
+  prompt: import_zod26.z.string(),
+  systemPrompt: import_zod26.z.string(),
+  usage: import_zod26.z.object({
+    totalTokens: import_zod26.z.number().optional(),
+    totalMicrocentsSpent: import_zod26.z.number().optional()
   })
 });
-var EvaluationResultSchema = import_zod25.z.object({
-  id: import_zod25.z.string(),
-  runId: import_zod25.z.string(),
-  timestamp: import_zod25.z.number(),
+var EvaluationResultSchema = import_zod26.z.object({
+  id: import_zod26.z.string(),
+  runId: import_zod26.z.string(),
+  timestamp: import_zod26.z.number(),
   promptResult: PromptResultSchema,
-  testResults: import_zod25.z.array(import_zod25.z.unknown()),
-  tags: import_zod25.z.array(import_zod25.z.string()).optional(),
-  feedback: import_zod25.z.string().optional(),
-  score: import_zod25.z.number(),
-  suiteId: import_zod25.z.string().optional()
-});
-var LeanEvaluationResultSchema = import_zod25.z.object({
-  id: import_zod25.z.string(),
-  runId: import_zod25.z.string(),
-  timestamp: import_zod25.z.number(),
-  tags: import_zod25.z.array(import_zod25.z.string()).optional(),
-  scenarioId: import_zod25.z.string(),
-  scenarioVersion: import_zod25.z.number().optional(),
-  targetId: import_zod25.z.string(),
-  targetVersion: import_zod25.z.number().optional(),
-  suiteId: import_zod25.z.string().optional(),
-  score: import_zod25.z.number(),
-  time: import_zod25.z.number().optional(),
-  microcentsSpent: import_zod25.z.number().optional()
+  testResults: import_zod26.z.array(import_zod26.z.unknown()),
+  tags: import_zod26.z.array(import_zod26.z.string()).optional(),
+  feedback: import_zod26.z.string().optional(),
+  score: import_zod26.z.number(),
+  suiteId: import_zod26.z.string().optional()
+});
+var LeanEvaluationResultSchema = import_zod26.z.object({
+  id: import_zod26.z.string(),
+  runId: import_zod26.z.string(),
+  timestamp: import_zod26.z.number(),
+  tags: import_zod26.z.array(import_zod26.z.string()).optional(),
+  scenarioId: import_zod26.z.string(),
+  scenarioVersion: import_zod26.z.number().optional(),
+  targetId: import_zod26.z.string(),
+  targetVersion: import_zod26.z.number().optional(),
+  suiteId: import_zod26.z.string().optional(),
+  score: import_zod26.z.number(),
+  time: import_zod26.z.number().optional(),
+  microcentsSpent: import_zod26.z.number().optional()
 });
 // src/project/project.ts
-var import_zod26 = require("zod");
+var import_zod27 = require("zod");
 var ProjectSchema = BaseEntitySchema.extend({
-  appId: import_zod26.z.string().optional().describe("The ID of the app in Dev Center"),
-  appSecret: import_zod26.z.string().optional().describe("The secret of the app in Dev Center")
+  appId: import_zod27.z.string().optional().describe("The ID of the app in Dev Center"),
+  appSecret: import_zod27.z.string().optional().describe("The secret of the app in Dev Center")
 });
 var CreateProjectInputSchema = ProjectSchema.omit({
   id: true,
@@ -1045,10 +1176,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
 var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
 // src/template/template.ts
-var import_zod27 = require("zod");
+var import_zod28 = require("zod");
 var TemplateSchema = TenantEntitySchema.extend({
   /** URL to download the template from */
-  downloadUrl: import_zod27.z.url()
+  downloadUrl: import_zod28.z.url()
 });
 var CreateTemplateInputSchema = TemplateSchema.omit({
   id: true,
@@ -1058,86 +1189,69 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
 });
 var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
-// src/assertion/assertion.ts
-var import_zod28 = require("zod");
-var AssertionTypeSchema = import_zod28.z.enum([
-  "skill_was_called",
-  "build_passed",
-  "llm_judge",
-  "custom"
-]);
-var SkillWasCalledConfigSchema = import_zod28.z.object({
-  /** Name of the skill that must have been called */
-  skillName: import_zod28.z.string().min(1)
-});
-var BuildPassedConfigSchema = import_zod28.z.object({
-  /** Command to run (default: "yarn build") */
-  command: import_zod28.z.string().optional(),
-  /** Expected exit code (default: 0) */
-  expectedExitCode: import_zod28.z.number().int().optional()
-});
-var LlmJudgeConfigSchema = import_zod28.z.object({
-  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
-  prompt: import_zod28.z.string().min(1),
-  /** Optional system prompt for the judge */
-  systemPrompt: import_zod28.z.string().optional(),
-  /** Minimum score to pass (0-100, default 70) */
-  minScore: import_zod28.z.number().int().min(0).max(100).optional(),
-  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
-  model: import_zod28.z.string().optional(),
-  /** Max output tokens */
-  maxTokens: import_zod28.z.number().int().optional(),
-  /** Temperature (0-1) */
-  temperature: import_zod28.z.number().min(0).max(1).optional()
-});
-var AssertionConfigSchema = import_zod28.z.union([
-  SkillWasCalledConfigSchema,
-  BuildPassedConfigSchema,
-  LlmJudgeConfigSchema,
-  import_zod28.z.object({})
-  // Empty config for cases where defaults are used
-]);
-var CustomAssertionSchema = TenantEntitySchema.extend({
-  /** The assertion type */
-  type: AssertionTypeSchema,
-  /** Type-specific configuration */
-  config: AssertionConfigSchema
-});
-var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
-  id: true,
-  createdAt: true,
-  updatedAt: true,
-  deleted: true
-});
-var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
-function validateAssertionConfig(type, config) {
-  switch (type) {
-    case "skill_was_called":
-      return SkillWasCalledConfigSchema.safeParse(config).success;
-    case "build_passed":
-      return BuildPassedConfigSchema.safeParse(config).success;
-    case "llm_judge":
-    case "custom":
-      return LlmJudgeConfigSchema.safeParse(config).success;
-    default:
-      return false;
-  }
-}
-function getSkillWasCalledConfig(assertion) {
-  if (assertion.type !== "skill_was_called") return null;
-  const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
-  return result.success ? result.data : null;
+// src/assertion/system-assertions.ts
+var SYSTEM_ASSERTION_IDS = {
+  SKILL_WAS_CALLED: "system:skill_was_called",
+  BUILD_PASSED: "system:build_passed"
+};
+function isSystemAssertionId(id) {
+  return id.startsWith("system:");
 }
-function getBuildPassedConfig(assertion) {
-  if (assertion.type !== "build_passed") return null;
-  const result = BuildPassedConfigSchema.safeParse(assertion.config);
-  return result.success ? result.data : null;
+var SYSTEM_ASSERTIONS = {
+  [SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED]: {
+    id: SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED,
+    name: "Skill Was Called",
+    description: "Check if a specific skill was invoked during the agent run",
+    type: "skill_was_called",
+    parameters: [
+      {
+        name: "skillName",
+        label: "Skill Name",
+        type: "string",
+        required: true
+      }
+    ]
+  },
+  [SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
+    id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
+    name: "Build Passed",
+    description: "Run a build command and verify it exits with expected code",
+    type: "build_passed",
+    parameters: [
+      {
+        name: "command",
+        label: "Build Command",
+        type: "string",
+        required: false,
+        defaultValue: "yarn build"
+      },
+      {
+        name: "expectedExitCode",
+        label: "Expected Exit Code",
+        type: "number",
+        required: false,
+        defaultValue: 0
+      },
+      {
+        name: "maxBuildTime",
+        label: "Max Build Time (ms)",
+        type: "number",
+        required: false
+      },
+      {
+        name: "maxMemory",
+        label: "Max Memory (MB)",
+        type: "number",
+        required: false
+      }
+    ]
+  }
+};
+function getSystemAssertions() {
+  return Object.values(SYSTEM_ASSERTIONS);
 }
-function getLlmJudgeConfig(assertion) {
-  if (assertion.type !== "llm_judge" && assertion.type !== "custom")
-    return null;
-  const result = LlmJudgeConfigSchema.safeParse(assertion.config);
-  return result.success ? result.data : null;
+function getSystemAssertion(id) {
+  return SYSTEM_ASSERTIONS[id];
 }
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
@@ -1147,6 +1261,8 @@ function getLlmJudgeConfig(assertion) {
   AllowedCommands,
   ApiCallSchema,
   AssertionConfigSchema,
+  AssertionParameterSchema,
+  AssertionParameterTypeSchema,
   AssertionResultSchema,
   AssertionResultStatus,
   AssertionSchema,
@@ -1213,6 +1329,9 @@ function getLlmJudgeConfig(assertion) {
   ProjectSchema,
   PromptResultSchema,
   SKILL_FOLDER_NAME_REGEX,
+  SYSTEM_ASSERTIONS,
+  SYSTEM_ASSERTION_IDS,
+  ScenarioAssertionLinkSchema,
   SiteConfigTestSchema,
   SkillMetadataSchema,
   SkillSchema,
@@ -1251,6 +1370,9 @@ function getLlmJudgeConfig(assertion) {
   getBuildPassedConfig,
   getLlmJudgeConfig,
   getSkillWasCalledConfig,
+  getSystemAssertion,
+  getSystemAssertions,
+  isSystemAssertionId,
   isValidSkillFolderName,
   parseTraceEventLine,
   validateAssertionConfig