@wix/evalforge-types 0.68.0 → 0.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -1030,22 +1030,6 @@ var TriggerSchema = z28.object({
1030
1030
  metadata: TriggerMetadataSchema.optional(),
1031
1031
  type: z28.nativeEnum(TriggerType)
1032
1032
  });
1033
- var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
1034
- FailureCategory2["MISSING_FILE"] = "missing_file";
1035
- FailureCategory2["WRONG_CONTENT"] = "wrong_content";
1036
- FailureCategory2["BUILD_ERROR"] = "build_error";
1037
- FailureCategory2["TEST_FAILURE"] = "test_failure";
1038
- FailureCategory2["RUNTIME_ERROR"] = "runtime_error";
1039
- FailureCategory2["PERFORMANCE"] = "performance";
1040
- return FailureCategory2;
1041
- })(FailureCategory || {});
1042
- var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
1043
- FailureSeverity2["CRITICAL"] = "critical";
1044
- FailureSeverity2["HIGH"] = "high";
1045
- FailureSeverity2["MEDIUM"] = "medium";
1046
- FailureSeverity2["LOW"] = "low";
1047
- return FailureSeverity2;
1048
- })(FailureSeverity || {});
1049
1033
  var DiffLineTypeSchema = z28.enum(["added", "removed", "unchanged"]);
1050
1034
  var DiffLineSchema = z28.object({
1051
1035
  type: DiffLineTypeSchema,
@@ -1098,20 +1082,23 @@ var ExecutionTraceSchema = z28.object({
1098
1082
  apiCalls: z28.array(ApiCallSchema),
1099
1083
  totalDuration: z28.number()
1100
1084
  });
1101
- var FailureAnalysisSchema = z28.object({
1102
- category: z28.enum(FailureCategory),
1103
- severity: z28.enum(FailureSeverity),
1085
+ var RunAnalysisFindingSchema = z28.object({
1086
+ category: z28.enum([
1087
+ "failure_pattern",
1088
+ "cost_waste",
1089
+ "flakiness",
1090
+ "inefficiency",
1091
+ "positive"
1092
+ ]),
1093
+ severity: z28.enum(["high", "medium", "low"]),
1094
+ description: z28.string(),
1095
+ affectedScenarios: z28.array(z28.string()),
1096
+ recommendation: z28.string().optional()
1097
+ });
1098
+ var RunAnalysisSchema = z28.object({
1099
+ generatedAt: z28.string(),
1104
1100
  summary: z28.string(),
1105
- details: z28.string(),
1106
- rootCause: z28.string(),
1107
- suggestedFix: z28.string(),
1108
- relatedAssertions: z28.array(z28.string()),
1109
- codeSnippet: z28.string().optional(),
1110
- similarIssues: z28.array(z28.string()).optional(),
1111
- patternId: z28.string().optional(),
1112
- // Extended fields for detailed debugging
1113
- diff: DiffContentSchema.optional(),
1114
- executionTrace: ExecutionTraceSchema.optional()
1101
+ findings: z28.array(RunAnalysisFindingSchema)
1115
1102
  });
1116
1103
  var EvalRunSchema = TenantEntitySchema.extend({
1117
1104
  /** Agent ID for this run */
@@ -1132,8 +1119,6 @@ var EvalRunSchema = TenantEntitySchema.extend({
1132
1119
  results: z28.array(z28.lazy(() => EvalRunResultSchema)),
1133
1120
  /** Aggregated metrics across all results */
1134
1121
  aggregateMetrics: EvalMetricsSchema,
1135
- /** Failure analyses */
1136
- failureAnalyses: z28.array(FailureAnalysisSchema).optional(),
1137
1122
  /** Aggregated LLM trace summary */
1138
1123
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1139
1124
  /** What triggered this run */
@@ -1169,7 +1154,13 @@ var EvalRunSchema = TenantEntitySchema.extend({
1169
1154
  runCommand: AgentRunCommandSchema.optional(),
1170
1155
  systemPrompt: z28.string().nullable().optional(),
1171
1156
  modelConfig: ModelConfigSchema.optional()
1172
- }).optional()
1157
+ }).optional(),
1158
+ /** UUID linking all runs in a comparison group */
1159
+ comparisonGroupId: z28.string().optional(),
1160
+ /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
1161
+ comparisonLabel: z28.string().optional(),
1162
+ /** LLM-generated analysis of the completed run */
1163
+ runAnalysis: RunAnalysisSchema.optional()
1173
1164
  });
1174
1165
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1175
1166
  id: true,
@@ -1805,9 +1796,6 @@ export {
1805
1796
  EvaluationResultSchema,
1806
1797
  ExecutionTraceSchema,
1807
1798
  ExpectedFileSchema,
1808
- FailureAnalysisSchema,
1809
- FailureCategory,
1810
- FailureSeverity,
1811
1799
  FileContentCheckSchema,
1812
1800
  FileContentTestSchema,
1813
1801
  FileModificationSchema,
@@ -1844,6 +1832,8 @@ export {
1844
1832
  RUN_COMMAND_LABELS,
1845
1833
  RuleSchema,
1846
1834
  RuleTypeSchema,
1835
+ RunAnalysisFindingSchema,
1836
+ RunAnalysisSchema,
1847
1837
  SEMVER_REGEX,
1848
1838
  SKILL_FOLDER_NAME_REGEX,
1849
1839
  SYSTEM_ASSERTIONS,