@wix/evalforge-types 0.68.0 → 0.70.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +27 -38
- package/build/index.js.map +3 -3
- package/build/index.mjs +25 -35
- package/build/index.mjs.map +3 -3
- package/build/types/evaluation/eval-run.d.ts +87 -169
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -1030,22 +1030,6 @@ var TriggerSchema = z28.object({
|
|
|
1030
1030
|
metadata: TriggerMetadataSchema.optional(),
|
|
1031
1031
|
type: z28.nativeEnum(TriggerType)
|
|
1032
1032
|
});
|
|
1033
|
-
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1034
|
-
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
1035
|
-
FailureCategory2["WRONG_CONTENT"] = "wrong_content";
|
|
1036
|
-
FailureCategory2["BUILD_ERROR"] = "build_error";
|
|
1037
|
-
FailureCategory2["TEST_FAILURE"] = "test_failure";
|
|
1038
|
-
FailureCategory2["RUNTIME_ERROR"] = "runtime_error";
|
|
1039
|
-
FailureCategory2["PERFORMANCE"] = "performance";
|
|
1040
|
-
return FailureCategory2;
|
|
1041
|
-
})(FailureCategory || {});
|
|
1042
|
-
var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
1043
|
-
FailureSeverity2["CRITICAL"] = "critical";
|
|
1044
|
-
FailureSeverity2["HIGH"] = "high";
|
|
1045
|
-
FailureSeverity2["MEDIUM"] = "medium";
|
|
1046
|
-
FailureSeverity2["LOW"] = "low";
|
|
1047
|
-
return FailureSeverity2;
|
|
1048
|
-
})(FailureSeverity || {});
|
|
1049
1033
|
var DiffLineTypeSchema = z28.enum(["added", "removed", "unchanged"]);
|
|
1050
1034
|
var DiffLineSchema = z28.object({
|
|
1051
1035
|
type: DiffLineTypeSchema,
|
|
@@ -1098,20 +1082,23 @@ var ExecutionTraceSchema = z28.object({
|
|
|
1098
1082
|
apiCalls: z28.array(ApiCallSchema),
|
|
1099
1083
|
totalDuration: z28.number()
|
|
1100
1084
|
});
|
|
1101
|
-
var
|
|
1102
|
-
category: z28.enum(
|
|
1103
|
-
|
|
1085
|
+
var RunAnalysisFindingSchema = z28.object({
|
|
1086
|
+
category: z28.enum([
|
|
1087
|
+
"failure_pattern",
|
|
1088
|
+
"cost_waste",
|
|
1089
|
+
"flakiness",
|
|
1090
|
+
"inefficiency",
|
|
1091
|
+
"positive"
|
|
1092
|
+
]),
|
|
1093
|
+
severity: z28.enum(["high", "medium", "low"]),
|
|
1094
|
+
description: z28.string(),
|
|
1095
|
+
affectedScenarios: z28.array(z28.string()),
|
|
1096
|
+
recommendation: z28.string().optional()
|
|
1097
|
+
});
|
|
1098
|
+
var RunAnalysisSchema = z28.object({
|
|
1099
|
+
generatedAt: z28.string(),
|
|
1104
1100
|
summary: z28.string(),
|
|
1105
|
-
|
|
1106
|
-
rootCause: z28.string(),
|
|
1107
|
-
suggestedFix: z28.string(),
|
|
1108
|
-
relatedAssertions: z28.array(z28.string()),
|
|
1109
|
-
codeSnippet: z28.string().optional(),
|
|
1110
|
-
similarIssues: z28.array(z28.string()).optional(),
|
|
1111
|
-
patternId: z28.string().optional(),
|
|
1112
|
-
// Extended fields for detailed debugging
|
|
1113
|
-
diff: DiffContentSchema.optional(),
|
|
1114
|
-
executionTrace: ExecutionTraceSchema.optional()
|
|
1101
|
+
findings: z28.array(RunAnalysisFindingSchema)
|
|
1115
1102
|
});
|
|
1116
1103
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1117
1104
|
/** Agent ID for this run */
|
|
@@ -1132,8 +1119,6 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1132
1119
|
results: z28.array(z28.lazy(() => EvalRunResultSchema)),
|
|
1133
1120
|
/** Aggregated metrics across all results */
|
|
1134
1121
|
aggregateMetrics: EvalMetricsSchema,
|
|
1135
|
-
/** Failure analyses */
|
|
1136
|
-
failureAnalyses: z28.array(FailureAnalysisSchema).optional(),
|
|
1137
1122
|
/** Aggregated LLM trace summary */
|
|
1138
1123
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1139
1124
|
/** What triggered this run */
|
|
@@ -1169,7 +1154,13 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1169
1154
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1170
1155
|
systemPrompt: z28.string().nullable().optional(),
|
|
1171
1156
|
modelConfig: ModelConfigSchema.optional()
|
|
1172
|
-
}).optional()
|
|
1157
|
+
}).optional(),
|
|
1158
|
+
/** UUID linking all runs in a comparison group */
|
|
1159
|
+
comparisonGroupId: z28.string().optional(),
|
|
1160
|
+
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1161
|
+
comparisonLabel: z28.string().optional(),
|
|
1162
|
+
/** LLM-generated analysis of the completed run */
|
|
1163
|
+
runAnalysis: RunAnalysisSchema.optional()
|
|
1173
1164
|
});
|
|
1174
1165
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1175
1166
|
id: true,
|
|
@@ -1805,9 +1796,6 @@ export {
|
|
|
1805
1796
|
EvaluationResultSchema,
|
|
1806
1797
|
ExecutionTraceSchema,
|
|
1807
1798
|
ExpectedFileSchema,
|
|
1808
|
-
FailureAnalysisSchema,
|
|
1809
|
-
FailureCategory,
|
|
1810
|
-
FailureSeverity,
|
|
1811
1799
|
FileContentCheckSchema,
|
|
1812
1800
|
FileContentTestSchema,
|
|
1813
1801
|
FileModificationSchema,
|
|
@@ -1844,6 +1832,8 @@ export {
|
|
|
1844
1832
|
RUN_COMMAND_LABELS,
|
|
1845
1833
|
RuleSchema,
|
|
1846
1834
|
RuleTypeSchema,
|
|
1835
|
+
RunAnalysisFindingSchema,
|
|
1836
|
+
RunAnalysisSchema,
|
|
1847
1837
|
SEMVER_REGEX,
|
|
1848
1838
|
SKILL_FOLDER_NAME_REGEX,
|
|
1849
1839
|
SYSTEM_ASSERTIONS,
|