@wix/evalforge-types 0.68.0 → 0.70.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +27 -38
- package/build/index.js.map +3 -3
- package/build/index.mjs +25 -35
- package/build/index.mjs.map +3 -3
- package/build/types/evaluation/eval-run.d.ts +87 -169
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -100,9 +100,6 @@ __export(index_exports, {
|
|
|
100
100
|
EvaluationResultSchema: () => EvaluationResultSchema,
|
|
101
101
|
ExecutionTraceSchema: () => ExecutionTraceSchema,
|
|
102
102
|
ExpectedFileSchema: () => ExpectedFileSchema,
|
|
103
|
-
FailureAnalysisSchema: () => FailureAnalysisSchema,
|
|
104
|
-
FailureCategory: () => FailureCategory,
|
|
105
|
-
FailureSeverity: () => FailureSeverity,
|
|
106
103
|
FileContentCheckSchema: () => FileContentCheckSchema,
|
|
107
104
|
FileContentTestSchema: () => FileContentTestSchema,
|
|
108
105
|
FileModificationSchema: () => FileModificationSchema,
|
|
@@ -139,6 +136,8 @@ __export(index_exports, {
|
|
|
139
136
|
RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
|
|
140
137
|
RuleSchema: () => RuleSchema,
|
|
141
138
|
RuleTypeSchema: () => RuleTypeSchema,
|
|
139
|
+
RunAnalysisFindingSchema: () => RunAnalysisFindingSchema,
|
|
140
|
+
RunAnalysisSchema: () => RunAnalysisSchema,
|
|
142
141
|
SEMVER_REGEX: () => SEMVER_REGEX,
|
|
143
142
|
SKILL_FOLDER_NAME_REGEX: () => SKILL_FOLDER_NAME_REGEX,
|
|
144
143
|
SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
|
|
@@ -1239,22 +1238,6 @@ var TriggerSchema = import_zod28.z.object({
|
|
|
1239
1238
|
metadata: TriggerMetadataSchema.optional(),
|
|
1240
1239
|
type: import_zod28.z.nativeEnum(TriggerType)
|
|
1241
1240
|
});
|
|
1242
|
-
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1243
|
-
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
1244
|
-
FailureCategory2["WRONG_CONTENT"] = "wrong_content";
|
|
1245
|
-
FailureCategory2["BUILD_ERROR"] = "build_error";
|
|
1246
|
-
FailureCategory2["TEST_FAILURE"] = "test_failure";
|
|
1247
|
-
FailureCategory2["RUNTIME_ERROR"] = "runtime_error";
|
|
1248
|
-
FailureCategory2["PERFORMANCE"] = "performance";
|
|
1249
|
-
return FailureCategory2;
|
|
1250
|
-
})(FailureCategory || {});
|
|
1251
|
-
var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
1252
|
-
FailureSeverity2["CRITICAL"] = "critical";
|
|
1253
|
-
FailureSeverity2["HIGH"] = "high";
|
|
1254
|
-
FailureSeverity2["MEDIUM"] = "medium";
|
|
1255
|
-
FailureSeverity2["LOW"] = "low";
|
|
1256
|
-
return FailureSeverity2;
|
|
1257
|
-
})(FailureSeverity || {});
|
|
1258
1241
|
var DiffLineTypeSchema = import_zod28.z.enum(["added", "removed", "unchanged"]);
|
|
1259
1242
|
var DiffLineSchema = import_zod28.z.object({
|
|
1260
1243
|
type: DiffLineTypeSchema,
|
|
@@ -1307,20 +1290,23 @@ var ExecutionTraceSchema = import_zod28.z.object({
|
|
|
1307
1290
|
apiCalls: import_zod28.z.array(ApiCallSchema),
|
|
1308
1291
|
totalDuration: import_zod28.z.number()
|
|
1309
1292
|
});
|
|
1310
|
-
var
|
|
1311
|
-
category: import_zod28.z.enum(
|
|
1312
|
-
|
|
1293
|
+
var RunAnalysisFindingSchema = import_zod28.z.object({
|
|
1294
|
+
category: import_zod28.z.enum([
|
|
1295
|
+
"failure_pattern",
|
|
1296
|
+
"cost_waste",
|
|
1297
|
+
"flakiness",
|
|
1298
|
+
"inefficiency",
|
|
1299
|
+
"positive"
|
|
1300
|
+
]),
|
|
1301
|
+
severity: import_zod28.z.enum(["high", "medium", "low"]),
|
|
1302
|
+
description: import_zod28.z.string(),
|
|
1303
|
+
affectedScenarios: import_zod28.z.array(import_zod28.z.string()),
|
|
1304
|
+
recommendation: import_zod28.z.string().optional()
|
|
1305
|
+
});
|
|
1306
|
+
var RunAnalysisSchema = import_zod28.z.object({
|
|
1307
|
+
generatedAt: import_zod28.z.string(),
|
|
1313
1308
|
summary: import_zod28.z.string(),
|
|
1314
|
-
|
|
1315
|
-
rootCause: import_zod28.z.string(),
|
|
1316
|
-
suggestedFix: import_zod28.z.string(),
|
|
1317
|
-
relatedAssertions: import_zod28.z.array(import_zod28.z.string()),
|
|
1318
|
-
codeSnippet: import_zod28.z.string().optional(),
|
|
1319
|
-
similarIssues: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1320
|
-
patternId: import_zod28.z.string().optional(),
|
|
1321
|
-
// Extended fields for detailed debugging
|
|
1322
|
-
diff: DiffContentSchema.optional(),
|
|
1323
|
-
executionTrace: ExecutionTraceSchema.optional()
|
|
1309
|
+
findings: import_zod28.z.array(RunAnalysisFindingSchema)
|
|
1324
1310
|
});
|
|
1325
1311
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1326
1312
|
/** Agent ID for this run */
|
|
@@ -1341,8 +1327,6 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1341
1327
|
results: import_zod28.z.array(import_zod28.z.lazy(() => EvalRunResultSchema)),
|
|
1342
1328
|
/** Aggregated metrics across all results */
|
|
1343
1329
|
aggregateMetrics: EvalMetricsSchema,
|
|
1344
|
-
/** Failure analyses */
|
|
1345
|
-
failureAnalyses: import_zod28.z.array(FailureAnalysisSchema).optional(),
|
|
1346
1330
|
/** Aggregated LLM trace summary */
|
|
1347
1331
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1348
1332
|
/** What triggered this run */
|
|
@@ -1378,7 +1362,13 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1378
1362
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1379
1363
|
systemPrompt: import_zod28.z.string().nullable().optional(),
|
|
1380
1364
|
modelConfig: ModelConfigSchema.optional()
|
|
1381
|
-
}).optional()
|
|
1365
|
+
}).optional(),
|
|
1366
|
+
/** UUID linking all runs in a comparison group */
|
|
1367
|
+
comparisonGroupId: import_zod28.z.string().optional(),
|
|
1368
|
+
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1369
|
+
comparisonLabel: import_zod28.z.string().optional(),
|
|
1370
|
+
/** LLM-generated analysis of the completed run */
|
|
1371
|
+
runAnalysis: RunAnalysisSchema.optional()
|
|
1382
1372
|
});
|
|
1383
1373
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1384
1374
|
id: true,
|
|
@@ -2015,9 +2005,6 @@ function getSystemAssertion(id) {
|
|
|
2015
2005
|
EvaluationResultSchema,
|
|
2016
2006
|
ExecutionTraceSchema,
|
|
2017
2007
|
ExpectedFileSchema,
|
|
2018
|
-
FailureAnalysisSchema,
|
|
2019
|
-
FailureCategory,
|
|
2020
|
-
FailureSeverity,
|
|
2021
2008
|
FileContentCheckSchema,
|
|
2022
2009
|
FileContentTestSchema,
|
|
2023
2010
|
FileModificationSchema,
|
|
@@ -2054,6 +2041,8 @@ function getSystemAssertion(id) {
|
|
|
2054
2041
|
RUN_COMMAND_LABELS,
|
|
2055
2042
|
RuleSchema,
|
|
2056
2043
|
RuleTypeSchema,
|
|
2044
|
+
RunAnalysisFindingSchema,
|
|
2045
|
+
RunAnalysisSchema,
|
|
2057
2046
|
SEMVER_REGEX,
|
|
2058
2047
|
SKILL_FOLDER_NAME_REGEX,
|
|
2059
2048
|
SYSTEM_ASSERTIONS,
|