@wix/evalforge-types 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +145 -80
- package/build/index.js.map +4 -4
- package/build/index.mjs +137 -80
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/evaluation/conversation.d.ts +108 -0
- package/build/types/evaluation/eval-result.d.ts +25 -0
- package/build/types/evaluation/eval-run.d.ts +25 -0
- package/build/types/evaluation/index.d.ts +1 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -50,6 +50,9 @@ __export(index_exports, {
|
|
|
50
50
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
51
51
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
52
52
|
CommandExecutionTestSchema: () => CommandExecutionTestSchema,
|
|
53
|
+
ConversationBlockSchema: () => ConversationBlockSchema,
|
|
54
|
+
ConversationMessageRoles: () => ConversationMessageRoles,
|
|
55
|
+
ConversationMessageSchema: () => ConversationMessageSchema,
|
|
53
56
|
CostAssertionSchema: () => CostAssertionSchema,
|
|
54
57
|
CostConfigSchema: () => CostConfigSchema,
|
|
55
58
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
@@ -122,6 +125,7 @@ __export(index_exports, {
|
|
|
122
125
|
SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
|
|
123
126
|
SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
|
|
124
127
|
ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
|
|
128
|
+
ScenarioConversationSchema: () => ScenarioConversationSchema,
|
|
125
129
|
SiteConfigTestSchema: () => SiteConfigTestSchema,
|
|
126
130
|
SkillFileSchema: () => SkillFileSchema,
|
|
127
131
|
SkillMetadataSchema: () => SkillMetadataSchema,
|
|
@@ -146,12 +150,16 @@ __export(index_exports, {
|
|
|
146
150
|
TestSuiteSchema: () => TestSuiteSchema,
|
|
147
151
|
TestType: () => TestType,
|
|
148
152
|
TestTypeSchema: () => TestTypeSchema,
|
|
153
|
+
TextBlockSchema: () => TextBlockSchema,
|
|
154
|
+
ThinkingBlockSchema: () => ThinkingBlockSchema,
|
|
149
155
|
TimeAssertionSchema: () => TimeAssertionSchema,
|
|
150
156
|
TimeConfigSchema: () => TimeConfigSchema,
|
|
151
157
|
TokenUsageSchema: () => TokenUsageSchema,
|
|
152
158
|
ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
|
|
153
159
|
ToolCalledWithParamConfigSchema: () => ToolCalledWithParamConfigSchema,
|
|
160
|
+
ToolResultBlockSchema: () => ToolResultBlockSchema,
|
|
154
161
|
ToolTestSchema: () => ToolTestSchema,
|
|
162
|
+
ToolUseBlockSchema: () => ToolUseBlockSchema,
|
|
155
163
|
TriggerMetadataSchema: () => TriggerMetadataSchema,
|
|
156
164
|
TriggerSchema: () => TriggerSchema,
|
|
157
165
|
TriggerType: () => TriggerType,
|
|
@@ -1879,7 +1887,7 @@ var LLMTraceSchema = import_zod26.z.object({
|
|
|
1879
1887
|
});
|
|
1880
1888
|
|
|
1881
1889
|
// src/evaluation/eval-result.ts
|
|
1882
|
-
var
|
|
1890
|
+
var import_zod30 = require("zod");
|
|
1883
1891
|
|
|
1884
1892
|
// src/evaluation/eval-run.ts
|
|
1885
1893
|
var import_zod28 = require("zod");
|
|
@@ -2126,6 +2134,53 @@ var EvaluationLogSchema = import_zod28.z.object({
|
|
|
2126
2134
|
});
|
|
2127
2135
|
var LLM_TIMEOUT = 12e4;
|
|
2128
2136
|
|
|
2137
|
+
// src/evaluation/conversation.ts
|
|
2138
|
+
var import_zod29 = require("zod");
|
|
2139
|
+
var TextBlockSchema = import_zod29.z.object({
|
|
2140
|
+
type: import_zod29.z.literal("text"),
|
|
2141
|
+
text: import_zod29.z.string()
|
|
2142
|
+
});
|
|
2143
|
+
var ThinkingBlockSchema = import_zod29.z.object({
|
|
2144
|
+
type: import_zod29.z.literal("thinking"),
|
|
2145
|
+
thinking: import_zod29.z.string()
|
|
2146
|
+
});
|
|
2147
|
+
var ToolUseBlockSchema = import_zod29.z.object({
|
|
2148
|
+
type: import_zod29.z.literal("tool_use"),
|
|
2149
|
+
toolName: import_zod29.z.string(),
|
|
2150
|
+
toolId: import_zod29.z.string(),
|
|
2151
|
+
input: import_zod29.z.unknown()
|
|
2152
|
+
});
|
|
2153
|
+
var ToolResultBlockSchema = import_zod29.z.object({
|
|
2154
|
+
type: import_zod29.z.literal("tool_result"),
|
|
2155
|
+
toolUseId: import_zod29.z.string(),
|
|
2156
|
+
content: import_zod29.z.string(),
|
|
2157
|
+
isError: import_zod29.z.boolean().optional()
|
|
2158
|
+
});
|
|
2159
|
+
var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
|
|
2160
|
+
TextBlockSchema,
|
|
2161
|
+
ThinkingBlockSchema,
|
|
2162
|
+
ToolUseBlockSchema,
|
|
2163
|
+
ToolResultBlockSchema
|
|
2164
|
+
]);
|
|
2165
|
+
var ConversationMessageRoles = [
|
|
2166
|
+
"assistant",
|
|
2167
|
+
"user",
|
|
2168
|
+
"system"
|
|
2169
|
+
];
|
|
2170
|
+
var ConversationMessageSchema = import_zod29.z.object({
|
|
2171
|
+
role: import_zod29.z.enum(ConversationMessageRoles),
|
|
2172
|
+
content: import_zod29.z.array(ConversationBlockSchema),
|
|
2173
|
+
timestamp: import_zod29.z.string()
|
|
2174
|
+
});
|
|
2175
|
+
var ScenarioConversationSchema = import_zod29.z.object({
|
|
2176
|
+
id: import_zod29.z.string(),
|
|
2177
|
+
projectId: import_zod29.z.string(),
|
|
2178
|
+
evalRunId: import_zod29.z.string(),
|
|
2179
|
+
resultId: import_zod29.z.string(),
|
|
2180
|
+
messages: import_zod29.z.array(ConversationMessageSchema),
|
|
2181
|
+
createdAt: import_zod29.z.string()
|
|
2182
|
+
});
|
|
2183
|
+
|
|
2129
2184
|
// src/evaluation/eval-result.ts
|
|
2130
2185
|
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
2131
2186
|
AssertionResultStatus2["PASSED"] = "passed";
|
|
@@ -2134,97 +2189,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
2134
2189
|
AssertionResultStatus2["ERROR"] = "error";
|
|
2135
2190
|
return AssertionResultStatus2;
|
|
2136
2191
|
})(AssertionResultStatus || {});
|
|
2137
|
-
var AssertionResultSchema =
|
|
2138
|
-
id:
|
|
2139
|
-
assertionId:
|
|
2140
|
-
assertionType:
|
|
2141
|
-
assertionName:
|
|
2142
|
-
status:
|
|
2143
|
-
message:
|
|
2144
|
-
expected:
|
|
2145
|
-
actual:
|
|
2146
|
-
duration:
|
|
2147
|
-
details:
|
|
2148
|
-
llmTraceSteps:
|
|
2149
|
-
});
|
|
2150
|
-
var EvalRunResultSchema =
|
|
2151
|
-
id:
|
|
2152
|
-
targetId:
|
|
2153
|
-
targetName:
|
|
2192
|
+
var AssertionResultSchema = import_zod30.z.object({
|
|
2193
|
+
id: import_zod30.z.string(),
|
|
2194
|
+
assertionId: import_zod30.z.string(),
|
|
2195
|
+
assertionType: import_zod30.z.string(),
|
|
2196
|
+
assertionName: import_zod30.z.string(),
|
|
2197
|
+
status: import_zod30.z.enum(AssertionResultStatus),
|
|
2198
|
+
message: import_zod30.z.string().optional(),
|
|
2199
|
+
expected: import_zod30.z.string().optional(),
|
|
2200
|
+
actual: import_zod30.z.string().optional(),
|
|
2201
|
+
duration: import_zod30.z.number().optional(),
|
|
2202
|
+
details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
|
|
2203
|
+
llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
|
|
2204
|
+
});
|
|
2205
|
+
var EvalRunResultSchema = import_zod30.z.object({
|
|
2206
|
+
id: import_zod30.z.string(),
|
|
2207
|
+
targetId: import_zod30.z.string(),
|
|
2208
|
+
targetName: import_zod30.z.string().optional(),
|
|
2154
2209
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
2155
|
-
skillVersionId:
|
|
2210
|
+
skillVersionId: import_zod30.z.string().optional(),
|
|
2156
2211
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
2157
|
-
skillVersion:
|
|
2158
|
-
scenarioId:
|
|
2159
|
-
scenarioName:
|
|
2212
|
+
skillVersion: import_zod30.z.string().optional(),
|
|
2213
|
+
scenarioId: import_zod30.z.string(),
|
|
2214
|
+
scenarioName: import_zod30.z.string(),
|
|
2160
2215
|
modelConfig: ModelConfigSchema.optional(),
|
|
2161
|
-
assertionResults:
|
|
2216
|
+
assertionResults: import_zod30.z.array(AssertionResultSchema),
|
|
2162
2217
|
metrics: EvalMetricsSchema.optional(),
|
|
2163
|
-
passed:
|
|
2164
|
-
failed:
|
|
2165
|
-
passRate:
|
|
2166
|
-
duration:
|
|
2167
|
-
outputText:
|
|
2168
|
-
files:
|
|
2169
|
-
fileDiffs:
|
|
2218
|
+
passed: import_zod30.z.number(),
|
|
2219
|
+
failed: import_zod30.z.number(),
|
|
2220
|
+
passRate: import_zod30.z.number(),
|
|
2221
|
+
duration: import_zod30.z.number(),
|
|
2222
|
+
outputText: import_zod30.z.string().optional(),
|
|
2223
|
+
files: import_zod30.z.array(ExpectedFileSchema).optional(),
|
|
2224
|
+
fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
|
|
2170
2225
|
/** Full template files after execution with status indicators */
|
|
2171
|
-
templateFiles:
|
|
2172
|
-
startedAt:
|
|
2173
|
-
completedAt:
|
|
2174
|
-
llmTrace: LLMTraceSchema.optional()
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2226
|
+
templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
|
|
2227
|
+
startedAt: import_zod30.z.string().optional(),
|
|
2228
|
+
completedAt: import_zod30.z.string().optional(),
|
|
2229
|
+
llmTrace: LLMTraceSchema.optional(),
|
|
2230
|
+
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
2231
|
+
conversation: import_zod30.z.array(ConversationMessageSchema).optional()
|
|
2232
|
+
});
|
|
2233
|
+
var PromptResultSchema = import_zod30.z.object({
|
|
2234
|
+
text: import_zod30.z.string(),
|
|
2235
|
+
files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2236
|
+
finishReason: import_zod30.z.string().optional(),
|
|
2237
|
+
reasoning: import_zod30.z.string().optional(),
|
|
2238
|
+
reasoningDetails: import_zod30.z.unknown().optional(),
|
|
2239
|
+
toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2240
|
+
toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2241
|
+
warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2242
|
+
sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2243
|
+
steps: import_zod30.z.array(import_zod30.z.unknown()),
|
|
2244
|
+
generationTimeMs: import_zod30.z.number(),
|
|
2245
|
+
prompt: import_zod30.z.string(),
|
|
2246
|
+
systemPrompt: import_zod30.z.string(),
|
|
2247
|
+
usage: import_zod30.z.object({
|
|
2248
|
+
totalTokens: import_zod30.z.number().optional(),
|
|
2249
|
+
totalMicrocentsSpent: import_zod30.z.number().optional()
|
|
2193
2250
|
})
|
|
2194
2251
|
});
|
|
2195
|
-
var EvaluationResultSchema =
|
|
2196
|
-
id:
|
|
2197
|
-
runId:
|
|
2198
|
-
timestamp:
|
|
2252
|
+
var EvaluationResultSchema = import_zod30.z.object({
|
|
2253
|
+
id: import_zod30.z.string(),
|
|
2254
|
+
runId: import_zod30.z.string(),
|
|
2255
|
+
timestamp: import_zod30.z.number(),
|
|
2199
2256
|
promptResult: PromptResultSchema,
|
|
2200
|
-
testResults:
|
|
2201
|
-
tags:
|
|
2202
|
-
feedback:
|
|
2203
|
-
score:
|
|
2204
|
-
suiteId:
|
|
2205
|
-
});
|
|
2206
|
-
var LeanEvaluationResultSchema =
|
|
2207
|
-
id:
|
|
2208
|
-
runId:
|
|
2209
|
-
timestamp:
|
|
2210
|
-
tags:
|
|
2211
|
-
scenarioId:
|
|
2212
|
-
scenarioVersion:
|
|
2213
|
-
targetId:
|
|
2214
|
-
targetVersion:
|
|
2215
|
-
suiteId:
|
|
2216
|
-
score:
|
|
2217
|
-
time:
|
|
2218
|
-
microcentsSpent:
|
|
2257
|
+
testResults: import_zod30.z.array(import_zod30.z.unknown()),
|
|
2258
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
2259
|
+
feedback: import_zod30.z.string().optional(),
|
|
2260
|
+
score: import_zod30.z.number(),
|
|
2261
|
+
suiteId: import_zod30.z.string().optional()
|
|
2262
|
+
});
|
|
2263
|
+
var LeanEvaluationResultSchema = import_zod30.z.object({
|
|
2264
|
+
id: import_zod30.z.string(),
|
|
2265
|
+
runId: import_zod30.z.string(),
|
|
2266
|
+
timestamp: import_zod30.z.number(),
|
|
2267
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
2268
|
+
scenarioId: import_zod30.z.string(),
|
|
2269
|
+
scenarioVersion: import_zod30.z.number().optional(),
|
|
2270
|
+
targetId: import_zod30.z.string(),
|
|
2271
|
+
targetVersion: import_zod30.z.number().optional(),
|
|
2272
|
+
suiteId: import_zod30.z.string().optional(),
|
|
2273
|
+
score: import_zod30.z.number(),
|
|
2274
|
+
time: import_zod30.z.number().optional(),
|
|
2275
|
+
microcentsSpent: import_zod30.z.number().optional()
|
|
2219
2276
|
});
|
|
2220
2277
|
|
|
2221
2278
|
// src/project/project.ts
|
|
2222
|
-
var
|
|
2279
|
+
var import_zod31 = require("zod");
|
|
2223
2280
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
2224
|
-
appId:
|
|
2225
|
-
appSecret:
|
|
2226
|
-
useWixAuth:
|
|
2227
|
-
useBase44Auth:
|
|
2281
|
+
appId: import_zod31.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
2282
|
+
appSecret: import_zod31.z.string().optional().describe("The secret of the app in Dev Center"),
|
|
2283
|
+
useWixAuth: import_zod31.z.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
|
|
2284
|
+
useBase44Auth: import_zod31.z.boolean().optional().describe("Enable Base44 auth for evaluations")
|
|
2228
2285
|
});
|
|
2229
2286
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
2230
2287
|
id: true,
|
|
@@ -2427,6 +2484,9 @@ function getSystemAssertion(id) {
|
|
|
2427
2484
|
ClaudeModelSchema,
|
|
2428
2485
|
CommandExecutionSchema,
|
|
2429
2486
|
CommandExecutionTestSchema,
|
|
2487
|
+
ConversationBlockSchema,
|
|
2488
|
+
ConversationMessageRoles,
|
|
2489
|
+
ConversationMessageSchema,
|
|
2430
2490
|
CostAssertionSchema,
|
|
2431
2491
|
CostConfigSchema,
|
|
2432
2492
|
CreateAgentInputSchema,
|
|
@@ -2499,6 +2559,7 @@ function getSystemAssertion(id) {
|
|
|
2499
2559
|
SYSTEM_ASSERTIONS,
|
|
2500
2560
|
SYSTEM_ASSERTION_IDS,
|
|
2501
2561
|
ScenarioAssertionLinkSchema,
|
|
2562
|
+
ScenarioConversationSchema,
|
|
2502
2563
|
SiteConfigTestSchema,
|
|
2503
2564
|
SkillFileSchema,
|
|
2504
2565
|
SkillMetadataSchema,
|
|
@@ -2523,12 +2584,16 @@ function getSystemAssertion(id) {
|
|
|
2523
2584
|
TestSuiteSchema,
|
|
2524
2585
|
TestType,
|
|
2525
2586
|
TestTypeSchema,
|
|
2587
|
+
TextBlockSchema,
|
|
2588
|
+
ThinkingBlockSchema,
|
|
2526
2589
|
TimeAssertionSchema,
|
|
2527
2590
|
TimeConfigSchema,
|
|
2528
2591
|
TokenUsageSchema,
|
|
2529
2592
|
ToolCalledWithParamAssertionSchema,
|
|
2530
2593
|
ToolCalledWithParamConfigSchema,
|
|
2594
|
+
ToolResultBlockSchema,
|
|
2531
2595
|
ToolTestSchema,
|
|
2596
|
+
ToolUseBlockSchema,
|
|
2532
2597
|
TriggerMetadataSchema,
|
|
2533
2598
|
TriggerSchema,
|
|
2534
2599
|
TriggerType,
|