@wix/evalforge-types 0.44.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +145 -78
- package/build/index.js.map +4 -4
- package/build/index.mjs +137 -78
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/evaluation/conversation.d.ts +108 -0
- package/build/types/evaluation/eval-result.d.ts +25 -0
- package/build/types/evaluation/eval-run.d.ts +25 -0
- package/build/types/evaluation/index.d.ts +1 -0
- package/build/types/project/project.d.ts +6 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -50,6 +50,9 @@ __export(index_exports, {
|
|
|
50
50
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
51
51
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
52
52
|
CommandExecutionTestSchema: () => CommandExecutionTestSchema,
|
|
53
|
+
ConversationBlockSchema: () => ConversationBlockSchema,
|
|
54
|
+
ConversationMessageRoles: () => ConversationMessageRoles,
|
|
55
|
+
ConversationMessageSchema: () => ConversationMessageSchema,
|
|
53
56
|
CostAssertionSchema: () => CostAssertionSchema,
|
|
54
57
|
CostConfigSchema: () => CostConfigSchema,
|
|
55
58
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
@@ -122,6 +125,7 @@ __export(index_exports, {
|
|
|
122
125
|
SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
|
|
123
126
|
SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
|
|
124
127
|
ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
|
|
128
|
+
ScenarioConversationSchema: () => ScenarioConversationSchema,
|
|
125
129
|
SiteConfigTestSchema: () => SiteConfigTestSchema,
|
|
126
130
|
SkillFileSchema: () => SkillFileSchema,
|
|
127
131
|
SkillMetadataSchema: () => SkillMetadataSchema,
|
|
@@ -146,12 +150,16 @@ __export(index_exports, {
|
|
|
146
150
|
TestSuiteSchema: () => TestSuiteSchema,
|
|
147
151
|
TestType: () => TestType,
|
|
148
152
|
TestTypeSchema: () => TestTypeSchema,
|
|
153
|
+
TextBlockSchema: () => TextBlockSchema,
|
|
154
|
+
ThinkingBlockSchema: () => ThinkingBlockSchema,
|
|
149
155
|
TimeAssertionSchema: () => TimeAssertionSchema,
|
|
150
156
|
TimeConfigSchema: () => TimeConfigSchema,
|
|
151
157
|
TokenUsageSchema: () => TokenUsageSchema,
|
|
152
158
|
ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
|
|
153
159
|
ToolCalledWithParamConfigSchema: () => ToolCalledWithParamConfigSchema,
|
|
160
|
+
ToolResultBlockSchema: () => ToolResultBlockSchema,
|
|
154
161
|
ToolTestSchema: () => ToolTestSchema,
|
|
162
|
+
ToolUseBlockSchema: () => ToolUseBlockSchema,
|
|
155
163
|
TriggerMetadataSchema: () => TriggerMetadataSchema,
|
|
156
164
|
TriggerSchema: () => TriggerSchema,
|
|
157
165
|
TriggerType: () => TriggerType,
|
|
@@ -1879,7 +1887,7 @@ var LLMTraceSchema = import_zod26.z.object({
|
|
|
1879
1887
|
});
|
|
1880
1888
|
|
|
1881
1889
|
// src/evaluation/eval-result.ts
|
|
1882
|
-
var
|
|
1890
|
+
var import_zod30 = require("zod");
|
|
1883
1891
|
|
|
1884
1892
|
// src/evaluation/eval-run.ts
|
|
1885
1893
|
var import_zod28 = require("zod");
|
|
@@ -2126,6 +2134,53 @@ var EvaluationLogSchema = import_zod28.z.object({
|
|
|
2126
2134
|
});
|
|
2127
2135
|
var LLM_TIMEOUT = 12e4;
|
|
2128
2136
|
|
|
2137
|
+
// src/evaluation/conversation.ts
|
|
2138
|
+
var import_zod29 = require("zod");
|
|
2139
|
+
var TextBlockSchema = import_zod29.z.object({
|
|
2140
|
+
type: import_zod29.z.literal("text"),
|
|
2141
|
+
text: import_zod29.z.string()
|
|
2142
|
+
});
|
|
2143
|
+
var ThinkingBlockSchema = import_zod29.z.object({
|
|
2144
|
+
type: import_zod29.z.literal("thinking"),
|
|
2145
|
+
thinking: import_zod29.z.string()
|
|
2146
|
+
});
|
|
2147
|
+
var ToolUseBlockSchema = import_zod29.z.object({
|
|
2148
|
+
type: import_zod29.z.literal("tool_use"),
|
|
2149
|
+
toolName: import_zod29.z.string(),
|
|
2150
|
+
toolId: import_zod29.z.string(),
|
|
2151
|
+
input: import_zod29.z.unknown()
|
|
2152
|
+
});
|
|
2153
|
+
var ToolResultBlockSchema = import_zod29.z.object({
|
|
2154
|
+
type: import_zod29.z.literal("tool_result"),
|
|
2155
|
+
toolUseId: import_zod29.z.string(),
|
|
2156
|
+
content: import_zod29.z.string(),
|
|
2157
|
+
isError: import_zod29.z.boolean().optional()
|
|
2158
|
+
});
|
|
2159
|
+
var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
|
|
2160
|
+
TextBlockSchema,
|
|
2161
|
+
ThinkingBlockSchema,
|
|
2162
|
+
ToolUseBlockSchema,
|
|
2163
|
+
ToolResultBlockSchema
|
|
2164
|
+
]);
|
|
2165
|
+
var ConversationMessageRoles = [
|
|
2166
|
+
"assistant",
|
|
2167
|
+
"user",
|
|
2168
|
+
"system"
|
|
2169
|
+
];
|
|
2170
|
+
var ConversationMessageSchema = import_zod29.z.object({
|
|
2171
|
+
role: import_zod29.z.enum(ConversationMessageRoles),
|
|
2172
|
+
content: import_zod29.z.array(ConversationBlockSchema),
|
|
2173
|
+
timestamp: import_zod29.z.string()
|
|
2174
|
+
});
|
|
2175
|
+
var ScenarioConversationSchema = import_zod29.z.object({
|
|
2176
|
+
id: import_zod29.z.string(),
|
|
2177
|
+
projectId: import_zod29.z.string(),
|
|
2178
|
+
evalRunId: import_zod29.z.string(),
|
|
2179
|
+
resultId: import_zod29.z.string(),
|
|
2180
|
+
messages: import_zod29.z.array(ConversationMessageSchema),
|
|
2181
|
+
createdAt: import_zod29.z.string()
|
|
2182
|
+
});
|
|
2183
|
+
|
|
2129
2184
|
// src/evaluation/eval-result.ts
|
|
2130
2185
|
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
2131
2186
|
AssertionResultStatus2["PASSED"] = "passed";
|
|
@@ -2134,95 +2189,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
2134
2189
|
AssertionResultStatus2["ERROR"] = "error";
|
|
2135
2190
|
return AssertionResultStatus2;
|
|
2136
2191
|
})(AssertionResultStatus || {});
|
|
2137
|
-
var AssertionResultSchema =
|
|
2138
|
-
id:
|
|
2139
|
-
assertionId:
|
|
2140
|
-
assertionType:
|
|
2141
|
-
assertionName:
|
|
2142
|
-
status:
|
|
2143
|
-
message:
|
|
2144
|
-
expected:
|
|
2145
|
-
actual:
|
|
2146
|
-
duration:
|
|
2147
|
-
details:
|
|
2148
|
-
llmTraceSteps:
|
|
2149
|
-
});
|
|
2150
|
-
var EvalRunResultSchema =
|
|
2151
|
-
id:
|
|
2152
|
-
targetId:
|
|
2153
|
-
targetName:
|
|
2192
|
+
var AssertionResultSchema = import_zod30.z.object({
|
|
2193
|
+
id: import_zod30.z.string(),
|
|
2194
|
+
assertionId: import_zod30.z.string(),
|
|
2195
|
+
assertionType: import_zod30.z.string(),
|
|
2196
|
+
assertionName: import_zod30.z.string(),
|
|
2197
|
+
status: import_zod30.z.enum(AssertionResultStatus),
|
|
2198
|
+
message: import_zod30.z.string().optional(),
|
|
2199
|
+
expected: import_zod30.z.string().optional(),
|
|
2200
|
+
actual: import_zod30.z.string().optional(),
|
|
2201
|
+
duration: import_zod30.z.number().optional(),
|
|
2202
|
+
details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
|
|
2203
|
+
llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
|
|
2204
|
+
});
|
|
2205
|
+
var EvalRunResultSchema = import_zod30.z.object({
|
|
2206
|
+
id: import_zod30.z.string(),
|
|
2207
|
+
targetId: import_zod30.z.string(),
|
|
2208
|
+
targetName: import_zod30.z.string().optional(),
|
|
2154
2209
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
2155
|
-
skillVersionId:
|
|
2210
|
+
skillVersionId: import_zod30.z.string().optional(),
|
|
2156
2211
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
2157
|
-
skillVersion:
|
|
2158
|
-
scenarioId:
|
|
2159
|
-
scenarioName:
|
|
2212
|
+
skillVersion: import_zod30.z.string().optional(),
|
|
2213
|
+
scenarioId: import_zod30.z.string(),
|
|
2214
|
+
scenarioName: import_zod30.z.string(),
|
|
2160
2215
|
modelConfig: ModelConfigSchema.optional(),
|
|
2161
|
-
assertionResults:
|
|
2216
|
+
assertionResults: import_zod30.z.array(AssertionResultSchema),
|
|
2162
2217
|
metrics: EvalMetricsSchema.optional(),
|
|
2163
|
-
passed:
|
|
2164
|
-
failed:
|
|
2165
|
-
passRate:
|
|
2166
|
-
duration:
|
|
2167
|
-
outputText:
|
|
2168
|
-
files:
|
|
2169
|
-
fileDiffs:
|
|
2218
|
+
passed: import_zod30.z.number(),
|
|
2219
|
+
failed: import_zod30.z.number(),
|
|
2220
|
+
passRate: import_zod30.z.number(),
|
|
2221
|
+
duration: import_zod30.z.number(),
|
|
2222
|
+
outputText: import_zod30.z.string().optional(),
|
|
2223
|
+
files: import_zod30.z.array(ExpectedFileSchema).optional(),
|
|
2224
|
+
fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
|
|
2170
2225
|
/** Full template files after execution with status indicators */
|
|
2171
|
-
templateFiles:
|
|
2172
|
-
startedAt:
|
|
2173
|
-
completedAt:
|
|
2174
|
-
llmTrace: LLMTraceSchema.optional()
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2226
|
+
templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
|
|
2227
|
+
startedAt: import_zod30.z.string().optional(),
|
|
2228
|
+
completedAt: import_zod30.z.string().optional(),
|
|
2229
|
+
llmTrace: LLMTraceSchema.optional(),
|
|
2230
|
+
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
2231
|
+
conversation: import_zod30.z.array(ConversationMessageSchema).optional()
|
|
2232
|
+
});
|
|
2233
|
+
var PromptResultSchema = import_zod30.z.object({
|
|
2234
|
+
text: import_zod30.z.string(),
|
|
2235
|
+
files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2236
|
+
finishReason: import_zod30.z.string().optional(),
|
|
2237
|
+
reasoning: import_zod30.z.string().optional(),
|
|
2238
|
+
reasoningDetails: import_zod30.z.unknown().optional(),
|
|
2239
|
+
toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2240
|
+
toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2241
|
+
warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2242
|
+
sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
|
|
2243
|
+
steps: import_zod30.z.array(import_zod30.z.unknown()),
|
|
2244
|
+
generationTimeMs: import_zod30.z.number(),
|
|
2245
|
+
prompt: import_zod30.z.string(),
|
|
2246
|
+
systemPrompt: import_zod30.z.string(),
|
|
2247
|
+
usage: import_zod30.z.object({
|
|
2248
|
+
totalTokens: import_zod30.z.number().optional(),
|
|
2249
|
+
totalMicrocentsSpent: import_zod30.z.number().optional()
|
|
2193
2250
|
})
|
|
2194
2251
|
});
|
|
2195
|
-
var EvaluationResultSchema =
|
|
2196
|
-
id:
|
|
2197
|
-
runId:
|
|
2198
|
-
timestamp:
|
|
2252
|
+
var EvaluationResultSchema = import_zod30.z.object({
|
|
2253
|
+
id: import_zod30.z.string(),
|
|
2254
|
+
runId: import_zod30.z.string(),
|
|
2255
|
+
timestamp: import_zod30.z.number(),
|
|
2199
2256
|
promptResult: PromptResultSchema,
|
|
2200
|
-
testResults:
|
|
2201
|
-
tags:
|
|
2202
|
-
feedback:
|
|
2203
|
-
score:
|
|
2204
|
-
suiteId:
|
|
2205
|
-
});
|
|
2206
|
-
var LeanEvaluationResultSchema =
|
|
2207
|
-
id:
|
|
2208
|
-
runId:
|
|
2209
|
-
timestamp:
|
|
2210
|
-
tags:
|
|
2211
|
-
scenarioId:
|
|
2212
|
-
scenarioVersion:
|
|
2213
|
-
targetId:
|
|
2214
|
-
targetVersion:
|
|
2215
|
-
suiteId:
|
|
2216
|
-
score:
|
|
2217
|
-
time:
|
|
2218
|
-
microcentsSpent:
|
|
2257
|
+
testResults: import_zod30.z.array(import_zod30.z.unknown()),
|
|
2258
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
2259
|
+
feedback: import_zod30.z.string().optional(),
|
|
2260
|
+
score: import_zod30.z.number(),
|
|
2261
|
+
suiteId: import_zod30.z.string().optional()
|
|
2262
|
+
});
|
|
2263
|
+
var LeanEvaluationResultSchema = import_zod30.z.object({
|
|
2264
|
+
id: import_zod30.z.string(),
|
|
2265
|
+
runId: import_zod30.z.string(),
|
|
2266
|
+
timestamp: import_zod30.z.number(),
|
|
2267
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
2268
|
+
scenarioId: import_zod30.z.string(),
|
|
2269
|
+
scenarioVersion: import_zod30.z.number().optional(),
|
|
2270
|
+
targetId: import_zod30.z.string(),
|
|
2271
|
+
targetVersion: import_zod30.z.number().optional(),
|
|
2272
|
+
suiteId: import_zod30.z.string().optional(),
|
|
2273
|
+
score: import_zod30.z.number(),
|
|
2274
|
+
time: import_zod30.z.number().optional(),
|
|
2275
|
+
microcentsSpent: import_zod30.z.number().optional()
|
|
2219
2276
|
});
|
|
2220
2277
|
|
|
2221
2278
|
// src/project/project.ts
|
|
2222
|
-
var
|
|
2279
|
+
var import_zod31 = require("zod");
|
|
2223
2280
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
2224
|
-
appId:
|
|
2225
|
-
appSecret:
|
|
2281
|
+
appId: import_zod31.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
2282
|
+
appSecret: import_zod31.z.string().optional().describe("The secret of the app in Dev Center"),
|
|
2283
|
+
useWixAuth: import_zod31.z.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
|
|
2284
|
+
useBase44Auth: import_zod31.z.boolean().optional().describe("Enable Base44 auth for evaluations")
|
|
2226
2285
|
});
|
|
2227
2286
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
2228
2287
|
id: true,
|
|
@@ -2425,6 +2484,9 @@ function getSystemAssertion(id) {
|
|
|
2425
2484
|
ClaudeModelSchema,
|
|
2426
2485
|
CommandExecutionSchema,
|
|
2427
2486
|
CommandExecutionTestSchema,
|
|
2487
|
+
ConversationBlockSchema,
|
|
2488
|
+
ConversationMessageRoles,
|
|
2489
|
+
ConversationMessageSchema,
|
|
2428
2490
|
CostAssertionSchema,
|
|
2429
2491
|
CostConfigSchema,
|
|
2430
2492
|
CreateAgentInputSchema,
|
|
@@ -2497,6 +2559,7 @@ function getSystemAssertion(id) {
|
|
|
2497
2559
|
SYSTEM_ASSERTIONS,
|
|
2498
2560
|
SYSTEM_ASSERTION_IDS,
|
|
2499
2561
|
ScenarioAssertionLinkSchema,
|
|
2562
|
+
ScenarioConversationSchema,
|
|
2500
2563
|
SiteConfigTestSchema,
|
|
2501
2564
|
SkillFileSchema,
|
|
2502
2565
|
SkillMetadataSchema,
|
|
@@ -2521,12 +2584,16 @@ function getSystemAssertion(id) {
|
|
|
2521
2584
|
TestSuiteSchema,
|
|
2522
2585
|
TestType,
|
|
2523
2586
|
TestTypeSchema,
|
|
2587
|
+
TextBlockSchema,
|
|
2588
|
+
ThinkingBlockSchema,
|
|
2524
2589
|
TimeAssertionSchema,
|
|
2525
2590
|
TimeConfigSchema,
|
|
2526
2591
|
TokenUsageSchema,
|
|
2527
2592
|
ToolCalledWithParamAssertionSchema,
|
|
2528
2593
|
ToolCalledWithParamConfigSchema,
|
|
2594
|
+
ToolResultBlockSchema,
|
|
2529
2595
|
ToolTestSchema,
|
|
2596
|
+
ToolUseBlockSchema,
|
|
2530
2597
|
TriggerMetadataSchema,
|
|
2531
2598
|
TriggerSchema,
|
|
2532
2599
|
TriggerType,
|