@wix/evalforge-types 0.45.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -50,6 +50,9 @@ __export(index_exports, {
50
50
  ClaudeModelSchema: () => ClaudeModelSchema,
51
51
  CommandExecutionSchema: () => CommandExecutionSchema,
52
52
  CommandExecutionTestSchema: () => CommandExecutionTestSchema,
53
+ ConversationBlockSchema: () => ConversationBlockSchema,
54
+ ConversationMessageRoles: () => ConversationMessageRoles,
55
+ ConversationMessageSchema: () => ConversationMessageSchema,
53
56
  CostAssertionSchema: () => CostAssertionSchema,
54
57
  CostConfigSchema: () => CostConfigSchema,
55
58
  CreateAgentInputSchema: () => CreateAgentInputSchema,
@@ -122,6 +125,7 @@ __export(index_exports, {
122
125
  SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
123
126
  SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
124
127
  ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
128
+ ScenarioConversationSchema: () => ScenarioConversationSchema,
125
129
  SiteConfigTestSchema: () => SiteConfigTestSchema,
126
130
  SkillFileSchema: () => SkillFileSchema,
127
131
  SkillMetadataSchema: () => SkillMetadataSchema,
@@ -146,12 +150,16 @@ __export(index_exports, {
146
150
  TestSuiteSchema: () => TestSuiteSchema,
147
151
  TestType: () => TestType,
148
152
  TestTypeSchema: () => TestTypeSchema,
153
+ TextBlockSchema: () => TextBlockSchema,
154
+ ThinkingBlockSchema: () => ThinkingBlockSchema,
149
155
  TimeAssertionSchema: () => TimeAssertionSchema,
150
156
  TimeConfigSchema: () => TimeConfigSchema,
151
157
  TokenUsageSchema: () => TokenUsageSchema,
152
158
  ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
153
159
  ToolCalledWithParamConfigSchema: () => ToolCalledWithParamConfigSchema,
160
+ ToolResultBlockSchema: () => ToolResultBlockSchema,
154
161
  ToolTestSchema: () => ToolTestSchema,
162
+ ToolUseBlockSchema: () => ToolUseBlockSchema,
155
163
  TriggerMetadataSchema: () => TriggerMetadataSchema,
156
164
  TriggerSchema: () => TriggerSchema,
157
165
  TriggerType: () => TriggerType,
@@ -1879,7 +1887,7 @@ var LLMTraceSchema = import_zod26.z.object({
1879
1887
  });
1880
1888
 
1881
1889
  // src/evaluation/eval-result.ts
1882
- var import_zod29 = require("zod");
1890
+ var import_zod30 = require("zod");
1883
1891
 
1884
1892
  // src/evaluation/eval-run.ts
1885
1893
  var import_zod28 = require("zod");
@@ -2126,6 +2134,53 @@ var EvaluationLogSchema = import_zod28.z.object({
2126
2134
  });
2127
2135
  var LLM_TIMEOUT = 12e4;
2128
2136
 
2137
+ // src/evaluation/conversation.ts
2138
+ var import_zod29 = require("zod");
2139
+ var TextBlockSchema = import_zod29.z.object({
2140
+ type: import_zod29.z.literal("text"),
2141
+ text: import_zod29.z.string()
2142
+ });
2143
+ var ThinkingBlockSchema = import_zod29.z.object({
2144
+ type: import_zod29.z.literal("thinking"),
2145
+ thinking: import_zod29.z.string()
2146
+ });
2147
+ var ToolUseBlockSchema = import_zod29.z.object({
2148
+ type: import_zod29.z.literal("tool_use"),
2149
+ toolName: import_zod29.z.string(),
2150
+ toolId: import_zod29.z.string(),
2151
+ input: import_zod29.z.unknown()
2152
+ });
2153
+ var ToolResultBlockSchema = import_zod29.z.object({
2154
+ type: import_zod29.z.literal("tool_result"),
2155
+ toolUseId: import_zod29.z.string(),
2156
+ content: import_zod29.z.string(),
2157
+ isError: import_zod29.z.boolean().optional()
2158
+ });
2159
+ var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
2160
+ TextBlockSchema,
2161
+ ThinkingBlockSchema,
2162
+ ToolUseBlockSchema,
2163
+ ToolResultBlockSchema
2164
+ ]);
2165
+ var ConversationMessageRoles = [
2166
+ "assistant",
2167
+ "user",
2168
+ "system"
2169
+ ];
2170
+ var ConversationMessageSchema = import_zod29.z.object({
2171
+ role: import_zod29.z.enum(ConversationMessageRoles),
2172
+ content: import_zod29.z.array(ConversationBlockSchema),
2173
+ timestamp: import_zod29.z.string()
2174
+ });
2175
+ var ScenarioConversationSchema = import_zod29.z.object({
2176
+ id: import_zod29.z.string(),
2177
+ projectId: import_zod29.z.string(),
2178
+ evalRunId: import_zod29.z.string(),
2179
+ resultId: import_zod29.z.string(),
2180
+ messages: import_zod29.z.array(ConversationMessageSchema),
2181
+ createdAt: import_zod29.z.string()
2182
+ });
2183
+
2129
2184
  // src/evaluation/eval-result.ts
2130
2185
  var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2131
2186
  AssertionResultStatus2["PASSED"] = "passed";
@@ -2134,97 +2189,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2134
2189
  AssertionResultStatus2["ERROR"] = "error";
2135
2190
  return AssertionResultStatus2;
2136
2191
  })(AssertionResultStatus || {});
2137
- var AssertionResultSchema = import_zod29.z.object({
2138
- id: import_zod29.z.string(),
2139
- assertionId: import_zod29.z.string(),
2140
- assertionType: import_zod29.z.string(),
2141
- assertionName: import_zod29.z.string(),
2142
- status: import_zod29.z.enum(AssertionResultStatus),
2143
- message: import_zod29.z.string().optional(),
2144
- expected: import_zod29.z.string().optional(),
2145
- actual: import_zod29.z.string().optional(),
2146
- duration: import_zod29.z.number().optional(),
2147
- details: import_zod29.z.record(import_zod29.z.string(), import_zod29.z.unknown()).optional(),
2148
- llmTraceSteps: import_zod29.z.array(LLMTraceStepSchema).optional()
2149
- });
2150
- var EvalRunResultSchema = import_zod29.z.object({
2151
- id: import_zod29.z.string(),
2152
- targetId: import_zod29.z.string(),
2153
- targetName: import_zod29.z.string().optional(),
2192
+ var AssertionResultSchema = import_zod30.z.object({
2193
+ id: import_zod30.z.string(),
2194
+ assertionId: import_zod30.z.string(),
2195
+ assertionType: import_zod30.z.string(),
2196
+ assertionName: import_zod30.z.string(),
2197
+ status: import_zod30.z.enum(AssertionResultStatus),
2198
+ message: import_zod30.z.string().optional(),
2199
+ expected: import_zod30.z.string().optional(),
2200
+ actual: import_zod30.z.string().optional(),
2201
+ duration: import_zod30.z.number().optional(),
2202
+ details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
2203
+ llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
2204
+ });
2205
+ var EvalRunResultSchema = import_zod30.z.object({
2206
+ id: import_zod30.z.string(),
2207
+ targetId: import_zod30.z.string(),
2208
+ targetName: import_zod30.z.string().optional(),
2154
2209
  /** SkillVersion ID used for this evaluation (for version tracking) */
2155
- skillVersionId: import_zod29.z.string().optional(),
2210
+ skillVersionId: import_zod30.z.string().optional(),
2156
2211
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
2157
- skillVersion: import_zod29.z.string().optional(),
2158
- scenarioId: import_zod29.z.string(),
2159
- scenarioName: import_zod29.z.string(),
2212
+ skillVersion: import_zod30.z.string().optional(),
2213
+ scenarioId: import_zod30.z.string(),
2214
+ scenarioName: import_zod30.z.string(),
2160
2215
  modelConfig: ModelConfigSchema.optional(),
2161
- assertionResults: import_zod29.z.array(AssertionResultSchema),
2216
+ assertionResults: import_zod30.z.array(AssertionResultSchema),
2162
2217
  metrics: EvalMetricsSchema.optional(),
2163
- passed: import_zod29.z.number(),
2164
- failed: import_zod29.z.number(),
2165
- passRate: import_zod29.z.number(),
2166
- duration: import_zod29.z.number(),
2167
- outputText: import_zod29.z.string().optional(),
2168
- files: import_zod29.z.array(ExpectedFileSchema).optional(),
2169
- fileDiffs: import_zod29.z.array(DiffContentSchema).optional(),
2218
+ passed: import_zod30.z.number(),
2219
+ failed: import_zod30.z.number(),
2220
+ passRate: import_zod30.z.number(),
2221
+ duration: import_zod30.z.number(),
2222
+ outputText: import_zod30.z.string().optional(),
2223
+ files: import_zod30.z.array(ExpectedFileSchema).optional(),
2224
+ fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
2170
2225
  /** Full template files after execution with status indicators */
2171
- templateFiles: import_zod29.z.array(TemplateFileSchema).optional(),
2172
- startedAt: import_zod29.z.string().optional(),
2173
- completedAt: import_zod29.z.string().optional(),
2174
- llmTrace: LLMTraceSchema.optional()
2175
- });
2176
- var PromptResultSchema = import_zod29.z.object({
2177
- text: import_zod29.z.string(),
2178
- files: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2179
- finishReason: import_zod29.z.string().optional(),
2180
- reasoning: import_zod29.z.string().optional(),
2181
- reasoningDetails: import_zod29.z.unknown().optional(),
2182
- toolCalls: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2183
- toolResults: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2184
- warnings: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2185
- sources: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2186
- steps: import_zod29.z.array(import_zod29.z.unknown()),
2187
- generationTimeMs: import_zod29.z.number(),
2188
- prompt: import_zod29.z.string(),
2189
- systemPrompt: import_zod29.z.string(),
2190
- usage: import_zod29.z.object({
2191
- totalTokens: import_zod29.z.number().optional(),
2192
- totalMicrocentsSpent: import_zod29.z.number().optional()
2226
+ templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
2227
+ startedAt: import_zod30.z.string().optional(),
2228
+ completedAt: import_zod30.z.string().optional(),
2229
+ llmTrace: LLMTraceSchema.optional(),
2230
+ /** Full conversation messages (only present in transit; stripped before DB storage) */
2231
+ conversation: import_zod30.z.array(ConversationMessageSchema).optional()
2232
+ });
2233
+ var PromptResultSchema = import_zod30.z.object({
2234
+ text: import_zod30.z.string(),
2235
+ files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2236
+ finishReason: import_zod30.z.string().optional(),
2237
+ reasoning: import_zod30.z.string().optional(),
2238
+ reasoningDetails: import_zod30.z.unknown().optional(),
2239
+ toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2240
+ toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2241
+ warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2242
+ sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2243
+ steps: import_zod30.z.array(import_zod30.z.unknown()),
2244
+ generationTimeMs: import_zod30.z.number(),
2245
+ prompt: import_zod30.z.string(),
2246
+ systemPrompt: import_zod30.z.string(),
2247
+ usage: import_zod30.z.object({
2248
+ totalTokens: import_zod30.z.number().optional(),
2249
+ totalMicrocentsSpent: import_zod30.z.number().optional()
2193
2250
  })
2194
2251
  });
2195
- var EvaluationResultSchema = import_zod29.z.object({
2196
- id: import_zod29.z.string(),
2197
- runId: import_zod29.z.string(),
2198
- timestamp: import_zod29.z.number(),
2252
+ var EvaluationResultSchema = import_zod30.z.object({
2253
+ id: import_zod30.z.string(),
2254
+ runId: import_zod30.z.string(),
2255
+ timestamp: import_zod30.z.number(),
2199
2256
  promptResult: PromptResultSchema,
2200
- testResults: import_zod29.z.array(import_zod29.z.unknown()),
2201
- tags: import_zod29.z.array(import_zod29.z.string()).optional(),
2202
- feedback: import_zod29.z.string().optional(),
2203
- score: import_zod29.z.number(),
2204
- suiteId: import_zod29.z.string().optional()
2205
- });
2206
- var LeanEvaluationResultSchema = import_zod29.z.object({
2207
- id: import_zod29.z.string(),
2208
- runId: import_zod29.z.string(),
2209
- timestamp: import_zod29.z.number(),
2210
- tags: import_zod29.z.array(import_zod29.z.string()).optional(),
2211
- scenarioId: import_zod29.z.string(),
2212
- scenarioVersion: import_zod29.z.number().optional(),
2213
- targetId: import_zod29.z.string(),
2214
- targetVersion: import_zod29.z.number().optional(),
2215
- suiteId: import_zod29.z.string().optional(),
2216
- score: import_zod29.z.number(),
2217
- time: import_zod29.z.number().optional(),
2218
- microcentsSpent: import_zod29.z.number().optional()
2257
+ testResults: import_zod30.z.array(import_zod30.z.unknown()),
2258
+ tags: import_zod30.z.array(import_zod30.z.string()).optional(),
2259
+ feedback: import_zod30.z.string().optional(),
2260
+ score: import_zod30.z.number(),
2261
+ suiteId: import_zod30.z.string().optional()
2262
+ });
2263
+ var LeanEvaluationResultSchema = import_zod30.z.object({
2264
+ id: import_zod30.z.string(),
2265
+ runId: import_zod30.z.string(),
2266
+ timestamp: import_zod30.z.number(),
2267
+ tags: import_zod30.z.array(import_zod30.z.string()).optional(),
2268
+ scenarioId: import_zod30.z.string(),
2269
+ scenarioVersion: import_zod30.z.number().optional(),
2270
+ targetId: import_zod30.z.string(),
2271
+ targetVersion: import_zod30.z.number().optional(),
2272
+ suiteId: import_zod30.z.string().optional(),
2273
+ score: import_zod30.z.number(),
2274
+ time: import_zod30.z.number().optional(),
2275
+ microcentsSpent: import_zod30.z.number().optional()
2219
2276
  });
2220
2277
 
2221
2278
  // src/project/project.ts
2222
- var import_zod30 = require("zod");
2279
+ var import_zod31 = require("zod");
2223
2280
  var ProjectSchema = BaseEntitySchema.extend({
2224
- appId: import_zod30.z.string().optional().describe("The ID of the app in Dev Center"),
2225
- appSecret: import_zod30.z.string().optional().describe("The secret of the app in Dev Center"),
2226
- useWixAuth: import_zod30.z.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2227
- useBase44Auth: import_zod30.z.boolean().optional().describe("Enable Base44 auth for evaluations")
2281
+ appId: import_zod31.z.string().optional().describe("The ID of the app in Dev Center"),
2282
+ appSecret: import_zod31.z.string().optional().describe("The secret of the app in Dev Center"),
2283
+ useWixAuth: import_zod31.z.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2284
+ useBase44Auth: import_zod31.z.boolean().optional().describe("Enable Base44 auth for evaluations")
2228
2285
  });
2229
2286
  var CreateProjectInputSchema = ProjectSchema.omit({
2230
2287
  id: true,
@@ -2427,6 +2484,9 @@ function getSystemAssertion(id) {
2427
2484
  ClaudeModelSchema,
2428
2485
  CommandExecutionSchema,
2429
2486
  CommandExecutionTestSchema,
2487
+ ConversationBlockSchema,
2488
+ ConversationMessageRoles,
2489
+ ConversationMessageSchema,
2430
2490
  CostAssertionSchema,
2431
2491
  CostConfigSchema,
2432
2492
  CreateAgentInputSchema,
@@ -2499,6 +2559,7 @@ function getSystemAssertion(id) {
2499
2559
  SYSTEM_ASSERTIONS,
2500
2560
  SYSTEM_ASSERTION_IDS,
2501
2561
  ScenarioAssertionLinkSchema,
2562
+ ScenarioConversationSchema,
2502
2563
  SiteConfigTestSchema,
2503
2564
  SkillFileSchema,
2504
2565
  SkillMetadataSchema,
@@ -2523,12 +2584,16 @@ function getSystemAssertion(id) {
2523
2584
  TestSuiteSchema,
2524
2585
  TestType,
2525
2586
  TestTypeSchema,
2587
+ TextBlockSchema,
2588
+ ThinkingBlockSchema,
2526
2589
  TimeAssertionSchema,
2527
2590
  TimeConfigSchema,
2528
2591
  TokenUsageSchema,
2529
2592
  ToolCalledWithParamAssertionSchema,
2530
2593
  ToolCalledWithParamConfigSchema,
2594
+ ToolResultBlockSchema,
2531
2595
  ToolTestSchema,
2596
+ ToolUseBlockSchema,
2532
2597
  TriggerMetadataSchema,
2533
2598
  TriggerSchema,
2534
2599
  TriggerType,