@wix/evalforge-types 0.44.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -50,6 +50,9 @@ __export(index_exports, {
50
50
  ClaudeModelSchema: () => ClaudeModelSchema,
51
51
  CommandExecutionSchema: () => CommandExecutionSchema,
52
52
  CommandExecutionTestSchema: () => CommandExecutionTestSchema,
53
+ ConversationBlockSchema: () => ConversationBlockSchema,
54
+ ConversationMessageRoles: () => ConversationMessageRoles,
55
+ ConversationMessageSchema: () => ConversationMessageSchema,
53
56
  CostAssertionSchema: () => CostAssertionSchema,
54
57
  CostConfigSchema: () => CostConfigSchema,
55
58
  CreateAgentInputSchema: () => CreateAgentInputSchema,
@@ -122,6 +125,7 @@ __export(index_exports, {
122
125
  SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
123
126
  SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
124
127
  ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
128
+ ScenarioConversationSchema: () => ScenarioConversationSchema,
125
129
  SiteConfigTestSchema: () => SiteConfigTestSchema,
126
130
  SkillFileSchema: () => SkillFileSchema,
127
131
  SkillMetadataSchema: () => SkillMetadataSchema,
@@ -146,12 +150,16 @@ __export(index_exports, {
146
150
  TestSuiteSchema: () => TestSuiteSchema,
147
151
  TestType: () => TestType,
148
152
  TestTypeSchema: () => TestTypeSchema,
153
+ TextBlockSchema: () => TextBlockSchema,
154
+ ThinkingBlockSchema: () => ThinkingBlockSchema,
149
155
  TimeAssertionSchema: () => TimeAssertionSchema,
150
156
  TimeConfigSchema: () => TimeConfigSchema,
151
157
  TokenUsageSchema: () => TokenUsageSchema,
152
158
  ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
153
159
  ToolCalledWithParamConfigSchema: () => ToolCalledWithParamConfigSchema,
160
+ ToolResultBlockSchema: () => ToolResultBlockSchema,
154
161
  ToolTestSchema: () => ToolTestSchema,
162
+ ToolUseBlockSchema: () => ToolUseBlockSchema,
155
163
  TriggerMetadataSchema: () => TriggerMetadataSchema,
156
164
  TriggerSchema: () => TriggerSchema,
157
165
  TriggerType: () => TriggerType,
@@ -1879,7 +1887,7 @@ var LLMTraceSchema = import_zod26.z.object({
1879
1887
  });
1880
1888
 
1881
1889
  // src/evaluation/eval-result.ts
1882
- var import_zod29 = require("zod");
1890
+ var import_zod30 = require("zod");
1883
1891
 
1884
1892
  // src/evaluation/eval-run.ts
1885
1893
  var import_zod28 = require("zod");
@@ -2126,6 +2134,53 @@ var EvaluationLogSchema = import_zod28.z.object({
2126
2134
  });
2127
2135
  var LLM_TIMEOUT = 12e4;
2128
2136
 
2137
+ // src/evaluation/conversation.ts
2138
+ var import_zod29 = require("zod");
2139
+ var TextBlockSchema = import_zod29.z.object({
2140
+ type: import_zod29.z.literal("text"),
2141
+ text: import_zod29.z.string()
2142
+ });
2143
+ var ThinkingBlockSchema = import_zod29.z.object({
2144
+ type: import_zod29.z.literal("thinking"),
2145
+ thinking: import_zod29.z.string()
2146
+ });
2147
+ var ToolUseBlockSchema = import_zod29.z.object({
2148
+ type: import_zod29.z.literal("tool_use"),
2149
+ toolName: import_zod29.z.string(),
2150
+ toolId: import_zod29.z.string(),
2151
+ input: import_zod29.z.unknown()
2152
+ });
2153
+ var ToolResultBlockSchema = import_zod29.z.object({
2154
+ type: import_zod29.z.literal("tool_result"),
2155
+ toolUseId: import_zod29.z.string(),
2156
+ content: import_zod29.z.string(),
2157
+ isError: import_zod29.z.boolean().optional()
2158
+ });
2159
+ var ConversationBlockSchema = import_zod29.z.discriminatedUnion("type", [
2160
+ TextBlockSchema,
2161
+ ThinkingBlockSchema,
2162
+ ToolUseBlockSchema,
2163
+ ToolResultBlockSchema
2164
+ ]);
2165
+ var ConversationMessageRoles = [
2166
+ "assistant",
2167
+ "user",
2168
+ "system"
2169
+ ];
2170
+ var ConversationMessageSchema = import_zod29.z.object({
2171
+ role: import_zod29.z.enum(ConversationMessageRoles),
2172
+ content: import_zod29.z.array(ConversationBlockSchema),
2173
+ timestamp: import_zod29.z.string()
2174
+ });
2175
+ var ScenarioConversationSchema = import_zod29.z.object({
2176
+ id: import_zod29.z.string(),
2177
+ projectId: import_zod29.z.string(),
2178
+ evalRunId: import_zod29.z.string(),
2179
+ resultId: import_zod29.z.string(),
2180
+ messages: import_zod29.z.array(ConversationMessageSchema),
2181
+ createdAt: import_zod29.z.string()
2182
+ });
2183
+
2129
2184
  // src/evaluation/eval-result.ts
2130
2185
  var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2131
2186
  AssertionResultStatus2["PASSED"] = "passed";
@@ -2134,95 +2189,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
2134
2189
  AssertionResultStatus2["ERROR"] = "error";
2135
2190
  return AssertionResultStatus2;
2136
2191
  })(AssertionResultStatus || {});
2137
- var AssertionResultSchema = import_zod29.z.object({
2138
- id: import_zod29.z.string(),
2139
- assertionId: import_zod29.z.string(),
2140
- assertionType: import_zod29.z.string(),
2141
- assertionName: import_zod29.z.string(),
2142
- status: import_zod29.z.enum(AssertionResultStatus),
2143
- message: import_zod29.z.string().optional(),
2144
- expected: import_zod29.z.string().optional(),
2145
- actual: import_zod29.z.string().optional(),
2146
- duration: import_zod29.z.number().optional(),
2147
- details: import_zod29.z.record(import_zod29.z.string(), import_zod29.z.unknown()).optional(),
2148
- llmTraceSteps: import_zod29.z.array(LLMTraceStepSchema).optional()
2149
- });
2150
- var EvalRunResultSchema = import_zod29.z.object({
2151
- id: import_zod29.z.string(),
2152
- targetId: import_zod29.z.string(),
2153
- targetName: import_zod29.z.string().optional(),
2192
+ var AssertionResultSchema = import_zod30.z.object({
2193
+ id: import_zod30.z.string(),
2194
+ assertionId: import_zod30.z.string(),
2195
+ assertionType: import_zod30.z.string(),
2196
+ assertionName: import_zod30.z.string(),
2197
+ status: import_zod30.z.enum(AssertionResultStatus),
2198
+ message: import_zod30.z.string().optional(),
2199
+ expected: import_zod30.z.string().optional(),
2200
+ actual: import_zod30.z.string().optional(),
2201
+ duration: import_zod30.z.number().optional(),
2202
+ details: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional(),
2203
+ llmTraceSteps: import_zod30.z.array(LLMTraceStepSchema).optional()
2204
+ });
2205
+ var EvalRunResultSchema = import_zod30.z.object({
2206
+ id: import_zod30.z.string(),
2207
+ targetId: import_zod30.z.string(),
2208
+ targetName: import_zod30.z.string().optional(),
2154
2209
  /** SkillVersion ID used for this evaluation (for version tracking) */
2155
- skillVersionId: import_zod29.z.string().optional(),
2210
+ skillVersionId: import_zod30.z.string().optional(),
2156
2211
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
2157
- skillVersion: import_zod29.z.string().optional(),
2158
- scenarioId: import_zod29.z.string(),
2159
- scenarioName: import_zod29.z.string(),
2212
+ skillVersion: import_zod30.z.string().optional(),
2213
+ scenarioId: import_zod30.z.string(),
2214
+ scenarioName: import_zod30.z.string(),
2160
2215
  modelConfig: ModelConfigSchema.optional(),
2161
- assertionResults: import_zod29.z.array(AssertionResultSchema),
2216
+ assertionResults: import_zod30.z.array(AssertionResultSchema),
2162
2217
  metrics: EvalMetricsSchema.optional(),
2163
- passed: import_zod29.z.number(),
2164
- failed: import_zod29.z.number(),
2165
- passRate: import_zod29.z.number(),
2166
- duration: import_zod29.z.number(),
2167
- outputText: import_zod29.z.string().optional(),
2168
- files: import_zod29.z.array(ExpectedFileSchema).optional(),
2169
- fileDiffs: import_zod29.z.array(DiffContentSchema).optional(),
2218
+ passed: import_zod30.z.number(),
2219
+ failed: import_zod30.z.number(),
2220
+ passRate: import_zod30.z.number(),
2221
+ duration: import_zod30.z.number(),
2222
+ outputText: import_zod30.z.string().optional(),
2223
+ files: import_zod30.z.array(ExpectedFileSchema).optional(),
2224
+ fileDiffs: import_zod30.z.array(DiffContentSchema).optional(),
2170
2225
  /** Full template files after execution with status indicators */
2171
- templateFiles: import_zod29.z.array(TemplateFileSchema).optional(),
2172
- startedAt: import_zod29.z.string().optional(),
2173
- completedAt: import_zod29.z.string().optional(),
2174
- llmTrace: LLMTraceSchema.optional()
2175
- });
2176
- var PromptResultSchema = import_zod29.z.object({
2177
- text: import_zod29.z.string(),
2178
- files: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2179
- finishReason: import_zod29.z.string().optional(),
2180
- reasoning: import_zod29.z.string().optional(),
2181
- reasoningDetails: import_zod29.z.unknown().optional(),
2182
- toolCalls: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2183
- toolResults: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2184
- warnings: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2185
- sources: import_zod29.z.array(import_zod29.z.unknown()).optional(),
2186
- steps: import_zod29.z.array(import_zod29.z.unknown()),
2187
- generationTimeMs: import_zod29.z.number(),
2188
- prompt: import_zod29.z.string(),
2189
- systemPrompt: import_zod29.z.string(),
2190
- usage: import_zod29.z.object({
2191
- totalTokens: import_zod29.z.number().optional(),
2192
- totalMicrocentsSpent: import_zod29.z.number().optional()
2226
+ templateFiles: import_zod30.z.array(TemplateFileSchema).optional(),
2227
+ startedAt: import_zod30.z.string().optional(),
2228
+ completedAt: import_zod30.z.string().optional(),
2229
+ llmTrace: LLMTraceSchema.optional(),
2230
+ /** Full conversation messages (only present in transit; stripped before DB storage) */
2231
+ conversation: import_zod30.z.array(ConversationMessageSchema).optional()
2232
+ });
2233
+ var PromptResultSchema = import_zod30.z.object({
2234
+ text: import_zod30.z.string(),
2235
+ files: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2236
+ finishReason: import_zod30.z.string().optional(),
2237
+ reasoning: import_zod30.z.string().optional(),
2238
+ reasoningDetails: import_zod30.z.unknown().optional(),
2239
+ toolCalls: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2240
+ toolResults: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2241
+ warnings: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2242
+ sources: import_zod30.z.array(import_zod30.z.unknown()).optional(),
2243
+ steps: import_zod30.z.array(import_zod30.z.unknown()),
2244
+ generationTimeMs: import_zod30.z.number(),
2245
+ prompt: import_zod30.z.string(),
2246
+ systemPrompt: import_zod30.z.string(),
2247
+ usage: import_zod30.z.object({
2248
+ totalTokens: import_zod30.z.number().optional(),
2249
+ totalMicrocentsSpent: import_zod30.z.number().optional()
2193
2250
  })
2194
2251
  });
2195
- var EvaluationResultSchema = import_zod29.z.object({
2196
- id: import_zod29.z.string(),
2197
- runId: import_zod29.z.string(),
2198
- timestamp: import_zod29.z.number(),
2252
+ var EvaluationResultSchema = import_zod30.z.object({
2253
+ id: import_zod30.z.string(),
2254
+ runId: import_zod30.z.string(),
2255
+ timestamp: import_zod30.z.number(),
2199
2256
  promptResult: PromptResultSchema,
2200
- testResults: import_zod29.z.array(import_zod29.z.unknown()),
2201
- tags: import_zod29.z.array(import_zod29.z.string()).optional(),
2202
- feedback: import_zod29.z.string().optional(),
2203
- score: import_zod29.z.number(),
2204
- suiteId: import_zod29.z.string().optional()
2205
- });
2206
- var LeanEvaluationResultSchema = import_zod29.z.object({
2207
- id: import_zod29.z.string(),
2208
- runId: import_zod29.z.string(),
2209
- timestamp: import_zod29.z.number(),
2210
- tags: import_zod29.z.array(import_zod29.z.string()).optional(),
2211
- scenarioId: import_zod29.z.string(),
2212
- scenarioVersion: import_zod29.z.number().optional(),
2213
- targetId: import_zod29.z.string(),
2214
- targetVersion: import_zod29.z.number().optional(),
2215
- suiteId: import_zod29.z.string().optional(),
2216
- score: import_zod29.z.number(),
2217
- time: import_zod29.z.number().optional(),
2218
- microcentsSpent: import_zod29.z.number().optional()
2257
+ testResults: import_zod30.z.array(import_zod30.z.unknown()),
2258
+ tags: import_zod30.z.array(import_zod30.z.string()).optional(),
2259
+ feedback: import_zod30.z.string().optional(),
2260
+ score: import_zod30.z.number(),
2261
+ suiteId: import_zod30.z.string().optional()
2262
+ });
2263
+ var LeanEvaluationResultSchema = import_zod30.z.object({
2264
+ id: import_zod30.z.string(),
2265
+ runId: import_zod30.z.string(),
2266
+ timestamp: import_zod30.z.number(),
2267
+ tags: import_zod30.z.array(import_zod30.z.string()).optional(),
2268
+ scenarioId: import_zod30.z.string(),
2269
+ scenarioVersion: import_zod30.z.number().optional(),
2270
+ targetId: import_zod30.z.string(),
2271
+ targetVersion: import_zod30.z.number().optional(),
2272
+ suiteId: import_zod30.z.string().optional(),
2273
+ score: import_zod30.z.number(),
2274
+ time: import_zod30.z.number().optional(),
2275
+ microcentsSpent: import_zod30.z.number().optional()
2219
2276
  });
2220
2277
 
2221
2278
  // src/project/project.ts
2222
- var import_zod30 = require("zod");
2279
+ var import_zod31 = require("zod");
2223
2280
  var ProjectSchema = BaseEntitySchema.extend({
2224
- appId: import_zod30.z.string().optional().describe("The ID of the app in Dev Center"),
2225
- appSecret: import_zod30.z.string().optional().describe("The secret of the app in Dev Center")
2281
+ appId: import_zod31.z.string().optional().describe("The ID of the app in Dev Center"),
2282
+ appSecret: import_zod31.z.string().optional().describe("The secret of the app in Dev Center"),
2283
+ useWixAuth: import_zod31.z.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2284
+ useBase44Auth: import_zod31.z.boolean().optional().describe("Enable Base44 auth for evaluations")
2226
2285
  });
2227
2286
  var CreateProjectInputSchema = ProjectSchema.omit({
2228
2287
  id: true,
@@ -2425,6 +2484,9 @@ function getSystemAssertion(id) {
2425
2484
  ClaudeModelSchema,
2426
2485
  CommandExecutionSchema,
2427
2486
  CommandExecutionTestSchema,
2487
+ ConversationBlockSchema,
2488
+ ConversationMessageRoles,
2489
+ ConversationMessageSchema,
2428
2490
  CostAssertionSchema,
2429
2491
  CostConfigSchema,
2430
2492
  CreateAgentInputSchema,
@@ -2497,6 +2559,7 @@ function getSystemAssertion(id) {
2497
2559
  SYSTEM_ASSERTIONS,
2498
2560
  SYSTEM_ASSERTION_IDS,
2499
2561
  ScenarioAssertionLinkSchema,
2562
+ ScenarioConversationSchema,
2500
2563
  SiteConfigTestSchema,
2501
2564
  SkillFileSchema,
2502
2565
  SkillMetadataSchema,
@@ -2521,12 +2584,16 @@ function getSystemAssertion(id) {
2521
2584
  TestSuiteSchema,
2522
2585
  TestType,
2523
2586
  TestTypeSchema,
2587
+ TextBlockSchema,
2588
+ ThinkingBlockSchema,
2524
2589
  TimeAssertionSchema,
2525
2590
  TimeConfigSchema,
2526
2591
  TokenUsageSchema,
2527
2592
  ToolCalledWithParamAssertionSchema,
2528
2593
  ToolCalledWithParamConfigSchema,
2594
+ ToolResultBlockSchema,
2529
2595
  ToolTestSchema,
2596
+ ToolUseBlockSchema,
2530
2597
  TriggerMetadataSchema,
2531
2598
  TriggerSchema,
2532
2599
  TriggerType,