@wix/evalforge-types 0.45.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -929,17 +929,17 @@ var WebhookIdentityType;
929
929
  })(WebhookIdentityType || (WebhookIdentityType = {}));
930
930
 
931
931
  // src/common/models.ts
932
- var AVAILABLE_MODEL_IDS = Object.values(
932
+ var AVAILABLE_CLAUDE_MODEL_IDS = Object.values(
933
933
  ClaudeModel
934
934
  ).filter(
935
935
  (v) => typeof v === "string" && v !== ClaudeModel.UNKNOWN_CLAUDE_MODEL
936
936
  );
937
937
  var PREFERRED_JUDGE_MODEL = "CLAUDE_4_5_HAIKU_1_0";
938
- var DEFAULT_JUDGE_MODEL = AVAILABLE_MODEL_IDS.includes(
938
+ var DEFAULT_JUDGE_MODEL = AVAILABLE_CLAUDE_MODEL_IDS.includes(
939
939
  PREFERRED_JUDGE_MODEL
940
- ) ? PREFERRED_JUDGE_MODEL : AVAILABLE_MODEL_IDS[0];
940
+ ) ? PREFERRED_JUDGE_MODEL : AVAILABLE_CLAUDE_MODEL_IDS[0];
941
941
  var ClaudeModelSchema = z4.enum(
942
- AVAILABLE_MODEL_IDS
942
+ AVAILABLE_CLAUDE_MODEL_IDS
943
943
  );
944
944
  var AVAILABLE_OPENAI_MODEL_IDS = Object.values(
945
945
  Model
@@ -950,7 +950,7 @@ var OpenAIModelSchema = z4.enum(
950
950
  AVAILABLE_OPENAI_MODEL_IDS
951
951
  );
952
952
  var ALL_AVAILABLE_MODEL_IDS = [
953
- ...AVAILABLE_MODEL_IDS,
953
+ ...AVAILABLE_CLAUDE_MODEL_IDS,
954
954
  ...AVAILABLE_OPENAI_MODEL_IDS
955
955
  ];
956
956
  var AnyModelSchema = z4.enum(
@@ -1697,7 +1697,7 @@ var LLMTraceSchema = z26.object({
1697
1697
  });
1698
1698
 
1699
1699
  // src/evaluation/eval-result.ts
1700
- import { z as z29 } from "zod";
1700
+ import { z as z30 } from "zod";
1701
1701
 
1702
1702
  // src/evaluation/eval-run.ts
1703
1703
  import { z as z28 } from "zod";
@@ -1944,6 +1944,53 @@ var EvaluationLogSchema = z28.object({
1944
1944
  });
1945
1945
  var LLM_TIMEOUT = 12e4;
1946
1946
 
1947
+ // src/evaluation/conversation.ts
1948
+ import { z as z29 } from "zod";
1949
+ var TextBlockSchema = z29.object({
1950
+ type: z29.literal("text"),
1951
+ text: z29.string()
1952
+ });
1953
+ var ThinkingBlockSchema = z29.object({
1954
+ type: z29.literal("thinking"),
1955
+ thinking: z29.string()
1956
+ });
1957
+ var ToolUseBlockSchema = z29.object({
1958
+ type: z29.literal("tool_use"),
1959
+ toolName: z29.string(),
1960
+ toolId: z29.string(),
1961
+ input: z29.unknown()
1962
+ });
1963
+ var ToolResultBlockSchema = z29.object({
1964
+ type: z29.literal("tool_result"),
1965
+ toolUseId: z29.string(),
1966
+ content: z29.string(),
1967
+ isError: z29.boolean().optional()
1968
+ });
1969
+ var ConversationBlockSchema = z29.discriminatedUnion("type", [
1970
+ TextBlockSchema,
1971
+ ThinkingBlockSchema,
1972
+ ToolUseBlockSchema,
1973
+ ToolResultBlockSchema
1974
+ ]);
1975
+ var ConversationMessageRoles = [
1976
+ "assistant",
1977
+ "user",
1978
+ "system"
1979
+ ];
1980
+ var ConversationMessageSchema = z29.object({
1981
+ role: z29.enum(ConversationMessageRoles),
1982
+ content: z29.array(ConversationBlockSchema),
1983
+ timestamp: z29.string()
1984
+ });
1985
+ var ScenarioConversationSchema = z29.object({
1986
+ id: z29.string(),
1987
+ projectId: z29.string(),
1988
+ evalRunId: z29.string(),
1989
+ resultId: z29.string(),
1990
+ messages: z29.array(ConversationMessageSchema),
1991
+ createdAt: z29.string()
1992
+ });
1993
+
1947
1994
  // src/evaluation/eval-result.ts
1948
1995
  var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1949
1996
  AssertionResultStatus2["PASSED"] = "passed";
@@ -1952,97 +1999,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1952
1999
  AssertionResultStatus2["ERROR"] = "error";
1953
2000
  return AssertionResultStatus2;
1954
2001
  })(AssertionResultStatus || {});
1955
- var AssertionResultSchema = z29.object({
1956
- id: z29.string(),
1957
- assertionId: z29.string(),
1958
- assertionType: z29.string(),
1959
- assertionName: z29.string(),
1960
- status: z29.enum(AssertionResultStatus),
1961
- message: z29.string().optional(),
1962
- expected: z29.string().optional(),
1963
- actual: z29.string().optional(),
1964
- duration: z29.number().optional(),
1965
- details: z29.record(z29.string(), z29.unknown()).optional(),
1966
- llmTraceSteps: z29.array(LLMTraceStepSchema).optional()
1967
- });
1968
- var EvalRunResultSchema = z29.object({
1969
- id: z29.string(),
1970
- targetId: z29.string(),
1971
- targetName: z29.string().optional(),
2002
+ var AssertionResultSchema = z30.object({
2003
+ id: z30.string(),
2004
+ assertionId: z30.string(),
2005
+ assertionType: z30.string(),
2006
+ assertionName: z30.string(),
2007
+ status: z30.enum(AssertionResultStatus),
2008
+ message: z30.string().optional(),
2009
+ expected: z30.string().optional(),
2010
+ actual: z30.string().optional(),
2011
+ duration: z30.number().optional(),
2012
+ details: z30.record(z30.string(), z30.unknown()).optional(),
2013
+ llmTraceSteps: z30.array(LLMTraceStepSchema).optional()
2014
+ });
2015
+ var EvalRunResultSchema = z30.object({
2016
+ id: z30.string(),
2017
+ targetId: z30.string(),
2018
+ targetName: z30.string().optional(),
1972
2019
  /** SkillVersion ID used for this evaluation (for version tracking) */
1973
- skillVersionId: z29.string().optional(),
2020
+ skillVersionId: z30.string().optional(),
1974
2021
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
1975
- skillVersion: z29.string().optional(),
1976
- scenarioId: z29.string(),
1977
- scenarioName: z29.string(),
2022
+ skillVersion: z30.string().optional(),
2023
+ scenarioId: z30.string(),
2024
+ scenarioName: z30.string(),
1978
2025
  modelConfig: ModelConfigSchema.optional(),
1979
- assertionResults: z29.array(AssertionResultSchema),
2026
+ assertionResults: z30.array(AssertionResultSchema),
1980
2027
  metrics: EvalMetricsSchema.optional(),
1981
- passed: z29.number(),
1982
- failed: z29.number(),
1983
- passRate: z29.number(),
1984
- duration: z29.number(),
1985
- outputText: z29.string().optional(),
1986
- files: z29.array(ExpectedFileSchema).optional(),
1987
- fileDiffs: z29.array(DiffContentSchema).optional(),
2028
+ passed: z30.number(),
2029
+ failed: z30.number(),
2030
+ passRate: z30.number(),
2031
+ duration: z30.number(),
2032
+ outputText: z30.string().optional(),
2033
+ files: z30.array(ExpectedFileSchema).optional(),
2034
+ fileDiffs: z30.array(DiffContentSchema).optional(),
1988
2035
  /** Full template files after execution with status indicators */
1989
- templateFiles: z29.array(TemplateFileSchema).optional(),
1990
- startedAt: z29.string().optional(),
1991
- completedAt: z29.string().optional(),
1992
- llmTrace: LLMTraceSchema.optional()
1993
- });
1994
- var PromptResultSchema = z29.object({
1995
- text: z29.string(),
1996
- files: z29.array(z29.unknown()).optional(),
1997
- finishReason: z29.string().optional(),
1998
- reasoning: z29.string().optional(),
1999
- reasoningDetails: z29.unknown().optional(),
2000
- toolCalls: z29.array(z29.unknown()).optional(),
2001
- toolResults: z29.array(z29.unknown()).optional(),
2002
- warnings: z29.array(z29.unknown()).optional(),
2003
- sources: z29.array(z29.unknown()).optional(),
2004
- steps: z29.array(z29.unknown()),
2005
- generationTimeMs: z29.number(),
2006
- prompt: z29.string(),
2007
- systemPrompt: z29.string(),
2008
- usage: z29.object({
2009
- totalTokens: z29.number().optional(),
2010
- totalMicrocentsSpent: z29.number().optional()
2036
+ templateFiles: z30.array(TemplateFileSchema).optional(),
2037
+ startedAt: z30.string().optional(),
2038
+ completedAt: z30.string().optional(),
2039
+ llmTrace: LLMTraceSchema.optional(),
2040
+ /** Full conversation messages (only present in transit; stripped before DB storage) */
2041
+ conversation: z30.array(ConversationMessageSchema).optional()
2042
+ });
2043
+ var PromptResultSchema = z30.object({
2044
+ text: z30.string(),
2045
+ files: z30.array(z30.unknown()).optional(),
2046
+ finishReason: z30.string().optional(),
2047
+ reasoning: z30.string().optional(),
2048
+ reasoningDetails: z30.unknown().optional(),
2049
+ toolCalls: z30.array(z30.unknown()).optional(),
2050
+ toolResults: z30.array(z30.unknown()).optional(),
2051
+ warnings: z30.array(z30.unknown()).optional(),
2052
+ sources: z30.array(z30.unknown()).optional(),
2053
+ steps: z30.array(z30.unknown()),
2054
+ generationTimeMs: z30.number(),
2055
+ prompt: z30.string(),
2056
+ systemPrompt: z30.string(),
2057
+ usage: z30.object({
2058
+ totalTokens: z30.number().optional(),
2059
+ totalMicrocentsSpent: z30.number().optional()
2011
2060
  })
2012
2061
  });
2013
- var EvaluationResultSchema = z29.object({
2014
- id: z29.string(),
2015
- runId: z29.string(),
2016
- timestamp: z29.number(),
2062
+ var EvaluationResultSchema = z30.object({
2063
+ id: z30.string(),
2064
+ runId: z30.string(),
2065
+ timestamp: z30.number(),
2017
2066
  promptResult: PromptResultSchema,
2018
- testResults: z29.array(z29.unknown()),
2019
- tags: z29.array(z29.string()).optional(),
2020
- feedback: z29.string().optional(),
2021
- score: z29.number(),
2022
- suiteId: z29.string().optional()
2023
- });
2024
- var LeanEvaluationResultSchema = z29.object({
2025
- id: z29.string(),
2026
- runId: z29.string(),
2027
- timestamp: z29.number(),
2028
- tags: z29.array(z29.string()).optional(),
2029
- scenarioId: z29.string(),
2030
- scenarioVersion: z29.number().optional(),
2031
- targetId: z29.string(),
2032
- targetVersion: z29.number().optional(),
2033
- suiteId: z29.string().optional(),
2034
- score: z29.number(),
2035
- time: z29.number().optional(),
2036
- microcentsSpent: z29.number().optional()
2067
+ testResults: z30.array(z30.unknown()),
2068
+ tags: z30.array(z30.string()).optional(),
2069
+ feedback: z30.string().optional(),
2070
+ score: z30.number(),
2071
+ suiteId: z30.string().optional()
2072
+ });
2073
+ var LeanEvaluationResultSchema = z30.object({
2074
+ id: z30.string(),
2075
+ runId: z30.string(),
2076
+ timestamp: z30.number(),
2077
+ tags: z30.array(z30.string()).optional(),
2078
+ scenarioId: z30.string(),
2079
+ scenarioVersion: z30.number().optional(),
2080
+ targetId: z30.string(),
2081
+ targetVersion: z30.number().optional(),
2082
+ suiteId: z30.string().optional(),
2083
+ score: z30.number(),
2084
+ time: z30.number().optional(),
2085
+ microcentsSpent: z30.number().optional()
2037
2086
  });
2038
2087
 
2039
2088
  // src/project/project.ts
2040
- import { z as z30 } from "zod";
2089
+ import { z as z31 } from "zod";
2041
2090
  var ProjectSchema = BaseEntitySchema.extend({
2042
- appId: z30.string().optional().describe("The ID of the app in Dev Center"),
2043
- appSecret: z30.string().optional().describe("The secret of the app in Dev Center"),
2044
- useWixAuth: z30.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2045
- useBase44Auth: z30.boolean().optional().describe("Enable Base44 auth for evaluations")
2091
+ appId: z31.string().optional().describe("The ID of the app in Dev Center"),
2092
+ appSecret: z31.string().optional().describe("The secret of the app in Dev Center"),
2093
+ useWixAuth: z31.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
2094
+ useBase44Auth: z31.boolean().optional().describe("Enable Base44 auth for evaluations")
2046
2095
  });
2047
2096
  var CreateProjectInputSchema = ProjectSchema.omit({
2048
2097
  id: true,
@@ -2216,7 +2265,7 @@ function getSystemAssertion(id) {
2216
2265
  export {
2217
2266
  AGENT_TYPE_LABELS,
2218
2267
  ALL_AVAILABLE_MODEL_IDS,
2219
- AVAILABLE_MODEL_IDS,
2268
+ AVAILABLE_CLAUDE_MODEL_IDS,
2220
2269
  AVAILABLE_OPENAI_MODEL_IDS,
2221
2270
  AVAILABLE_RUN_COMMANDS,
2222
2271
  AVAILABLE_TOOL_NAMES,
@@ -2244,6 +2293,9 @@ export {
2244
2293
  ClaudeModelSchema,
2245
2294
  CommandExecutionSchema,
2246
2295
  CommandExecutionTestSchema,
2296
+ ConversationBlockSchema,
2297
+ ConversationMessageRoles,
2298
+ ConversationMessageSchema,
2247
2299
  CostAssertionSchema,
2248
2300
  CostConfigSchema,
2249
2301
  CreateAgentInputSchema,
@@ -2316,6 +2368,7 @@ export {
2316
2368
  SYSTEM_ASSERTIONS,
2317
2369
  SYSTEM_ASSERTION_IDS,
2318
2370
  ScenarioAssertionLinkSchema,
2371
+ ScenarioConversationSchema,
2319
2372
  SiteConfigTestSchema,
2320
2373
  SkillFileSchema,
2321
2374
  SkillMetadataSchema,
@@ -2340,12 +2393,16 @@ export {
2340
2393
  TestSuiteSchema,
2341
2394
  TestType,
2342
2395
  TestTypeSchema,
2396
+ TextBlockSchema,
2397
+ ThinkingBlockSchema,
2343
2398
  TimeAssertionSchema,
2344
2399
  TimeConfigSchema,
2345
2400
  TokenUsageSchema,
2346
2401
  ToolCalledWithParamAssertionSchema,
2347
2402
  ToolCalledWithParamConfigSchema,
2403
+ ToolResultBlockSchema,
2348
2404
  ToolTestSchema,
2405
+ ToolUseBlockSchema,
2349
2406
  TriggerMetadataSchema,
2350
2407
  TriggerSchema,
2351
2408
  TriggerType,