@wix/evalforge-types 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +145 -80
- package/build/index.js.map +4 -4
- package/build/index.mjs +137 -80
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/evaluation/conversation.d.ts +108 -0
- package/build/types/evaluation/eval-result.d.ts +25 -0
- package/build/types/evaluation/eval-run.d.ts +25 -0
- package/build/types/evaluation/index.d.ts +1 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -1697,7 +1697,7 @@ var LLMTraceSchema = z26.object({
|
|
|
1697
1697
|
});
|
|
1698
1698
|
|
|
1699
1699
|
// src/evaluation/eval-result.ts
|
|
1700
|
-
import { z as
|
|
1700
|
+
import { z as z30 } from "zod";
|
|
1701
1701
|
|
|
1702
1702
|
// src/evaluation/eval-run.ts
|
|
1703
1703
|
import { z as z28 } from "zod";
|
|
@@ -1944,6 +1944,53 @@ var EvaluationLogSchema = z28.object({
|
|
|
1944
1944
|
});
|
|
1945
1945
|
var LLM_TIMEOUT = 12e4;
|
|
1946
1946
|
|
|
1947
|
+
// src/evaluation/conversation.ts
|
|
1948
|
+
import { z as z29 } from "zod";
|
|
1949
|
+
var TextBlockSchema = z29.object({
|
|
1950
|
+
type: z29.literal("text"),
|
|
1951
|
+
text: z29.string()
|
|
1952
|
+
});
|
|
1953
|
+
var ThinkingBlockSchema = z29.object({
|
|
1954
|
+
type: z29.literal("thinking"),
|
|
1955
|
+
thinking: z29.string()
|
|
1956
|
+
});
|
|
1957
|
+
var ToolUseBlockSchema = z29.object({
|
|
1958
|
+
type: z29.literal("tool_use"),
|
|
1959
|
+
toolName: z29.string(),
|
|
1960
|
+
toolId: z29.string(),
|
|
1961
|
+
input: z29.unknown()
|
|
1962
|
+
});
|
|
1963
|
+
var ToolResultBlockSchema = z29.object({
|
|
1964
|
+
type: z29.literal("tool_result"),
|
|
1965
|
+
toolUseId: z29.string(),
|
|
1966
|
+
content: z29.string(),
|
|
1967
|
+
isError: z29.boolean().optional()
|
|
1968
|
+
});
|
|
1969
|
+
var ConversationBlockSchema = z29.discriminatedUnion("type", [
|
|
1970
|
+
TextBlockSchema,
|
|
1971
|
+
ThinkingBlockSchema,
|
|
1972
|
+
ToolUseBlockSchema,
|
|
1973
|
+
ToolResultBlockSchema
|
|
1974
|
+
]);
|
|
1975
|
+
var ConversationMessageRoles = [
|
|
1976
|
+
"assistant",
|
|
1977
|
+
"user",
|
|
1978
|
+
"system"
|
|
1979
|
+
];
|
|
1980
|
+
var ConversationMessageSchema = z29.object({
|
|
1981
|
+
role: z29.enum(ConversationMessageRoles),
|
|
1982
|
+
content: z29.array(ConversationBlockSchema),
|
|
1983
|
+
timestamp: z29.string()
|
|
1984
|
+
});
|
|
1985
|
+
var ScenarioConversationSchema = z29.object({
|
|
1986
|
+
id: z29.string(),
|
|
1987
|
+
projectId: z29.string(),
|
|
1988
|
+
evalRunId: z29.string(),
|
|
1989
|
+
resultId: z29.string(),
|
|
1990
|
+
messages: z29.array(ConversationMessageSchema),
|
|
1991
|
+
createdAt: z29.string()
|
|
1992
|
+
});
|
|
1993
|
+
|
|
1947
1994
|
// src/evaluation/eval-result.ts
|
|
1948
1995
|
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
1949
1996
|
AssertionResultStatus2["PASSED"] = "passed";
|
|
@@ -1952,97 +1999,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1952
1999
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1953
2000
|
return AssertionResultStatus2;
|
|
1954
2001
|
})(AssertionResultStatus || {});
|
|
1955
|
-
var AssertionResultSchema =
|
|
1956
|
-
id:
|
|
1957
|
-
assertionId:
|
|
1958
|
-
assertionType:
|
|
1959
|
-
assertionName:
|
|
1960
|
-
status:
|
|
1961
|
-
message:
|
|
1962
|
-
expected:
|
|
1963
|
-
actual:
|
|
1964
|
-
duration:
|
|
1965
|
-
details:
|
|
1966
|
-
llmTraceSteps:
|
|
1967
|
-
});
|
|
1968
|
-
var EvalRunResultSchema =
|
|
1969
|
-
id:
|
|
1970
|
-
targetId:
|
|
1971
|
-
targetName:
|
|
2002
|
+
var AssertionResultSchema = z30.object({
|
|
2003
|
+
id: z30.string(),
|
|
2004
|
+
assertionId: z30.string(),
|
|
2005
|
+
assertionType: z30.string(),
|
|
2006
|
+
assertionName: z30.string(),
|
|
2007
|
+
status: z30.enum(AssertionResultStatus),
|
|
2008
|
+
message: z30.string().optional(),
|
|
2009
|
+
expected: z30.string().optional(),
|
|
2010
|
+
actual: z30.string().optional(),
|
|
2011
|
+
duration: z30.number().optional(),
|
|
2012
|
+
details: z30.record(z30.string(), z30.unknown()).optional(),
|
|
2013
|
+
llmTraceSteps: z30.array(LLMTraceStepSchema).optional()
|
|
2014
|
+
});
|
|
2015
|
+
var EvalRunResultSchema = z30.object({
|
|
2016
|
+
id: z30.string(),
|
|
2017
|
+
targetId: z30.string(),
|
|
2018
|
+
targetName: z30.string().optional(),
|
|
1972
2019
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1973
|
-
skillVersionId:
|
|
2020
|
+
skillVersionId: z30.string().optional(),
|
|
1974
2021
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1975
|
-
skillVersion:
|
|
1976
|
-
scenarioId:
|
|
1977
|
-
scenarioName:
|
|
2022
|
+
skillVersion: z30.string().optional(),
|
|
2023
|
+
scenarioId: z30.string(),
|
|
2024
|
+
scenarioName: z30.string(),
|
|
1978
2025
|
modelConfig: ModelConfigSchema.optional(),
|
|
1979
|
-
assertionResults:
|
|
2026
|
+
assertionResults: z30.array(AssertionResultSchema),
|
|
1980
2027
|
metrics: EvalMetricsSchema.optional(),
|
|
1981
|
-
passed:
|
|
1982
|
-
failed:
|
|
1983
|
-
passRate:
|
|
1984
|
-
duration:
|
|
1985
|
-
outputText:
|
|
1986
|
-
files:
|
|
1987
|
-
fileDiffs:
|
|
2028
|
+
passed: z30.number(),
|
|
2029
|
+
failed: z30.number(),
|
|
2030
|
+
passRate: z30.number(),
|
|
2031
|
+
duration: z30.number(),
|
|
2032
|
+
outputText: z30.string().optional(),
|
|
2033
|
+
files: z30.array(ExpectedFileSchema).optional(),
|
|
2034
|
+
fileDiffs: z30.array(DiffContentSchema).optional(),
|
|
1988
2035
|
/** Full template files after execution with status indicators */
|
|
1989
|
-
templateFiles:
|
|
1990
|
-
startedAt:
|
|
1991
|
-
completedAt:
|
|
1992
|
-
llmTrace: LLMTraceSchema.optional()
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2036
|
+
templateFiles: z30.array(TemplateFileSchema).optional(),
|
|
2037
|
+
startedAt: z30.string().optional(),
|
|
2038
|
+
completedAt: z30.string().optional(),
|
|
2039
|
+
llmTrace: LLMTraceSchema.optional(),
|
|
2040
|
+
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
2041
|
+
conversation: z30.array(ConversationMessageSchema).optional()
|
|
2042
|
+
});
|
|
2043
|
+
var PromptResultSchema = z30.object({
|
|
2044
|
+
text: z30.string(),
|
|
2045
|
+
files: z30.array(z30.unknown()).optional(),
|
|
2046
|
+
finishReason: z30.string().optional(),
|
|
2047
|
+
reasoning: z30.string().optional(),
|
|
2048
|
+
reasoningDetails: z30.unknown().optional(),
|
|
2049
|
+
toolCalls: z30.array(z30.unknown()).optional(),
|
|
2050
|
+
toolResults: z30.array(z30.unknown()).optional(),
|
|
2051
|
+
warnings: z30.array(z30.unknown()).optional(),
|
|
2052
|
+
sources: z30.array(z30.unknown()).optional(),
|
|
2053
|
+
steps: z30.array(z30.unknown()),
|
|
2054
|
+
generationTimeMs: z30.number(),
|
|
2055
|
+
prompt: z30.string(),
|
|
2056
|
+
systemPrompt: z30.string(),
|
|
2057
|
+
usage: z30.object({
|
|
2058
|
+
totalTokens: z30.number().optional(),
|
|
2059
|
+
totalMicrocentsSpent: z30.number().optional()
|
|
2011
2060
|
})
|
|
2012
2061
|
});
|
|
2013
|
-
var EvaluationResultSchema =
|
|
2014
|
-
id:
|
|
2015
|
-
runId:
|
|
2016
|
-
timestamp:
|
|
2062
|
+
var EvaluationResultSchema = z30.object({
|
|
2063
|
+
id: z30.string(),
|
|
2064
|
+
runId: z30.string(),
|
|
2065
|
+
timestamp: z30.number(),
|
|
2017
2066
|
promptResult: PromptResultSchema,
|
|
2018
|
-
testResults:
|
|
2019
|
-
tags:
|
|
2020
|
-
feedback:
|
|
2021
|
-
score:
|
|
2022
|
-
suiteId:
|
|
2023
|
-
});
|
|
2024
|
-
var LeanEvaluationResultSchema =
|
|
2025
|
-
id:
|
|
2026
|
-
runId:
|
|
2027
|
-
timestamp:
|
|
2028
|
-
tags:
|
|
2029
|
-
scenarioId:
|
|
2030
|
-
scenarioVersion:
|
|
2031
|
-
targetId:
|
|
2032
|
-
targetVersion:
|
|
2033
|
-
suiteId:
|
|
2034
|
-
score:
|
|
2035
|
-
time:
|
|
2036
|
-
microcentsSpent:
|
|
2067
|
+
testResults: z30.array(z30.unknown()),
|
|
2068
|
+
tags: z30.array(z30.string()).optional(),
|
|
2069
|
+
feedback: z30.string().optional(),
|
|
2070
|
+
score: z30.number(),
|
|
2071
|
+
suiteId: z30.string().optional()
|
|
2072
|
+
});
|
|
2073
|
+
var LeanEvaluationResultSchema = z30.object({
|
|
2074
|
+
id: z30.string(),
|
|
2075
|
+
runId: z30.string(),
|
|
2076
|
+
timestamp: z30.number(),
|
|
2077
|
+
tags: z30.array(z30.string()).optional(),
|
|
2078
|
+
scenarioId: z30.string(),
|
|
2079
|
+
scenarioVersion: z30.number().optional(),
|
|
2080
|
+
targetId: z30.string(),
|
|
2081
|
+
targetVersion: z30.number().optional(),
|
|
2082
|
+
suiteId: z30.string().optional(),
|
|
2083
|
+
score: z30.number(),
|
|
2084
|
+
time: z30.number().optional(),
|
|
2085
|
+
microcentsSpent: z30.number().optional()
|
|
2037
2086
|
});
|
|
2038
2087
|
|
|
2039
2088
|
// src/project/project.ts
|
|
2040
|
-
import { z as
|
|
2089
|
+
import { z as z31 } from "zod";
|
|
2041
2090
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
2042
|
-
appId:
|
|
2043
|
-
appSecret:
|
|
2044
|
-
useWixAuth:
|
|
2045
|
-
useBase44Auth:
|
|
2091
|
+
appId: z31.string().optional().describe("The ID of the app in Dev Center"),
|
|
2092
|
+
appSecret: z31.string().optional().describe("The secret of the app in Dev Center"),
|
|
2093
|
+
useWixAuth: z31.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
|
|
2094
|
+
useBase44Auth: z31.boolean().optional().describe("Enable Base44 auth for evaluations")
|
|
2046
2095
|
});
|
|
2047
2096
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
2048
2097
|
id: true,
|
|
@@ -2244,6 +2293,9 @@ export {
|
|
|
2244
2293
|
ClaudeModelSchema,
|
|
2245
2294
|
CommandExecutionSchema,
|
|
2246
2295
|
CommandExecutionTestSchema,
|
|
2296
|
+
ConversationBlockSchema,
|
|
2297
|
+
ConversationMessageRoles,
|
|
2298
|
+
ConversationMessageSchema,
|
|
2247
2299
|
CostAssertionSchema,
|
|
2248
2300
|
CostConfigSchema,
|
|
2249
2301
|
CreateAgentInputSchema,
|
|
@@ -2316,6 +2368,7 @@ export {
|
|
|
2316
2368
|
SYSTEM_ASSERTIONS,
|
|
2317
2369
|
SYSTEM_ASSERTION_IDS,
|
|
2318
2370
|
ScenarioAssertionLinkSchema,
|
|
2371
|
+
ScenarioConversationSchema,
|
|
2319
2372
|
SiteConfigTestSchema,
|
|
2320
2373
|
SkillFileSchema,
|
|
2321
2374
|
SkillMetadataSchema,
|
|
@@ -2340,12 +2393,16 @@ export {
|
|
|
2340
2393
|
TestSuiteSchema,
|
|
2341
2394
|
TestType,
|
|
2342
2395
|
TestTypeSchema,
|
|
2396
|
+
TextBlockSchema,
|
|
2397
|
+
ThinkingBlockSchema,
|
|
2343
2398
|
TimeAssertionSchema,
|
|
2344
2399
|
TimeConfigSchema,
|
|
2345
2400
|
TokenUsageSchema,
|
|
2346
2401
|
ToolCalledWithParamAssertionSchema,
|
|
2347
2402
|
ToolCalledWithParamConfigSchema,
|
|
2403
|
+
ToolResultBlockSchema,
|
|
2348
2404
|
ToolTestSchema,
|
|
2405
|
+
ToolUseBlockSchema,
|
|
2349
2406
|
TriggerMetadataSchema,
|
|
2350
2407
|
TriggerSchema,
|
|
2351
2408
|
TriggerType,
|