@wix/evalforge-types 0.44.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +145 -78
- package/build/index.js.map +4 -4
- package/build/index.mjs +137 -78
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/evaluation/conversation.d.ts +108 -0
- package/build/types/evaluation/eval-result.d.ts +25 -0
- package/build/types/evaluation/eval-run.d.ts +25 -0
- package/build/types/evaluation/index.d.ts +1 -0
- package/build/types/project/project.d.ts +6 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -1697,7 +1697,7 @@ var LLMTraceSchema = z26.object({
|
|
|
1697
1697
|
});
|
|
1698
1698
|
|
|
1699
1699
|
// src/evaluation/eval-result.ts
|
|
1700
|
-
import { z as
|
|
1700
|
+
import { z as z30 } from "zod";
|
|
1701
1701
|
|
|
1702
1702
|
// src/evaluation/eval-run.ts
|
|
1703
1703
|
import { z as z28 } from "zod";
|
|
@@ -1944,6 +1944,53 @@ var EvaluationLogSchema = z28.object({
|
|
|
1944
1944
|
});
|
|
1945
1945
|
var LLM_TIMEOUT = 12e4;
|
|
1946
1946
|
|
|
1947
|
+
// src/evaluation/conversation.ts
|
|
1948
|
+
import { z as z29 } from "zod";
|
|
1949
|
+
var TextBlockSchema = z29.object({
|
|
1950
|
+
type: z29.literal("text"),
|
|
1951
|
+
text: z29.string()
|
|
1952
|
+
});
|
|
1953
|
+
var ThinkingBlockSchema = z29.object({
|
|
1954
|
+
type: z29.literal("thinking"),
|
|
1955
|
+
thinking: z29.string()
|
|
1956
|
+
});
|
|
1957
|
+
var ToolUseBlockSchema = z29.object({
|
|
1958
|
+
type: z29.literal("tool_use"),
|
|
1959
|
+
toolName: z29.string(),
|
|
1960
|
+
toolId: z29.string(),
|
|
1961
|
+
input: z29.unknown()
|
|
1962
|
+
});
|
|
1963
|
+
var ToolResultBlockSchema = z29.object({
|
|
1964
|
+
type: z29.literal("tool_result"),
|
|
1965
|
+
toolUseId: z29.string(),
|
|
1966
|
+
content: z29.string(),
|
|
1967
|
+
isError: z29.boolean().optional()
|
|
1968
|
+
});
|
|
1969
|
+
var ConversationBlockSchema = z29.discriminatedUnion("type", [
|
|
1970
|
+
TextBlockSchema,
|
|
1971
|
+
ThinkingBlockSchema,
|
|
1972
|
+
ToolUseBlockSchema,
|
|
1973
|
+
ToolResultBlockSchema
|
|
1974
|
+
]);
|
|
1975
|
+
var ConversationMessageRoles = [
|
|
1976
|
+
"assistant",
|
|
1977
|
+
"user",
|
|
1978
|
+
"system"
|
|
1979
|
+
];
|
|
1980
|
+
var ConversationMessageSchema = z29.object({
|
|
1981
|
+
role: z29.enum(ConversationMessageRoles),
|
|
1982
|
+
content: z29.array(ConversationBlockSchema),
|
|
1983
|
+
timestamp: z29.string()
|
|
1984
|
+
});
|
|
1985
|
+
var ScenarioConversationSchema = z29.object({
|
|
1986
|
+
id: z29.string(),
|
|
1987
|
+
projectId: z29.string(),
|
|
1988
|
+
evalRunId: z29.string(),
|
|
1989
|
+
resultId: z29.string(),
|
|
1990
|
+
messages: z29.array(ConversationMessageSchema),
|
|
1991
|
+
createdAt: z29.string()
|
|
1992
|
+
});
|
|
1993
|
+
|
|
1947
1994
|
// src/evaluation/eval-result.ts
|
|
1948
1995
|
var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
1949
1996
|
AssertionResultStatus2["PASSED"] = "passed";
|
|
@@ -1952,95 +1999,99 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1952
1999
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1953
2000
|
return AssertionResultStatus2;
|
|
1954
2001
|
})(AssertionResultStatus || {});
|
|
1955
|
-
var AssertionResultSchema =
|
|
1956
|
-
id:
|
|
1957
|
-
assertionId:
|
|
1958
|
-
assertionType:
|
|
1959
|
-
assertionName:
|
|
1960
|
-
status:
|
|
1961
|
-
message:
|
|
1962
|
-
expected:
|
|
1963
|
-
actual:
|
|
1964
|
-
duration:
|
|
1965
|
-
details:
|
|
1966
|
-
llmTraceSteps:
|
|
1967
|
-
});
|
|
1968
|
-
var EvalRunResultSchema =
|
|
1969
|
-
id:
|
|
1970
|
-
targetId:
|
|
1971
|
-
targetName:
|
|
2002
|
+
var AssertionResultSchema = z30.object({
|
|
2003
|
+
id: z30.string(),
|
|
2004
|
+
assertionId: z30.string(),
|
|
2005
|
+
assertionType: z30.string(),
|
|
2006
|
+
assertionName: z30.string(),
|
|
2007
|
+
status: z30.enum(AssertionResultStatus),
|
|
2008
|
+
message: z30.string().optional(),
|
|
2009
|
+
expected: z30.string().optional(),
|
|
2010
|
+
actual: z30.string().optional(),
|
|
2011
|
+
duration: z30.number().optional(),
|
|
2012
|
+
details: z30.record(z30.string(), z30.unknown()).optional(),
|
|
2013
|
+
llmTraceSteps: z30.array(LLMTraceStepSchema).optional()
|
|
2014
|
+
});
|
|
2015
|
+
var EvalRunResultSchema = z30.object({
|
|
2016
|
+
id: z30.string(),
|
|
2017
|
+
targetId: z30.string(),
|
|
2018
|
+
targetName: z30.string().optional(),
|
|
1972
2019
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1973
|
-
skillVersionId:
|
|
2020
|
+
skillVersionId: z30.string().optional(),
|
|
1974
2021
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1975
|
-
skillVersion:
|
|
1976
|
-
scenarioId:
|
|
1977
|
-
scenarioName:
|
|
2022
|
+
skillVersion: z30.string().optional(),
|
|
2023
|
+
scenarioId: z30.string(),
|
|
2024
|
+
scenarioName: z30.string(),
|
|
1978
2025
|
modelConfig: ModelConfigSchema.optional(),
|
|
1979
|
-
assertionResults:
|
|
2026
|
+
assertionResults: z30.array(AssertionResultSchema),
|
|
1980
2027
|
metrics: EvalMetricsSchema.optional(),
|
|
1981
|
-
passed:
|
|
1982
|
-
failed:
|
|
1983
|
-
passRate:
|
|
1984
|
-
duration:
|
|
1985
|
-
outputText:
|
|
1986
|
-
files:
|
|
1987
|
-
fileDiffs:
|
|
2028
|
+
passed: z30.number(),
|
|
2029
|
+
failed: z30.number(),
|
|
2030
|
+
passRate: z30.number(),
|
|
2031
|
+
duration: z30.number(),
|
|
2032
|
+
outputText: z30.string().optional(),
|
|
2033
|
+
files: z30.array(ExpectedFileSchema).optional(),
|
|
2034
|
+
fileDiffs: z30.array(DiffContentSchema).optional(),
|
|
1988
2035
|
/** Full template files after execution with status indicators */
|
|
1989
|
-
templateFiles:
|
|
1990
|
-
startedAt:
|
|
1991
|
-
completedAt:
|
|
1992
|
-
llmTrace: LLMTraceSchema.optional()
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2036
|
+
templateFiles: z30.array(TemplateFileSchema).optional(),
|
|
2037
|
+
startedAt: z30.string().optional(),
|
|
2038
|
+
completedAt: z30.string().optional(),
|
|
2039
|
+
llmTrace: LLMTraceSchema.optional(),
|
|
2040
|
+
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
2041
|
+
conversation: z30.array(ConversationMessageSchema).optional()
|
|
2042
|
+
});
|
|
2043
|
+
var PromptResultSchema = z30.object({
|
|
2044
|
+
text: z30.string(),
|
|
2045
|
+
files: z30.array(z30.unknown()).optional(),
|
|
2046
|
+
finishReason: z30.string().optional(),
|
|
2047
|
+
reasoning: z30.string().optional(),
|
|
2048
|
+
reasoningDetails: z30.unknown().optional(),
|
|
2049
|
+
toolCalls: z30.array(z30.unknown()).optional(),
|
|
2050
|
+
toolResults: z30.array(z30.unknown()).optional(),
|
|
2051
|
+
warnings: z30.array(z30.unknown()).optional(),
|
|
2052
|
+
sources: z30.array(z30.unknown()).optional(),
|
|
2053
|
+
steps: z30.array(z30.unknown()),
|
|
2054
|
+
generationTimeMs: z30.number(),
|
|
2055
|
+
prompt: z30.string(),
|
|
2056
|
+
systemPrompt: z30.string(),
|
|
2057
|
+
usage: z30.object({
|
|
2058
|
+
totalTokens: z30.number().optional(),
|
|
2059
|
+
totalMicrocentsSpent: z30.number().optional()
|
|
2011
2060
|
})
|
|
2012
2061
|
});
|
|
2013
|
-
var EvaluationResultSchema =
|
|
2014
|
-
id:
|
|
2015
|
-
runId:
|
|
2016
|
-
timestamp:
|
|
2062
|
+
var EvaluationResultSchema = z30.object({
|
|
2063
|
+
id: z30.string(),
|
|
2064
|
+
runId: z30.string(),
|
|
2065
|
+
timestamp: z30.number(),
|
|
2017
2066
|
promptResult: PromptResultSchema,
|
|
2018
|
-
testResults:
|
|
2019
|
-
tags:
|
|
2020
|
-
feedback:
|
|
2021
|
-
score:
|
|
2022
|
-
suiteId:
|
|
2023
|
-
});
|
|
2024
|
-
var LeanEvaluationResultSchema =
|
|
2025
|
-
id:
|
|
2026
|
-
runId:
|
|
2027
|
-
timestamp:
|
|
2028
|
-
tags:
|
|
2029
|
-
scenarioId:
|
|
2030
|
-
scenarioVersion:
|
|
2031
|
-
targetId:
|
|
2032
|
-
targetVersion:
|
|
2033
|
-
suiteId:
|
|
2034
|
-
score:
|
|
2035
|
-
time:
|
|
2036
|
-
microcentsSpent:
|
|
2067
|
+
testResults: z30.array(z30.unknown()),
|
|
2068
|
+
tags: z30.array(z30.string()).optional(),
|
|
2069
|
+
feedback: z30.string().optional(),
|
|
2070
|
+
score: z30.number(),
|
|
2071
|
+
suiteId: z30.string().optional()
|
|
2072
|
+
});
|
|
2073
|
+
var LeanEvaluationResultSchema = z30.object({
|
|
2074
|
+
id: z30.string(),
|
|
2075
|
+
runId: z30.string(),
|
|
2076
|
+
timestamp: z30.number(),
|
|
2077
|
+
tags: z30.array(z30.string()).optional(),
|
|
2078
|
+
scenarioId: z30.string(),
|
|
2079
|
+
scenarioVersion: z30.number().optional(),
|
|
2080
|
+
targetId: z30.string(),
|
|
2081
|
+
targetVersion: z30.number().optional(),
|
|
2082
|
+
suiteId: z30.string().optional(),
|
|
2083
|
+
score: z30.number(),
|
|
2084
|
+
time: z30.number().optional(),
|
|
2085
|
+
microcentsSpent: z30.number().optional()
|
|
2037
2086
|
});
|
|
2038
2087
|
|
|
2039
2088
|
// src/project/project.ts
|
|
2040
|
-
import { z as
|
|
2089
|
+
import { z as z31 } from "zod";
|
|
2041
2090
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
2042
|
-
appId:
|
|
2043
|
-
appSecret:
|
|
2091
|
+
appId: z31.string().optional().describe("The ID of the app in Dev Center"),
|
|
2092
|
+
appSecret: z31.string().optional().describe("The secret of the app in Dev Center"),
|
|
2093
|
+
useWixAuth: z31.boolean().optional().describe("Enable Wix CLI/MCP auth for evaluations"),
|
|
2094
|
+
useBase44Auth: z31.boolean().optional().describe("Enable Base44 auth for evaluations")
|
|
2044
2095
|
});
|
|
2045
2096
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
2046
2097
|
id: true,
|
|
@@ -2242,6 +2293,9 @@ export {
|
|
|
2242
2293
|
ClaudeModelSchema,
|
|
2243
2294
|
CommandExecutionSchema,
|
|
2244
2295
|
CommandExecutionTestSchema,
|
|
2296
|
+
ConversationBlockSchema,
|
|
2297
|
+
ConversationMessageRoles,
|
|
2298
|
+
ConversationMessageSchema,
|
|
2245
2299
|
CostAssertionSchema,
|
|
2246
2300
|
CostConfigSchema,
|
|
2247
2301
|
CreateAgentInputSchema,
|
|
@@ -2314,6 +2368,7 @@ export {
|
|
|
2314
2368
|
SYSTEM_ASSERTIONS,
|
|
2315
2369
|
SYSTEM_ASSERTION_IDS,
|
|
2316
2370
|
ScenarioAssertionLinkSchema,
|
|
2371
|
+
ScenarioConversationSchema,
|
|
2317
2372
|
SiteConfigTestSchema,
|
|
2318
2373
|
SkillFileSchema,
|
|
2319
2374
|
SkillMetadataSchema,
|
|
@@ -2338,12 +2393,16 @@ export {
|
|
|
2338
2393
|
TestSuiteSchema,
|
|
2339
2394
|
TestType,
|
|
2340
2395
|
TestTypeSchema,
|
|
2396
|
+
TextBlockSchema,
|
|
2397
|
+
ThinkingBlockSchema,
|
|
2341
2398
|
TimeAssertionSchema,
|
|
2342
2399
|
TimeConfigSchema,
|
|
2343
2400
|
TokenUsageSchema,
|
|
2344
2401
|
ToolCalledWithParamAssertionSchema,
|
|
2345
2402
|
ToolCalledWithParamConfigSchema,
|
|
2403
|
+
ToolResultBlockSchema,
|
|
2346
2404
|
ToolTestSchema,
|
|
2405
|
+
ToolUseBlockSchema,
|
|
2347
2406
|
TriggerMetadataSchema,
|
|
2348
2407
|
TriggerSchema,
|
|
2349
2408
|
TriggerType,
|