@wix/evalforge-types 0.34.0 → 0.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +54 -2
- package/build/index.js.map +3 -3
- package/build/index.mjs +48 -2
- package/build/index.mjs.map +3 -3
- package/build/types/agent/adapter.d.ts +4 -3
- package/build/types/assertion/assertion.d.ts +26 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +12 -0
- package/build/types/scenario/test-scenario.d.ts +9 -0
- package/build/types/target/agent.d.ts +17 -3
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -983,9 +983,18 @@ var TargetSchema = TenantEntitySchema.extend({
|
|
|
983
983
|
|
|
984
984
|
// src/target/agent.ts
|
|
985
985
|
import { z as z5 } from "zod";
|
|
986
|
+
var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
|
|
987
|
+
AgentRunCommand2["CLAUDE"] = "claude";
|
|
988
|
+
return AgentRunCommand2;
|
|
989
|
+
})(AgentRunCommand || {});
|
|
990
|
+
var AVAILABLE_RUN_COMMANDS = Object.values(AgentRunCommand);
|
|
991
|
+
var RUN_COMMAND_LABELS = {
|
|
992
|
+
["claude" /* CLAUDE */]: "Claude Code"
|
|
993
|
+
};
|
|
994
|
+
var AgentRunCommandSchema = z5.nativeEnum(AgentRunCommand);
|
|
986
995
|
var AgentSchema = TargetSchema.extend({
|
|
987
996
|
/** Command to run the agent */
|
|
988
|
-
runCommand:
|
|
997
|
+
runCommand: AgentRunCommandSchema,
|
|
989
998
|
/** Optional model configuration override */
|
|
990
999
|
modelConfig: ModelConfigSchema.optional()
|
|
991
1000
|
});
|
|
@@ -1324,9 +1333,15 @@ var LlmJudgeAssertionSchema = z20.object({
|
|
|
1324
1333
|
maxTokens: z20.number().int().optional(),
|
|
1325
1334
|
temperature: z20.number().min(0).max(1).optional()
|
|
1326
1335
|
});
|
|
1336
|
+
var TimeAssertionSchema = z20.object({
|
|
1337
|
+
type: z20.literal("time_limit"),
|
|
1338
|
+
/** Maximum allowed duration in milliseconds */
|
|
1339
|
+
maxDurationMs: z20.number().int().positive()
|
|
1340
|
+
});
|
|
1327
1341
|
var AssertionSchema = z20.union([
|
|
1328
1342
|
SkillWasCalledAssertionSchema,
|
|
1329
1343
|
BuildPassedAssertionSchema,
|
|
1344
|
+
TimeAssertionSchema,
|
|
1330
1345
|
LlmJudgeAssertionSchema
|
|
1331
1346
|
]);
|
|
1332
1347
|
|
|
@@ -1372,6 +1387,7 @@ import { z as z22 } from "zod";
|
|
|
1372
1387
|
var AssertionTypeSchema = z22.enum([
|
|
1373
1388
|
"skill_was_called",
|
|
1374
1389
|
"build_passed",
|
|
1390
|
+
"time_limit",
|
|
1375
1391
|
"llm_judge"
|
|
1376
1392
|
]);
|
|
1377
1393
|
var AssertionParameterTypeSchema = z22.enum([
|
|
@@ -1412,6 +1428,10 @@ var BuildPassedConfigSchema = z22.strictObject({
|
|
|
1412
1428
|
/** Expected exit code (default: 0) */
|
|
1413
1429
|
expectedExitCode: z22.number().int().optional()
|
|
1414
1430
|
});
|
|
1431
|
+
var TimeConfigSchema = z22.strictObject({
|
|
1432
|
+
/** Maximum allowed duration in milliseconds */
|
|
1433
|
+
maxDurationMs: z22.number().int().positive()
|
|
1434
|
+
});
|
|
1415
1435
|
var LlmJudgeConfigSchema = z22.object({
|
|
1416
1436
|
/**
|
|
1417
1437
|
* Prompt template with placeholders:
|
|
@@ -1441,7 +1461,9 @@ var AssertionConfigSchema = z22.union([
|
|
|
1441
1461
|
LlmJudgeConfigSchema,
|
|
1442
1462
|
// requires prompt - check first
|
|
1443
1463
|
SkillWasCalledConfigSchema,
|
|
1444
|
-
// requires
|
|
1464
|
+
// requires skillNames
|
|
1465
|
+
TimeConfigSchema,
|
|
1466
|
+
// requires maxDurationMs, uses strictObject
|
|
1445
1467
|
BuildPassedConfigSchema,
|
|
1446
1468
|
// all optional, uses strictObject to reject unknown keys
|
|
1447
1469
|
z22.object({})
|
|
@@ -1466,6 +1488,8 @@ function validateAssertionConfig(type, config) {
|
|
|
1466
1488
|
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
1467
1489
|
case "build_passed":
|
|
1468
1490
|
return BuildPassedConfigSchema.safeParse(config).success;
|
|
1491
|
+
case "time_limit":
|
|
1492
|
+
return TimeConfigSchema.safeParse(config).success;
|
|
1469
1493
|
case "llm_judge":
|
|
1470
1494
|
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
1471
1495
|
default:
|
|
@@ -1969,6 +1993,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
1969
1993
|
var SYSTEM_ASSERTION_IDS = {
|
|
1970
1994
|
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
1971
1995
|
BUILD_PASSED: "system:build_passed",
|
|
1996
|
+
TIME_LIMIT: "system:time_limit",
|
|
1972
1997
|
LLM_JUDGE: "system:llm_judge"
|
|
1973
1998
|
};
|
|
1974
1999
|
function isSystemAssertionId(id) {
|
|
@@ -2025,6 +2050,21 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2025
2050
|
}
|
|
2026
2051
|
]
|
|
2027
2052
|
},
|
|
2053
|
+
[SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
|
|
2054
|
+
id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
|
|
2055
|
+
name: "Time Limit",
|
|
2056
|
+
description: "Check that the scenario completed within a maximum duration",
|
|
2057
|
+
type: "time_limit",
|
|
2058
|
+
parameters: [
|
|
2059
|
+
{
|
|
2060
|
+
name: "maxDurationMs",
|
|
2061
|
+
label: "Max Duration (ms)",
|
|
2062
|
+
type: "number",
|
|
2063
|
+
required: true,
|
|
2064
|
+
defaultValue: 3e5
|
|
2065
|
+
}
|
|
2066
|
+
]
|
|
2067
|
+
},
|
|
2028
2068
|
[SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
|
|
2029
2069
|
id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
|
|
2030
2070
|
name: "LLM Judge",
|
|
@@ -2076,6 +2116,9 @@ function getSystemAssertion(id) {
|
|
|
2076
2116
|
var export_ClaudeModel = import_types.ClaudeModel;
|
|
2077
2117
|
export {
|
|
2078
2118
|
AVAILABLE_MODEL_IDS,
|
|
2119
|
+
AVAILABLE_RUN_COMMANDS,
|
|
2120
|
+
AgentRunCommand,
|
|
2121
|
+
AgentRunCommandSchema,
|
|
2079
2122
|
AgentSchema,
|
|
2080
2123
|
AllowedCommands,
|
|
2081
2124
|
ApiCallSchema,
|
|
@@ -2153,6 +2196,7 @@ export {
|
|
|
2153
2196
|
PlaywrightNLTestSchema,
|
|
2154
2197
|
ProjectSchema,
|
|
2155
2198
|
PromptResultSchema,
|
|
2199
|
+
RUN_COMMAND_LABELS,
|
|
2156
2200
|
SEMVER_REGEX,
|
|
2157
2201
|
SKILL_FOLDER_NAME_REGEX,
|
|
2158
2202
|
SYSTEM_ASSERTIONS,
|
|
@@ -2182,6 +2226,8 @@ export {
|
|
|
2182
2226
|
TestSuiteSchema,
|
|
2183
2227
|
TestType,
|
|
2184
2228
|
TestTypeSchema,
|
|
2229
|
+
TimeAssertionSchema,
|
|
2230
|
+
TimeConfigSchema,
|
|
2185
2231
|
TokenUsageSchema,
|
|
2186
2232
|
ToolTestSchema,
|
|
2187
2233
|
TriggerMetadataSchema,
|