@wix/evalforge-types 0.34.0 → 0.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +54 -2
- package/build/index.js.map +3 -3
- package/build/index.mjs +48 -2
- package/build/index.mjs.map +3 -3
- package/build/types/agent/adapter.d.ts +4 -3
- package/build/types/assertion/assertion.d.ts +26 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +12 -0
- package/build/types/scenario/test-scenario.d.ts +9 -0
- package/build/types/target/agent.d.ts +17 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -915,6 +915,9 @@ var require_types_impl = __commonJS({
|
|
|
915
915
|
var index_exports = {};
|
|
916
916
|
__export(index_exports, {
|
|
917
917
|
AVAILABLE_MODEL_IDS: () => AVAILABLE_MODEL_IDS,
|
|
918
|
+
AVAILABLE_RUN_COMMANDS: () => AVAILABLE_RUN_COMMANDS,
|
|
919
|
+
AgentRunCommand: () => AgentRunCommand,
|
|
920
|
+
AgentRunCommandSchema: () => AgentRunCommandSchema,
|
|
918
921
|
AgentSchema: () => AgentSchema,
|
|
919
922
|
AllowedCommands: () => AllowedCommands,
|
|
920
923
|
ApiCallSchema: () => ApiCallSchema,
|
|
@@ -992,6 +995,7 @@ __export(index_exports, {
|
|
|
992
995
|
PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
|
|
993
996
|
ProjectSchema: () => ProjectSchema,
|
|
994
997
|
PromptResultSchema: () => PromptResultSchema,
|
|
998
|
+
RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
|
|
995
999
|
SEMVER_REGEX: () => SEMVER_REGEX,
|
|
996
1000
|
SKILL_FOLDER_NAME_REGEX: () => SKILL_FOLDER_NAME_REGEX,
|
|
997
1001
|
SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
|
|
@@ -1021,6 +1025,8 @@ __export(index_exports, {
|
|
|
1021
1025
|
TestSuiteSchema: () => TestSuiteSchema,
|
|
1022
1026
|
TestType: () => TestType,
|
|
1023
1027
|
TestTypeSchema: () => TestTypeSchema,
|
|
1028
|
+
TimeAssertionSchema: () => TimeAssertionSchema,
|
|
1029
|
+
TimeConfigSchema: () => TimeConfigSchema,
|
|
1024
1030
|
TokenUsageSchema: () => TokenUsageSchema,
|
|
1025
1031
|
ToolTestSchema: () => ToolTestSchema,
|
|
1026
1032
|
TriggerMetadataSchema: () => TriggerMetadataSchema,
|
|
@@ -1128,9 +1134,18 @@ var TargetSchema = TenantEntitySchema.extend({
|
|
|
1128
1134
|
|
|
1129
1135
|
// src/target/agent.ts
|
|
1130
1136
|
var import_zod5 = require("zod");
|
|
1137
|
+
var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
|
|
1138
|
+
AgentRunCommand2["CLAUDE"] = "claude";
|
|
1139
|
+
return AgentRunCommand2;
|
|
1140
|
+
})(AgentRunCommand || {});
|
|
1141
|
+
var AVAILABLE_RUN_COMMANDS = Object.values(AgentRunCommand);
|
|
1142
|
+
var RUN_COMMAND_LABELS = {
|
|
1143
|
+
["claude" /* CLAUDE */]: "Claude Code"
|
|
1144
|
+
};
|
|
1145
|
+
var AgentRunCommandSchema = import_zod5.z.nativeEnum(AgentRunCommand);
|
|
1131
1146
|
var AgentSchema = TargetSchema.extend({
|
|
1132
1147
|
/** Command to run the agent */
|
|
1133
|
-
runCommand:
|
|
1148
|
+
runCommand: AgentRunCommandSchema,
|
|
1134
1149
|
/** Optional model configuration override */
|
|
1135
1150
|
modelConfig: ModelConfigSchema.optional()
|
|
1136
1151
|
});
|
|
@@ -1469,9 +1484,15 @@ var LlmJudgeAssertionSchema = import_zod20.z.object({
|
|
|
1469
1484
|
maxTokens: import_zod20.z.number().int().optional(),
|
|
1470
1485
|
temperature: import_zod20.z.number().min(0).max(1).optional()
|
|
1471
1486
|
});
|
|
1487
|
+
var TimeAssertionSchema = import_zod20.z.object({
|
|
1488
|
+
type: import_zod20.z.literal("time_limit"),
|
|
1489
|
+
/** Maximum allowed duration in milliseconds */
|
|
1490
|
+
maxDurationMs: import_zod20.z.number().int().positive()
|
|
1491
|
+
});
|
|
1472
1492
|
var AssertionSchema = import_zod20.z.union([
|
|
1473
1493
|
SkillWasCalledAssertionSchema,
|
|
1474
1494
|
BuildPassedAssertionSchema,
|
|
1495
|
+
TimeAssertionSchema,
|
|
1475
1496
|
LlmJudgeAssertionSchema
|
|
1476
1497
|
]);
|
|
1477
1498
|
|
|
@@ -1517,6 +1538,7 @@ var import_zod22 = require("zod");
|
|
|
1517
1538
|
var AssertionTypeSchema = import_zod22.z.enum([
|
|
1518
1539
|
"skill_was_called",
|
|
1519
1540
|
"build_passed",
|
|
1541
|
+
"time_limit",
|
|
1520
1542
|
"llm_judge"
|
|
1521
1543
|
]);
|
|
1522
1544
|
var AssertionParameterTypeSchema = import_zod22.z.enum([
|
|
@@ -1557,6 +1579,10 @@ var BuildPassedConfigSchema = import_zod22.z.strictObject({
|
|
|
1557
1579
|
/** Expected exit code (default: 0) */
|
|
1558
1580
|
expectedExitCode: import_zod22.z.number().int().optional()
|
|
1559
1581
|
});
|
|
1582
|
+
var TimeConfigSchema = import_zod22.z.strictObject({
|
|
1583
|
+
/** Maximum allowed duration in milliseconds */
|
|
1584
|
+
maxDurationMs: import_zod22.z.number().int().positive()
|
|
1585
|
+
});
|
|
1560
1586
|
var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
1561
1587
|
/**
|
|
1562
1588
|
* Prompt template with placeholders:
|
|
@@ -1586,7 +1612,9 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
1586
1612
|
LlmJudgeConfigSchema,
|
|
1587
1613
|
// requires prompt - check first
|
|
1588
1614
|
SkillWasCalledConfigSchema,
|
|
1589
|
-
// requires
|
|
1615
|
+
// requires skillNames
|
|
1616
|
+
TimeConfigSchema,
|
|
1617
|
+
// requires maxDurationMs, uses strictObject
|
|
1590
1618
|
BuildPassedConfigSchema,
|
|
1591
1619
|
// all optional, uses strictObject to reject unknown keys
|
|
1592
1620
|
import_zod22.z.object({})
|
|
@@ -1611,6 +1639,8 @@ function validateAssertionConfig(type, config) {
|
|
|
1611
1639
|
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
1612
1640
|
case "build_passed":
|
|
1613
1641
|
return BuildPassedConfigSchema.safeParse(config).success;
|
|
1642
|
+
case "time_limit":
|
|
1643
|
+
return TimeConfigSchema.safeParse(config).success;
|
|
1614
1644
|
case "llm_judge":
|
|
1615
1645
|
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
1616
1646
|
default:
|
|
@@ -2114,6 +2144,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
2114
2144
|
var SYSTEM_ASSERTION_IDS = {
|
|
2115
2145
|
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
2116
2146
|
BUILD_PASSED: "system:build_passed",
|
|
2147
|
+
TIME_LIMIT: "system:time_limit",
|
|
2117
2148
|
LLM_JUDGE: "system:llm_judge"
|
|
2118
2149
|
};
|
|
2119
2150
|
function isSystemAssertionId(id) {
|
|
@@ -2170,6 +2201,21 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2170
2201
|
}
|
|
2171
2202
|
]
|
|
2172
2203
|
},
|
|
2204
|
+
[SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
|
|
2205
|
+
id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
|
|
2206
|
+
name: "Time Limit",
|
|
2207
|
+
description: "Check that the scenario completed within a maximum duration",
|
|
2208
|
+
type: "time_limit",
|
|
2209
|
+
parameters: [
|
|
2210
|
+
{
|
|
2211
|
+
name: "maxDurationMs",
|
|
2212
|
+
label: "Max Duration (ms)",
|
|
2213
|
+
type: "number",
|
|
2214
|
+
required: true,
|
|
2215
|
+
defaultValue: 3e5
|
|
2216
|
+
}
|
|
2217
|
+
]
|
|
2218
|
+
},
|
|
2173
2219
|
[SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
|
|
2174
2220
|
id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
|
|
2175
2221
|
name: "LLM Judge",
|
|
@@ -2221,6 +2267,9 @@ function getSystemAssertion(id) {
|
|
|
2221
2267
|
// Annotate the CommonJS export names for ESM import in node:
|
|
2222
2268
|
0 && (module.exports = {
|
|
2223
2269
|
AVAILABLE_MODEL_IDS,
|
|
2270
|
+
AVAILABLE_RUN_COMMANDS,
|
|
2271
|
+
AgentRunCommand,
|
|
2272
|
+
AgentRunCommandSchema,
|
|
2224
2273
|
AgentSchema,
|
|
2225
2274
|
AllowedCommands,
|
|
2226
2275
|
ApiCallSchema,
|
|
@@ -2298,6 +2347,7 @@ function getSystemAssertion(id) {
|
|
|
2298
2347
|
PlaywrightNLTestSchema,
|
|
2299
2348
|
ProjectSchema,
|
|
2300
2349
|
PromptResultSchema,
|
|
2350
|
+
RUN_COMMAND_LABELS,
|
|
2301
2351
|
SEMVER_REGEX,
|
|
2302
2352
|
SKILL_FOLDER_NAME_REGEX,
|
|
2303
2353
|
SYSTEM_ASSERTIONS,
|
|
@@ -2327,6 +2377,8 @@ function getSystemAssertion(id) {
|
|
|
2327
2377
|
TestSuiteSchema,
|
|
2328
2378
|
TestType,
|
|
2329
2379
|
TestTypeSchema,
|
|
2380
|
+
TimeAssertionSchema,
|
|
2381
|
+
TimeConfigSchema,
|
|
2330
2382
|
TokenUsageSchema,
|
|
2331
2383
|
ToolTestSchema,
|
|
2332
2384
|
TriggerMetadataSchema,
|