@wix/evalforge-types 0.78.0 → 0.79.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +111 -20
- package/build/index.js.map +4 -4
- package/build/index.mjs +102 -20
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +5 -3
- package/build/types/agent/agent-config.d.ts +118 -0
- package/build/types/agent/index.d.ts +2 -0
- package/build/types/target/agent.d.ts +3 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -46,6 +46,7 @@ __export(index_exports, {
|
|
|
46
46
|
AssertionSchema: () => AssertionSchema,
|
|
47
47
|
AssertionTypeSchema: () => AssertionTypeSchema,
|
|
48
48
|
BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
|
|
49
|
+
BaseAgentConfigSchema: () => BaseAgentConfigSchema,
|
|
49
50
|
BaseEntitySchema: () => BaseEntitySchema,
|
|
50
51
|
BaseTestSchema: () => BaseTestSchema,
|
|
51
52
|
BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
|
|
@@ -68,6 +69,7 @@ __export(index_exports, {
|
|
|
68
69
|
CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
|
|
69
70
|
CapabilityVersionSchema: () => CapabilityVersionSchema,
|
|
70
71
|
CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
|
|
72
|
+
ClaudeCodeConfigSchema: () => ClaudeCodeConfigSchema,
|
|
71
73
|
ClaudeModel: () => ClaudeModel,
|
|
72
74
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
73
75
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
@@ -100,6 +102,7 @@ __export(index_exports, {
|
|
|
100
102
|
DiffContentSchema: () => DiffContentSchema,
|
|
101
103
|
DiffLineSchema: () => DiffLineSchema,
|
|
102
104
|
DiffLineTypeSchema: () => DiffLineTypeSchema,
|
|
105
|
+
EffortLevelSchema: () => EffortLevelSchema,
|
|
103
106
|
EnvironmentSchema: () => EnvironmentSchema,
|
|
104
107
|
EvalMetricsSchema: () => EvalMetricsSchema,
|
|
105
108
|
EvalRunFolderMembershipSchema: () => EvalRunFolderMembershipSchema,
|
|
@@ -147,11 +150,15 @@ __export(index_exports, {
|
|
|
147
150
|
OPENAI_RESPONSES_MODEL_IDS: () => OPENAI_RESPONSES_MODEL_IDS,
|
|
148
151
|
OpenAIModel: () => OpenAIModel,
|
|
149
152
|
OpenAIModelSchema: () => OpenAIModelSchema,
|
|
153
|
+
OpenCodeConfigSchema: () => OpenCodeConfigSchema,
|
|
154
|
+
OpenCodePermissionSchema: () => OpenCodePermissionSchema,
|
|
155
|
+
PermissionValueSchema: () => PermissionValueSchema,
|
|
150
156
|
PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
|
|
151
157
|
PresetSchema: () => PresetSchema,
|
|
152
158
|
ProjectSchema: () => ProjectSchema,
|
|
153
159
|
PromptResultSchema: () => PromptResultSchema,
|
|
154
160
|
RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
|
|
161
|
+
ReasoningEffortSchema: () => ReasoningEffortSchema,
|
|
155
162
|
RuleSchema: () => RuleSchema,
|
|
156
163
|
RuleTypeSchema: () => RuleTypeSchema,
|
|
157
164
|
RunAnalysisFindingSchema: () => RunAnalysisFindingSchema,
|
|
@@ -162,6 +169,7 @@ __export(index_exports, {
|
|
|
162
169
|
SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
|
|
163
170
|
ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
|
|
164
171
|
ScenarioConversationSchema: () => ScenarioConversationSchema,
|
|
172
|
+
SimpleAgentConfigSchema: () => SimpleAgentConfigSchema,
|
|
165
173
|
SiteConfigTestSchema: () => SiteConfigTestSchema,
|
|
166
174
|
SkillFileSchema: () => SkillFileSchema,
|
|
167
175
|
SkillMetadataSchema: () => SkillMetadataSchema,
|
|
@@ -187,6 +195,7 @@ __export(index_exports, {
|
|
|
187
195
|
TestTypeSchema: () => TestTypeSchema,
|
|
188
196
|
TextBlockSchema: () => TextBlockSchema,
|
|
189
197
|
ThinkingBlockSchema: () => ThinkingBlockSchema,
|
|
198
|
+
ThinkingVariantSchema: () => ThinkingVariantSchema,
|
|
190
199
|
TimeAssertionSchema: () => TimeAssertionSchema,
|
|
191
200
|
TimeConfigSchema: () => TimeConfigSchema,
|
|
192
201
|
TokenUsageSchema: () => TokenUsageSchema,
|
|
@@ -387,7 +396,8 @@ var ModelConfigSchema = import_zod4.z.object({
|
|
|
387
396
|
import_zod4.z.number().min(0).max(1).optional()
|
|
388
397
|
),
|
|
389
398
|
maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()),
|
|
390
|
-
|
|
399
|
+
/** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
|
|
400
|
+
maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional())
|
|
391
401
|
});
|
|
392
402
|
|
|
393
403
|
// src/common/rule.ts
|
|
@@ -459,11 +469,23 @@ var AgentSchema = TargetSchema.extend({
|
|
|
459
469
|
agentType: AgentTypeSchema.default(AgentType.CLI),
|
|
460
470
|
/** Command to run the agent (required for CLI agents, absent for SDK agents) */
|
|
461
471
|
runCommand: AgentRunCommandSchema.optional(),
|
|
462
|
-
/**
|
|
472
|
+
/**
|
|
473
|
+
* @deprecated Use `config` bag instead. Retained for backward compatibility
|
|
474
|
+
* with existing DB rows.
|
|
475
|
+
*/
|
|
463
476
|
modelConfig: ModelConfigSchema.optional(),
|
|
464
477
|
systemPrompt: import_zod6.z.string().nullish().describe(
|
|
465
478
|
"Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
|
|
466
|
-
)
|
|
479
|
+
),
|
|
480
|
+
/**
|
|
481
|
+
* Unified agent configuration bag. Absorbs model params (model,
|
|
482
|
+
* temperature, maxTokens, maxTurns) plus agent-specific settings
|
|
483
|
+
* (permissions, thinking tokens, allowed tools, etc.).
|
|
484
|
+
*
|
|
485
|
+
* Adapters read from `config` first, falling back to `modelConfig`
|
|
486
|
+
* for backward compatibility with existing DB rows.
|
|
487
|
+
*/
|
|
488
|
+
config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional()
|
|
467
489
|
});
|
|
468
490
|
var CreateAgentInputSchema = AgentSchema.omit({
|
|
469
491
|
id: true,
|
|
@@ -473,7 +495,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
|
|
|
473
495
|
});
|
|
474
496
|
var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
475
497
|
modelConfig: ModelConfigSchema.optional().nullable(),
|
|
476
|
-
systemPrompt: import_zod6.z.string().optional().nullable()
|
|
498
|
+
systemPrompt: import_zod6.z.string().optional().nullable(),
|
|
499
|
+
config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional().nullable()
|
|
477
500
|
});
|
|
478
501
|
|
|
479
502
|
// src/target/skill.ts
|
|
@@ -2137,8 +2160,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
2137
2160
|
});
|
|
2138
2161
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
2139
2162
|
|
|
2140
|
-
// src/
|
|
2163
|
+
// src/agent/agent-config.ts
|
|
2141
2164
|
var import_zod35 = require("zod");
|
|
2165
|
+
var BaseAgentConfigSchema = import_zod35.z.object({
|
|
2166
|
+
/** Model ID (Claude or OpenAI). */
|
|
2167
|
+
model: AnyModelSchema.optional(),
|
|
2168
|
+
/** Sampling temperature (0–1). */
|
|
2169
|
+
temperature: import_zod35.z.number().min(0).max(1).optional(),
|
|
2170
|
+
/** Max output tokens per turn. */
|
|
2171
|
+
maxTokens: import_zod35.z.number().int().min(1).optional(),
|
|
2172
|
+
/** Number of agentic turns. 0 = unlimited. */
|
|
2173
|
+
maxTurns: import_zod35.z.number().int().min(0).optional(),
|
|
2174
|
+
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
2175
|
+
maxDurationMs: import_zod35.z.number().int().min(0).optional()
|
|
2176
|
+
});
|
|
2177
|
+
var EffortLevelSchema = import_zod35.z.enum(["low", "medium", "high", "max"]);
|
|
2178
|
+
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2179
|
+
/** Extended thinking token budget. */
|
|
2180
|
+
maxThinkingTokens: import_zod35.z.number().int().min(0).optional(),
|
|
2181
|
+
/** Override the default allowedTools list passed to the SDK. */
|
|
2182
|
+
allowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
|
|
2183
|
+
/** Tools to remove from the model's context entirely. */
|
|
2184
|
+
disallowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
|
|
2185
|
+
/** Controls thinking depth: low, medium, high, max. */
|
|
2186
|
+
effort: EffortLevelSchema.optional(),
|
|
2187
|
+
/** Maximum USD spend per run. Stops execution when reached. */
|
|
2188
|
+
maxBudgetUsd: import_zod35.z.number().min(0).optional()
|
|
2189
|
+
});
|
|
2190
|
+
var PermissionValueSchema = import_zod35.z.enum(["allow", "deny"]);
|
|
2191
|
+
var OpenCodePermissionSchema = import_zod35.z.record(
|
|
2192
|
+
import_zod35.z.string(),
|
|
2193
|
+
import_zod35.z.union([PermissionValueSchema, import_zod35.z.record(import_zod35.z.string(), PermissionValueSchema)])
|
|
2194
|
+
);
|
|
2195
|
+
var ThinkingVariantSchema = import_zod35.z.enum(["high", "low", "none"]);
|
|
2196
|
+
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2197
|
+
/** Permission overrides (defaults: allow-all). */
|
|
2198
|
+
permission: OpenCodePermissionSchema.optional(),
|
|
2199
|
+
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
2200
|
+
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
2201
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2202
|
+
topP: import_zod35.z.number().min(0).max(1).optional()
|
|
2203
|
+
}).omit({ maxTokens: true });
|
|
2204
|
+
var ReasoningEffortSchema = import_zod35.z.enum(["low", "medium", "high"]);
|
|
2205
|
+
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
2206
|
+
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
2207
|
+
thinkingBudgetTokens: import_zod35.z.number().int().min(0).optional(),
|
|
2208
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2209
|
+
topP: import_zod35.z.number().min(0).max(1).optional(),
|
|
2210
|
+
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
2211
|
+
seed: import_zod35.z.number().int().optional(),
|
|
2212
|
+
/** Stop sequences — model stops when generating any of these strings. */
|
|
2213
|
+
stopSequences: import_zod35.z.array(import_zod35.z.string()).optional(),
|
|
2214
|
+
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
2215
|
+
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
2216
|
+
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
2217
|
+
frequencyPenalty: import_zod35.z.number().min(-2).max(2).optional(),
|
|
2218
|
+
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
2219
|
+
presencePenalty: import_zod35.z.number().min(-2).max(2).optional()
|
|
2220
|
+
});
|
|
2221
|
+
|
|
2222
|
+
// src/schedule/eval-schedule.ts
|
|
2223
|
+
var import_zod36 = require("zod");
|
|
2142
2224
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
2143
2225
|
FrequencyType2["DAILY"] = "daily";
|
|
2144
2226
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -2148,29 +2230,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
2148
2230
|
})(FrequencyType || {});
|
|
2149
2231
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
2150
2232
|
/** Whether the schedule is active */
|
|
2151
|
-
enabled:
|
|
2233
|
+
enabled: import_zod36.z.boolean(),
|
|
2152
2234
|
/** Test suite to run */
|
|
2153
|
-
suiteId:
|
|
2235
|
+
suiteId: import_zod36.z.string(),
|
|
2154
2236
|
/** Preset that provides agent + entities for this schedule */
|
|
2155
|
-
presetId:
|
|
2237
|
+
presetId: import_zod36.z.string(),
|
|
2156
2238
|
/** How often to run */
|
|
2157
|
-
frequencyType:
|
|
2239
|
+
frequencyType: import_zod36.z.nativeEnum(FrequencyType),
|
|
2158
2240
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
2159
|
-
timeOfDay:
|
|
2241
|
+
timeOfDay: import_zod36.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
2160
2242
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
2161
|
-
dayOfWeek:
|
|
2243
|
+
dayOfWeek: import_zod36.z.number().min(0).max(6).optional(),
|
|
2162
2244
|
/** Day of month (1-31) for monthly schedules */
|
|
2163
|
-
dayOfMonth:
|
|
2245
|
+
dayOfMonth: import_zod36.z.number().min(1).max(31).optional(),
|
|
2164
2246
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
2165
|
-
timezone:
|
|
2247
|
+
timezone: import_zod36.z.string(),
|
|
2166
2248
|
/** ID of the last eval run created by this schedule */
|
|
2167
|
-
lastRunId:
|
|
2249
|
+
lastRunId: import_zod36.z.string().optional(),
|
|
2168
2250
|
/** Denormalized status of the last run */
|
|
2169
|
-
lastRunStatus:
|
|
2251
|
+
lastRunStatus: import_zod36.z.string().optional(),
|
|
2170
2252
|
/** ISO timestamp of the last run */
|
|
2171
|
-
lastRunAt:
|
|
2253
|
+
lastRunAt: import_zod36.z.string().optional(),
|
|
2172
2254
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
2173
|
-
nextRunAt:
|
|
2255
|
+
nextRunAt: import_zod36.z.string().optional()
|
|
2174
2256
|
});
|
|
2175
2257
|
function isValidTimezone(tz) {
|
|
2176
2258
|
try {
|
|
@@ -2183,14 +2265,14 @@ function isValidTimezone(tz) {
|
|
|
2183
2265
|
function validateScheduleFields(data, ctx, options) {
|
|
2184
2266
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
2185
2267
|
ctx.addIssue({
|
|
2186
|
-
code:
|
|
2268
|
+
code: import_zod36.z.ZodIssueCode.custom,
|
|
2187
2269
|
message: "dayOfWeek is required for weekly schedules",
|
|
2188
2270
|
path: ["dayOfWeek"]
|
|
2189
2271
|
});
|
|
2190
2272
|
}
|
|
2191
2273
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
2192
2274
|
ctx.addIssue({
|
|
2193
|
-
code:
|
|
2275
|
+
code: import_zod36.z.ZodIssueCode.custom,
|
|
2194
2276
|
message: "dayOfMonth is required for monthly schedules",
|
|
2195
2277
|
path: ["dayOfMonth"]
|
|
2196
2278
|
});
|
|
@@ -2198,7 +2280,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
2198
2280
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
2199
2281
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
2200
2282
|
ctx.addIssue({
|
|
2201
|
-
code:
|
|
2283
|
+
code: import_zod36.z.ZodIssueCode.custom,
|
|
2202
2284
|
message: "Invalid IANA timezone",
|
|
2203
2285
|
path: ["timezone"]
|
|
2204
2286
|
});
|
|
@@ -2249,6 +2331,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2249
2331
|
AssertionSchema,
|
|
2250
2332
|
AssertionTypeSchema,
|
|
2251
2333
|
BATCH_IMPORT_LIMITS,
|
|
2334
|
+
BaseAgentConfigSchema,
|
|
2252
2335
|
BaseEntitySchema,
|
|
2253
2336
|
BaseTestSchema,
|
|
2254
2337
|
BatchAssertionLinkSchema,
|
|
@@ -2271,6 +2354,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2271
2354
|
CapabilityVersionOriginSchema,
|
|
2272
2355
|
CapabilityVersionSchema,
|
|
2273
2356
|
CapabilityWithLatestVersionSchema,
|
|
2357
|
+
ClaudeCodeConfigSchema,
|
|
2274
2358
|
ClaudeModel,
|
|
2275
2359
|
ClaudeModelSchema,
|
|
2276
2360
|
CommandExecutionSchema,
|
|
@@ -2303,6 +2387,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2303
2387
|
DiffContentSchema,
|
|
2304
2388
|
DiffLineSchema,
|
|
2305
2389
|
DiffLineTypeSchema,
|
|
2390
|
+
EffortLevelSchema,
|
|
2306
2391
|
EnvironmentSchema,
|
|
2307
2392
|
EvalMetricsSchema,
|
|
2308
2393
|
EvalRunFolderMembershipSchema,
|
|
@@ -2350,11 +2435,15 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2350
2435
|
OPENAI_RESPONSES_MODEL_IDS,
|
|
2351
2436
|
OpenAIModel,
|
|
2352
2437
|
OpenAIModelSchema,
|
|
2438
|
+
OpenCodeConfigSchema,
|
|
2439
|
+
OpenCodePermissionSchema,
|
|
2440
|
+
PermissionValueSchema,
|
|
2353
2441
|
PlaywrightNLTestSchema,
|
|
2354
2442
|
PresetSchema,
|
|
2355
2443
|
ProjectSchema,
|
|
2356
2444
|
PromptResultSchema,
|
|
2357
2445
|
RUN_COMMAND_LABELS,
|
|
2446
|
+
ReasoningEffortSchema,
|
|
2358
2447
|
RuleSchema,
|
|
2359
2448
|
RuleTypeSchema,
|
|
2360
2449
|
RunAnalysisFindingSchema,
|
|
@@ -2365,6 +2454,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2365
2454
|
SYSTEM_ASSERTION_IDS,
|
|
2366
2455
|
ScenarioAssertionLinkSchema,
|
|
2367
2456
|
ScenarioConversationSchema,
|
|
2457
|
+
SimpleAgentConfigSchema,
|
|
2368
2458
|
SiteConfigTestSchema,
|
|
2369
2459
|
SkillFileSchema,
|
|
2370
2460
|
SkillMetadataSchema,
|
|
@@ -2390,6 +2480,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2390
2480
|
TestTypeSchema,
|
|
2391
2481
|
TextBlockSchema,
|
|
2392
2482
|
ThinkingBlockSchema,
|
|
2483
|
+
ThinkingVariantSchema,
|
|
2393
2484
|
TimeAssertionSchema,
|
|
2394
2485
|
TimeConfigSchema,
|
|
2395
2486
|
TokenUsageSchema,
|