@wix/evalforge-types 0.85.0 → 0.87.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +74 -51
- package/build/index.js.map +3 -3
- package/build/index.mjs +72 -51
- package/build/index.mjs.map +3 -3
- package/build/types/common/models.d.ts +3 -3
- package/build/types/evaluation/eval-result.d.ts +3 -3
- package/build/types/evaluation/eval-run.d.ts +6 -6
- package/build/types/target/agent.d.ts +9 -9
- package/build/types/template/template.d.ts +58 -14
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -117,6 +117,7 @@ __export(index_exports, {
|
|
|
117
117
|
EvaluationResultSchema: () => EvaluationResultSchema,
|
|
118
118
|
ExecutionTraceSchema: () => ExecutionTraceSchema,
|
|
119
119
|
ExpectedFileSchema: () => ExpectedFileSchema,
|
|
120
|
+
ExtraFileSchema: () => ExtraFileSchema,
|
|
120
121
|
FileContentCheckSchema: () => FileContentCheckSchema,
|
|
121
122
|
FileContentTestSchema: () => FileContentTestSchema,
|
|
122
123
|
FileModificationSchema: () => FileModificationSchema,
|
|
@@ -180,6 +181,7 @@ __export(index_exports, {
|
|
|
180
181
|
SkillWasCalledAssertionSchema: () => SkillWasCalledAssertionSchema,
|
|
181
182
|
SkillWasCalledConfigSchema: () => SkillWasCalledConfigSchema,
|
|
182
183
|
SkillWithLatestVersionSchema: () => SkillWithLatestVersionSchema,
|
|
184
|
+
SourceFileSchema: () => SourceFileSchema,
|
|
183
185
|
SubAgentSchema: () => SubAgentSchema,
|
|
184
186
|
TRACE_EVENT_PREFIX: () => TRACE_EVENT_PREFIX,
|
|
185
187
|
TargetSchema: () => TargetSchema,
|
|
@@ -392,13 +394,10 @@ function normalizeModelId(modelId) {
|
|
|
392
394
|
var nullToUndefined = (val) => val === null ? void 0 : val;
|
|
393
395
|
var ModelConfigSchema = import_zod4.z.object({
|
|
394
396
|
model: AnyModelSchema,
|
|
395
|
-
temperature: import_zod4.z.preprocess(
|
|
396
|
-
|
|
397
|
-
import_zod4.z.number().min(0).max(1).optional()
|
|
398
|
-
),
|
|
399
|
-
maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()),
|
|
397
|
+
temperature: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(0).max(1).optional()).optional(),
|
|
398
|
+
maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()).optional(),
|
|
400
399
|
/** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
|
|
401
|
-
maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional())
|
|
400
|
+
maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional()).optional()
|
|
402
401
|
});
|
|
403
402
|
|
|
404
403
|
// src/common/rule.ts
|
|
@@ -2190,79 +2189,101 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
2190
2189
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
2191
2190
|
|
|
2192
2191
|
// src/template/template.ts
|
|
2192
|
+
var import_zod35 = require("zod");
|
|
2193
|
+
var SourceFileSchema = import_zod35.z.object({
|
|
2194
|
+
path: import_zod35.z.string().min(1),
|
|
2195
|
+
content: import_zod35.z.string()
|
|
2196
|
+
});
|
|
2197
|
+
var ExtraFileSchema = import_zod35.z.object({
|
|
2198
|
+
path: import_zod35.z.string().min(1),
|
|
2199
|
+
content: import_zod35.z.string().optional(),
|
|
2200
|
+
gitSource: GitHubSourceSchema.optional()
|
|
2201
|
+
}).refine((ef) => ef.content !== void 0 || ef.gitSource !== void 0, {
|
|
2202
|
+
message: "ExtraFile must have either content or gitSource"
|
|
2203
|
+
});
|
|
2193
2204
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
2194
|
-
|
|
2195
|
-
|
|
2205
|
+
source: GitHubSourceSchema.optional(),
|
|
2206
|
+
sourceFiles: import_zod35.z.array(SourceFileSchema).optional(),
|
|
2207
|
+
extraFiles: import_zod35.z.array(ExtraFileSchema).optional()
|
|
2196
2208
|
});
|
|
2209
|
+
var singleSourceKind = (t) => !(t.source && t.sourceFiles?.length);
|
|
2210
|
+
var singleSourceKindError = {
|
|
2211
|
+
message: "Set source or sourceFiles, not both"
|
|
2212
|
+
};
|
|
2197
2213
|
var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
2198
2214
|
id: true,
|
|
2199
2215
|
createdAt: true,
|
|
2200
2216
|
updatedAt: true,
|
|
2201
2217
|
deleted: true
|
|
2202
|
-
});
|
|
2203
|
-
var UpdateTemplateInputSchema =
|
|
2218
|
+
}).refine(singleSourceKind, singleSourceKindError);
|
|
2219
|
+
var UpdateTemplateInputSchema = TemplateSchema.omit({
|
|
2220
|
+
id: true,
|
|
2221
|
+
createdAt: true,
|
|
2222
|
+
updatedAt: true,
|
|
2223
|
+
deleted: true
|
|
2224
|
+
}).partial().refine(singleSourceKind, singleSourceKindError);
|
|
2204
2225
|
|
|
2205
2226
|
// src/agent/agent-config.ts
|
|
2206
|
-
var
|
|
2207
|
-
var BaseAgentConfigSchema =
|
|
2227
|
+
var import_zod36 = require("zod");
|
|
2228
|
+
var BaseAgentConfigSchema = import_zod36.z.object({
|
|
2208
2229
|
/** Model ID (Claude or OpenAI). */
|
|
2209
2230
|
model: AnyModelSchema.optional(),
|
|
2210
2231
|
/** Sampling temperature (0–1). */
|
|
2211
|
-
temperature:
|
|
2232
|
+
temperature: import_zod36.z.number().min(0).max(1).optional(),
|
|
2212
2233
|
/** Max output tokens per turn. */
|
|
2213
|
-
maxTokens:
|
|
2234
|
+
maxTokens: import_zod36.z.number().int().min(1).optional(),
|
|
2214
2235
|
/** Number of agentic turns. 0 = unlimited. */
|
|
2215
|
-
maxTurns:
|
|
2236
|
+
maxTurns: import_zod36.z.number().int().min(0).optional(),
|
|
2216
2237
|
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
2217
|
-
maxDurationMs:
|
|
2238
|
+
maxDurationMs: import_zod36.z.number().int().min(0).optional()
|
|
2218
2239
|
});
|
|
2219
|
-
var EffortLevelSchema =
|
|
2240
|
+
var EffortLevelSchema = import_zod36.z.enum(["low", "medium", "high", "max"]);
|
|
2220
2241
|
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2221
2242
|
/** Extended thinking token budget. */
|
|
2222
|
-
maxThinkingTokens:
|
|
2243
|
+
maxThinkingTokens: import_zod36.z.number().int().min(0).optional(),
|
|
2223
2244
|
/** Override the default allowedTools list passed to the SDK. */
|
|
2224
|
-
allowedTools:
|
|
2245
|
+
allowedTools: import_zod36.z.array(import_zod36.z.string()).optional(),
|
|
2225
2246
|
/** Tools to remove from the model's context entirely. */
|
|
2226
|
-
disallowedTools:
|
|
2247
|
+
disallowedTools: import_zod36.z.array(import_zod36.z.string()).optional(),
|
|
2227
2248
|
/** Controls thinking depth: low, medium, high, max. */
|
|
2228
2249
|
effort: EffortLevelSchema.optional(),
|
|
2229
2250
|
/** Maximum USD spend per run. Stops execution when reached. */
|
|
2230
|
-
maxBudgetUsd:
|
|
2251
|
+
maxBudgetUsd: import_zod36.z.number().min(0).optional()
|
|
2231
2252
|
});
|
|
2232
|
-
var PermissionValueSchema =
|
|
2233
|
-
var OpenCodePermissionSchema =
|
|
2234
|
-
|
|
2235
|
-
|
|
2253
|
+
var PermissionValueSchema = import_zod36.z.enum(["allow", "deny"]);
|
|
2254
|
+
var OpenCodePermissionSchema = import_zod36.z.record(
|
|
2255
|
+
import_zod36.z.string(),
|
|
2256
|
+
import_zod36.z.union([PermissionValueSchema, import_zod36.z.record(import_zod36.z.string(), PermissionValueSchema)])
|
|
2236
2257
|
);
|
|
2237
|
-
var ThinkingVariantSchema =
|
|
2258
|
+
var ThinkingVariantSchema = import_zod36.z.enum(["high", "low", "none"]);
|
|
2238
2259
|
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2239
2260
|
/** Permission overrides (defaults: allow-all). */
|
|
2240
2261
|
permission: OpenCodePermissionSchema.optional(),
|
|
2241
2262
|
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
2242
2263
|
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
2243
2264
|
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2244
|
-
topP:
|
|
2265
|
+
topP: import_zod36.z.number().min(0).max(1).optional()
|
|
2245
2266
|
}).omit({ maxTokens: true });
|
|
2246
|
-
var ReasoningEffortSchema =
|
|
2267
|
+
var ReasoningEffortSchema = import_zod36.z.enum(["low", "medium", "high"]);
|
|
2247
2268
|
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
2248
2269
|
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
2249
|
-
thinkingBudgetTokens:
|
|
2270
|
+
thinkingBudgetTokens: import_zod36.z.number().int().min(0).optional(),
|
|
2250
2271
|
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2251
|
-
topP:
|
|
2272
|
+
topP: import_zod36.z.number().min(0).max(1).optional(),
|
|
2252
2273
|
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
2253
|
-
seed:
|
|
2274
|
+
seed: import_zod36.z.number().int().optional(),
|
|
2254
2275
|
/** Stop sequences — model stops when generating any of these strings. */
|
|
2255
|
-
stopSequences:
|
|
2276
|
+
stopSequences: import_zod36.z.array(import_zod36.z.string()).optional(),
|
|
2256
2277
|
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
2257
2278
|
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
2258
2279
|
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
2259
|
-
frequencyPenalty:
|
|
2280
|
+
frequencyPenalty: import_zod36.z.number().min(-2).max(2).optional(),
|
|
2260
2281
|
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
2261
|
-
presencePenalty:
|
|
2282
|
+
presencePenalty: import_zod36.z.number().min(-2).max(2).optional()
|
|
2262
2283
|
});
|
|
2263
2284
|
|
|
2264
2285
|
// src/schedule/eval-schedule.ts
|
|
2265
|
-
var
|
|
2286
|
+
var import_zod37 = require("zod");
|
|
2266
2287
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
2267
2288
|
FrequencyType2["DAILY"] = "daily";
|
|
2268
2289
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -2272,31 +2293,31 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
2272
2293
|
})(FrequencyType || {});
|
|
2273
2294
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
2274
2295
|
/** Whether the schedule is active */
|
|
2275
|
-
enabled:
|
|
2296
|
+
enabled: import_zod37.z.boolean(),
|
|
2276
2297
|
/** Test suite to run */
|
|
2277
|
-
suiteId:
|
|
2298
|
+
suiteId: import_zod37.z.string(),
|
|
2278
2299
|
/** Preset that provides agent + entities for this schedule */
|
|
2279
|
-
presetId:
|
|
2300
|
+
presetId: import_zod37.z.string(),
|
|
2280
2301
|
/** How often to run */
|
|
2281
|
-
frequencyType:
|
|
2302
|
+
frequencyType: import_zod37.z.nativeEnum(FrequencyType),
|
|
2282
2303
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
2283
|
-
timeOfDay:
|
|
2304
|
+
timeOfDay: import_zod37.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
2284
2305
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
2285
|
-
dayOfWeek:
|
|
2306
|
+
dayOfWeek: import_zod37.z.number().min(0).max(6).optional(),
|
|
2286
2307
|
/** Day of month (1-31) for monthly schedules */
|
|
2287
|
-
dayOfMonth:
|
|
2308
|
+
dayOfMonth: import_zod37.z.number().min(1).max(31).optional(),
|
|
2288
2309
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
2289
|
-
timezone:
|
|
2310
|
+
timezone: import_zod37.z.string(),
|
|
2290
2311
|
/** ID of the last eval run created by this schedule */
|
|
2291
|
-
lastRunId:
|
|
2312
|
+
lastRunId: import_zod37.z.string().optional(),
|
|
2292
2313
|
/** Denormalized status of the last run */
|
|
2293
|
-
lastRunStatus:
|
|
2314
|
+
lastRunStatus: import_zod37.z.string().optional(),
|
|
2294
2315
|
/** ISO timestamp of the last run */
|
|
2295
|
-
lastRunAt:
|
|
2316
|
+
lastRunAt: import_zod37.z.string().optional(),
|
|
2296
2317
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
2297
|
-
nextRunAt:
|
|
2318
|
+
nextRunAt: import_zod37.z.string().optional(),
|
|
2298
2319
|
/** Per-scenario variable values forwarded to runs triggered by this schedule (scenarioId → varName → value) */
|
|
2299
|
-
variables:
|
|
2320
|
+
variables: import_zod37.z.record(import_zod37.z.string(), import_zod37.z.record(import_zod37.z.string(), import_zod37.z.string())).optional()
|
|
2300
2321
|
});
|
|
2301
2322
|
function isValidTimezone(tz) {
|
|
2302
2323
|
try {
|
|
@@ -2309,14 +2330,14 @@ function isValidTimezone(tz) {
|
|
|
2309
2330
|
function validateScheduleFields(data, ctx, options) {
|
|
2310
2331
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
2311
2332
|
ctx.addIssue({
|
|
2312
|
-
code:
|
|
2333
|
+
code: import_zod37.z.ZodIssueCode.custom,
|
|
2313
2334
|
message: "dayOfWeek is required for weekly schedules",
|
|
2314
2335
|
path: ["dayOfWeek"]
|
|
2315
2336
|
});
|
|
2316
2337
|
}
|
|
2317
2338
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
2318
2339
|
ctx.addIssue({
|
|
2319
|
-
code:
|
|
2340
|
+
code: import_zod37.z.ZodIssueCode.custom,
|
|
2320
2341
|
message: "dayOfMonth is required for monthly schedules",
|
|
2321
2342
|
path: ["dayOfMonth"]
|
|
2322
2343
|
});
|
|
@@ -2324,7 +2345,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
2324
2345
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
2325
2346
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
2326
2347
|
ctx.addIssue({
|
|
2327
|
-
code:
|
|
2348
|
+
code: import_zod37.z.ZodIssueCode.custom,
|
|
2328
2349
|
message: "Invalid IANA timezone",
|
|
2329
2350
|
path: ["timezone"]
|
|
2330
2351
|
});
|
|
@@ -2446,6 +2467,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2446
2467
|
EvaluationResultSchema,
|
|
2447
2468
|
ExecutionTraceSchema,
|
|
2448
2469
|
ExpectedFileSchema,
|
|
2470
|
+
ExtraFileSchema,
|
|
2449
2471
|
FileContentCheckSchema,
|
|
2450
2472
|
FileContentTestSchema,
|
|
2451
2473
|
FileModificationSchema,
|
|
@@ -2509,6 +2531,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2509
2531
|
SkillWasCalledAssertionSchema,
|
|
2510
2532
|
SkillWasCalledConfigSchema,
|
|
2511
2533
|
SkillWithLatestVersionSchema,
|
|
2534
|
+
SourceFileSchema,
|
|
2512
2535
|
SubAgentSchema,
|
|
2513
2536
|
TRACE_EVENT_PREFIX,
|
|
2514
2537
|
TargetSchema,
|