@wix/evalforge-types 0.78.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -46,6 +46,7 @@ __export(index_exports, {
46
46
  AssertionSchema: () => AssertionSchema,
47
47
  AssertionTypeSchema: () => AssertionTypeSchema,
48
48
  BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
49
+ BaseAgentConfigSchema: () => BaseAgentConfigSchema,
49
50
  BaseEntitySchema: () => BaseEntitySchema,
50
51
  BaseTestSchema: () => BaseTestSchema,
51
52
  BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
@@ -68,6 +69,7 @@ __export(index_exports, {
68
69
  CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
69
70
  CapabilityVersionSchema: () => CapabilityVersionSchema,
70
71
  CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
72
+ ClaudeCodeConfigSchema: () => ClaudeCodeConfigSchema,
71
73
  ClaudeModel: () => ClaudeModel,
72
74
  ClaudeModelSchema: () => ClaudeModelSchema,
73
75
  CommandExecutionSchema: () => CommandExecutionSchema,
@@ -100,6 +102,7 @@ __export(index_exports, {
100
102
  DiffContentSchema: () => DiffContentSchema,
101
103
  DiffLineSchema: () => DiffLineSchema,
102
104
  DiffLineTypeSchema: () => DiffLineTypeSchema,
105
+ EffortLevelSchema: () => EffortLevelSchema,
103
106
  EnvironmentSchema: () => EnvironmentSchema,
104
107
  EvalMetricsSchema: () => EvalMetricsSchema,
105
108
  EvalRunFolderMembershipSchema: () => EvalRunFolderMembershipSchema,
@@ -147,11 +150,15 @@ __export(index_exports, {
147
150
  OPENAI_RESPONSES_MODEL_IDS: () => OPENAI_RESPONSES_MODEL_IDS,
148
151
  OpenAIModel: () => OpenAIModel,
149
152
  OpenAIModelSchema: () => OpenAIModelSchema,
153
+ OpenCodeConfigSchema: () => OpenCodeConfigSchema,
154
+ OpenCodePermissionSchema: () => OpenCodePermissionSchema,
155
+ PermissionValueSchema: () => PermissionValueSchema,
150
156
  PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
151
157
  PresetSchema: () => PresetSchema,
152
158
  ProjectSchema: () => ProjectSchema,
153
159
  PromptResultSchema: () => PromptResultSchema,
154
160
  RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
161
+ ReasoningEffortSchema: () => ReasoningEffortSchema,
155
162
  RuleSchema: () => RuleSchema,
156
163
  RuleTypeSchema: () => RuleTypeSchema,
157
164
  RunAnalysisFindingSchema: () => RunAnalysisFindingSchema,
@@ -162,6 +169,7 @@ __export(index_exports, {
162
169
  SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
163
170
  ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
164
171
  ScenarioConversationSchema: () => ScenarioConversationSchema,
172
+ SimpleAgentConfigSchema: () => SimpleAgentConfigSchema,
165
173
  SiteConfigTestSchema: () => SiteConfigTestSchema,
166
174
  SkillFileSchema: () => SkillFileSchema,
167
175
  SkillMetadataSchema: () => SkillMetadataSchema,
@@ -187,6 +195,7 @@ __export(index_exports, {
187
195
  TestTypeSchema: () => TestTypeSchema,
188
196
  TextBlockSchema: () => TextBlockSchema,
189
197
  ThinkingBlockSchema: () => ThinkingBlockSchema,
198
+ ThinkingVariantSchema: () => ThinkingVariantSchema,
190
199
  TimeAssertionSchema: () => TimeAssertionSchema,
191
200
  TimeConfigSchema: () => TimeConfigSchema,
192
201
  TokenUsageSchema: () => TokenUsageSchema,
@@ -387,7 +396,8 @@ var ModelConfigSchema = import_zod4.z.object({
387
396
  import_zod4.z.number().min(0).max(1).optional()
388
397
  ),
389
398
  maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()),
390
- maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(1).optional())
399
+ /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
400
+ maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional())
391
401
  });
392
402
 
393
403
  // src/common/rule.ts
@@ -459,11 +469,23 @@ var AgentSchema = TargetSchema.extend({
459
469
  agentType: AgentTypeSchema.default(AgentType.CLI),
460
470
  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
461
471
  runCommand: AgentRunCommandSchema.optional(),
462
- /** Optional model configuration override */
472
+ /**
473
+ * @deprecated Use `config` bag instead. Retained for backward compatibility
474
+ * with existing DB rows.
475
+ */
463
476
  modelConfig: ModelConfigSchema.optional(),
464
477
  systemPrompt: import_zod6.z.string().nullish().describe(
465
478
  "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
466
- )
479
+ ),
480
+ /**
481
+ * Unified agent configuration bag. Absorbs model params (model,
482
+ * temperature, maxTokens, maxTurns) plus agent-specific settings
483
+ * (permissions, thinking tokens, allowed tools, etc.).
484
+ *
485
+ * Adapters read from `config` first, falling back to `modelConfig`
486
+ * for backward compatibility with existing DB rows.
487
+ */
488
+ config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional()
467
489
  });
468
490
  var CreateAgentInputSchema = AgentSchema.omit({
469
491
  id: true,
@@ -473,7 +495,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
473
495
  });
474
496
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
475
497
  modelConfig: ModelConfigSchema.optional().nullable(),
476
- systemPrompt: import_zod6.z.string().optional().nullable()
498
+ systemPrompt: import_zod6.z.string().optional().nullable(),
499
+ config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional().nullable()
477
500
  });
478
501
 
479
502
  // src/target/skill.ts
@@ -2137,8 +2160,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
2137
2160
  });
2138
2161
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
2139
2162
 
2140
- // src/schedule/eval-schedule.ts
2163
+ // src/agent/agent-config.ts
2141
2164
  var import_zod35 = require("zod");
2165
+ var BaseAgentConfigSchema = import_zod35.z.object({
2166
+ /** Model ID (Claude or OpenAI). */
2167
+ model: AnyModelSchema.optional(),
2168
+ /** Sampling temperature (0–1). */
2169
+ temperature: import_zod35.z.number().min(0).max(1).optional(),
2170
+ /** Max output tokens per turn. */
2171
+ maxTokens: import_zod35.z.number().int().min(1).optional(),
2172
+ /** Number of agentic turns. 0 = unlimited. */
2173
+ maxTurns: import_zod35.z.number().int().min(0).optional(),
2174
+ /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
2175
+ maxDurationMs: import_zod35.z.number().int().min(0).optional()
2176
+ });
2177
+ var EffortLevelSchema = import_zod35.z.enum(["low", "medium", "high", "max"]);
2178
+ var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
2179
+ /** Extended thinking token budget. */
2180
+ maxThinkingTokens: import_zod35.z.number().int().min(0).optional(),
2181
+ /** Override the default allowedTools list passed to the SDK. */
2182
+ allowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
2183
+ /** Tools to remove from the model's context entirely. */
2184
+ disallowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
2185
+ /** Controls thinking depth: low, medium, high, max. */
2186
+ effort: EffortLevelSchema.optional(),
2187
+ /** Maximum USD spend per run. Stops execution when reached. */
2188
+ maxBudgetUsd: import_zod35.z.number().min(0).optional()
2189
+ });
2190
+ var PermissionValueSchema = import_zod35.z.enum(["allow", "deny"]);
2191
+ var OpenCodePermissionSchema = import_zod35.z.record(
2192
+ import_zod35.z.string(),
2193
+ import_zod35.z.union([PermissionValueSchema, import_zod35.z.record(import_zod35.z.string(), PermissionValueSchema)])
2194
+ );
2195
+ var ThinkingVariantSchema = import_zod35.z.enum(["high", "low", "none"]);
2196
+ var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
2197
+ /** Permission overrides (defaults: allow-all). */
2198
+ permission: OpenCodePermissionSchema.optional(),
2199
+ /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
2200
+ thinkingVariant: ThinkingVariantSchema.optional(),
2201
+ /** Nucleus sampling (0–1). Alternative to temperature. */
2202
+ topP: import_zod35.z.number().min(0).max(1).optional()
2203
+ }).omit({ maxTokens: true });
2204
+ var ReasoningEffortSchema = import_zod35.z.enum(["low", "medium", "high"]);
2205
+ var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
2206
+ /** Anthropic thinking budget in tokens. Default: 10 000. */
2207
+ thinkingBudgetTokens: import_zod35.z.number().int().min(0).optional(),
2208
+ /** Nucleus sampling (0–1). Alternative to temperature. */
2209
+ topP: import_zod35.z.number().min(0).max(1).optional(),
2210
+ /** Integer seed for deterministic/reproducible results (if model supports it). */
2211
+ seed: import_zod35.z.number().int().optional(),
2212
+ /** Stop sequences — model stops when generating any of these strings. */
2213
+ stopSequences: import_zod35.z.array(import_zod35.z.string()).optional(),
2214
+ /** OpenAI reasoning effort level. Default: 'high'. */
2215
+ reasoningEffort: ReasoningEffortSchema.optional(),
2216
+ /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
2217
+ frequencyPenalty: import_zod35.z.number().min(-2).max(2).optional(),
2218
+ /** Presence penalty (−2 to 2). Encourages topic diversity. */
2219
+ presencePenalty: import_zod35.z.number().min(-2).max(2).optional()
2220
+ });
2221
+
2222
+ // src/schedule/eval-schedule.ts
2223
+ var import_zod36 = require("zod");
2142
2224
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
2143
2225
  FrequencyType2["DAILY"] = "daily";
2144
2226
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -2148,29 +2230,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
2148
2230
  })(FrequencyType || {});
2149
2231
  var EvalScheduleSchema = TenantEntitySchema.extend({
2150
2232
  /** Whether the schedule is active */
2151
- enabled: import_zod35.z.boolean(),
2233
+ enabled: import_zod36.z.boolean(),
2152
2234
  /** Test suite to run */
2153
- suiteId: import_zod35.z.string(),
2235
+ suiteId: import_zod36.z.string(),
2154
2236
  /** Preset that provides agent + entities for this schedule */
2155
- presetId: import_zod35.z.string(),
2237
+ presetId: import_zod36.z.string(),
2156
2238
  /** How often to run */
2157
- frequencyType: import_zod35.z.nativeEnum(FrequencyType),
2239
+ frequencyType: import_zod36.z.nativeEnum(FrequencyType),
2158
2240
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
2159
- timeOfDay: import_zod35.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
2241
+ timeOfDay: import_zod36.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
2160
2242
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
2161
- dayOfWeek: import_zod35.z.number().min(0).max(6).optional(),
2243
+ dayOfWeek: import_zod36.z.number().min(0).max(6).optional(),
2162
2244
  /** Day of month (1-31) for monthly schedules */
2163
- dayOfMonth: import_zod35.z.number().min(1).max(31).optional(),
2245
+ dayOfMonth: import_zod36.z.number().min(1).max(31).optional(),
2164
2246
  /** IANA timezone (e.g., 'America/New_York') */
2165
- timezone: import_zod35.z.string(),
2247
+ timezone: import_zod36.z.string(),
2166
2248
  /** ID of the last eval run created by this schedule */
2167
- lastRunId: import_zod35.z.string().optional(),
2249
+ lastRunId: import_zod36.z.string().optional(),
2168
2250
  /** Denormalized status of the last run */
2169
- lastRunStatus: import_zod35.z.string().optional(),
2251
+ lastRunStatus: import_zod36.z.string().optional(),
2170
2252
  /** ISO timestamp of the last run */
2171
- lastRunAt: import_zod35.z.string().optional(),
2253
+ lastRunAt: import_zod36.z.string().optional(),
2172
2254
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
2173
- nextRunAt: import_zod35.z.string().optional()
2255
+ nextRunAt: import_zod36.z.string().optional()
2174
2256
  });
2175
2257
  function isValidTimezone(tz) {
2176
2258
  try {
@@ -2183,14 +2265,14 @@ function isValidTimezone(tz) {
2183
2265
  function validateScheduleFields(data, ctx, options) {
2184
2266
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
2185
2267
  ctx.addIssue({
2186
- code: import_zod35.z.ZodIssueCode.custom,
2268
+ code: import_zod36.z.ZodIssueCode.custom,
2187
2269
  message: "dayOfWeek is required for weekly schedules",
2188
2270
  path: ["dayOfWeek"]
2189
2271
  });
2190
2272
  }
2191
2273
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
2192
2274
  ctx.addIssue({
2193
- code: import_zod35.z.ZodIssueCode.custom,
2275
+ code: import_zod36.z.ZodIssueCode.custom,
2194
2276
  message: "dayOfMonth is required for monthly schedules",
2195
2277
  path: ["dayOfMonth"]
2196
2278
  });
@@ -2198,7 +2280,7 @@ function validateScheduleFields(data, ctx, options) {
2198
2280
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
2199
2281
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
2200
2282
  ctx.addIssue({
2201
- code: import_zod35.z.ZodIssueCode.custom,
2283
+ code: import_zod36.z.ZodIssueCode.custom,
2202
2284
  message: "Invalid IANA timezone",
2203
2285
  path: ["timezone"]
2204
2286
  });
@@ -2249,6 +2331,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2249
2331
  AssertionSchema,
2250
2332
  AssertionTypeSchema,
2251
2333
  BATCH_IMPORT_LIMITS,
2334
+ BaseAgentConfigSchema,
2252
2335
  BaseEntitySchema,
2253
2336
  BaseTestSchema,
2254
2337
  BatchAssertionLinkSchema,
@@ -2271,6 +2354,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2271
2354
  CapabilityVersionOriginSchema,
2272
2355
  CapabilityVersionSchema,
2273
2356
  CapabilityWithLatestVersionSchema,
2357
+ ClaudeCodeConfigSchema,
2274
2358
  ClaudeModel,
2275
2359
  ClaudeModelSchema,
2276
2360
  CommandExecutionSchema,
@@ -2303,6 +2387,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2303
2387
  DiffContentSchema,
2304
2388
  DiffLineSchema,
2305
2389
  DiffLineTypeSchema,
2390
+ EffortLevelSchema,
2306
2391
  EnvironmentSchema,
2307
2392
  EvalMetricsSchema,
2308
2393
  EvalRunFolderMembershipSchema,
@@ -2350,11 +2435,15 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2350
2435
  OPENAI_RESPONSES_MODEL_IDS,
2351
2436
  OpenAIModel,
2352
2437
  OpenAIModelSchema,
2438
+ OpenCodeConfigSchema,
2439
+ OpenCodePermissionSchema,
2440
+ PermissionValueSchema,
2353
2441
  PlaywrightNLTestSchema,
2354
2442
  PresetSchema,
2355
2443
  ProjectSchema,
2356
2444
  PromptResultSchema,
2357
2445
  RUN_COMMAND_LABELS,
2446
+ ReasoningEffortSchema,
2358
2447
  RuleSchema,
2359
2448
  RuleTypeSchema,
2360
2449
  RunAnalysisFindingSchema,
@@ -2365,6 +2454,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2365
2454
  SYSTEM_ASSERTION_IDS,
2366
2455
  ScenarioAssertionLinkSchema,
2367
2456
  ScenarioConversationSchema,
2457
+ SimpleAgentConfigSchema,
2368
2458
  SiteConfigTestSchema,
2369
2459
  SkillFileSchema,
2370
2460
  SkillMetadataSchema,
@@ -2390,6 +2480,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2390
2480
  TestTypeSchema,
2391
2481
  TextBlockSchema,
2392
2482
  ThinkingBlockSchema,
2483
+ ThinkingVariantSchema,
2393
2484
  TimeAssertionSchema,
2394
2485
  TimeConfigSchema,
2395
2486
  TokenUsageSchema,