@wix/evalforge-types 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -46,6 +46,7 @@ __export(index_exports, {
46
46
  AssertionSchema: () => AssertionSchema,
47
47
  AssertionTypeSchema: () => AssertionTypeSchema,
48
48
  BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
49
+ BaseAgentConfigSchema: () => BaseAgentConfigSchema,
49
50
  BaseEntitySchema: () => BaseEntitySchema,
50
51
  BaseTestSchema: () => BaseTestSchema,
51
52
  BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
@@ -68,6 +69,7 @@ __export(index_exports, {
68
69
  CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
69
70
  CapabilityVersionSchema: () => CapabilityVersionSchema,
70
71
  CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
72
+ ClaudeCodeConfigSchema: () => ClaudeCodeConfigSchema,
71
73
  ClaudeModel: () => ClaudeModel,
72
74
  ClaudeModelSchema: () => ClaudeModelSchema,
73
75
  CommandExecutionSchema: () => CommandExecutionSchema,
@@ -100,6 +102,7 @@ __export(index_exports, {
100
102
  DiffContentSchema: () => DiffContentSchema,
101
103
  DiffLineSchema: () => DiffLineSchema,
102
104
  DiffLineTypeSchema: () => DiffLineTypeSchema,
105
+ EffortLevelSchema: () => EffortLevelSchema,
103
106
  EnvironmentSchema: () => EnvironmentSchema,
104
107
  EvalMetricsSchema: () => EvalMetricsSchema,
105
108
  EvalRunFolderMembershipSchema: () => EvalRunFolderMembershipSchema,
@@ -147,11 +150,15 @@ __export(index_exports, {
147
150
  OPENAI_RESPONSES_MODEL_IDS: () => OPENAI_RESPONSES_MODEL_IDS,
148
151
  OpenAIModel: () => OpenAIModel,
149
152
  OpenAIModelSchema: () => OpenAIModelSchema,
153
+ OpenCodeConfigSchema: () => OpenCodeConfigSchema,
154
+ OpenCodePermissionSchema: () => OpenCodePermissionSchema,
155
+ PermissionValueSchema: () => PermissionValueSchema,
150
156
  PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
151
157
  PresetSchema: () => PresetSchema,
152
158
  ProjectSchema: () => ProjectSchema,
153
159
  PromptResultSchema: () => PromptResultSchema,
154
160
  RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
161
+ ReasoningEffortSchema: () => ReasoningEffortSchema,
155
162
  RuleSchema: () => RuleSchema,
156
163
  RuleTypeSchema: () => RuleTypeSchema,
157
164
  RunAnalysisFindingSchema: () => RunAnalysisFindingSchema,
@@ -162,6 +169,7 @@ __export(index_exports, {
162
169
  SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
163
170
  ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
164
171
  ScenarioConversationSchema: () => ScenarioConversationSchema,
172
+ SimpleAgentConfigSchema: () => SimpleAgentConfigSchema,
165
173
  SiteConfigTestSchema: () => SiteConfigTestSchema,
166
174
  SkillFileSchema: () => SkillFileSchema,
167
175
  SkillMetadataSchema: () => SkillMetadataSchema,
@@ -187,6 +195,7 @@ __export(index_exports, {
187
195
  TestTypeSchema: () => TestTypeSchema,
188
196
  TextBlockSchema: () => TextBlockSchema,
189
197
  ThinkingBlockSchema: () => ThinkingBlockSchema,
198
+ ThinkingVariantSchema: () => ThinkingVariantSchema,
190
199
  TimeAssertionSchema: () => TimeAssertionSchema,
191
200
  TimeConfigSchema: () => TimeConfigSchema,
192
201
  TokenUsageSchema: () => TokenUsageSchema,
@@ -387,7 +396,8 @@ var ModelConfigSchema = import_zod4.z.object({
387
396
  import_zod4.z.number().min(0).max(1).optional()
388
397
  ),
389
398
  maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()),
390
- maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(1).optional())
399
+ /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
400
+ maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional())
391
401
  });
392
402
 
393
403
  // src/common/rule.ts
@@ -459,11 +469,23 @@ var AgentSchema = TargetSchema.extend({
459
469
  agentType: AgentTypeSchema.default(AgentType.CLI),
460
470
  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
461
471
  runCommand: AgentRunCommandSchema.optional(),
462
- /** Optional model configuration override */
472
+ /**
473
+ * @deprecated Use `config` bag instead. Retained for backward compatibility
474
+ * with existing DB rows.
475
+ */
463
476
  modelConfig: ModelConfigSchema.optional(),
464
477
  systemPrompt: import_zod6.z.string().nullish().describe(
465
478
  "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
466
- )
479
+ ),
480
+ /**
481
+ * Unified agent configuration bag. Absorbs model params (model,
482
+ * temperature, maxTokens, maxTurns) plus agent-specific settings
483
+ * (permissions, thinking tokens, allowed tools, etc.).
484
+ *
485
+ * Adapters read from `config` first, falling back to `modelConfig`
486
+ * for backward compatibility with existing DB rows.
487
+ */
488
+ config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional()
467
489
  });
468
490
  var CreateAgentInputSchema = AgentSchema.omit({
469
491
  id: true,
@@ -473,7 +495,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
473
495
  });
474
496
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
475
497
  modelConfig: ModelConfigSchema.optional().nullable(),
476
- systemPrompt: import_zod6.z.string().optional().nullable()
498
+ systemPrompt: import_zod6.z.string().optional().nullable(),
499
+ config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional().nullable()
477
500
  });
478
501
 
479
502
  // src/target/skill.ts
@@ -1871,14 +1894,18 @@ var EvalRunSchema = TenantEntitySchema.extend({
1871
1894
  agentType: AgentTypeSchema.optional(),
1872
1895
  runCommand: AgentRunCommandSchema.optional(),
1873
1896
  systemPrompt: import_zod30.z.string().nullable().optional(),
1874
- modelConfig: ModelConfigSchema.optional()
1897
+ /** @deprecated retained for backward compat with stored snapshots */
1898
+ modelConfig: ModelConfigSchema.optional(),
1899
+ config: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional()
1875
1900
  }).optional(),
1876
1901
  /** UUID linking all runs in a comparison group */
1877
1902
  comparisonGroupId: import_zod30.z.string().optional(),
1878
1903
  /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
1879
1904
  comparisonLabel: import_zod30.z.string().optional(),
1880
1905
  /** LLM-generated analysis of the completed run */
1881
- runAnalysis: RunAnalysisSchema.optional()
1906
+ runAnalysis: RunAnalysisSchema.optional(),
1907
+ /** IDs of folders this run belongs to (read-only, managed via AddRunToFolder / RemoveRunFromFolder) */
1908
+ folderIds: import_zod30.z.array(import_zod30.z.string()).optional()
1882
1909
  });
1883
1910
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1884
1911
  id: true,
@@ -2137,8 +2164,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
2137
2164
  });
2138
2165
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
2139
2166
 
2140
- // src/schedule/eval-schedule.ts
2167
+ // src/agent/agent-config.ts
2141
2168
  var import_zod35 = require("zod");
2169
+ var BaseAgentConfigSchema = import_zod35.z.object({
2170
+ /** Model ID (Claude or OpenAI). */
2171
+ model: AnyModelSchema.optional(),
2172
+ /** Sampling temperature (0–1). */
2173
+ temperature: import_zod35.z.number().min(0).max(1).optional(),
2174
+ /** Max output tokens per turn. */
2175
+ maxTokens: import_zod35.z.number().int().min(1).optional(),
2176
+ /** Number of agentic turns. 0 = unlimited. */
2177
+ maxTurns: import_zod35.z.number().int().min(0).optional(),
2178
+ /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
2179
+ maxDurationMs: import_zod35.z.number().int().min(0).optional()
2180
+ });
2181
+ var EffortLevelSchema = import_zod35.z.enum(["low", "medium", "high", "max"]);
2182
+ var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
2183
+ /** Extended thinking token budget. */
2184
+ maxThinkingTokens: import_zod35.z.number().int().min(0).optional(),
2185
+ /** Override the default allowedTools list passed to the SDK. */
2186
+ allowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
2187
+ /** Tools to remove from the model's context entirely. */
2188
+ disallowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
2189
+ /** Controls thinking depth: low, medium, high, max. */
2190
+ effort: EffortLevelSchema.optional(),
2191
+ /** Maximum USD spend per run. Stops execution when reached. */
2192
+ maxBudgetUsd: import_zod35.z.number().min(0).optional()
2193
+ });
2194
+ var PermissionValueSchema = import_zod35.z.enum(["allow", "deny"]);
2195
+ var OpenCodePermissionSchema = import_zod35.z.record(
2196
+ import_zod35.z.string(),
2197
+ import_zod35.z.union([PermissionValueSchema, import_zod35.z.record(import_zod35.z.string(), PermissionValueSchema)])
2198
+ );
2199
+ var ThinkingVariantSchema = import_zod35.z.enum(["high", "low", "none"]);
2200
+ var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
2201
+ /** Permission overrides (defaults: allow-all). */
2202
+ permission: OpenCodePermissionSchema.optional(),
2203
+ /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
2204
+ thinkingVariant: ThinkingVariantSchema.optional(),
2205
+ /** Nucleus sampling (0–1). Alternative to temperature. */
2206
+ topP: import_zod35.z.number().min(0).max(1).optional()
2207
+ }).omit({ maxTokens: true });
2208
+ var ReasoningEffortSchema = import_zod35.z.enum(["low", "medium", "high"]);
2209
+ var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
2210
+ /** Anthropic thinking budget in tokens. Default: 10 000. */
2211
+ thinkingBudgetTokens: import_zod35.z.number().int().min(0).optional(),
2212
+ /** Nucleus sampling (0–1). Alternative to temperature. */
2213
+ topP: import_zod35.z.number().min(0).max(1).optional(),
2214
+ /** Integer seed for deterministic/reproducible results (if model supports it). */
2215
+ seed: import_zod35.z.number().int().optional(),
2216
+ /** Stop sequences — model stops when generating any of these strings. */
2217
+ stopSequences: import_zod35.z.array(import_zod35.z.string()).optional(),
2218
+ /** OpenAI reasoning effort level. Default: 'high'. */
2219
+ reasoningEffort: ReasoningEffortSchema.optional(),
2220
+ /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
2221
+ frequencyPenalty: import_zod35.z.number().min(-2).max(2).optional(),
2222
+ /** Presence penalty (−2 to 2). Encourages topic diversity. */
2223
+ presencePenalty: import_zod35.z.number().min(-2).max(2).optional()
2224
+ });
2225
+
2226
+ // src/schedule/eval-schedule.ts
2227
+ var import_zod36 = require("zod");
2142
2228
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
2143
2229
  FrequencyType2["DAILY"] = "daily";
2144
2230
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -2148,29 +2234,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
2148
2234
  })(FrequencyType || {});
2149
2235
  var EvalScheduleSchema = TenantEntitySchema.extend({
2150
2236
  /** Whether the schedule is active */
2151
- enabled: import_zod35.z.boolean(),
2237
+ enabled: import_zod36.z.boolean(),
2152
2238
  /** Test suite to run */
2153
- suiteId: import_zod35.z.string(),
2239
+ suiteId: import_zod36.z.string(),
2154
2240
  /** Preset that provides agent + entities for this schedule */
2155
- presetId: import_zod35.z.string(),
2241
+ presetId: import_zod36.z.string(),
2156
2242
  /** How often to run */
2157
- frequencyType: import_zod35.z.nativeEnum(FrequencyType),
2243
+ frequencyType: import_zod36.z.nativeEnum(FrequencyType),
2158
2244
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
2159
- timeOfDay: import_zod35.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
2245
+ timeOfDay: import_zod36.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
2160
2246
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
2161
- dayOfWeek: import_zod35.z.number().min(0).max(6).optional(),
2247
+ dayOfWeek: import_zod36.z.number().min(0).max(6).optional(),
2162
2248
  /** Day of month (1-31) for monthly schedules */
2163
- dayOfMonth: import_zod35.z.number().min(1).max(31).optional(),
2249
+ dayOfMonth: import_zod36.z.number().min(1).max(31).optional(),
2164
2250
  /** IANA timezone (e.g., 'America/New_York') */
2165
- timezone: import_zod35.z.string(),
2251
+ timezone: import_zod36.z.string(),
2166
2252
  /** ID of the last eval run created by this schedule */
2167
- lastRunId: import_zod35.z.string().optional(),
2253
+ lastRunId: import_zod36.z.string().optional(),
2168
2254
  /** Denormalized status of the last run */
2169
- lastRunStatus: import_zod35.z.string().optional(),
2255
+ lastRunStatus: import_zod36.z.string().optional(),
2170
2256
  /** ISO timestamp of the last run */
2171
- lastRunAt: import_zod35.z.string().optional(),
2257
+ lastRunAt: import_zod36.z.string().optional(),
2172
2258
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
2173
- nextRunAt: import_zod35.z.string().optional()
2259
+ nextRunAt: import_zod36.z.string().optional()
2174
2260
  });
2175
2261
  function isValidTimezone(tz) {
2176
2262
  try {
@@ -2183,14 +2269,14 @@ function isValidTimezone(tz) {
2183
2269
  function validateScheduleFields(data, ctx, options) {
2184
2270
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
2185
2271
  ctx.addIssue({
2186
- code: import_zod35.z.ZodIssueCode.custom,
2272
+ code: import_zod36.z.ZodIssueCode.custom,
2187
2273
  message: "dayOfWeek is required for weekly schedules",
2188
2274
  path: ["dayOfWeek"]
2189
2275
  });
2190
2276
  }
2191
2277
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
2192
2278
  ctx.addIssue({
2193
- code: import_zod35.z.ZodIssueCode.custom,
2279
+ code: import_zod36.z.ZodIssueCode.custom,
2194
2280
  message: "dayOfMonth is required for monthly schedules",
2195
2281
  path: ["dayOfMonth"]
2196
2282
  });
@@ -2198,7 +2284,7 @@ function validateScheduleFields(data, ctx, options) {
2198
2284
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
2199
2285
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
2200
2286
  ctx.addIssue({
2201
- code: import_zod35.z.ZodIssueCode.custom,
2287
+ code: import_zod36.z.ZodIssueCode.custom,
2202
2288
  message: "Invalid IANA timezone",
2203
2289
  path: ["timezone"]
2204
2290
  });
@@ -2249,6 +2335,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2249
2335
  AssertionSchema,
2250
2336
  AssertionTypeSchema,
2251
2337
  BATCH_IMPORT_LIMITS,
2338
+ BaseAgentConfigSchema,
2252
2339
  BaseEntitySchema,
2253
2340
  BaseTestSchema,
2254
2341
  BatchAssertionLinkSchema,
@@ -2271,6 +2358,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2271
2358
  CapabilityVersionOriginSchema,
2272
2359
  CapabilityVersionSchema,
2273
2360
  CapabilityWithLatestVersionSchema,
2361
+ ClaudeCodeConfigSchema,
2274
2362
  ClaudeModel,
2275
2363
  ClaudeModelSchema,
2276
2364
  CommandExecutionSchema,
@@ -2303,6 +2391,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2303
2391
  DiffContentSchema,
2304
2392
  DiffLineSchema,
2305
2393
  DiffLineTypeSchema,
2394
+ EffortLevelSchema,
2306
2395
  EnvironmentSchema,
2307
2396
  EvalMetricsSchema,
2308
2397
  EvalRunFolderMembershipSchema,
@@ -2350,11 +2439,15 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2350
2439
  OPENAI_RESPONSES_MODEL_IDS,
2351
2440
  OpenAIModel,
2352
2441
  OpenAIModelSchema,
2442
+ OpenCodeConfigSchema,
2443
+ OpenCodePermissionSchema,
2444
+ PermissionValueSchema,
2353
2445
  PlaywrightNLTestSchema,
2354
2446
  PresetSchema,
2355
2447
  ProjectSchema,
2356
2448
  PromptResultSchema,
2357
2449
  RUN_COMMAND_LABELS,
2450
+ ReasoningEffortSchema,
2358
2451
  RuleSchema,
2359
2452
  RuleTypeSchema,
2360
2453
  RunAnalysisFindingSchema,
@@ -2365,6 +2458,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2365
2458
  SYSTEM_ASSERTION_IDS,
2366
2459
  ScenarioAssertionLinkSchema,
2367
2460
  ScenarioConversationSchema,
2461
+ SimpleAgentConfigSchema,
2368
2462
  SiteConfigTestSchema,
2369
2463
  SkillFileSchema,
2370
2464
  SkillMetadataSchema,
@@ -2390,6 +2484,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2390
2484
  TestTypeSchema,
2391
2485
  TextBlockSchema,
2392
2486
  ThinkingBlockSchema,
2487
+ ThinkingVariantSchema,
2393
2488
  TimeAssertionSchema,
2394
2489
  TimeConfigSchema,
2395
2490
  TokenUsageSchema,