@wix/evalforge-types 0.77.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -24,6 +24,7 @@ __export(index_exports, {
24
24
  ALLOWED_BUILD_COMMANDS: () => ALLOWED_BUILD_COMMANDS,
25
25
  ALL_AVAILABLE_MODEL_IDS: () => ALL_AVAILABLE_MODEL_IDS,
26
26
  AVAILABLE_CLAUDE_MODEL_IDS: () => AVAILABLE_CLAUDE_MODEL_IDS,
27
+ AVAILABLE_GEMINI_MODEL_IDS: () => AVAILABLE_GEMINI_MODEL_IDS,
27
28
  AVAILABLE_OPENAI_MODEL_IDS: () => AVAILABLE_OPENAI_MODEL_IDS,
28
29
  AVAILABLE_RUN_COMMANDS: () => AVAILABLE_RUN_COMMANDS,
29
30
  AVAILABLE_TOOL_NAMES: () => AVAILABLE_TOOL_NAMES,
@@ -45,6 +46,7 @@ __export(index_exports, {
45
46
  AssertionSchema: () => AssertionSchema,
46
47
  AssertionTypeSchema: () => AssertionTypeSchema,
47
48
  BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
49
+ BaseAgentConfigSchema: () => BaseAgentConfigSchema,
48
50
  BaseEntitySchema: () => BaseEntitySchema,
49
51
  BaseTestSchema: () => BaseTestSchema,
50
52
  BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
@@ -67,6 +69,7 @@ __export(index_exports, {
67
69
  CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
68
70
  CapabilityVersionSchema: () => CapabilityVersionSchema,
69
71
  CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
72
+ ClaudeCodeConfigSchema: () => ClaudeCodeConfigSchema,
70
73
  ClaudeModel: () => ClaudeModel,
71
74
  ClaudeModelSchema: () => ClaudeModelSchema,
72
75
  CommandExecutionSchema: () => CommandExecutionSchema,
@@ -99,6 +102,7 @@ __export(index_exports, {
99
102
  DiffContentSchema: () => DiffContentSchema,
100
103
  DiffLineSchema: () => DiffLineSchema,
101
104
  DiffLineTypeSchema: () => DiffLineTypeSchema,
105
+ EffortLevelSchema: () => EffortLevelSchema,
102
106
  EnvironmentSchema: () => EnvironmentSchema,
103
107
  EvalMetricsSchema: () => EvalMetricsSchema,
104
108
  EvalRunFolderMembershipSchema: () => EvalRunFolderMembershipSchema,
@@ -118,6 +122,9 @@ __export(index_exports, {
118
122
  FileModificationSchema: () => FileModificationSchema,
119
123
  FilePresenceTestSchema: () => FilePresenceTestSchema,
120
124
  FrequencyType: () => FrequencyType,
125
+ GEMINI_THINKING_MODEL_IDS: () => GEMINI_THINKING_MODEL_IDS,
126
+ GeminiModel: () => GeminiModel,
127
+ GeminiModelSchema: () => GeminiModelSchema,
121
128
  GitHubSourceSchema: () => GitHubSourceSchema,
122
129
  InitialCapabilityVersionInputSchema: () => InitialCapabilityVersionInputSchema,
123
130
  InitialVersionInputSchema: () => InitialVersionInputSchema,
@@ -143,11 +150,15 @@ __export(index_exports, {
143
150
  OPENAI_RESPONSES_MODEL_IDS: () => OPENAI_RESPONSES_MODEL_IDS,
144
151
  OpenAIModel: () => OpenAIModel,
145
152
  OpenAIModelSchema: () => OpenAIModelSchema,
153
+ OpenCodeConfigSchema: () => OpenCodeConfigSchema,
154
+ OpenCodePermissionSchema: () => OpenCodePermissionSchema,
155
+ PermissionValueSchema: () => PermissionValueSchema,
146
156
  PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
147
157
  PresetSchema: () => PresetSchema,
148
158
  ProjectSchema: () => ProjectSchema,
149
159
  PromptResultSchema: () => PromptResultSchema,
150
160
  RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
161
+ ReasoningEffortSchema: () => ReasoningEffortSchema,
151
162
  RuleSchema: () => RuleSchema,
152
163
  RuleTypeSchema: () => RuleTypeSchema,
153
164
  RunAnalysisFindingSchema: () => RunAnalysisFindingSchema,
@@ -158,6 +169,7 @@ __export(index_exports, {
158
169
  SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
159
170
  ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
160
171
  ScenarioConversationSchema: () => ScenarioConversationSchema,
172
+ SimpleAgentConfigSchema: () => SimpleAgentConfigSchema,
161
173
  SiteConfigTestSchema: () => SiteConfigTestSchema,
162
174
  SkillFileSchema: () => SkillFileSchema,
163
175
  SkillMetadataSchema: () => SkillMetadataSchema,
@@ -183,6 +195,7 @@ __export(index_exports, {
183
195
  TestTypeSchema: () => TestTypeSchema,
184
196
  TextBlockSchema: () => TextBlockSchema,
185
197
  ThinkingBlockSchema: () => ThinkingBlockSchema,
198
+ ThinkingVariantSchema: () => ThinkingVariantSchema,
186
199
  TimeAssertionSchema: () => TimeAssertionSchema,
187
200
  TimeConfigSchema: () => TimeConfigSchema,
188
201
  TokenUsageSchema: () => TokenUsageSchema,
@@ -320,9 +333,31 @@ var OPENAI_RESPONSES_MODEL_IDS = /* @__PURE__ */ new Set([
320
333
  var OpenAIModelSchema = import_zod4.z.enum(
321
334
  AVAILABLE_OPENAI_MODEL_IDS
322
335
  );
336
+ var GeminiModel = {
337
+ GEMINI_2_0_FLASH: "gemini-2.0-flash",
338
+ GEMINI_2_0_FLASH_LITE: "gemini-2.0-flash-lite",
339
+ GEMINI_2_5_PRO: "gemini-2.5-pro",
340
+ GEMINI_2_5_FLASH: "gemini-2.5-flash",
341
+ GEMINI_2_5_FLASH_LITE: "gemini-2.5-flash-lite",
342
+ GEMINI_3_0_PRO: "gemini-3-pro-preview",
343
+ GEMINI_3_0_FLASH: "gemini-3-flash-preview",
344
+ GEMINI_3_1_PRO: "gemini-3.1-pro-preview"
345
+ };
346
+ var AVAILABLE_GEMINI_MODEL_IDS = Object.values(GeminiModel);
347
+ var GEMINI_THINKING_MODEL_IDS = /* @__PURE__ */ new Set([
348
+ GeminiModel.GEMINI_2_5_PRO,
349
+ GeminiModel.GEMINI_2_5_FLASH,
350
+ GeminiModel.GEMINI_3_0_PRO,
351
+ GeminiModel.GEMINI_3_0_FLASH,
352
+ GeminiModel.GEMINI_3_1_PRO
353
+ ]);
354
+ var GeminiModelSchema = import_zod4.z.enum(
355
+ AVAILABLE_GEMINI_MODEL_IDS
356
+ );
323
357
  var ALL_AVAILABLE_MODEL_IDS = [
324
358
  ...AVAILABLE_CLAUDE_MODEL_IDS,
325
- ...AVAILABLE_OPENAI_MODEL_IDS
359
+ ...AVAILABLE_OPENAI_MODEL_IDS,
360
+ ...AVAILABLE_GEMINI_MODEL_IDS
326
361
  ];
327
362
  var AnyModelSchema = import_zod4.z.enum(
328
363
  ALL_AVAILABLE_MODEL_IDS
@@ -361,7 +396,8 @@ var ModelConfigSchema = import_zod4.z.object({
361
396
  import_zod4.z.number().min(0).max(1).optional()
362
397
  ),
363
398
  maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()),
364
- maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(1).optional())
399
+ /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
400
+ maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional())
365
401
  });
366
402
 
367
403
  // src/common/rule.ts
@@ -433,11 +469,23 @@ var AgentSchema = TargetSchema.extend({
433
469
  agentType: AgentTypeSchema.default(AgentType.CLI),
434
470
  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
435
471
  runCommand: AgentRunCommandSchema.optional(),
436
- /** Optional model configuration override */
472
+ /**
473
+ * @deprecated Use `config` bag instead. Retained for backward compatibility
474
+ * with existing DB rows.
475
+ */
437
476
  modelConfig: ModelConfigSchema.optional(),
438
477
  systemPrompt: import_zod6.z.string().nullish().describe(
439
478
  "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
440
- )
479
+ ),
480
+ /**
481
+ * Unified agent configuration bag. Absorbs model params (model,
482
+ * temperature, maxTokens, maxTurns) plus agent-specific settings
483
+ * (permissions, thinking tokens, allowed tools, etc.).
484
+ *
485
+ * Adapters read from `config` first, falling back to `modelConfig`
486
+ * for backward compatibility with existing DB rows.
487
+ */
488
+ config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional()
441
489
  });
442
490
  var CreateAgentInputSchema = AgentSchema.omit({
443
491
  id: true,
@@ -447,7 +495,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
447
495
  });
448
496
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
449
497
  modelConfig: ModelConfigSchema.optional().nullable(),
450
- systemPrompt: import_zod6.z.string().optional().nullable()
498
+ systemPrompt: import_zod6.z.string().optional().nullable(),
499
+ config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional().nullable()
451
500
  });
452
501
 
453
502
  // src/target/skill.ts
@@ -2111,8 +2160,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
2111
2160
  });
2112
2161
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
2113
2162
 
2114
- // src/schedule/eval-schedule.ts
2163
+ // src/agent/agent-config.ts
2115
2164
  var import_zod35 = require("zod");
2165
+ var BaseAgentConfigSchema = import_zod35.z.object({
2166
+ /** Model ID (Claude or OpenAI). */
2167
+ model: AnyModelSchema.optional(),
2168
+ /** Sampling temperature (0–1). */
2169
+ temperature: import_zod35.z.number().min(0).max(1).optional(),
2170
+ /** Max output tokens per turn. */
2171
+ maxTokens: import_zod35.z.number().int().min(1).optional(),
2172
+ /** Number of agentic turns. 0 = unlimited. */
2173
+ maxTurns: import_zod35.z.number().int().min(0).optional(),
2174
+ /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
2175
+ maxDurationMs: import_zod35.z.number().int().min(0).optional()
2176
+ });
2177
+ var EffortLevelSchema = import_zod35.z.enum(["low", "medium", "high", "max"]);
2178
+ var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
2179
+ /** Extended thinking token budget. */
2180
+ maxThinkingTokens: import_zod35.z.number().int().min(0).optional(),
2181
+ /** Override the default allowedTools list passed to the SDK. */
2182
+ allowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
2183
+ /** Tools to remove from the model's context entirely. */
2184
+ disallowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
2185
+ /** Controls thinking depth: low, medium, high, max. */
2186
+ effort: EffortLevelSchema.optional(),
2187
+ /** Maximum USD spend per run. Stops execution when reached. */
2188
+ maxBudgetUsd: import_zod35.z.number().min(0).optional()
2189
+ });
2190
+ var PermissionValueSchema = import_zod35.z.enum(["allow", "deny"]);
2191
+ var OpenCodePermissionSchema = import_zod35.z.record(
2192
+ import_zod35.z.string(),
2193
+ import_zod35.z.union([PermissionValueSchema, import_zod35.z.record(import_zod35.z.string(), PermissionValueSchema)])
2194
+ );
2195
+ var ThinkingVariantSchema = import_zod35.z.enum(["high", "low", "none"]);
2196
+ var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
2197
+ /** Permission overrides (defaults: allow-all). */
2198
+ permission: OpenCodePermissionSchema.optional(),
2199
+ /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
2200
+ thinkingVariant: ThinkingVariantSchema.optional(),
2201
+ /** Nucleus sampling (0–1). Alternative to temperature. */
2202
+ topP: import_zod35.z.number().min(0).max(1).optional()
2203
+ }).omit({ maxTokens: true });
2204
+ var ReasoningEffortSchema = import_zod35.z.enum(["low", "medium", "high"]);
2205
+ var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
2206
+ /** Anthropic thinking budget in tokens. Default: 10 000. */
2207
+ thinkingBudgetTokens: import_zod35.z.number().int().min(0).optional(),
2208
+ /** Nucleus sampling (0–1). Alternative to temperature. */
2209
+ topP: import_zod35.z.number().min(0).max(1).optional(),
2210
+ /** Integer seed for deterministic/reproducible results (if model supports it). */
2211
+ seed: import_zod35.z.number().int().optional(),
2212
+ /** Stop sequences — model stops when generating any of these strings. */
2213
+ stopSequences: import_zod35.z.array(import_zod35.z.string()).optional(),
2214
+ /** OpenAI reasoning effort level. Default: 'high'. */
2215
+ reasoningEffort: ReasoningEffortSchema.optional(),
2216
+ /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
2217
+ frequencyPenalty: import_zod35.z.number().min(-2).max(2).optional(),
2218
+ /** Presence penalty (−2 to 2). Encourages topic diversity. */
2219
+ presencePenalty: import_zod35.z.number().min(-2).max(2).optional()
2220
+ });
2221
+
2222
+ // src/schedule/eval-schedule.ts
2223
+ var import_zod36 = require("zod");
2116
2224
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
2117
2225
  FrequencyType2["DAILY"] = "daily";
2118
2226
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -2122,29 +2230,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
2122
2230
  })(FrequencyType || {});
2123
2231
  var EvalScheduleSchema = TenantEntitySchema.extend({
2124
2232
  /** Whether the schedule is active */
2125
- enabled: import_zod35.z.boolean(),
2233
+ enabled: import_zod36.z.boolean(),
2126
2234
  /** Test suite to run */
2127
- suiteId: import_zod35.z.string(),
2235
+ suiteId: import_zod36.z.string(),
2128
2236
  /** Preset that provides agent + entities for this schedule */
2129
- presetId: import_zod35.z.string(),
2237
+ presetId: import_zod36.z.string(),
2130
2238
  /** How often to run */
2131
- frequencyType: import_zod35.z.nativeEnum(FrequencyType),
2239
+ frequencyType: import_zod36.z.nativeEnum(FrequencyType),
2132
2240
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
2133
- timeOfDay: import_zod35.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
2241
+ timeOfDay: import_zod36.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
2134
2242
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
2135
- dayOfWeek: import_zod35.z.number().min(0).max(6).optional(),
2243
+ dayOfWeek: import_zod36.z.number().min(0).max(6).optional(),
2136
2244
  /** Day of month (1-31) for monthly schedules */
2137
- dayOfMonth: import_zod35.z.number().min(1).max(31).optional(),
2245
+ dayOfMonth: import_zod36.z.number().min(1).max(31).optional(),
2138
2246
  /** IANA timezone (e.g., 'America/New_York') */
2139
- timezone: import_zod35.z.string(),
2247
+ timezone: import_zod36.z.string(),
2140
2248
  /** ID of the last eval run created by this schedule */
2141
- lastRunId: import_zod35.z.string().optional(),
2249
+ lastRunId: import_zod36.z.string().optional(),
2142
2250
  /** Denormalized status of the last run */
2143
- lastRunStatus: import_zod35.z.string().optional(),
2251
+ lastRunStatus: import_zod36.z.string().optional(),
2144
2252
  /** ISO timestamp of the last run */
2145
- lastRunAt: import_zod35.z.string().optional(),
2253
+ lastRunAt: import_zod36.z.string().optional(),
2146
2254
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
2147
- nextRunAt: import_zod35.z.string().optional()
2255
+ nextRunAt: import_zod36.z.string().optional()
2148
2256
  });
2149
2257
  function isValidTimezone(tz) {
2150
2258
  try {
@@ -2157,14 +2265,14 @@ function isValidTimezone(tz) {
2157
2265
  function validateScheduleFields(data, ctx, options) {
2158
2266
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
2159
2267
  ctx.addIssue({
2160
- code: import_zod35.z.ZodIssueCode.custom,
2268
+ code: import_zod36.z.ZodIssueCode.custom,
2161
2269
  message: "dayOfWeek is required for weekly schedules",
2162
2270
  path: ["dayOfWeek"]
2163
2271
  });
2164
2272
  }
2165
2273
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
2166
2274
  ctx.addIssue({
2167
- code: import_zod35.z.ZodIssueCode.custom,
2275
+ code: import_zod36.z.ZodIssueCode.custom,
2168
2276
  message: "dayOfMonth is required for monthly schedules",
2169
2277
  path: ["dayOfMonth"]
2170
2278
  });
@@ -2172,7 +2280,7 @@ function validateScheduleFields(data, ctx, options) {
2172
2280
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
2173
2281
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
2174
2282
  ctx.addIssue({
2175
- code: import_zod35.z.ZodIssueCode.custom,
2283
+ code: import_zod36.z.ZodIssueCode.custom,
2176
2284
  message: "Invalid IANA timezone",
2177
2285
  path: ["timezone"]
2178
2286
  });
@@ -2201,6 +2309,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2201
2309
  ALLOWED_BUILD_COMMANDS,
2202
2310
  ALL_AVAILABLE_MODEL_IDS,
2203
2311
  AVAILABLE_CLAUDE_MODEL_IDS,
2312
+ AVAILABLE_GEMINI_MODEL_IDS,
2204
2313
  AVAILABLE_OPENAI_MODEL_IDS,
2205
2314
  AVAILABLE_RUN_COMMANDS,
2206
2315
  AVAILABLE_TOOL_NAMES,
@@ -2222,6 +2331,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2222
2331
  AssertionSchema,
2223
2332
  AssertionTypeSchema,
2224
2333
  BATCH_IMPORT_LIMITS,
2334
+ BaseAgentConfigSchema,
2225
2335
  BaseEntitySchema,
2226
2336
  BaseTestSchema,
2227
2337
  BatchAssertionLinkSchema,
@@ -2244,6 +2354,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2244
2354
  CapabilityVersionOriginSchema,
2245
2355
  CapabilityVersionSchema,
2246
2356
  CapabilityWithLatestVersionSchema,
2357
+ ClaudeCodeConfigSchema,
2247
2358
  ClaudeModel,
2248
2359
  ClaudeModelSchema,
2249
2360
  CommandExecutionSchema,
@@ -2276,6 +2387,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2276
2387
  DiffContentSchema,
2277
2388
  DiffLineSchema,
2278
2389
  DiffLineTypeSchema,
2390
+ EffortLevelSchema,
2279
2391
  EnvironmentSchema,
2280
2392
  EvalMetricsSchema,
2281
2393
  EvalRunFolderMembershipSchema,
@@ -2295,6 +2407,9 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2295
2407
  FileModificationSchema,
2296
2408
  FilePresenceTestSchema,
2297
2409
  FrequencyType,
2410
+ GEMINI_THINKING_MODEL_IDS,
2411
+ GeminiModel,
2412
+ GeminiModelSchema,
2298
2413
  GitHubSourceSchema,
2299
2414
  InitialCapabilityVersionInputSchema,
2300
2415
  InitialVersionInputSchema,
@@ -2320,11 +2435,15 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2320
2435
  OPENAI_RESPONSES_MODEL_IDS,
2321
2436
  OpenAIModel,
2322
2437
  OpenAIModelSchema,
2438
+ OpenCodeConfigSchema,
2439
+ OpenCodePermissionSchema,
2440
+ PermissionValueSchema,
2323
2441
  PlaywrightNLTestSchema,
2324
2442
  PresetSchema,
2325
2443
  ProjectSchema,
2326
2444
  PromptResultSchema,
2327
2445
  RUN_COMMAND_LABELS,
2446
+ ReasoningEffortSchema,
2328
2447
  RuleSchema,
2329
2448
  RuleTypeSchema,
2330
2449
  RunAnalysisFindingSchema,
@@ -2335,6 +2454,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2335
2454
  SYSTEM_ASSERTION_IDS,
2336
2455
  ScenarioAssertionLinkSchema,
2337
2456
  ScenarioConversationSchema,
2457
+ SimpleAgentConfigSchema,
2338
2458
  SiteConfigTestSchema,
2339
2459
  SkillFileSchema,
2340
2460
  SkillMetadataSchema,
@@ -2360,6 +2480,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
2360
2480
  TestTypeSchema,
2361
2481
  TextBlockSchema,
2362
2482
  ThinkingBlockSchema,
2483
+ ThinkingVariantSchema,
2363
2484
  TimeAssertionSchema,
2364
2485
  TimeConfigSchema,
2365
2486
  TokenUsageSchema,