@wix/evalforge-types 0.77.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -85,9 +85,31 @@ var OPENAI_RESPONSES_MODEL_IDS = /* @__PURE__ */ new Set([
85
85
  var OpenAIModelSchema = z4.enum(
86
86
  AVAILABLE_OPENAI_MODEL_IDS
87
87
  );
88
+ var GeminiModel = {
89
+ GEMINI_2_0_FLASH: "gemini-2.0-flash",
90
+ GEMINI_2_0_FLASH_LITE: "gemini-2.0-flash-lite",
91
+ GEMINI_2_5_PRO: "gemini-2.5-pro",
92
+ GEMINI_2_5_FLASH: "gemini-2.5-flash",
93
+ GEMINI_2_5_FLASH_LITE: "gemini-2.5-flash-lite",
94
+ GEMINI_3_0_PRO: "gemini-3-pro-preview",
95
+ GEMINI_3_0_FLASH: "gemini-3-flash-preview",
96
+ GEMINI_3_1_PRO: "gemini-3.1-pro-preview"
97
+ };
98
+ var AVAILABLE_GEMINI_MODEL_IDS = Object.values(GeminiModel);
99
+ var GEMINI_THINKING_MODEL_IDS = /* @__PURE__ */ new Set([
100
+ GeminiModel.GEMINI_2_5_PRO,
101
+ GeminiModel.GEMINI_2_5_FLASH,
102
+ GeminiModel.GEMINI_3_0_PRO,
103
+ GeminiModel.GEMINI_3_0_FLASH,
104
+ GeminiModel.GEMINI_3_1_PRO
105
+ ]);
106
+ var GeminiModelSchema = z4.enum(
107
+ AVAILABLE_GEMINI_MODEL_IDS
108
+ );
88
109
  var ALL_AVAILABLE_MODEL_IDS = [
89
110
  ...AVAILABLE_CLAUDE_MODEL_IDS,
90
- ...AVAILABLE_OPENAI_MODEL_IDS
111
+ ...AVAILABLE_OPENAI_MODEL_IDS,
112
+ ...AVAILABLE_GEMINI_MODEL_IDS
91
113
  ];
92
114
  var AnyModelSchema = z4.enum(
93
115
  ALL_AVAILABLE_MODEL_IDS
@@ -126,7 +148,8 @@ var ModelConfigSchema = z4.object({
126
148
  z4.number().min(0).max(1).optional()
127
149
  ),
128
150
  maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional()),
129
- maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(1).optional())
151
+ /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
152
+ maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(0).optional())
130
153
  });
131
154
 
132
155
  // src/common/rule.ts
@@ -198,11 +221,23 @@ var AgentSchema = TargetSchema.extend({
198
221
  agentType: AgentTypeSchema.default(AgentType.CLI),
199
222
  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
200
223
  runCommand: AgentRunCommandSchema.optional(),
201
- /** Optional model configuration override */
224
+ /**
225
+ * @deprecated Use `config` bag instead. Retained for backward compatibility
226
+ * with existing DB rows.
227
+ */
202
228
  modelConfig: ModelConfigSchema.optional(),
203
229
  systemPrompt: z6.string().nullish().describe(
204
230
  "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
205
- )
231
+ ),
232
+ /**
233
+ * Unified agent configuration bag. Absorbs model params (model,
234
+ * temperature, maxTokens, maxTurns) plus agent-specific settings
235
+ * (permissions, thinking tokens, allowed tools, etc.).
236
+ *
237
+ * Adapters read from `config` first, falling back to `modelConfig`
238
+ * for backward compatibility with existing DB rows.
239
+ */
240
+ config: z6.record(z6.string(), z6.unknown()).optional()
206
241
  });
207
242
  var CreateAgentInputSchema = AgentSchema.omit({
208
243
  id: true,
@@ -212,7 +247,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
212
247
  });
213
248
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
214
249
  modelConfig: ModelConfigSchema.optional().nullable(),
215
- systemPrompt: z6.string().optional().nullable()
250
+ systemPrompt: z6.string().optional().nullable(),
251
+ config: z6.record(z6.string(), z6.unknown()).optional().nullable()
216
252
  });
217
253
 
218
254
  // src/target/skill.ts
@@ -1876,8 +1912,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1876
1912
  });
1877
1913
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1878
1914
 
1879
- // src/schedule/eval-schedule.ts
1915
+ // src/agent/agent-config.ts
1880
1916
  import { z as z35 } from "zod";
1917
+ var BaseAgentConfigSchema = z35.object({
1918
+ /** Model ID (Claude or OpenAI). */
1919
+ model: AnyModelSchema.optional(),
1920
+ /** Sampling temperature (0–1). */
1921
+ temperature: z35.number().min(0).max(1).optional(),
1922
+ /** Max output tokens per turn. */
1923
+ maxTokens: z35.number().int().min(1).optional(),
1924
+ /** Number of agentic turns. 0 = unlimited. */
1925
+ maxTurns: z35.number().int().min(0).optional(),
1926
+ /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
1927
+ maxDurationMs: z35.number().int().min(0).optional()
1928
+ });
1929
+ var EffortLevelSchema = z35.enum(["low", "medium", "high", "max"]);
1930
+ var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
1931
+ /** Extended thinking token budget. */
1932
+ maxThinkingTokens: z35.number().int().min(0).optional(),
1933
+ /** Override the default allowedTools list passed to the SDK. */
1934
+ allowedTools: z35.array(z35.string()).optional(),
1935
+ /** Tools to remove from the model's context entirely. */
1936
+ disallowedTools: z35.array(z35.string()).optional(),
1937
+ /** Controls thinking depth: low, medium, high, max. */
1938
+ effort: EffortLevelSchema.optional(),
1939
+ /** Maximum USD spend per run. Stops execution when reached. */
1940
+ maxBudgetUsd: z35.number().min(0).optional()
1941
+ });
1942
+ var PermissionValueSchema = z35.enum(["allow", "deny"]);
1943
+ var OpenCodePermissionSchema = z35.record(
1944
+ z35.string(),
1945
+ z35.union([PermissionValueSchema, z35.record(z35.string(), PermissionValueSchema)])
1946
+ );
1947
+ var ThinkingVariantSchema = z35.enum(["high", "low", "none"]);
1948
+ var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
1949
+ /** Permission overrides (defaults: allow-all). */
1950
+ permission: OpenCodePermissionSchema.optional(),
1951
+ /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
1952
+ thinkingVariant: ThinkingVariantSchema.optional(),
1953
+ /** Nucleus sampling (0–1). Alternative to temperature. */
1954
+ topP: z35.number().min(0).max(1).optional()
1955
+ }).omit({ maxTokens: true });
1956
+ var ReasoningEffortSchema = z35.enum(["low", "medium", "high"]);
1957
+ var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
1958
+ /** Anthropic thinking budget in tokens. Default: 10 000. */
1959
+ thinkingBudgetTokens: z35.number().int().min(0).optional(),
1960
+ /** Nucleus sampling (0–1). Alternative to temperature. */
1961
+ topP: z35.number().min(0).max(1).optional(),
1962
+ /** Integer seed for deterministic/reproducible results (if model supports it). */
1963
+ seed: z35.number().int().optional(),
1964
+ /** Stop sequences — model stops when generating any of these strings. */
1965
+ stopSequences: z35.array(z35.string()).optional(),
1966
+ /** OpenAI reasoning effort level. Default: 'high'. */
1967
+ reasoningEffort: ReasoningEffortSchema.optional(),
1968
+ /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
1969
+ frequencyPenalty: z35.number().min(-2).max(2).optional(),
1970
+ /** Presence penalty (−2 to 2). Encourages topic diversity. */
1971
+ presencePenalty: z35.number().min(-2).max(2).optional()
1972
+ });
1973
+
1974
+ // src/schedule/eval-schedule.ts
1975
+ import { z as z36 } from "zod";
1881
1976
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1882
1977
  FrequencyType2["DAILY"] = "daily";
1883
1978
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1887,29 +1982,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1887
1982
  })(FrequencyType || {});
1888
1983
  var EvalScheduleSchema = TenantEntitySchema.extend({
1889
1984
  /** Whether the schedule is active */
1890
- enabled: z35.boolean(),
1985
+ enabled: z36.boolean(),
1891
1986
  /** Test suite to run */
1892
- suiteId: z35.string(),
1987
+ suiteId: z36.string(),
1893
1988
  /** Preset that provides agent + entities for this schedule */
1894
- presetId: z35.string(),
1989
+ presetId: z36.string(),
1895
1990
  /** How often to run */
1896
- frequencyType: z35.nativeEnum(FrequencyType),
1991
+ frequencyType: z36.nativeEnum(FrequencyType),
1897
1992
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1898
- timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1993
+ timeOfDay: z36.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1899
1994
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1900
- dayOfWeek: z35.number().min(0).max(6).optional(),
1995
+ dayOfWeek: z36.number().min(0).max(6).optional(),
1901
1996
  /** Day of month (1-31) for monthly schedules */
1902
- dayOfMonth: z35.number().min(1).max(31).optional(),
1997
+ dayOfMonth: z36.number().min(1).max(31).optional(),
1903
1998
  /** IANA timezone (e.g., 'America/New_York') */
1904
- timezone: z35.string(),
1999
+ timezone: z36.string(),
1905
2000
  /** ID of the last eval run created by this schedule */
1906
- lastRunId: z35.string().optional(),
2001
+ lastRunId: z36.string().optional(),
1907
2002
  /** Denormalized status of the last run */
1908
- lastRunStatus: z35.string().optional(),
2003
+ lastRunStatus: z36.string().optional(),
1909
2004
  /** ISO timestamp of the last run */
1910
- lastRunAt: z35.string().optional(),
2005
+ lastRunAt: z36.string().optional(),
1911
2006
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1912
- nextRunAt: z35.string().optional()
2007
+ nextRunAt: z36.string().optional()
1913
2008
  });
1914
2009
  function isValidTimezone(tz) {
1915
2010
  try {
@@ -1922,14 +2017,14 @@ function isValidTimezone(tz) {
1922
2017
  function validateScheduleFields(data, ctx, options) {
1923
2018
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1924
2019
  ctx.addIssue({
1925
- code: z35.ZodIssueCode.custom,
2020
+ code: z36.ZodIssueCode.custom,
1926
2021
  message: "dayOfWeek is required for weekly schedules",
1927
2022
  path: ["dayOfWeek"]
1928
2023
  });
1929
2024
  }
1930
2025
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1931
2026
  ctx.addIssue({
1932
- code: z35.ZodIssueCode.custom,
2027
+ code: z36.ZodIssueCode.custom,
1933
2028
  message: "dayOfMonth is required for monthly schedules",
1934
2029
  path: ["dayOfMonth"]
1935
2030
  });
@@ -1937,7 +2032,7 @@ function validateScheduleFields(data, ctx, options) {
1937
2032
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1938
2033
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1939
2034
  ctx.addIssue({
1940
- code: z35.ZodIssueCode.custom,
2035
+ code: z36.ZodIssueCode.custom,
1941
2036
  message: "Invalid IANA timezone",
1942
2037
  path: ["timezone"]
1943
2038
  });
@@ -1965,6 +2060,7 @@ export {
1965
2060
  ALLOWED_BUILD_COMMANDS,
1966
2061
  ALL_AVAILABLE_MODEL_IDS,
1967
2062
  AVAILABLE_CLAUDE_MODEL_IDS,
2063
+ AVAILABLE_GEMINI_MODEL_IDS,
1968
2064
  AVAILABLE_OPENAI_MODEL_IDS,
1969
2065
  AVAILABLE_RUN_COMMANDS,
1970
2066
  AVAILABLE_TOOL_NAMES,
@@ -1986,6 +2082,7 @@ export {
1986
2082
  AssertionSchema,
1987
2083
  AssertionTypeSchema,
1988
2084
  BATCH_IMPORT_LIMITS,
2085
+ BaseAgentConfigSchema,
1989
2086
  BaseEntitySchema,
1990
2087
  BaseTestSchema,
1991
2088
  BatchAssertionLinkSchema,
@@ -2008,6 +2105,7 @@ export {
2008
2105
  CapabilityVersionOriginSchema,
2009
2106
  CapabilityVersionSchema,
2010
2107
  CapabilityWithLatestVersionSchema,
2108
+ ClaudeCodeConfigSchema,
2011
2109
  ClaudeModel,
2012
2110
  ClaudeModelSchema,
2013
2111
  CommandExecutionSchema,
@@ -2040,6 +2138,7 @@ export {
2040
2138
  DiffContentSchema,
2041
2139
  DiffLineSchema,
2042
2140
  DiffLineTypeSchema,
2141
+ EffortLevelSchema,
2043
2142
  EnvironmentSchema,
2044
2143
  EvalMetricsSchema,
2045
2144
  EvalRunFolderMembershipSchema,
@@ -2059,6 +2158,9 @@ export {
2059
2158
  FileModificationSchema,
2060
2159
  FilePresenceTestSchema,
2061
2160
  FrequencyType,
2161
+ GEMINI_THINKING_MODEL_IDS,
2162
+ GeminiModel,
2163
+ GeminiModelSchema,
2062
2164
  GitHubSourceSchema,
2063
2165
  InitialCapabilityVersionInputSchema,
2064
2166
  InitialVersionInputSchema,
@@ -2084,11 +2186,15 @@ export {
2084
2186
  OPENAI_RESPONSES_MODEL_IDS,
2085
2187
  OpenAIModel,
2086
2188
  OpenAIModelSchema,
2189
+ OpenCodeConfigSchema,
2190
+ OpenCodePermissionSchema,
2191
+ PermissionValueSchema,
2087
2192
  PlaywrightNLTestSchema,
2088
2193
  PresetSchema,
2089
2194
  ProjectSchema,
2090
2195
  PromptResultSchema,
2091
2196
  RUN_COMMAND_LABELS,
2197
+ ReasoningEffortSchema,
2092
2198
  RuleSchema,
2093
2199
  RuleTypeSchema,
2094
2200
  RunAnalysisFindingSchema,
@@ -2099,6 +2205,7 @@ export {
2099
2205
  SYSTEM_ASSERTION_IDS,
2100
2206
  ScenarioAssertionLinkSchema,
2101
2207
  ScenarioConversationSchema,
2208
+ SimpleAgentConfigSchema,
2102
2209
  SiteConfigTestSchema,
2103
2210
  SkillFileSchema,
2104
2211
  SkillMetadataSchema,
@@ -2124,6 +2231,7 @@ export {
2124
2231
  TestTypeSchema,
2125
2232
  TextBlockSchema,
2126
2233
  ThinkingBlockSchema,
2234
+ ThinkingVariantSchema,
2127
2235
  TimeAssertionSchema,
2128
2236
  TimeConfigSchema,
2129
2237
  TokenUsageSchema,