@wix/evalforge-types 0.78.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -148,7 +148,8 @@ var ModelConfigSchema = z4.object({
148
148
  z4.number().min(0).max(1).optional()
149
149
  ),
150
150
  maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional()),
151
- maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(1).optional())
151
+ /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
152
+ maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(0).optional())
152
153
  });
153
154
 
154
155
  // src/common/rule.ts
@@ -220,11 +221,23 @@ var AgentSchema = TargetSchema.extend({
220
221
  agentType: AgentTypeSchema.default(AgentType.CLI),
221
222
  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
222
223
  runCommand: AgentRunCommandSchema.optional(),
223
- /** Optional model configuration override */
224
+ /**
225
+ * @deprecated Use `config` bag instead. Retained for backward compatibility
226
+ * with existing DB rows.
227
+ */
224
228
  modelConfig: ModelConfigSchema.optional(),
225
229
  systemPrompt: z6.string().nullish().describe(
226
230
  "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
227
- )
231
+ ),
232
+ /**
233
+ * Unified agent configuration bag. Absorbs model params (model,
234
+ * temperature, maxTokens, maxTurns) plus agent-specific settings
235
+ * (permissions, thinking tokens, allowed tools, etc.).
236
+ *
237
+ * Adapters read from `config` first, falling back to `modelConfig`
238
+ * for backward compatibility with existing DB rows.
239
+ */
240
+ config: z6.record(z6.string(), z6.unknown()).optional()
228
241
  });
229
242
  var CreateAgentInputSchema = AgentSchema.omit({
230
243
  id: true,
@@ -234,7 +247,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
234
247
  });
235
248
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
236
249
  modelConfig: ModelConfigSchema.optional().nullable(),
237
- systemPrompt: z6.string().optional().nullable()
250
+ systemPrompt: z6.string().optional().nullable(),
251
+ config: z6.record(z6.string(), z6.unknown()).optional().nullable()
238
252
  });
239
253
 
240
254
  // src/target/skill.ts
@@ -1898,8 +1912,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1898
1912
  });
1899
1913
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1900
1914
 
1901
- // src/schedule/eval-schedule.ts
1915
+ // src/agent/agent-config.ts
1902
1916
  import { z as z35 } from "zod";
1917
+ var BaseAgentConfigSchema = z35.object({
1918
+ /** Model ID (Claude or OpenAI). */
1919
+ model: AnyModelSchema.optional(),
1920
+ /** Sampling temperature (0–1). */
1921
+ temperature: z35.number().min(0).max(1).optional(),
1922
+ /** Max output tokens per turn. */
1923
+ maxTokens: z35.number().int().min(1).optional(),
1924
+ /** Number of agentic turns. 0 = unlimited. */
1925
+ maxTurns: z35.number().int().min(0).optional(),
1926
+ /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
1927
+ maxDurationMs: z35.number().int().min(0).optional()
1928
+ });
1929
+ var EffortLevelSchema = z35.enum(["low", "medium", "high", "max"]);
1930
+ var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
1931
+ /** Extended thinking token budget. */
1932
+ maxThinkingTokens: z35.number().int().min(0).optional(),
1933
+ /** Override the default allowedTools list passed to the SDK. */
1934
+ allowedTools: z35.array(z35.string()).optional(),
1935
+ /** Tools to remove from the model's context entirely. */
1936
+ disallowedTools: z35.array(z35.string()).optional(),
1937
+ /** Controls thinking depth: low, medium, high, max. */
1938
+ effort: EffortLevelSchema.optional(),
1939
+ /** Maximum USD spend per run. Stops execution when reached. */
1940
+ maxBudgetUsd: z35.number().min(0).optional()
1941
+ });
1942
+ var PermissionValueSchema = z35.enum(["allow", "deny"]);
1943
+ var OpenCodePermissionSchema = z35.record(
1944
+ z35.string(),
1945
+ z35.union([PermissionValueSchema, z35.record(z35.string(), PermissionValueSchema)])
1946
+ );
1947
+ var ThinkingVariantSchema = z35.enum(["high", "low", "none"]);
1948
+ var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
1949
+ /** Permission overrides (defaults: allow-all). */
1950
+ permission: OpenCodePermissionSchema.optional(),
1951
+ /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
1952
+ thinkingVariant: ThinkingVariantSchema.optional(),
1953
+ /** Nucleus sampling (0–1). Alternative to temperature. */
1954
+ topP: z35.number().min(0).max(1).optional()
1955
+ }).omit({ maxTokens: true });
1956
+ var ReasoningEffortSchema = z35.enum(["low", "medium", "high"]);
1957
+ var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
1958
+ /** Anthropic thinking budget in tokens. Default: 10 000. */
1959
+ thinkingBudgetTokens: z35.number().int().min(0).optional(),
1960
+ /** Nucleus sampling (0–1). Alternative to temperature. */
1961
+ topP: z35.number().min(0).max(1).optional(),
1962
+ /** Integer seed for deterministic/reproducible results (if model supports it). */
1963
+ seed: z35.number().int().optional(),
1964
+ /** Stop sequences — model stops when generating any of these strings. */
1965
+ stopSequences: z35.array(z35.string()).optional(),
1966
+ /** OpenAI reasoning effort level. Default: 'high'. */
1967
+ reasoningEffort: ReasoningEffortSchema.optional(),
1968
+ /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
1969
+ frequencyPenalty: z35.number().min(-2).max(2).optional(),
1970
+ /** Presence penalty (−2 to 2). Encourages topic diversity. */
1971
+ presencePenalty: z35.number().min(-2).max(2).optional()
1972
+ });
1973
+
1974
+ // src/schedule/eval-schedule.ts
1975
+ import { z as z36 } from "zod";
1903
1976
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1904
1977
  FrequencyType2["DAILY"] = "daily";
1905
1978
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1909,29 +1982,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1909
1982
  })(FrequencyType || {});
1910
1983
  var EvalScheduleSchema = TenantEntitySchema.extend({
1911
1984
  /** Whether the schedule is active */
1912
- enabled: z35.boolean(),
1985
+ enabled: z36.boolean(),
1913
1986
  /** Test suite to run */
1914
- suiteId: z35.string(),
1987
+ suiteId: z36.string(),
1915
1988
  /** Preset that provides agent + entities for this schedule */
1916
- presetId: z35.string(),
1989
+ presetId: z36.string(),
1917
1990
  /** How often to run */
1918
- frequencyType: z35.nativeEnum(FrequencyType),
1991
+ frequencyType: z36.nativeEnum(FrequencyType),
1919
1992
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1920
- timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1993
+ timeOfDay: z36.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1921
1994
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1922
- dayOfWeek: z35.number().min(0).max(6).optional(),
1995
+ dayOfWeek: z36.number().min(0).max(6).optional(),
1923
1996
  /** Day of month (1-31) for monthly schedules */
1924
- dayOfMonth: z35.number().min(1).max(31).optional(),
1997
+ dayOfMonth: z36.number().min(1).max(31).optional(),
1925
1998
  /** IANA timezone (e.g., 'America/New_York') */
1926
- timezone: z35.string(),
1999
+ timezone: z36.string(),
1927
2000
  /** ID of the last eval run created by this schedule */
1928
- lastRunId: z35.string().optional(),
2001
+ lastRunId: z36.string().optional(),
1929
2002
  /** Denormalized status of the last run */
1930
- lastRunStatus: z35.string().optional(),
2003
+ lastRunStatus: z36.string().optional(),
1931
2004
  /** ISO timestamp of the last run */
1932
- lastRunAt: z35.string().optional(),
2005
+ lastRunAt: z36.string().optional(),
1933
2006
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1934
- nextRunAt: z35.string().optional()
2007
+ nextRunAt: z36.string().optional()
1935
2008
  });
1936
2009
  function isValidTimezone(tz) {
1937
2010
  try {
@@ -1944,14 +2017,14 @@ function isValidTimezone(tz) {
1944
2017
  function validateScheduleFields(data, ctx, options) {
1945
2018
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1946
2019
  ctx.addIssue({
1947
- code: z35.ZodIssueCode.custom,
2020
+ code: z36.ZodIssueCode.custom,
1948
2021
  message: "dayOfWeek is required for weekly schedules",
1949
2022
  path: ["dayOfWeek"]
1950
2023
  });
1951
2024
  }
1952
2025
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1953
2026
  ctx.addIssue({
1954
- code: z35.ZodIssueCode.custom,
2027
+ code: z36.ZodIssueCode.custom,
1955
2028
  message: "dayOfMonth is required for monthly schedules",
1956
2029
  path: ["dayOfMonth"]
1957
2030
  });
@@ -1959,7 +2032,7 @@ function validateScheduleFields(data, ctx, options) {
1959
2032
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1960
2033
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1961
2034
  ctx.addIssue({
1962
- code: z35.ZodIssueCode.custom,
2035
+ code: z36.ZodIssueCode.custom,
1963
2036
  message: "Invalid IANA timezone",
1964
2037
  path: ["timezone"]
1965
2038
  });
@@ -2009,6 +2082,7 @@ export {
2009
2082
  AssertionSchema,
2010
2083
  AssertionTypeSchema,
2011
2084
  BATCH_IMPORT_LIMITS,
2085
+ BaseAgentConfigSchema,
2012
2086
  BaseEntitySchema,
2013
2087
  BaseTestSchema,
2014
2088
  BatchAssertionLinkSchema,
@@ -2031,6 +2105,7 @@ export {
2031
2105
  CapabilityVersionOriginSchema,
2032
2106
  CapabilityVersionSchema,
2033
2107
  CapabilityWithLatestVersionSchema,
2108
+ ClaudeCodeConfigSchema,
2034
2109
  ClaudeModel,
2035
2110
  ClaudeModelSchema,
2036
2111
  CommandExecutionSchema,
@@ -2063,6 +2138,7 @@ export {
2063
2138
  DiffContentSchema,
2064
2139
  DiffLineSchema,
2065
2140
  DiffLineTypeSchema,
2141
+ EffortLevelSchema,
2066
2142
  EnvironmentSchema,
2067
2143
  EvalMetricsSchema,
2068
2144
  EvalRunFolderMembershipSchema,
@@ -2110,11 +2186,15 @@ export {
2110
2186
  OPENAI_RESPONSES_MODEL_IDS,
2111
2187
  OpenAIModel,
2112
2188
  OpenAIModelSchema,
2189
+ OpenCodeConfigSchema,
2190
+ OpenCodePermissionSchema,
2191
+ PermissionValueSchema,
2113
2192
  PlaywrightNLTestSchema,
2114
2193
  PresetSchema,
2115
2194
  ProjectSchema,
2116
2195
  PromptResultSchema,
2117
2196
  RUN_COMMAND_LABELS,
2197
+ ReasoningEffortSchema,
2118
2198
  RuleSchema,
2119
2199
  RuleTypeSchema,
2120
2200
  RunAnalysisFindingSchema,
@@ -2125,6 +2205,7 @@ export {
2125
2205
  SYSTEM_ASSERTION_IDS,
2126
2206
  ScenarioAssertionLinkSchema,
2127
2207
  ScenarioConversationSchema,
2208
+ SimpleAgentConfigSchema,
2128
2209
  SiteConfigTestSchema,
2129
2210
  SkillFileSchema,
2130
2211
  SkillMetadataSchema,
@@ -2150,6 +2231,7 @@ export {
2150
2231
  TestTypeSchema,
2151
2232
  TextBlockSchema,
2152
2233
  ThinkingBlockSchema,
2234
+ ThinkingVariantSchema,
2153
2235
  TimeAssertionSchema,
2154
2236
  TimeConfigSchema,
2155
2237
  TokenUsageSchema,