@wix/evalforge-types 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -148,7 +148,8 @@ var ModelConfigSchema = z4.object({
148
148
  z4.number().min(0).max(1).optional()
149
149
  ),
150
150
  maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional()),
151
- maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(1).optional())
151
+ /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
152
+ maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(0).optional())
152
153
  });
153
154
 
154
155
  // src/common/rule.ts
@@ -220,11 +221,23 @@ var AgentSchema = TargetSchema.extend({
220
221
  agentType: AgentTypeSchema.default(AgentType.CLI),
221
222
  /** Command to run the agent (required for CLI agents, absent for SDK agents) */
222
223
  runCommand: AgentRunCommandSchema.optional(),
223
- /** Optional model configuration override */
224
+ /**
225
+ * @deprecated Use `config` bag instead. Retained for backward compatibility
226
+ * with existing DB rows.
227
+ */
224
228
  modelConfig: ModelConfigSchema.optional(),
225
229
  systemPrompt: z6.string().nullish().describe(
226
230
  "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
227
- )
231
+ ),
232
+ /**
233
+ * Unified agent configuration bag. Absorbs model params (model,
234
+ * temperature, maxTokens, maxTurns) plus agent-specific settings
235
+ * (permissions, thinking tokens, allowed tools, etc.).
236
+ *
237
+ * Adapters read from `config` first, falling back to `modelConfig`
238
+ * for backward compatibility with existing DB rows.
239
+ */
240
+ config: z6.record(z6.string(), z6.unknown()).optional()
228
241
  });
229
242
  var CreateAgentInputSchema = AgentSchema.omit({
230
243
  id: true,
@@ -234,7 +247,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
234
247
  });
235
248
  var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
236
249
  modelConfig: ModelConfigSchema.optional().nullable(),
237
- systemPrompt: z6.string().optional().nullable()
250
+ systemPrompt: z6.string().optional().nullable(),
251
+ config: z6.record(z6.string(), z6.unknown()).optional().nullable()
238
252
  });
239
253
 
240
254
  // src/target/skill.ts
@@ -1632,14 +1646,18 @@ var EvalRunSchema = TenantEntitySchema.extend({
1632
1646
  agentType: AgentTypeSchema.optional(),
1633
1647
  runCommand: AgentRunCommandSchema.optional(),
1634
1648
  systemPrompt: z30.string().nullable().optional(),
1635
- modelConfig: ModelConfigSchema.optional()
1649
+ /** @deprecated retained for backward compat with stored snapshots */
1650
+ modelConfig: ModelConfigSchema.optional(),
1651
+ config: z30.record(z30.string(), z30.unknown()).optional()
1636
1652
  }).optional(),
1637
1653
  /** UUID linking all runs in a comparison group */
1638
1654
  comparisonGroupId: z30.string().optional(),
1639
1655
  /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
1640
1656
  comparisonLabel: z30.string().optional(),
1641
1657
  /** LLM-generated analysis of the completed run */
1642
- runAnalysis: RunAnalysisSchema.optional()
1658
+ runAnalysis: RunAnalysisSchema.optional(),
1659
+ /** IDs of folders this run belongs to (read-only, managed via AddRunToFolder / RemoveRunFromFolder) */
1660
+ folderIds: z30.array(z30.string()).optional()
1643
1661
  });
1644
1662
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1645
1663
  id: true,
@@ -1898,8 +1916,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1898
1916
  });
1899
1917
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1900
1918
 
1901
- // src/schedule/eval-schedule.ts
1919
+ // src/agent/agent-config.ts
1902
1920
  import { z as z35 } from "zod";
1921
+ var BaseAgentConfigSchema = z35.object({
1922
+ /** Model ID (Claude or OpenAI). */
1923
+ model: AnyModelSchema.optional(),
1924
+ /** Sampling temperature (0–1). */
1925
+ temperature: z35.number().min(0).max(1).optional(),
1926
+ /** Max output tokens per turn. */
1927
+ maxTokens: z35.number().int().min(1).optional(),
1928
+ /** Number of agentic turns. 0 = unlimited. */
1929
+ maxTurns: z35.number().int().min(0).optional(),
1930
+ /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
1931
+ maxDurationMs: z35.number().int().min(0).optional()
1932
+ });
1933
+ var EffortLevelSchema = z35.enum(["low", "medium", "high", "max"]);
1934
+ var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
1935
+ /** Extended thinking token budget. */
1936
+ maxThinkingTokens: z35.number().int().min(0).optional(),
1937
+ /** Override the default allowedTools list passed to the SDK. */
1938
+ allowedTools: z35.array(z35.string()).optional(),
1939
+ /** Tools to remove from the model's context entirely. */
1940
+ disallowedTools: z35.array(z35.string()).optional(),
1941
+ /** Controls thinking depth: low, medium, high, max. */
1942
+ effort: EffortLevelSchema.optional(),
1943
+ /** Maximum USD spend per run. Stops execution when reached. */
1944
+ maxBudgetUsd: z35.number().min(0).optional()
1945
+ });
1946
+ var PermissionValueSchema = z35.enum(["allow", "deny"]);
1947
+ var OpenCodePermissionSchema = z35.record(
1948
+ z35.string(),
1949
+ z35.union([PermissionValueSchema, z35.record(z35.string(), PermissionValueSchema)])
1950
+ );
1951
+ var ThinkingVariantSchema = z35.enum(["high", "low", "none"]);
1952
+ var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
1953
+ /** Permission overrides (defaults: allow-all). */
1954
+ permission: OpenCodePermissionSchema.optional(),
1955
+ /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
1956
+ thinkingVariant: ThinkingVariantSchema.optional(),
1957
+ /** Nucleus sampling (0–1). Alternative to temperature. */
1958
+ topP: z35.number().min(0).max(1).optional()
1959
+ }).omit({ maxTokens: true });
1960
+ var ReasoningEffortSchema = z35.enum(["low", "medium", "high"]);
1961
+ var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
1962
+ /** Anthropic thinking budget in tokens. Default: 10 000. */
1963
+ thinkingBudgetTokens: z35.number().int().min(0).optional(),
1964
+ /** Nucleus sampling (0–1). Alternative to temperature. */
1965
+ topP: z35.number().min(0).max(1).optional(),
1966
+ /** Integer seed for deterministic/reproducible results (if model supports it). */
1967
+ seed: z35.number().int().optional(),
1968
+ /** Stop sequences — model stops when generating any of these strings. */
1969
+ stopSequences: z35.array(z35.string()).optional(),
1970
+ /** OpenAI reasoning effort level. Default: 'high'. */
1971
+ reasoningEffort: ReasoningEffortSchema.optional(),
1972
+ /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
1973
+ frequencyPenalty: z35.number().min(-2).max(2).optional(),
1974
+ /** Presence penalty (−2 to 2). Encourages topic diversity. */
1975
+ presencePenalty: z35.number().min(-2).max(2).optional()
1976
+ });
1977
+
1978
+ // src/schedule/eval-schedule.ts
1979
+ import { z as z36 } from "zod";
1903
1980
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1904
1981
  FrequencyType2["DAILY"] = "daily";
1905
1982
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1909,29 +1986,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1909
1986
  })(FrequencyType || {});
1910
1987
  var EvalScheduleSchema = TenantEntitySchema.extend({
1911
1988
  /** Whether the schedule is active */
1912
- enabled: z35.boolean(),
1989
+ enabled: z36.boolean(),
1913
1990
  /** Test suite to run */
1914
- suiteId: z35.string(),
1991
+ suiteId: z36.string(),
1915
1992
  /** Preset that provides agent + entities for this schedule */
1916
- presetId: z35.string(),
1993
+ presetId: z36.string(),
1917
1994
  /** How often to run */
1918
- frequencyType: z35.nativeEnum(FrequencyType),
1995
+ frequencyType: z36.nativeEnum(FrequencyType),
1919
1996
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1920
- timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1997
+ timeOfDay: z36.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1921
1998
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1922
- dayOfWeek: z35.number().min(0).max(6).optional(),
1999
+ dayOfWeek: z36.number().min(0).max(6).optional(),
1923
2000
  /** Day of month (1-31) for monthly schedules */
1924
- dayOfMonth: z35.number().min(1).max(31).optional(),
2001
+ dayOfMonth: z36.number().min(1).max(31).optional(),
1925
2002
  /** IANA timezone (e.g., 'America/New_York') */
1926
- timezone: z35.string(),
2003
+ timezone: z36.string(),
1927
2004
  /** ID of the last eval run created by this schedule */
1928
- lastRunId: z35.string().optional(),
2005
+ lastRunId: z36.string().optional(),
1929
2006
  /** Denormalized status of the last run */
1930
- lastRunStatus: z35.string().optional(),
2007
+ lastRunStatus: z36.string().optional(),
1931
2008
  /** ISO timestamp of the last run */
1932
- lastRunAt: z35.string().optional(),
2009
+ lastRunAt: z36.string().optional(),
1933
2010
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1934
- nextRunAt: z35.string().optional()
2011
+ nextRunAt: z36.string().optional()
1935
2012
  });
1936
2013
  function isValidTimezone(tz) {
1937
2014
  try {
@@ -1944,14 +2021,14 @@ function isValidTimezone(tz) {
1944
2021
  function validateScheduleFields(data, ctx, options) {
1945
2022
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1946
2023
  ctx.addIssue({
1947
- code: z35.ZodIssueCode.custom,
2024
+ code: z36.ZodIssueCode.custom,
1948
2025
  message: "dayOfWeek is required for weekly schedules",
1949
2026
  path: ["dayOfWeek"]
1950
2027
  });
1951
2028
  }
1952
2029
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1953
2030
  ctx.addIssue({
1954
- code: z35.ZodIssueCode.custom,
2031
+ code: z36.ZodIssueCode.custom,
1955
2032
  message: "dayOfMonth is required for monthly schedules",
1956
2033
  path: ["dayOfMonth"]
1957
2034
  });
@@ -1959,7 +2036,7 @@ function validateScheduleFields(data, ctx, options) {
1959
2036
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1960
2037
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1961
2038
  ctx.addIssue({
1962
- code: z35.ZodIssueCode.custom,
2039
+ code: z36.ZodIssueCode.custom,
1963
2040
  message: "Invalid IANA timezone",
1964
2041
  path: ["timezone"]
1965
2042
  });
@@ -2009,6 +2086,7 @@ export {
2009
2086
  AssertionSchema,
2010
2087
  AssertionTypeSchema,
2011
2088
  BATCH_IMPORT_LIMITS,
2089
+ BaseAgentConfigSchema,
2012
2090
  BaseEntitySchema,
2013
2091
  BaseTestSchema,
2014
2092
  BatchAssertionLinkSchema,
@@ -2031,6 +2109,7 @@ export {
2031
2109
  CapabilityVersionOriginSchema,
2032
2110
  CapabilityVersionSchema,
2033
2111
  CapabilityWithLatestVersionSchema,
2112
+ ClaudeCodeConfigSchema,
2034
2113
  ClaudeModel,
2035
2114
  ClaudeModelSchema,
2036
2115
  CommandExecutionSchema,
@@ -2063,6 +2142,7 @@ export {
2063
2142
  DiffContentSchema,
2064
2143
  DiffLineSchema,
2065
2144
  DiffLineTypeSchema,
2145
+ EffortLevelSchema,
2066
2146
  EnvironmentSchema,
2067
2147
  EvalMetricsSchema,
2068
2148
  EvalRunFolderMembershipSchema,
@@ -2110,11 +2190,15 @@ export {
2110
2190
  OPENAI_RESPONSES_MODEL_IDS,
2111
2191
  OpenAIModel,
2112
2192
  OpenAIModelSchema,
2193
+ OpenCodeConfigSchema,
2194
+ OpenCodePermissionSchema,
2195
+ PermissionValueSchema,
2113
2196
  PlaywrightNLTestSchema,
2114
2197
  PresetSchema,
2115
2198
  ProjectSchema,
2116
2199
  PromptResultSchema,
2117
2200
  RUN_COMMAND_LABELS,
2201
+ ReasoningEffortSchema,
2118
2202
  RuleSchema,
2119
2203
  RuleTypeSchema,
2120
2204
  RunAnalysisFindingSchema,
@@ -2125,6 +2209,7 @@ export {
2125
2209
  SYSTEM_ASSERTION_IDS,
2126
2210
  ScenarioAssertionLinkSchema,
2127
2211
  ScenarioConversationSchema,
2212
+ SimpleAgentConfigSchema,
2128
2213
  SiteConfigTestSchema,
2129
2214
  SkillFileSchema,
2130
2215
  SkillMetadataSchema,
@@ -2150,6 +2235,7 @@ export {
2150
2235
  TestTypeSchema,
2151
2236
  TextBlockSchema,
2152
2237
  ThinkingBlockSchema,
2238
+ ThinkingVariantSchema,
2153
2239
  TimeAssertionSchema,
2154
2240
  TimeConfigSchema,
2155
2241
  TokenUsageSchema,