npm - @wix/evalforge-types - Versions diffs - 0.78.0 → 0.80.0 - Mend

@wix/evalforge-types 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/build/index.js +117 -22
package/build/index.js.map +4 -4
package/build/index.mjs +108 -22
package/build/index.mjs.map +4 -4
package/build/types/agent/adapter.d.ts +5 -3
package/build/types/agent/agent-config.d.ts +118 -0
package/build/types/agent/index.d.ts +2 -0
package/build/types/evaluation/eval-run.d.ts +3 -0
package/build/types/target/agent.d.ts +3 -0
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -46,6 +46,7 @@ __export(index_exports, {
   AssertionSchema: () => AssertionSchema,
   AssertionTypeSchema: () => AssertionTypeSchema,
   BATCH_IMPORT_LIMITS: () => BATCH_IMPORT_LIMITS,
+  BaseAgentConfigSchema: () => BaseAgentConfigSchema,
   BaseEntitySchema: () => BaseEntitySchema,
   BaseTestSchema: () => BaseTestSchema,
   BatchAssertionLinkSchema: () => BatchAssertionLinkSchema,
@@ -68,6 +69,7 @@ __export(index_exports, {
   CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
   CapabilityVersionSchema: () => CapabilityVersionSchema,
   CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
+  ClaudeCodeConfigSchema: () => ClaudeCodeConfigSchema,
   ClaudeModel: () => ClaudeModel,
   ClaudeModelSchema: () => ClaudeModelSchema,
   CommandExecutionSchema: () => CommandExecutionSchema,
@@ -100,6 +102,7 @@ __export(index_exports, {
   DiffContentSchema: () => DiffContentSchema,
   DiffLineSchema: () => DiffLineSchema,
   DiffLineTypeSchema: () => DiffLineTypeSchema,
+  EffortLevelSchema: () => EffortLevelSchema,
   EnvironmentSchema: () => EnvironmentSchema,
   EvalMetricsSchema: () => EvalMetricsSchema,
   EvalRunFolderMembershipSchema: () => EvalRunFolderMembershipSchema,
@@ -147,11 +150,15 @@ __export(index_exports, {
   OPENAI_RESPONSES_MODEL_IDS: () => OPENAI_RESPONSES_MODEL_IDS,
   OpenAIModel: () => OpenAIModel,
   OpenAIModelSchema: () => OpenAIModelSchema,
+  OpenCodeConfigSchema: () => OpenCodeConfigSchema,
+  OpenCodePermissionSchema: () => OpenCodePermissionSchema,
+  PermissionValueSchema: () => PermissionValueSchema,
   PlaywrightNLTestSchema: () => PlaywrightNLTestSchema,
   PresetSchema: () => PresetSchema,
   ProjectSchema: () => ProjectSchema,
   PromptResultSchema: () => PromptResultSchema,
   RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
+  ReasoningEffortSchema: () => ReasoningEffortSchema,
   RuleSchema: () => RuleSchema,
   RuleTypeSchema: () => RuleTypeSchema,
   RunAnalysisFindingSchema: () => RunAnalysisFindingSchema,
@@ -162,6 +169,7 @@ __export(index_exports, {
   SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
   ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
   ScenarioConversationSchema: () => ScenarioConversationSchema,
+  SimpleAgentConfigSchema: () => SimpleAgentConfigSchema,
   SiteConfigTestSchema: () => SiteConfigTestSchema,
   SkillFileSchema: () => SkillFileSchema,
   SkillMetadataSchema: () => SkillMetadataSchema,
@@ -187,6 +195,7 @@ __export(index_exports, {
   TestTypeSchema: () => TestTypeSchema,
   TextBlockSchema: () => TextBlockSchema,
   ThinkingBlockSchema: () => ThinkingBlockSchema,
+  ThinkingVariantSchema: () => ThinkingVariantSchema,
   TimeAssertionSchema: () => TimeAssertionSchema,
   TimeConfigSchema: () => TimeConfigSchema,
   TokenUsageSchema: () => TokenUsageSchema,
@@ -387,7 +396,8 @@ var ModelConfigSchema = import_zod4.z.object({
     import_zod4.z.number().min(0).max(1).optional()
   ),
   maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional()),
-  maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(1).optional())
+  /** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
+  maxTurns: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().int().min(0).optional())
 });
 // src/common/rule.ts
@@ -459,11 +469,23 @@ var AgentSchema = TargetSchema.extend({
   agentType: AgentTypeSchema.default(AgentType.CLI),
   /** Command to run the agent (required for CLI agents, absent for SDK agents) */
   runCommand: AgentRunCommandSchema.optional(),
-  /** Optional model configuration override */
+  /**
+   * @deprecated Use `config` bag instead. Retained for backward compatibility
+   * with existing DB rows.
+   */
   modelConfig: ModelConfigSchema.optional(),
   systemPrompt: import_zod6.z.string().nullish().describe(
     "Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
-  )
+  ),
+  /**
+   * Unified agent configuration bag. Absorbs model params (model,
+   * temperature, maxTokens, maxTurns) plus agent-specific settings
+   * (permissions, thinking tokens, allowed tools, etc.).
+   *
+   * Adapters read from `config` first, falling back to `modelConfig`
+   * for backward compatibility with existing DB rows.
+   */
+  config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional()
 });
 var CreateAgentInputSchema = AgentSchema.omit({
   id: true,
@@ -473,7 +495,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
 });
 var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
   modelConfig: ModelConfigSchema.optional().nullable(),
-  systemPrompt: import_zod6.z.string().optional().nullable()
+  systemPrompt: import_zod6.z.string().optional().nullable(),
+  config: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.unknown()).optional().nullable()
 });
 // src/target/skill.ts
@@ -1871,14 +1894,18 @@ var EvalRunSchema = TenantEntitySchema.extend({
     agentType: AgentTypeSchema.optional(),
     runCommand: AgentRunCommandSchema.optional(),
     systemPrompt: import_zod30.z.string().nullable().optional(),
-    modelConfig: ModelConfigSchema.optional()
+    /** @deprecated retained for backward compat with stored snapshots */
+    modelConfig: ModelConfigSchema.optional(),
+    config: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.unknown()).optional()
   }).optional(),
   /** UUID linking all runs in a comparison group */
   comparisonGroupId: import_zod30.z.string().optional(),
   /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
   comparisonLabel: import_zod30.z.string().optional(),
   /** LLM-generated analysis of the completed run */
-  runAnalysis: RunAnalysisSchema.optional()
+  runAnalysis: RunAnalysisSchema.optional(),
+  /** IDs of folders this run belongs to (read-only, managed via AddRunToFolder / RemoveRunFromFolder) */
+  folderIds: import_zod30.z.array(import_zod30.z.string()).optional()
 });
 var CreateEvalRunInputSchema = EvalRunSchema.omit({
   id: true,
@@ -2137,8 +2164,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
 });
 var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
-// src/schedule/eval-schedule.ts
+// src/agent/agent-config.ts
 var import_zod35 = require("zod");
+var BaseAgentConfigSchema = import_zod35.z.object({
+  /** Model ID (Claude or OpenAI). */
+  model: AnyModelSchema.optional(),
+  /** Sampling temperature (0–1). */
+  temperature: import_zod35.z.number().min(0).max(1).optional(),
+  /** Max output tokens per turn. */
+  maxTokens: import_zod35.z.number().int().min(1).optional(),
+  /** Number of agentic turns. 0 = unlimited. */
+  maxTurns: import_zod35.z.number().int().min(0).optional(),
+  /** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
+  maxDurationMs: import_zod35.z.number().int().min(0).optional()
+});
+var EffortLevelSchema = import_zod35.z.enum(["low", "medium", "high", "max"]);
+var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
+  /** Extended thinking token budget. */
+  maxThinkingTokens: import_zod35.z.number().int().min(0).optional(),
+  /** Override the default allowedTools list passed to the SDK. */
+  allowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
+  /** Tools to remove from the model's context entirely. */
+  disallowedTools: import_zod35.z.array(import_zod35.z.string()).optional(),
+  /** Controls thinking depth: low, medium, high, max. */
+  effort: EffortLevelSchema.optional(),
+  /** Maximum USD spend per run. Stops execution when reached. */
+  maxBudgetUsd: import_zod35.z.number().min(0).optional()
+});
+var PermissionValueSchema = import_zod35.z.enum(["allow", "deny"]);
+var OpenCodePermissionSchema = import_zod35.z.record(
+  import_zod35.z.string(),
+  import_zod35.z.union([PermissionValueSchema, import_zod35.z.record(import_zod35.z.string(), PermissionValueSchema)])
+);
+var ThinkingVariantSchema = import_zod35.z.enum(["high", "low", "none"]);
+var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
+  /** Permission overrides (defaults: allow-all). */
+  permission: OpenCodePermissionSchema.optional(),
+  /** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
+  thinkingVariant: ThinkingVariantSchema.optional(),
+  /** Nucleus sampling (0–1). Alternative to temperature. */
+  topP: import_zod35.z.number().min(0).max(1).optional()
+}).omit({ maxTokens: true });
+var ReasoningEffortSchema = import_zod35.z.enum(["low", "medium", "high"]);
+var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
+  /** Anthropic thinking budget in tokens. Default: 10 000. */
+  thinkingBudgetTokens: import_zod35.z.number().int().min(0).optional(),
+  /** Nucleus sampling (0–1). Alternative to temperature. */
+  topP: import_zod35.z.number().min(0).max(1).optional(),
+  /** Integer seed for deterministic/reproducible results (if model supports it). */
+  seed: import_zod35.z.number().int().optional(),
+  /** Stop sequences — model stops when generating any of these strings. */
+  stopSequences: import_zod35.z.array(import_zod35.z.string()).optional(),
+  /** OpenAI reasoning effort level. Default: 'high'. */
+  reasoningEffort: ReasoningEffortSchema.optional(),
+  /** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
+  frequencyPenalty: import_zod35.z.number().min(-2).max(2).optional(),
+  /** Presence penalty (−2 to 2). Encourages topic diversity. */
+  presencePenalty: import_zod35.z.number().min(-2).max(2).optional()
+});
+// src/schedule/eval-schedule.ts
+var import_zod36 = require("zod");
 var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
   FrequencyType2["DAILY"] = "daily";
   FrequencyType2["WEEKDAY"] = "weekday";
@@ -2148,29 +2234,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
 })(FrequencyType || {});
 var EvalScheduleSchema = TenantEntitySchema.extend({
   /** Whether the schedule is active */
-  enabled: import_zod35.z.boolean(),
+  enabled: import_zod36.z.boolean(),
   /** Test suite to run */
-  suiteId: import_zod35.z.string(),
+  suiteId: import_zod36.z.string(),
   /** Preset that provides agent + entities for this schedule */
-  presetId: import_zod35.z.string(),
+  presetId: import_zod36.z.string(),
   /** How often to run */
-  frequencyType: import_zod35.z.nativeEnum(FrequencyType),
+  frequencyType: import_zod36.z.nativeEnum(FrequencyType),
   /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
-  timeOfDay: import_zod35.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
+  timeOfDay: import_zod36.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
   /** Day of week (0=Sun, 6=Sat) for weekly schedules */
-  dayOfWeek: import_zod35.z.number().min(0).max(6).optional(),
+  dayOfWeek: import_zod36.z.number().min(0).max(6).optional(),
   /** Day of month (1-31) for monthly schedules */
-  dayOfMonth: import_zod35.z.number().min(1).max(31).optional(),
+  dayOfMonth: import_zod36.z.number().min(1).max(31).optional(),
   /** IANA timezone (e.g., 'America/New_York') */
-  timezone: import_zod35.z.string(),
+  timezone: import_zod36.z.string(),
   /** ID of the last eval run created by this schedule */
-  lastRunId: import_zod35.z.string().optional(),
+  lastRunId: import_zod36.z.string().optional(),
   /** Denormalized status of the last run */
-  lastRunStatus: import_zod35.z.string().optional(),
+  lastRunStatus: import_zod36.z.string().optional(),
   /** ISO timestamp of the last run */
-  lastRunAt: import_zod35.z.string().optional(),
+  lastRunAt: import_zod36.z.string().optional(),
   /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
-  nextRunAt: import_zod35.z.string().optional()
+  nextRunAt: import_zod36.z.string().optional()
 });
 function isValidTimezone(tz) {
   try {
@@ -2183,14 +2269,14 @@ function isValidTimezone(tz) {
 function validateScheduleFields(data, ctx, options) {
   if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
     ctx.addIssue({
-      code: import_zod35.z.ZodIssueCode.custom,
+      code: import_zod36.z.ZodIssueCode.custom,
       message: "dayOfWeek is required for weekly schedules",
       path: ["dayOfWeek"]
     });
   }
   if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
     ctx.addIssue({
-      code: import_zod35.z.ZodIssueCode.custom,
+      code: import_zod36.z.ZodIssueCode.custom,
       message: "dayOfMonth is required for monthly schedules",
       path: ["dayOfMonth"]
     });
@@ -2198,7 +2284,7 @@ function validateScheduleFields(data, ctx, options) {
   const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
   if (shouldValidateTz && !isValidTimezone(data.timezone)) {
     ctx.addIssue({
-      code: import_zod35.z.ZodIssueCode.custom,
+      code: import_zod36.z.ZodIssueCode.custom,
       message: "Invalid IANA timezone",
       path: ["timezone"]
     });
@@ -2249,6 +2335,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
   AssertionSchema,
   AssertionTypeSchema,
   BATCH_IMPORT_LIMITS,
+  BaseAgentConfigSchema,
   BaseEntitySchema,
   BaseTestSchema,
   BatchAssertionLinkSchema,
@@ -2271,6 +2358,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
   CapabilityVersionOriginSchema,
   CapabilityVersionSchema,
   CapabilityWithLatestVersionSchema,
+  ClaudeCodeConfigSchema,
   ClaudeModel,
   ClaudeModelSchema,
   CommandExecutionSchema,
@@ -2303,6 +2391,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
   DiffContentSchema,
   DiffLineSchema,
   DiffLineTypeSchema,
+  EffortLevelSchema,
   EnvironmentSchema,
   EvalMetricsSchema,
   EvalRunFolderMembershipSchema,
@@ -2350,11 +2439,15 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
   OPENAI_RESPONSES_MODEL_IDS,
   OpenAIModel,
   OpenAIModelSchema,
+  OpenCodeConfigSchema,
+  OpenCodePermissionSchema,
+  PermissionValueSchema,
   PlaywrightNLTestSchema,
   PresetSchema,
   ProjectSchema,
   PromptResultSchema,
   RUN_COMMAND_LABELS,
+  ReasoningEffortSchema,
   RuleSchema,
   RuleTypeSchema,
   RunAnalysisFindingSchema,
@@ -2365,6 +2458,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
   SYSTEM_ASSERTION_IDS,
   ScenarioAssertionLinkSchema,
   ScenarioConversationSchema,
+  SimpleAgentConfigSchema,
   SiteConfigTestSchema,
   SkillFileSchema,
   SkillMetadataSchema,
@@ -2390,6 +2484,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
   TestTypeSchema,
   TextBlockSchema,
   ThinkingBlockSchema,
+  ThinkingVariantSchema,
   TimeAssertionSchema,
   TimeConfigSchema,
   TokenUsageSchema,