@wix/evalforge-types 0.78.0 → 0.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +117 -22
- package/build/index.js.map +4 -4
- package/build/index.mjs +108 -22
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +5 -3
- package/build/types/agent/agent-config.d.ts +118 -0
- package/build/types/agent/index.d.ts +2 -0
- package/build/types/evaluation/eval-run.d.ts +3 -0
- package/build/types/target/agent.d.ts +3 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -148,7 +148,8 @@ var ModelConfigSchema = z4.object({
|
|
|
148
148
|
z4.number().min(0).max(1).optional()
|
|
149
149
|
),
|
|
150
150
|
maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional()),
|
|
151
|
-
|
|
151
|
+
/** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
|
|
152
|
+
maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(0).optional())
|
|
152
153
|
});
|
|
153
154
|
|
|
154
155
|
// src/common/rule.ts
|
|
@@ -220,11 +221,23 @@ var AgentSchema = TargetSchema.extend({
|
|
|
220
221
|
agentType: AgentTypeSchema.default(AgentType.CLI),
|
|
221
222
|
/** Command to run the agent (required for CLI agents, absent for SDK agents) */
|
|
222
223
|
runCommand: AgentRunCommandSchema.optional(),
|
|
223
|
-
/**
|
|
224
|
+
/**
|
|
225
|
+
* @deprecated Use `config` bag instead. Retained for backward compatibility
|
|
226
|
+
* with existing DB rows.
|
|
227
|
+
*/
|
|
224
228
|
modelConfig: ModelConfigSchema.optional(),
|
|
225
229
|
systemPrompt: z6.string().nullish().describe(
|
|
226
230
|
"Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
|
|
227
|
-
)
|
|
231
|
+
),
|
|
232
|
+
/**
|
|
233
|
+
* Unified agent configuration bag. Absorbs model params (model,
|
|
234
|
+
* temperature, maxTokens, maxTurns) plus agent-specific settings
|
|
235
|
+
* (permissions, thinking tokens, allowed tools, etc.).
|
|
236
|
+
*
|
|
237
|
+
* Adapters read from `config` first, falling back to `modelConfig`
|
|
238
|
+
* for backward compatibility with existing DB rows.
|
|
239
|
+
*/
|
|
240
|
+
config: z6.record(z6.string(), z6.unknown()).optional()
|
|
228
241
|
});
|
|
229
242
|
var CreateAgentInputSchema = AgentSchema.omit({
|
|
230
243
|
id: true,
|
|
@@ -234,7 +247,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
|
|
|
234
247
|
});
|
|
235
248
|
var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
236
249
|
modelConfig: ModelConfigSchema.optional().nullable(),
|
|
237
|
-
systemPrompt: z6.string().optional().nullable()
|
|
250
|
+
systemPrompt: z6.string().optional().nullable(),
|
|
251
|
+
config: z6.record(z6.string(), z6.unknown()).optional().nullable()
|
|
238
252
|
});
|
|
239
253
|
|
|
240
254
|
// src/target/skill.ts
|
|
@@ -1632,14 +1646,18 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1632
1646
|
agentType: AgentTypeSchema.optional(),
|
|
1633
1647
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1634
1648
|
systemPrompt: z30.string().nullable().optional(),
|
|
1635
|
-
|
|
1649
|
+
/** @deprecated retained for backward compat with stored snapshots */
|
|
1650
|
+
modelConfig: ModelConfigSchema.optional(),
|
|
1651
|
+
config: z30.record(z30.string(), z30.unknown()).optional()
|
|
1636
1652
|
}).optional(),
|
|
1637
1653
|
/** UUID linking all runs in a comparison group */
|
|
1638
1654
|
comparisonGroupId: z30.string().optional(),
|
|
1639
1655
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1640
1656
|
comparisonLabel: z30.string().optional(),
|
|
1641
1657
|
/** LLM-generated analysis of the completed run */
|
|
1642
|
-
runAnalysis: RunAnalysisSchema.optional()
|
|
1658
|
+
runAnalysis: RunAnalysisSchema.optional(),
|
|
1659
|
+
/** IDs of folders this run belongs to (read-only, managed via AddRunToFolder / RemoveRunFromFolder) */
|
|
1660
|
+
folderIds: z30.array(z30.string()).optional()
|
|
1643
1661
|
});
|
|
1644
1662
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1645
1663
|
id: true,
|
|
@@ -1898,8 +1916,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1898
1916
|
});
|
|
1899
1917
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1900
1918
|
|
|
1901
|
-
// src/
|
|
1919
|
+
// src/agent/agent-config.ts
|
|
1902
1920
|
import { z as z35 } from "zod";
|
|
1921
|
+
var BaseAgentConfigSchema = z35.object({
|
|
1922
|
+
/** Model ID (Claude or OpenAI). */
|
|
1923
|
+
model: AnyModelSchema.optional(),
|
|
1924
|
+
/** Sampling temperature (0–1). */
|
|
1925
|
+
temperature: z35.number().min(0).max(1).optional(),
|
|
1926
|
+
/** Max output tokens per turn. */
|
|
1927
|
+
maxTokens: z35.number().int().min(1).optional(),
|
|
1928
|
+
/** Number of agentic turns. 0 = unlimited. */
|
|
1929
|
+
maxTurns: z35.number().int().min(0).optional(),
|
|
1930
|
+
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
1931
|
+
maxDurationMs: z35.number().int().min(0).optional()
|
|
1932
|
+
});
|
|
1933
|
+
var EffortLevelSchema = z35.enum(["low", "medium", "high", "max"]);
|
|
1934
|
+
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
1935
|
+
/** Extended thinking token budget. */
|
|
1936
|
+
maxThinkingTokens: z35.number().int().min(0).optional(),
|
|
1937
|
+
/** Override the default allowedTools list passed to the SDK. */
|
|
1938
|
+
allowedTools: z35.array(z35.string()).optional(),
|
|
1939
|
+
/** Tools to remove from the model's context entirely. */
|
|
1940
|
+
disallowedTools: z35.array(z35.string()).optional(),
|
|
1941
|
+
/** Controls thinking depth: low, medium, high, max. */
|
|
1942
|
+
effort: EffortLevelSchema.optional(),
|
|
1943
|
+
/** Maximum USD spend per run. Stops execution when reached. */
|
|
1944
|
+
maxBudgetUsd: z35.number().min(0).optional()
|
|
1945
|
+
});
|
|
1946
|
+
var PermissionValueSchema = z35.enum(["allow", "deny"]);
|
|
1947
|
+
var OpenCodePermissionSchema = z35.record(
|
|
1948
|
+
z35.string(),
|
|
1949
|
+
z35.union([PermissionValueSchema, z35.record(z35.string(), PermissionValueSchema)])
|
|
1950
|
+
);
|
|
1951
|
+
var ThinkingVariantSchema = z35.enum(["high", "low", "none"]);
|
|
1952
|
+
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
1953
|
+
/** Permission overrides (defaults: allow-all). */
|
|
1954
|
+
permission: OpenCodePermissionSchema.optional(),
|
|
1955
|
+
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
1956
|
+
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
1957
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
1958
|
+
topP: z35.number().min(0).max(1).optional()
|
|
1959
|
+
}).omit({ maxTokens: true });
|
|
1960
|
+
var ReasoningEffortSchema = z35.enum(["low", "medium", "high"]);
|
|
1961
|
+
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
1962
|
+
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
1963
|
+
thinkingBudgetTokens: z35.number().int().min(0).optional(),
|
|
1964
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
1965
|
+
topP: z35.number().min(0).max(1).optional(),
|
|
1966
|
+
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
1967
|
+
seed: z35.number().int().optional(),
|
|
1968
|
+
/** Stop sequences — model stops when generating any of these strings. */
|
|
1969
|
+
stopSequences: z35.array(z35.string()).optional(),
|
|
1970
|
+
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
1971
|
+
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
1972
|
+
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
1973
|
+
frequencyPenalty: z35.number().min(-2).max(2).optional(),
|
|
1974
|
+
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
1975
|
+
presencePenalty: z35.number().min(-2).max(2).optional()
|
|
1976
|
+
});
|
|
1977
|
+
|
|
1978
|
+
// src/schedule/eval-schedule.ts
|
|
1979
|
+
import { z as z36 } from "zod";
|
|
1903
1980
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1904
1981
|
FrequencyType2["DAILY"] = "daily";
|
|
1905
1982
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1909,29 +1986,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1909
1986
|
})(FrequencyType || {});
|
|
1910
1987
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1911
1988
|
/** Whether the schedule is active */
|
|
1912
|
-
enabled:
|
|
1989
|
+
enabled: z36.boolean(),
|
|
1913
1990
|
/** Test suite to run */
|
|
1914
|
-
suiteId:
|
|
1991
|
+
suiteId: z36.string(),
|
|
1915
1992
|
/** Preset that provides agent + entities for this schedule */
|
|
1916
|
-
presetId:
|
|
1993
|
+
presetId: z36.string(),
|
|
1917
1994
|
/** How often to run */
|
|
1918
|
-
frequencyType:
|
|
1995
|
+
frequencyType: z36.nativeEnum(FrequencyType),
|
|
1919
1996
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1920
|
-
timeOfDay:
|
|
1997
|
+
timeOfDay: z36.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1921
1998
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1922
|
-
dayOfWeek:
|
|
1999
|
+
dayOfWeek: z36.number().min(0).max(6).optional(),
|
|
1923
2000
|
/** Day of month (1-31) for monthly schedules */
|
|
1924
|
-
dayOfMonth:
|
|
2001
|
+
dayOfMonth: z36.number().min(1).max(31).optional(),
|
|
1925
2002
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1926
|
-
timezone:
|
|
2003
|
+
timezone: z36.string(),
|
|
1927
2004
|
/** ID of the last eval run created by this schedule */
|
|
1928
|
-
lastRunId:
|
|
2005
|
+
lastRunId: z36.string().optional(),
|
|
1929
2006
|
/** Denormalized status of the last run */
|
|
1930
|
-
lastRunStatus:
|
|
2007
|
+
lastRunStatus: z36.string().optional(),
|
|
1931
2008
|
/** ISO timestamp of the last run */
|
|
1932
|
-
lastRunAt:
|
|
2009
|
+
lastRunAt: z36.string().optional(),
|
|
1933
2010
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1934
|
-
nextRunAt:
|
|
2011
|
+
nextRunAt: z36.string().optional()
|
|
1935
2012
|
});
|
|
1936
2013
|
function isValidTimezone(tz) {
|
|
1937
2014
|
try {
|
|
@@ -1944,14 +2021,14 @@ function isValidTimezone(tz) {
|
|
|
1944
2021
|
function validateScheduleFields(data, ctx, options) {
|
|
1945
2022
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1946
2023
|
ctx.addIssue({
|
|
1947
|
-
code:
|
|
2024
|
+
code: z36.ZodIssueCode.custom,
|
|
1948
2025
|
message: "dayOfWeek is required for weekly schedules",
|
|
1949
2026
|
path: ["dayOfWeek"]
|
|
1950
2027
|
});
|
|
1951
2028
|
}
|
|
1952
2029
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1953
2030
|
ctx.addIssue({
|
|
1954
|
-
code:
|
|
2031
|
+
code: z36.ZodIssueCode.custom,
|
|
1955
2032
|
message: "dayOfMonth is required for monthly schedules",
|
|
1956
2033
|
path: ["dayOfMonth"]
|
|
1957
2034
|
});
|
|
@@ -1959,7 +2036,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1959
2036
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1960
2037
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1961
2038
|
ctx.addIssue({
|
|
1962
|
-
code:
|
|
2039
|
+
code: z36.ZodIssueCode.custom,
|
|
1963
2040
|
message: "Invalid IANA timezone",
|
|
1964
2041
|
path: ["timezone"]
|
|
1965
2042
|
});
|
|
@@ -2009,6 +2086,7 @@ export {
|
|
|
2009
2086
|
AssertionSchema,
|
|
2010
2087
|
AssertionTypeSchema,
|
|
2011
2088
|
BATCH_IMPORT_LIMITS,
|
|
2089
|
+
BaseAgentConfigSchema,
|
|
2012
2090
|
BaseEntitySchema,
|
|
2013
2091
|
BaseTestSchema,
|
|
2014
2092
|
BatchAssertionLinkSchema,
|
|
@@ -2031,6 +2109,7 @@ export {
|
|
|
2031
2109
|
CapabilityVersionOriginSchema,
|
|
2032
2110
|
CapabilityVersionSchema,
|
|
2033
2111
|
CapabilityWithLatestVersionSchema,
|
|
2112
|
+
ClaudeCodeConfigSchema,
|
|
2034
2113
|
ClaudeModel,
|
|
2035
2114
|
ClaudeModelSchema,
|
|
2036
2115
|
CommandExecutionSchema,
|
|
@@ -2063,6 +2142,7 @@ export {
|
|
|
2063
2142
|
DiffContentSchema,
|
|
2064
2143
|
DiffLineSchema,
|
|
2065
2144
|
DiffLineTypeSchema,
|
|
2145
|
+
EffortLevelSchema,
|
|
2066
2146
|
EnvironmentSchema,
|
|
2067
2147
|
EvalMetricsSchema,
|
|
2068
2148
|
EvalRunFolderMembershipSchema,
|
|
@@ -2110,11 +2190,15 @@ export {
|
|
|
2110
2190
|
OPENAI_RESPONSES_MODEL_IDS,
|
|
2111
2191
|
OpenAIModel,
|
|
2112
2192
|
OpenAIModelSchema,
|
|
2193
|
+
OpenCodeConfigSchema,
|
|
2194
|
+
OpenCodePermissionSchema,
|
|
2195
|
+
PermissionValueSchema,
|
|
2113
2196
|
PlaywrightNLTestSchema,
|
|
2114
2197
|
PresetSchema,
|
|
2115
2198
|
ProjectSchema,
|
|
2116
2199
|
PromptResultSchema,
|
|
2117
2200
|
RUN_COMMAND_LABELS,
|
|
2201
|
+
ReasoningEffortSchema,
|
|
2118
2202
|
RuleSchema,
|
|
2119
2203
|
RuleTypeSchema,
|
|
2120
2204
|
RunAnalysisFindingSchema,
|
|
@@ -2125,6 +2209,7 @@ export {
|
|
|
2125
2209
|
SYSTEM_ASSERTION_IDS,
|
|
2126
2210
|
ScenarioAssertionLinkSchema,
|
|
2127
2211
|
ScenarioConversationSchema,
|
|
2212
|
+
SimpleAgentConfigSchema,
|
|
2128
2213
|
SiteConfigTestSchema,
|
|
2129
2214
|
SkillFileSchema,
|
|
2130
2215
|
SkillMetadataSchema,
|
|
@@ -2150,6 +2235,7 @@ export {
|
|
|
2150
2235
|
TestTypeSchema,
|
|
2151
2236
|
TextBlockSchema,
|
|
2152
2237
|
ThinkingBlockSchema,
|
|
2238
|
+
ThinkingVariantSchema,
|
|
2153
2239
|
TimeAssertionSchema,
|
|
2154
2240
|
TimeConfigSchema,
|
|
2155
2241
|
TokenUsageSchema,
|