@wix/evalforge-types 0.77.0 → 0.79.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +142 -21
- package/build/index.js.map +4 -4
- package/build/index.mjs +129 -21
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +5 -3
- package/build/types/agent/agent-config.d.ts +118 -0
- package/build/types/agent/index.d.ts +2 -0
- package/build/types/common/models.d.ts +20 -0
- package/build/types/target/agent.d.ts +3 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -85,9 +85,31 @@ var OPENAI_RESPONSES_MODEL_IDS = /* @__PURE__ */ new Set([
|
|
|
85
85
|
var OpenAIModelSchema = z4.enum(
|
|
86
86
|
AVAILABLE_OPENAI_MODEL_IDS
|
|
87
87
|
);
|
|
88
|
+
var GeminiModel = {
|
|
89
|
+
GEMINI_2_0_FLASH: "gemini-2.0-flash",
|
|
90
|
+
GEMINI_2_0_FLASH_LITE: "gemini-2.0-flash-lite",
|
|
91
|
+
GEMINI_2_5_PRO: "gemini-2.5-pro",
|
|
92
|
+
GEMINI_2_5_FLASH: "gemini-2.5-flash",
|
|
93
|
+
GEMINI_2_5_FLASH_LITE: "gemini-2.5-flash-lite",
|
|
94
|
+
GEMINI_3_0_PRO: "gemini-3-pro-preview",
|
|
95
|
+
GEMINI_3_0_FLASH: "gemini-3-flash-preview",
|
|
96
|
+
GEMINI_3_1_PRO: "gemini-3.1-pro-preview"
|
|
97
|
+
};
|
|
98
|
+
var AVAILABLE_GEMINI_MODEL_IDS = Object.values(GeminiModel);
|
|
99
|
+
var GEMINI_THINKING_MODEL_IDS = /* @__PURE__ */ new Set([
|
|
100
|
+
GeminiModel.GEMINI_2_5_PRO,
|
|
101
|
+
GeminiModel.GEMINI_2_5_FLASH,
|
|
102
|
+
GeminiModel.GEMINI_3_0_PRO,
|
|
103
|
+
GeminiModel.GEMINI_3_0_FLASH,
|
|
104
|
+
GeminiModel.GEMINI_3_1_PRO
|
|
105
|
+
]);
|
|
106
|
+
var GeminiModelSchema = z4.enum(
|
|
107
|
+
AVAILABLE_GEMINI_MODEL_IDS
|
|
108
|
+
);
|
|
88
109
|
var ALL_AVAILABLE_MODEL_IDS = [
|
|
89
110
|
...AVAILABLE_CLAUDE_MODEL_IDS,
|
|
90
|
-
...AVAILABLE_OPENAI_MODEL_IDS
|
|
111
|
+
...AVAILABLE_OPENAI_MODEL_IDS,
|
|
112
|
+
...AVAILABLE_GEMINI_MODEL_IDS
|
|
91
113
|
];
|
|
92
114
|
var AnyModelSchema = z4.enum(
|
|
93
115
|
ALL_AVAILABLE_MODEL_IDS
|
|
@@ -126,7 +148,8 @@ var ModelConfigSchema = z4.object({
|
|
|
126
148
|
z4.number().min(0).max(1).optional()
|
|
127
149
|
),
|
|
128
150
|
maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional()),
|
|
129
|
-
|
|
151
|
+
/** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
|
|
152
|
+
maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(0).optional())
|
|
130
153
|
});
|
|
131
154
|
|
|
132
155
|
// src/common/rule.ts
|
|
@@ -198,11 +221,23 @@ var AgentSchema = TargetSchema.extend({
|
|
|
198
221
|
agentType: AgentTypeSchema.default(AgentType.CLI),
|
|
199
222
|
/** Command to run the agent (required for CLI agents, absent for SDK agents) */
|
|
200
223
|
runCommand: AgentRunCommandSchema.optional(),
|
|
201
|
-
/**
|
|
224
|
+
/**
|
|
225
|
+
* @deprecated Use `config` bag instead. Retained for backward compatibility
|
|
226
|
+
* with existing DB rows.
|
|
227
|
+
*/
|
|
202
228
|
modelConfig: ModelConfigSchema.optional(),
|
|
203
229
|
systemPrompt: z6.string().nullish().describe(
|
|
204
230
|
"Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
|
|
205
|
-
)
|
|
231
|
+
),
|
|
232
|
+
/**
|
|
233
|
+
* Unified agent configuration bag. Absorbs model params (model,
|
|
234
|
+
* temperature, maxTokens, maxTurns) plus agent-specific settings
|
|
235
|
+
* (permissions, thinking tokens, allowed tools, etc.).
|
|
236
|
+
*
|
|
237
|
+
* Adapters read from `config` first, falling back to `modelConfig`
|
|
238
|
+
* for backward compatibility with existing DB rows.
|
|
239
|
+
*/
|
|
240
|
+
config: z6.record(z6.string(), z6.unknown()).optional()
|
|
206
241
|
});
|
|
207
242
|
var CreateAgentInputSchema = AgentSchema.omit({
|
|
208
243
|
id: true,
|
|
@@ -212,7 +247,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
|
|
|
212
247
|
});
|
|
213
248
|
var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
214
249
|
modelConfig: ModelConfigSchema.optional().nullable(),
|
|
215
|
-
systemPrompt: z6.string().optional().nullable()
|
|
250
|
+
systemPrompt: z6.string().optional().nullable(),
|
|
251
|
+
config: z6.record(z6.string(), z6.unknown()).optional().nullable()
|
|
216
252
|
});
|
|
217
253
|
|
|
218
254
|
// src/target/skill.ts
|
|
@@ -1876,8 +1912,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1876
1912
|
});
|
|
1877
1913
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1878
1914
|
|
|
1879
|
-
// src/
|
|
1915
|
+
// src/agent/agent-config.ts
|
|
1880
1916
|
import { z as z35 } from "zod";
|
|
1917
|
+
var BaseAgentConfigSchema = z35.object({
|
|
1918
|
+
/** Model ID (Claude or OpenAI). */
|
|
1919
|
+
model: AnyModelSchema.optional(),
|
|
1920
|
+
/** Sampling temperature (0–1). */
|
|
1921
|
+
temperature: z35.number().min(0).max(1).optional(),
|
|
1922
|
+
/** Max output tokens per turn. */
|
|
1923
|
+
maxTokens: z35.number().int().min(1).optional(),
|
|
1924
|
+
/** Number of agentic turns. 0 = unlimited. */
|
|
1925
|
+
maxTurns: z35.number().int().min(0).optional(),
|
|
1926
|
+
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
1927
|
+
maxDurationMs: z35.number().int().min(0).optional()
|
|
1928
|
+
});
|
|
1929
|
+
var EffortLevelSchema = z35.enum(["low", "medium", "high", "max"]);
|
|
1930
|
+
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
1931
|
+
/** Extended thinking token budget. */
|
|
1932
|
+
maxThinkingTokens: z35.number().int().min(0).optional(),
|
|
1933
|
+
/** Override the default allowedTools list passed to the SDK. */
|
|
1934
|
+
allowedTools: z35.array(z35.string()).optional(),
|
|
1935
|
+
/** Tools to remove from the model's context entirely. */
|
|
1936
|
+
disallowedTools: z35.array(z35.string()).optional(),
|
|
1937
|
+
/** Controls thinking depth: low, medium, high, max. */
|
|
1938
|
+
effort: EffortLevelSchema.optional(),
|
|
1939
|
+
/** Maximum USD spend per run. Stops execution when reached. */
|
|
1940
|
+
maxBudgetUsd: z35.number().min(0).optional()
|
|
1941
|
+
});
|
|
1942
|
+
var PermissionValueSchema = z35.enum(["allow", "deny"]);
|
|
1943
|
+
var OpenCodePermissionSchema = z35.record(
|
|
1944
|
+
z35.string(),
|
|
1945
|
+
z35.union([PermissionValueSchema, z35.record(z35.string(), PermissionValueSchema)])
|
|
1946
|
+
);
|
|
1947
|
+
var ThinkingVariantSchema = z35.enum(["high", "low", "none"]);
|
|
1948
|
+
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
1949
|
+
/** Permission overrides (defaults: allow-all). */
|
|
1950
|
+
permission: OpenCodePermissionSchema.optional(),
|
|
1951
|
+
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
1952
|
+
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
1953
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
1954
|
+
topP: z35.number().min(0).max(1).optional()
|
|
1955
|
+
}).omit({ maxTokens: true });
|
|
1956
|
+
var ReasoningEffortSchema = z35.enum(["low", "medium", "high"]);
|
|
1957
|
+
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
1958
|
+
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
1959
|
+
thinkingBudgetTokens: z35.number().int().min(0).optional(),
|
|
1960
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
1961
|
+
topP: z35.number().min(0).max(1).optional(),
|
|
1962
|
+
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
1963
|
+
seed: z35.number().int().optional(),
|
|
1964
|
+
/** Stop sequences — model stops when generating any of these strings. */
|
|
1965
|
+
stopSequences: z35.array(z35.string()).optional(),
|
|
1966
|
+
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
1967
|
+
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
1968
|
+
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
1969
|
+
frequencyPenalty: z35.number().min(-2).max(2).optional(),
|
|
1970
|
+
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
1971
|
+
presencePenalty: z35.number().min(-2).max(2).optional()
|
|
1972
|
+
});
|
|
1973
|
+
|
|
1974
|
+
// src/schedule/eval-schedule.ts
|
|
1975
|
+
import { z as z36 } from "zod";
|
|
1881
1976
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1882
1977
|
FrequencyType2["DAILY"] = "daily";
|
|
1883
1978
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1887,29 +1982,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1887
1982
|
})(FrequencyType || {});
|
|
1888
1983
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1889
1984
|
/** Whether the schedule is active */
|
|
1890
|
-
enabled:
|
|
1985
|
+
enabled: z36.boolean(),
|
|
1891
1986
|
/** Test suite to run */
|
|
1892
|
-
suiteId:
|
|
1987
|
+
suiteId: z36.string(),
|
|
1893
1988
|
/** Preset that provides agent + entities for this schedule */
|
|
1894
|
-
presetId:
|
|
1989
|
+
presetId: z36.string(),
|
|
1895
1990
|
/** How often to run */
|
|
1896
|
-
frequencyType:
|
|
1991
|
+
frequencyType: z36.nativeEnum(FrequencyType),
|
|
1897
1992
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1898
|
-
timeOfDay:
|
|
1993
|
+
timeOfDay: z36.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1899
1994
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1900
|
-
dayOfWeek:
|
|
1995
|
+
dayOfWeek: z36.number().min(0).max(6).optional(),
|
|
1901
1996
|
/** Day of month (1-31) for monthly schedules */
|
|
1902
|
-
dayOfMonth:
|
|
1997
|
+
dayOfMonth: z36.number().min(1).max(31).optional(),
|
|
1903
1998
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1904
|
-
timezone:
|
|
1999
|
+
timezone: z36.string(),
|
|
1905
2000
|
/** ID of the last eval run created by this schedule */
|
|
1906
|
-
lastRunId:
|
|
2001
|
+
lastRunId: z36.string().optional(),
|
|
1907
2002
|
/** Denormalized status of the last run */
|
|
1908
|
-
lastRunStatus:
|
|
2003
|
+
lastRunStatus: z36.string().optional(),
|
|
1909
2004
|
/** ISO timestamp of the last run */
|
|
1910
|
-
lastRunAt:
|
|
2005
|
+
lastRunAt: z36.string().optional(),
|
|
1911
2006
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1912
|
-
nextRunAt:
|
|
2007
|
+
nextRunAt: z36.string().optional()
|
|
1913
2008
|
});
|
|
1914
2009
|
function isValidTimezone(tz) {
|
|
1915
2010
|
try {
|
|
@@ -1922,14 +2017,14 @@ function isValidTimezone(tz) {
|
|
|
1922
2017
|
function validateScheduleFields(data, ctx, options) {
|
|
1923
2018
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1924
2019
|
ctx.addIssue({
|
|
1925
|
-
code:
|
|
2020
|
+
code: z36.ZodIssueCode.custom,
|
|
1926
2021
|
message: "dayOfWeek is required for weekly schedules",
|
|
1927
2022
|
path: ["dayOfWeek"]
|
|
1928
2023
|
});
|
|
1929
2024
|
}
|
|
1930
2025
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1931
2026
|
ctx.addIssue({
|
|
1932
|
-
code:
|
|
2027
|
+
code: z36.ZodIssueCode.custom,
|
|
1933
2028
|
message: "dayOfMonth is required for monthly schedules",
|
|
1934
2029
|
path: ["dayOfMonth"]
|
|
1935
2030
|
});
|
|
@@ -1937,7 +2032,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1937
2032
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1938
2033
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1939
2034
|
ctx.addIssue({
|
|
1940
|
-
code:
|
|
2035
|
+
code: z36.ZodIssueCode.custom,
|
|
1941
2036
|
message: "Invalid IANA timezone",
|
|
1942
2037
|
path: ["timezone"]
|
|
1943
2038
|
});
|
|
@@ -1965,6 +2060,7 @@ export {
|
|
|
1965
2060
|
ALLOWED_BUILD_COMMANDS,
|
|
1966
2061
|
ALL_AVAILABLE_MODEL_IDS,
|
|
1967
2062
|
AVAILABLE_CLAUDE_MODEL_IDS,
|
|
2063
|
+
AVAILABLE_GEMINI_MODEL_IDS,
|
|
1968
2064
|
AVAILABLE_OPENAI_MODEL_IDS,
|
|
1969
2065
|
AVAILABLE_RUN_COMMANDS,
|
|
1970
2066
|
AVAILABLE_TOOL_NAMES,
|
|
@@ -1986,6 +2082,7 @@ export {
|
|
|
1986
2082
|
AssertionSchema,
|
|
1987
2083
|
AssertionTypeSchema,
|
|
1988
2084
|
BATCH_IMPORT_LIMITS,
|
|
2085
|
+
BaseAgentConfigSchema,
|
|
1989
2086
|
BaseEntitySchema,
|
|
1990
2087
|
BaseTestSchema,
|
|
1991
2088
|
BatchAssertionLinkSchema,
|
|
@@ -2008,6 +2105,7 @@ export {
|
|
|
2008
2105
|
CapabilityVersionOriginSchema,
|
|
2009
2106
|
CapabilityVersionSchema,
|
|
2010
2107
|
CapabilityWithLatestVersionSchema,
|
|
2108
|
+
ClaudeCodeConfigSchema,
|
|
2011
2109
|
ClaudeModel,
|
|
2012
2110
|
ClaudeModelSchema,
|
|
2013
2111
|
CommandExecutionSchema,
|
|
@@ -2040,6 +2138,7 @@ export {
|
|
|
2040
2138
|
DiffContentSchema,
|
|
2041
2139
|
DiffLineSchema,
|
|
2042
2140
|
DiffLineTypeSchema,
|
|
2141
|
+
EffortLevelSchema,
|
|
2043
2142
|
EnvironmentSchema,
|
|
2044
2143
|
EvalMetricsSchema,
|
|
2045
2144
|
EvalRunFolderMembershipSchema,
|
|
@@ -2059,6 +2158,9 @@ export {
|
|
|
2059
2158
|
FileModificationSchema,
|
|
2060
2159
|
FilePresenceTestSchema,
|
|
2061
2160
|
FrequencyType,
|
|
2161
|
+
GEMINI_THINKING_MODEL_IDS,
|
|
2162
|
+
GeminiModel,
|
|
2163
|
+
GeminiModelSchema,
|
|
2062
2164
|
GitHubSourceSchema,
|
|
2063
2165
|
InitialCapabilityVersionInputSchema,
|
|
2064
2166
|
InitialVersionInputSchema,
|
|
@@ -2084,11 +2186,15 @@ export {
|
|
|
2084
2186
|
OPENAI_RESPONSES_MODEL_IDS,
|
|
2085
2187
|
OpenAIModel,
|
|
2086
2188
|
OpenAIModelSchema,
|
|
2189
|
+
OpenCodeConfigSchema,
|
|
2190
|
+
OpenCodePermissionSchema,
|
|
2191
|
+
PermissionValueSchema,
|
|
2087
2192
|
PlaywrightNLTestSchema,
|
|
2088
2193
|
PresetSchema,
|
|
2089
2194
|
ProjectSchema,
|
|
2090
2195
|
PromptResultSchema,
|
|
2091
2196
|
RUN_COMMAND_LABELS,
|
|
2197
|
+
ReasoningEffortSchema,
|
|
2092
2198
|
RuleSchema,
|
|
2093
2199
|
RuleTypeSchema,
|
|
2094
2200
|
RunAnalysisFindingSchema,
|
|
@@ -2099,6 +2205,7 @@ export {
|
|
|
2099
2205
|
SYSTEM_ASSERTION_IDS,
|
|
2100
2206
|
ScenarioAssertionLinkSchema,
|
|
2101
2207
|
ScenarioConversationSchema,
|
|
2208
|
+
SimpleAgentConfigSchema,
|
|
2102
2209
|
SiteConfigTestSchema,
|
|
2103
2210
|
SkillFileSchema,
|
|
2104
2211
|
SkillMetadataSchema,
|
|
@@ -2124,6 +2231,7 @@ export {
|
|
|
2124
2231
|
TestTypeSchema,
|
|
2125
2232
|
TextBlockSchema,
|
|
2126
2233
|
ThinkingBlockSchema,
|
|
2234
|
+
ThinkingVariantSchema,
|
|
2127
2235
|
TimeAssertionSchema,
|
|
2128
2236
|
TimeConfigSchema,
|
|
2129
2237
|
TokenUsageSchema,
|