@wix/evalforge-types 0.78.0 → 0.79.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +111 -20
- package/build/index.js.map +4 -4
- package/build/index.mjs +102 -20
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +5 -3
- package/build/types/agent/agent-config.d.ts +118 -0
- package/build/types/agent/index.d.ts +2 -0
- package/build/types/target/agent.d.ts +3 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -148,7 +148,8 @@ var ModelConfigSchema = z4.object({
|
|
|
148
148
|
z4.number().min(0).max(1).optional()
|
|
149
149
|
),
|
|
150
150
|
maxTokens: z4.preprocess(nullToUndefined, z4.number().min(1).optional()),
|
|
151
|
-
|
|
151
|
+
/** Number of agentic turns. 0 = unlimited (agent runs until done or timeout). */
|
|
152
|
+
maxTurns: z4.preprocess(nullToUndefined, z4.number().int().min(0).optional())
|
|
152
153
|
});
|
|
153
154
|
|
|
154
155
|
// src/common/rule.ts
|
|
@@ -220,11 +221,23 @@ var AgentSchema = TargetSchema.extend({
|
|
|
220
221
|
agentType: AgentTypeSchema.default(AgentType.CLI),
|
|
221
222
|
/** Command to run the agent (required for CLI agents, absent for SDK agents) */
|
|
222
223
|
runCommand: AgentRunCommandSchema.optional(),
|
|
223
|
-
/**
|
|
224
|
+
/**
|
|
225
|
+
* @deprecated Use `config` bag instead. Retained for backward compatibility
|
|
226
|
+
* with existing DB rows.
|
|
227
|
+
*/
|
|
224
228
|
modelConfig: ModelConfigSchema.optional(),
|
|
225
229
|
systemPrompt: z6.string().nullish().describe(
|
|
226
230
|
"Override for eval runs. undefined=default instructions, null=raw agent, string=append to claude_code preset. See https://docs.anthropic.com/en/docs/claude-code/sdk/modifying-system-prompts"
|
|
227
|
-
)
|
|
231
|
+
),
|
|
232
|
+
/**
|
|
233
|
+
* Unified agent configuration bag. Absorbs model params (model,
|
|
234
|
+
* temperature, maxTokens, maxTurns) plus agent-specific settings
|
|
235
|
+
* (permissions, thinking tokens, allowed tools, etc.).
|
|
236
|
+
*
|
|
237
|
+
* Adapters read from `config` first, falling back to `modelConfig`
|
|
238
|
+
* for backward compatibility with existing DB rows.
|
|
239
|
+
*/
|
|
240
|
+
config: z6.record(z6.string(), z6.unknown()).optional()
|
|
228
241
|
});
|
|
229
242
|
var CreateAgentInputSchema = AgentSchema.omit({
|
|
230
243
|
id: true,
|
|
@@ -234,7 +247,8 @@ var CreateAgentInputSchema = AgentSchema.omit({
|
|
|
234
247
|
});
|
|
235
248
|
var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
236
249
|
modelConfig: ModelConfigSchema.optional().nullable(),
|
|
237
|
-
systemPrompt: z6.string().optional().nullable()
|
|
250
|
+
systemPrompt: z6.string().optional().nullable(),
|
|
251
|
+
config: z6.record(z6.string(), z6.unknown()).optional().nullable()
|
|
238
252
|
});
|
|
239
253
|
|
|
240
254
|
// src/target/skill.ts
|
|
@@ -1898,8 +1912,67 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1898
1912
|
});
|
|
1899
1913
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1900
1914
|
|
|
1901
|
-
// src/
|
|
1915
|
+
// src/agent/agent-config.ts
|
|
1902
1916
|
import { z as z35 } from "zod";
|
|
1917
|
+
var BaseAgentConfigSchema = z35.object({
|
|
1918
|
+
/** Model ID (Claude or OpenAI). */
|
|
1919
|
+
model: AnyModelSchema.optional(),
|
|
1920
|
+
/** Sampling temperature (0–1). */
|
|
1921
|
+
temperature: z35.number().min(0).max(1).optional(),
|
|
1922
|
+
/** Max output tokens per turn. */
|
|
1923
|
+
maxTokens: z35.number().int().min(1).optional(),
|
|
1924
|
+
/** Number of agentic turns. 0 = unlimited. */
|
|
1925
|
+
maxTurns: z35.number().int().min(0).optional(),
|
|
1926
|
+
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
1927
|
+
maxDurationMs: z35.number().int().min(0).optional()
|
|
1928
|
+
});
|
|
1929
|
+
var EffortLevelSchema = z35.enum(["low", "medium", "high", "max"]);
|
|
1930
|
+
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
1931
|
+
/** Extended thinking token budget. */
|
|
1932
|
+
maxThinkingTokens: z35.number().int().min(0).optional(),
|
|
1933
|
+
/** Override the default allowedTools list passed to the SDK. */
|
|
1934
|
+
allowedTools: z35.array(z35.string()).optional(),
|
|
1935
|
+
/** Tools to remove from the model's context entirely. */
|
|
1936
|
+
disallowedTools: z35.array(z35.string()).optional(),
|
|
1937
|
+
/** Controls thinking depth: low, medium, high, max. */
|
|
1938
|
+
effort: EffortLevelSchema.optional(),
|
|
1939
|
+
/** Maximum USD spend per run. Stops execution when reached. */
|
|
1940
|
+
maxBudgetUsd: z35.number().min(0).optional()
|
|
1941
|
+
});
|
|
1942
|
+
var PermissionValueSchema = z35.enum(["allow", "deny"]);
|
|
1943
|
+
var OpenCodePermissionSchema = z35.record(
|
|
1944
|
+
z35.string(),
|
|
1945
|
+
z35.union([PermissionValueSchema, z35.record(z35.string(), PermissionValueSchema)])
|
|
1946
|
+
);
|
|
1947
|
+
var ThinkingVariantSchema = z35.enum(["high", "low", "none"]);
|
|
1948
|
+
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
1949
|
+
/** Permission overrides (defaults: allow-all). */
|
|
1950
|
+
permission: OpenCodePermissionSchema.optional(),
|
|
1951
|
+
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
1952
|
+
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
1953
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
1954
|
+
topP: z35.number().min(0).max(1).optional()
|
|
1955
|
+
}).omit({ maxTokens: true });
|
|
1956
|
+
var ReasoningEffortSchema = z35.enum(["low", "medium", "high"]);
|
|
1957
|
+
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
1958
|
+
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
1959
|
+
thinkingBudgetTokens: z35.number().int().min(0).optional(),
|
|
1960
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
1961
|
+
topP: z35.number().min(0).max(1).optional(),
|
|
1962
|
+
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
1963
|
+
seed: z35.number().int().optional(),
|
|
1964
|
+
/** Stop sequences — model stops when generating any of these strings. */
|
|
1965
|
+
stopSequences: z35.array(z35.string()).optional(),
|
|
1966
|
+
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
1967
|
+
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
1968
|
+
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
1969
|
+
frequencyPenalty: z35.number().min(-2).max(2).optional(),
|
|
1970
|
+
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
1971
|
+
presencePenalty: z35.number().min(-2).max(2).optional()
|
|
1972
|
+
});
|
|
1973
|
+
|
|
1974
|
+
// src/schedule/eval-schedule.ts
|
|
1975
|
+
import { z as z36 } from "zod";
|
|
1903
1976
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1904
1977
|
FrequencyType2["DAILY"] = "daily";
|
|
1905
1978
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1909,29 +1982,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1909
1982
|
})(FrequencyType || {});
|
|
1910
1983
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1911
1984
|
/** Whether the schedule is active */
|
|
1912
|
-
enabled:
|
|
1985
|
+
enabled: z36.boolean(),
|
|
1913
1986
|
/** Test suite to run */
|
|
1914
|
-
suiteId:
|
|
1987
|
+
suiteId: z36.string(),
|
|
1915
1988
|
/** Preset that provides agent + entities for this schedule */
|
|
1916
|
-
presetId:
|
|
1989
|
+
presetId: z36.string(),
|
|
1917
1990
|
/** How often to run */
|
|
1918
|
-
frequencyType:
|
|
1991
|
+
frequencyType: z36.nativeEnum(FrequencyType),
|
|
1919
1992
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1920
|
-
timeOfDay:
|
|
1993
|
+
timeOfDay: z36.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1921
1994
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1922
|
-
dayOfWeek:
|
|
1995
|
+
dayOfWeek: z36.number().min(0).max(6).optional(),
|
|
1923
1996
|
/** Day of month (1-31) for monthly schedules */
|
|
1924
|
-
dayOfMonth:
|
|
1997
|
+
dayOfMonth: z36.number().min(1).max(31).optional(),
|
|
1925
1998
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1926
|
-
timezone:
|
|
1999
|
+
timezone: z36.string(),
|
|
1927
2000
|
/** ID of the last eval run created by this schedule */
|
|
1928
|
-
lastRunId:
|
|
2001
|
+
lastRunId: z36.string().optional(),
|
|
1929
2002
|
/** Denormalized status of the last run */
|
|
1930
|
-
lastRunStatus:
|
|
2003
|
+
lastRunStatus: z36.string().optional(),
|
|
1931
2004
|
/** ISO timestamp of the last run */
|
|
1932
|
-
lastRunAt:
|
|
2005
|
+
lastRunAt: z36.string().optional(),
|
|
1933
2006
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1934
|
-
nextRunAt:
|
|
2007
|
+
nextRunAt: z36.string().optional()
|
|
1935
2008
|
});
|
|
1936
2009
|
function isValidTimezone(tz) {
|
|
1937
2010
|
try {
|
|
@@ -1944,14 +2017,14 @@ function isValidTimezone(tz) {
|
|
|
1944
2017
|
function validateScheduleFields(data, ctx, options) {
|
|
1945
2018
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1946
2019
|
ctx.addIssue({
|
|
1947
|
-
code:
|
|
2020
|
+
code: z36.ZodIssueCode.custom,
|
|
1948
2021
|
message: "dayOfWeek is required for weekly schedules",
|
|
1949
2022
|
path: ["dayOfWeek"]
|
|
1950
2023
|
});
|
|
1951
2024
|
}
|
|
1952
2025
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1953
2026
|
ctx.addIssue({
|
|
1954
|
-
code:
|
|
2027
|
+
code: z36.ZodIssueCode.custom,
|
|
1955
2028
|
message: "dayOfMonth is required for monthly schedules",
|
|
1956
2029
|
path: ["dayOfMonth"]
|
|
1957
2030
|
});
|
|
@@ -1959,7 +2032,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1959
2032
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1960
2033
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1961
2034
|
ctx.addIssue({
|
|
1962
|
-
code:
|
|
2035
|
+
code: z36.ZodIssueCode.custom,
|
|
1963
2036
|
message: "Invalid IANA timezone",
|
|
1964
2037
|
path: ["timezone"]
|
|
1965
2038
|
});
|
|
@@ -2009,6 +2082,7 @@ export {
|
|
|
2009
2082
|
AssertionSchema,
|
|
2010
2083
|
AssertionTypeSchema,
|
|
2011
2084
|
BATCH_IMPORT_LIMITS,
|
|
2085
|
+
BaseAgentConfigSchema,
|
|
2012
2086
|
BaseEntitySchema,
|
|
2013
2087
|
BaseTestSchema,
|
|
2014
2088
|
BatchAssertionLinkSchema,
|
|
@@ -2031,6 +2105,7 @@ export {
|
|
|
2031
2105
|
CapabilityVersionOriginSchema,
|
|
2032
2106
|
CapabilityVersionSchema,
|
|
2033
2107
|
CapabilityWithLatestVersionSchema,
|
|
2108
|
+
ClaudeCodeConfigSchema,
|
|
2034
2109
|
ClaudeModel,
|
|
2035
2110
|
ClaudeModelSchema,
|
|
2036
2111
|
CommandExecutionSchema,
|
|
@@ -2063,6 +2138,7 @@ export {
|
|
|
2063
2138
|
DiffContentSchema,
|
|
2064
2139
|
DiffLineSchema,
|
|
2065
2140
|
DiffLineTypeSchema,
|
|
2141
|
+
EffortLevelSchema,
|
|
2066
2142
|
EnvironmentSchema,
|
|
2067
2143
|
EvalMetricsSchema,
|
|
2068
2144
|
EvalRunFolderMembershipSchema,
|
|
@@ -2110,11 +2186,15 @@ export {
|
|
|
2110
2186
|
OPENAI_RESPONSES_MODEL_IDS,
|
|
2111
2187
|
OpenAIModel,
|
|
2112
2188
|
OpenAIModelSchema,
|
|
2189
|
+
OpenCodeConfigSchema,
|
|
2190
|
+
OpenCodePermissionSchema,
|
|
2191
|
+
PermissionValueSchema,
|
|
2113
2192
|
PlaywrightNLTestSchema,
|
|
2114
2193
|
PresetSchema,
|
|
2115
2194
|
ProjectSchema,
|
|
2116
2195
|
PromptResultSchema,
|
|
2117
2196
|
RUN_COMMAND_LABELS,
|
|
2197
|
+
ReasoningEffortSchema,
|
|
2118
2198
|
RuleSchema,
|
|
2119
2199
|
RuleTypeSchema,
|
|
2120
2200
|
RunAnalysisFindingSchema,
|
|
@@ -2125,6 +2205,7 @@ export {
|
|
|
2125
2205
|
SYSTEM_ASSERTION_IDS,
|
|
2126
2206
|
ScenarioAssertionLinkSchema,
|
|
2127
2207
|
ScenarioConversationSchema,
|
|
2208
|
+
SimpleAgentConfigSchema,
|
|
2128
2209
|
SiteConfigTestSchema,
|
|
2129
2210
|
SkillFileSchema,
|
|
2130
2211
|
SkillMetadataSchema,
|
|
@@ -2150,6 +2231,7 @@ export {
|
|
|
2150
2231
|
TestTypeSchema,
|
|
2151
2232
|
TextBlockSchema,
|
|
2152
2233
|
ThinkingBlockSchema,
|
|
2234
|
+
ThinkingVariantSchema,
|
|
2153
2235
|
TimeAssertionSchema,
|
|
2154
2236
|
TimeConfigSchema,
|
|
2155
2237
|
TokenUsageSchema,
|