@wix/evalforge-types 0.42.0 → 0.44.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +38 -10
- package/build/index.js.map +2 -2
- package/build/index.mjs +33 -10
- package/build/index.mjs.map +3 -3
- package/build/types/agent/adapter.d.ts +13 -7
- package/build/types/common/models.d.ts +4 -0
- package/build/types/scenario/assertions.d.ts +1 -1
- package/build/types/target/agent.d.ts +35 -7
- package/build/types/test/llm.d.ts +2 -2
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -949,9 +949,16 @@ var AVAILABLE_OPENAI_MODEL_IDS = Object.values(
|
|
|
949
949
|
var OpenAIModelSchema = z4.enum(
|
|
950
950
|
AVAILABLE_OPENAI_MODEL_IDS
|
|
951
951
|
);
|
|
952
|
+
var ALL_AVAILABLE_MODEL_IDS = [
|
|
953
|
+
...AVAILABLE_MODEL_IDS,
|
|
954
|
+
...AVAILABLE_OPENAI_MODEL_IDS
|
|
955
|
+
];
|
|
956
|
+
var AnyModelSchema = z4.enum(
|
|
957
|
+
ALL_AVAILABLE_MODEL_IDS
|
|
958
|
+
);
|
|
952
959
|
var nullToUndefined = (val) => val === null ? void 0 : val;
|
|
953
960
|
var ModelConfigSchema = z4.object({
|
|
954
|
-
model:
|
|
961
|
+
model: AnyModelSchema,
|
|
955
962
|
temperature: z4.preprocess(
|
|
956
963
|
nullToUndefined,
|
|
957
964
|
z4.number().min(0).max(1).optional()
|
|
@@ -1003,6 +1010,15 @@ var DEFAULT_EVALUATOR_SYSTEM_PROMPT = `IMPORTANT: This is an automated evaluatio
|
|
|
1003
1010
|
6. If you encounter an error, fix it directly rather than starting over.
|
|
1004
1011
|
7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
|
|
1005
1012
|
8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
|
|
1013
|
+
var AgentType = {
|
|
1014
|
+
CLI: "cli",
|
|
1015
|
+
SDK: "sdk"
|
|
1016
|
+
};
|
|
1017
|
+
var AgentTypeSchema = z6.enum([AgentType.CLI, AgentType.SDK]);
|
|
1018
|
+
var AGENT_TYPE_LABELS = {
|
|
1019
|
+
[AgentType.CLI]: "CLI Agent",
|
|
1020
|
+
[AgentType.SDK]: "Simple Agent"
|
|
1021
|
+
};
|
|
1006
1022
|
var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
|
|
1007
1023
|
AgentRunCommand2["CLAUDE"] = "claude";
|
|
1008
1024
|
return AgentRunCommand2;
|
|
@@ -1013,8 +1029,10 @@ var RUN_COMMAND_LABELS = {
|
|
|
1013
1029
|
};
|
|
1014
1030
|
var AgentRunCommandSchema = z6.nativeEnum(AgentRunCommand);
|
|
1015
1031
|
var AgentSchema = TargetSchema.extend({
|
|
1016
|
-
/**
|
|
1017
|
-
|
|
1032
|
+
/** Agent type: 'cli' for external CLI tools, 'sdk' for in-process SDK agents */
|
|
1033
|
+
agentType: AgentTypeSchema.default(AgentType.CLI),
|
|
1034
|
+
/** Command to run the agent (required for CLI agents, absent for SDK agents) */
|
|
1035
|
+
runCommand: AgentRunCommandSchema.optional(),
|
|
1018
1036
|
/** Optional model configuration override */
|
|
1019
1037
|
modelConfig: ModelConfigSchema.optional(),
|
|
1020
1038
|
systemPrompt: z6.string().nullish().describe(
|
|
@@ -1360,8 +1378,8 @@ var LlmJudgeAssertionSchema = z21.object({
|
|
|
1360
1378
|
type: z21.literal("llm_judge"),
|
|
1361
1379
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
1362
1380
|
prompt: z21.string(),
|
|
1363
|
-
/** Minimum score to pass (0-
|
|
1364
|
-
minScore: z21.number().int().min(0).max(
|
|
1381
|
+
/** Minimum score to pass (0-10, default 7) */
|
|
1382
|
+
minScore: z21.number().int().min(0).max(10).optional(),
|
|
1365
1383
|
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
1366
1384
|
model: z21.string().optional(),
|
|
1367
1385
|
maxTokens: z21.number().int().optional(),
|
|
@@ -1492,8 +1510,8 @@ var LlmJudgeConfigSchema = z23.object({
|
|
|
1492
1510
|
* - Custom parameters defined in the parameters array
|
|
1493
1511
|
*/
|
|
1494
1512
|
prompt: z23.string().min(1),
|
|
1495
|
-
/** Minimum score to pass (0-
|
|
1496
|
-
minScore: z23.number().int().min(0).max(
|
|
1513
|
+
/** Minimum score to pass (0-10, default 7) */
|
|
1514
|
+
minScore: z23.number().int().min(0).max(10).optional(),
|
|
1497
1515
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
1498
1516
|
model: z23.string().optional(),
|
|
1499
1517
|
/** Max output tokens */
|
|
@@ -2161,7 +2179,7 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2161
2179
|
[SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
|
|
2162
2180
|
id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
|
|
2163
2181
|
name: "LLM Judge",
|
|
2164
|
-
description: "LLM evaluates the output and assigns a score (0-
|
|
2182
|
+
description: "LLM evaluates the output and assigns a score (0-10)",
|
|
2165
2183
|
type: "llm_judge",
|
|
2166
2184
|
parameters: [
|
|
2167
2185
|
{
|
|
@@ -2173,10 +2191,10 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2173
2191
|
},
|
|
2174
2192
|
{
|
|
2175
2193
|
name: "minScore",
|
|
2176
|
-
label: "Minimum Score (0-
|
|
2194
|
+
label: "Minimum Score (0-10)",
|
|
2177
2195
|
type: "number",
|
|
2178
2196
|
required: false,
|
|
2179
|
-
defaultValue:
|
|
2197
|
+
defaultValue: 7
|
|
2180
2198
|
},
|
|
2181
2199
|
{
|
|
2182
2200
|
name: "model",
|
|
@@ -2194,6 +2212,8 @@ function getSystemAssertion(id) {
|
|
|
2194
2212
|
return SYSTEM_ASSERTIONS[id];
|
|
2195
2213
|
}
|
|
2196
2214
|
export {
|
|
2215
|
+
AGENT_TYPE_LABELS,
|
|
2216
|
+
ALL_AVAILABLE_MODEL_IDS,
|
|
2197
2217
|
AVAILABLE_MODEL_IDS,
|
|
2198
2218
|
AVAILABLE_OPENAI_MODEL_IDS,
|
|
2199
2219
|
AVAILABLE_RUN_COMMANDS,
|
|
@@ -2201,7 +2221,10 @@ export {
|
|
|
2201
2221
|
AgentRunCommand,
|
|
2202
2222
|
AgentRunCommandSchema,
|
|
2203
2223
|
AgentSchema,
|
|
2224
|
+
AgentType,
|
|
2225
|
+
AgentTypeSchema,
|
|
2204
2226
|
AllowedCommands,
|
|
2227
|
+
AnyModelSchema,
|
|
2205
2228
|
ApiCallSchema,
|
|
2206
2229
|
AssertionConfigSchema,
|
|
2207
2230
|
AssertionParameterSchema,
|