@wix/evalforge-types 0.42.0 → 0.44.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +38 -10
- package/build/index.js.map +2 -2
- package/build/index.mjs +33 -10
- package/build/index.mjs.map +3 -3
- package/build/types/agent/adapter.d.ts +13 -7
- package/build/types/common/models.d.ts +4 -0
- package/build/types/scenario/assertions.d.ts +1 -1
- package/build/types/target/agent.d.ts +35 -7
- package/build/types/test/llm.d.ts +2 -2
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -20,6 +20,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
AGENT_TYPE_LABELS: () => AGENT_TYPE_LABELS,
|
|
24
|
+
ALL_AVAILABLE_MODEL_IDS: () => ALL_AVAILABLE_MODEL_IDS,
|
|
23
25
|
AVAILABLE_MODEL_IDS: () => AVAILABLE_MODEL_IDS,
|
|
24
26
|
AVAILABLE_OPENAI_MODEL_IDS: () => AVAILABLE_OPENAI_MODEL_IDS,
|
|
25
27
|
AVAILABLE_RUN_COMMANDS: () => AVAILABLE_RUN_COMMANDS,
|
|
@@ -27,7 +29,10 @@ __export(index_exports, {
|
|
|
27
29
|
AgentRunCommand: () => AgentRunCommand,
|
|
28
30
|
AgentRunCommandSchema: () => AgentRunCommandSchema,
|
|
29
31
|
AgentSchema: () => AgentSchema,
|
|
32
|
+
AgentType: () => AgentType,
|
|
33
|
+
AgentTypeSchema: () => AgentTypeSchema,
|
|
30
34
|
AllowedCommands: () => AllowedCommands,
|
|
35
|
+
AnyModelSchema: () => AnyModelSchema,
|
|
31
36
|
ApiCallSchema: () => ApiCallSchema,
|
|
32
37
|
AssertionConfigSchema: () => AssertionConfigSchema,
|
|
33
38
|
AssertionParameterSchema: () => AssertionParameterSchema,
|
|
@@ -1126,9 +1131,16 @@ var AVAILABLE_OPENAI_MODEL_IDS = Object.values(
|
|
|
1126
1131
|
var OpenAIModelSchema = import_zod4.z.enum(
|
|
1127
1132
|
AVAILABLE_OPENAI_MODEL_IDS
|
|
1128
1133
|
);
|
|
1134
|
+
var ALL_AVAILABLE_MODEL_IDS = [
|
|
1135
|
+
...AVAILABLE_MODEL_IDS,
|
|
1136
|
+
...AVAILABLE_OPENAI_MODEL_IDS
|
|
1137
|
+
];
|
|
1138
|
+
var AnyModelSchema = import_zod4.z.enum(
|
|
1139
|
+
ALL_AVAILABLE_MODEL_IDS
|
|
1140
|
+
);
|
|
1129
1141
|
var nullToUndefined = (val) => val === null ? void 0 : val;
|
|
1130
1142
|
var ModelConfigSchema = import_zod4.z.object({
|
|
1131
|
-
model:
|
|
1143
|
+
model: AnyModelSchema,
|
|
1132
1144
|
temperature: import_zod4.z.preprocess(
|
|
1133
1145
|
nullToUndefined,
|
|
1134
1146
|
import_zod4.z.number().min(0).max(1).optional()
|
|
@@ -1180,6 +1192,15 @@ var DEFAULT_EVALUATOR_SYSTEM_PROMPT = `IMPORTANT: This is an automated evaluatio
|
|
|
1180
1192
|
6. If you encounter an error, fix it directly rather than starting over.
|
|
1181
1193
|
7. Your project root is the current working directory. Always create and modify source code files relative to the project root, NOT inside .claude/skills/ directories.
|
|
1182
1194
|
8. Before finishing, run the project's package manager install command (e.g. \`npm install\`, \`yarn install\`, or \`pnpm install\` depending on the lockfile present) to ensure all dependencies are installed and the project is ready to build.`;
|
|
1195
|
+
var AgentType = {
|
|
1196
|
+
CLI: "cli",
|
|
1197
|
+
SDK: "sdk"
|
|
1198
|
+
};
|
|
1199
|
+
var AgentTypeSchema = import_zod6.z.enum([AgentType.CLI, AgentType.SDK]);
|
|
1200
|
+
var AGENT_TYPE_LABELS = {
|
|
1201
|
+
[AgentType.CLI]: "CLI Agent",
|
|
1202
|
+
[AgentType.SDK]: "Simple Agent"
|
|
1203
|
+
};
|
|
1183
1204
|
var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
|
|
1184
1205
|
AgentRunCommand2["CLAUDE"] = "claude";
|
|
1185
1206
|
return AgentRunCommand2;
|
|
@@ -1190,8 +1211,10 @@ var RUN_COMMAND_LABELS = {
|
|
|
1190
1211
|
};
|
|
1191
1212
|
var AgentRunCommandSchema = import_zod6.z.nativeEnum(AgentRunCommand);
|
|
1192
1213
|
var AgentSchema = TargetSchema.extend({
|
|
1193
|
-
/**
|
|
1194
|
-
|
|
1214
|
+
/** Agent type: 'cli' for external CLI tools, 'sdk' for in-process SDK agents */
|
|
1215
|
+
agentType: AgentTypeSchema.default(AgentType.CLI),
|
|
1216
|
+
/** Command to run the agent (required for CLI agents, absent for SDK agents) */
|
|
1217
|
+
runCommand: AgentRunCommandSchema.optional(),
|
|
1195
1218
|
/** Optional model configuration override */
|
|
1196
1219
|
modelConfig: ModelConfigSchema.optional(),
|
|
1197
1220
|
systemPrompt: import_zod6.z.string().nullish().describe(
|
|
@@ -1537,8 +1560,8 @@ var LlmJudgeAssertionSchema = import_zod21.z.object({
|
|
|
1537
1560
|
type: import_zod21.z.literal("llm_judge"),
|
|
1538
1561
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
1539
1562
|
prompt: import_zod21.z.string(),
|
|
1540
|
-
/** Minimum score to pass (0-
|
|
1541
|
-
minScore: import_zod21.z.number().int().min(0).max(
|
|
1563
|
+
/** Minimum score to pass (0-10, default 7) */
|
|
1564
|
+
minScore: import_zod21.z.number().int().min(0).max(10).optional(),
|
|
1542
1565
|
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
1543
1566
|
model: import_zod21.z.string().optional(),
|
|
1544
1567
|
maxTokens: import_zod21.z.number().int().optional(),
|
|
@@ -1669,8 +1692,8 @@ var LlmJudgeConfigSchema = import_zod23.z.object({
|
|
|
1669
1692
|
* - Custom parameters defined in the parameters array
|
|
1670
1693
|
*/
|
|
1671
1694
|
prompt: import_zod23.z.string().min(1),
|
|
1672
|
-
/** Minimum score to pass (0-
|
|
1673
|
-
minScore: import_zod23.z.number().int().min(0).max(
|
|
1695
|
+
/** Minimum score to pass (0-10, default 7) */
|
|
1696
|
+
minScore: import_zod23.z.number().int().min(0).max(10).optional(),
|
|
1674
1697
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
1675
1698
|
model: import_zod23.z.string().optional(),
|
|
1676
1699
|
/** Max output tokens */
|
|
@@ -2338,7 +2361,7 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2338
2361
|
[SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
|
|
2339
2362
|
id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
|
|
2340
2363
|
name: "LLM Judge",
|
|
2341
|
-
description: "LLM evaluates the output and assigns a score (0-
|
|
2364
|
+
description: "LLM evaluates the output and assigns a score (0-10)",
|
|
2342
2365
|
type: "llm_judge",
|
|
2343
2366
|
parameters: [
|
|
2344
2367
|
{
|
|
@@ -2350,10 +2373,10 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2350
2373
|
},
|
|
2351
2374
|
{
|
|
2352
2375
|
name: "minScore",
|
|
2353
|
-
label: "Minimum Score (0-
|
|
2376
|
+
label: "Minimum Score (0-10)",
|
|
2354
2377
|
type: "number",
|
|
2355
2378
|
required: false,
|
|
2356
|
-
defaultValue:
|
|
2379
|
+
defaultValue: 7
|
|
2357
2380
|
},
|
|
2358
2381
|
{
|
|
2359
2382
|
name: "model",
|
|
@@ -2372,6 +2395,8 @@ function getSystemAssertion(id) {
|
|
|
2372
2395
|
}
|
|
2373
2396
|
// Annotate the CommonJS export names for ESM import in node:
|
|
2374
2397
|
0 && (module.exports = {
|
|
2398
|
+
AGENT_TYPE_LABELS,
|
|
2399
|
+
ALL_AVAILABLE_MODEL_IDS,
|
|
2375
2400
|
AVAILABLE_MODEL_IDS,
|
|
2376
2401
|
AVAILABLE_OPENAI_MODEL_IDS,
|
|
2377
2402
|
AVAILABLE_RUN_COMMANDS,
|
|
@@ -2379,7 +2404,10 @@ function getSystemAssertion(id) {
|
|
|
2379
2404
|
AgentRunCommand,
|
|
2380
2405
|
AgentRunCommandSchema,
|
|
2381
2406
|
AgentSchema,
|
|
2407
|
+
AgentType,
|
|
2408
|
+
AgentTypeSchema,
|
|
2382
2409
|
AllowedCommands,
|
|
2410
|
+
AnyModelSchema,
|
|
2383
2411
|
ApiCallSchema,
|
|
2384
2412
|
AssertionConfigSchema,
|
|
2385
2413
|
AssertionParameterSchema,
|