@wix/evalforge-types 0.39.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +0 -17
- package/build/index.js.map +2 -2
- package/build/index.mjs +0 -17
- package/build/index.mjs.map +2 -2
- package/build/types/assertion/assertion.d.ts +0 -5
- package/build/types/scenario/assertions.d.ts +0 -2
- package/build/types/scenario/test-scenario.d.ts +0 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -1523,8 +1523,6 @@ var LlmJudgeAssertionSchema = import_zod21.z.object({
|
|
|
1523
1523
|
type: import_zod21.z.literal("llm_judge"),
|
|
1524
1524
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
1525
1525
|
prompt: import_zod21.z.string(),
|
|
1526
|
-
/** Optional system prompt for the judge (default asks for JSON with score) */
|
|
1527
|
-
systemPrompt: import_zod21.z.string().optional(),
|
|
1528
1526
|
/** Minimum score to pass (0-100, default 70) */
|
|
1529
1527
|
minScore: import_zod21.z.number().int().min(0).max(100).optional(),
|
|
1530
1528
|
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
@@ -1657,8 +1655,6 @@ var LlmJudgeConfigSchema = import_zod23.z.object({
|
|
|
1657
1655
|
* - Custom parameters defined in the parameters array
|
|
1658
1656
|
*/
|
|
1659
1657
|
prompt: import_zod23.z.string().min(1),
|
|
1660
|
-
/** Optional system prompt for the judge */
|
|
1661
|
-
systemPrompt: import_zod23.z.string().optional(),
|
|
1662
1658
|
/** Minimum score to pass (0-100, default 70) */
|
|
1663
1659
|
minScore: import_zod23.z.number().int().min(0).max(100).optional(),
|
|
1664
1660
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
@@ -2338,19 +2334,6 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2338
2334
|
required: true,
|
|
2339
2335
|
defaultValue: "Verify the output meets the acceptance criteria."
|
|
2340
2336
|
},
|
|
2341
|
-
{
|
|
2342
|
-
name: "systemPrompt",
|
|
2343
|
-
label: "System Prompt (optional)",
|
|
2344
|
-
type: "string",
|
|
2345
|
-
required: false,
|
|
2346
|
-
defaultValue: `You are judging a scenario run. Use these values:
|
|
2347
|
-
- {{output}}: the agent's final output
|
|
2348
|
-
- {{cwd}}: working directory
|
|
2349
|
-
- {{changedFiles}}: list of files changed (or "No files were changed")
|
|
2350
|
-
- {{trace}}: step-by-step trace (tool calls, completions) to check e.g. which tools were called and how many times
|
|
2351
|
-
|
|
2352
|
-
Judge how well the output meets the acceptance criteria stated in the user prompt.`
|
|
2353
|
-
},
|
|
2354
2337
|
{
|
|
2355
2338
|
name: "minScore",
|
|
2356
2339
|
label: "Minimum Score (0-100)",
|