@mastra/evals 1.2.4 → 1.3.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/{chunk-XOXUFZEG.js → chunk-BE5F2OUQ.js} +5 -4
- package/dist/chunk-BE5F2OUQ.js.map +1 -0
- package/dist/{chunk-BULMCHKJ.cjs → chunk-UNQXHPOD.cjs} +5 -4
- package/dist/{chunk-XOXUFZEG.js.map → chunk-UNQXHPOD.cjs.map} +1 -1
- package/dist/docs/SKILL.md +2 -1
- package/dist/docs/assets/SOURCE_MAP.json +1 -1
- package/dist/docs/references/reference-evals-rubric.md +113 -0
- package/dist/docs/references/reference-evals-trajectory-accuracy.md +3 -3
- package/dist/scorers/llm/index.d.ts +1 -0
- package/dist/scorers/llm/index.d.ts.map +1 -1
- package/dist/scorers/llm/rubric/index.d.ts +71 -0
- package/dist/scorers/llm/rubric/index.d.ts.map +1 -0
- package/dist/scorers/llm/rubric/prompts.d.ts +37 -0
- package/dist/scorers/llm/rubric/prompts.d.ts.map +1 -0
- package/dist/scorers/prebuilt/index.cjs +276 -78
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +203 -6
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/dist/scorers/utils.cjs +25 -25
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +9 -8
- package/dist/chunk-BULMCHKJ.cjs.map +0 -1
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export declare const RUBRIC_INSTRUCTIONS = "You are an exacting grader. Your job is to judge whether an agent's output satisfies each criterion in a rubric.\n\nA rubric is a checklist of criteria. For each criterion you must decide, strictly and independently, whether the output satisfies it.\n\nGrading guidelines:\n- Judge each criterion on its own merits. Do not let one criterion's verdict influence another.\n- A criterion is \"satisfied\" only when the output clearly and fully meets it. When in doubt, mark it as NOT satisfied.\n- Base your judgement on evidence in the output (and the original task for context). Do not assume facts that are not present.\n- Be concise but specific in your reasoning: say what is present or missing.\n- Do not reward effort, intent, or partial progress. Only the actual output counts.";
|
|
2
|
+
export interface RubricAnalysisCriterion {
|
|
3
|
+
/** The criterion text, exactly as provided in the rubric. */
|
|
4
|
+
criterion: string;
|
|
5
|
+
/** Whether the output satisfies this criterion. */
|
|
6
|
+
satisfied: boolean;
|
|
7
|
+
/** Whether this criterion is required for the task to be considered complete. */
|
|
8
|
+
required: boolean;
|
|
9
|
+
/** Short explanation of why the criterion is or is not satisfied. */
|
|
10
|
+
reasoning: string;
|
|
11
|
+
}
|
|
12
|
+
export interface RubricAnalysisResult {
|
|
13
|
+
criteria: RubricAnalysisCriterion[];
|
|
14
|
+
overallAssessment: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* A single rubric criterion as provided to the prompt builder.
|
|
18
|
+
*/
|
|
19
|
+
export interface RubricCriterionInput {
|
|
20
|
+
criterion: string;
|
|
21
|
+
required: boolean;
|
|
22
|
+
}
|
|
23
|
+
export declare function createAnalyzePrompt({ originalTask, output, criteria, }: {
|
|
24
|
+
originalTask: string;
|
|
25
|
+
output: string;
|
|
26
|
+
criteria: RubricCriterionInput[];
|
|
27
|
+
}): string;
|
|
28
|
+
/**
|
|
29
|
+
* Format a human-readable, per-criterion explanation of the rubric result. This text is what
|
|
30
|
+
* `isTaskComplete` injects back into the conversation as feedback, so it must clearly tell the
|
|
31
|
+
* agent which criteria are unmet and why.
|
|
32
|
+
*/
|
|
33
|
+
export declare function formatRubricReason({ score, analysis }: {
|
|
34
|
+
score: number;
|
|
35
|
+
analysis: RubricAnalysisResult;
|
|
36
|
+
}): string;
|
|
37
|
+
//# sourceMappingURL=prompts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/rubric/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,mBAAmB,mxBASoD,CAAC;AAErF,MAAM,WAAW,uBAAuB;IACtC,6DAA6D;IAC7D,SAAS,EAAE,MAAM,CAAC;IAClB,mDAAmD;IACnD,SAAS,EAAE,OAAO,CAAC;IACnB,iFAAiF;IACjF,QAAQ,EAAE,OAAO,CAAC;IAClB,qEAAqE;IACrE,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,EAAE,uBAAuB,EAAE,CAAC;IACpC,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,wBAAgB,mBAAmB,CAAC,EAClC,YAAY,EACZ,MAAM,EACN,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,oBAAoB,EAAE,CAAC;CAClC,GAAG,MAAM,CA8BT;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,oBAAoB,CAAA;CAAE,GAAG,MAAM,CAoBjH"}
|