npm - @mastra/evals - Versions diffs - 0.13.2 → 0.13.3-alpha.1 - Mend

@mastra/evals 0.13.2 → 0.13.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/dist/scorers/llm/noise-sensitivity/index.d.ts ADDED Viewed

@@ -0,0 +1,36 @@
+import type { MastraLanguageModel } from '@mastra/core/agent';
+import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
+export interface NoiseSensitivityOptions {
+    baselineResponse: string;
+    noisyQuery: string;
+    noiseType?: string;
+    scoring?: {
+        impactWeights?: {
+            none?: number;
+            minimal?: number;
+            moderate?: number;
+            significant?: number;
+            severe?: number;
+        };
+        penalties?: {
+            majorIssuePerItem?: number;
+            maxMajorIssuePenalty?: number;
+        };
+        discrepancyThreshold?: number;
+    };
+}
+export declare function createNoiseSensitivityScorerLLM({ model, options, }: {
+    model: MastraLanguageModel;
+    options: NoiseSensitivityOptions;
+}): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
+    overallAssessment: string;
+    dimensions: {
+        dimension: string;
+        impactLevel: "none" | "minimal" | "moderate" | "significant" | "severe";
+        specificChanges: string;
+        noiseInfluence: string;
+    }[];
+    robustnessScore: number;
+    majorIssues?: string[] | undefined;
+}> & Record<"generateScoreStepResult", number> & Record<"generateReasonStepResult", string>>;
+//# sourceMappingURL=index.d.ts.map

package/dist/scorers/llm/noise-sensitivity/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/noise-sensitivity/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAM3F,MAAM,WAAW,uBAAuB;IACtC,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE;QACR,aAAa,CAAC,EAAE;YACd,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;YAClB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,MAAM,CAAC,EAAE,MAAM,CAAC;SACjB,CAAC;QACF,SAAS,CAAC,EAAE;YACV,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,oBAAoB,CAAC,EAAE,MAAM,CAAC;SAC/B,CAAC;QACF,oBAAoB,CAAC,EAAE,MAAM,CAAC;KAC/B,CAAC;CACH;AA+BD,wBAAgB,+BAA+B,CAAC,EAC9C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,uBAAuB,CAAC;CAClC;;;;;;;;;;6FAoHA"}

package/dist/scorers/llm/noise-sensitivity/prompts.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+export declare const NOISE_SENSITIVITY_INSTRUCTIONS = "You are an expert noise sensitivity evaluator. Your job is to analyze how much irrelevant, distracting, or misleading information (noise) affected the agent's response quality and accuracy.\n\nKey Evaluation Criteria:\n1. **Response Consistency**: How similar are the baseline and noisy responses in content and correctness?\n2. **Information Integrity**: Did the agent maintain accuracy despite noise, or was it misled?\n3. **Focus Preservation**: Did the agent stay on topic or get distracted by irrelevant information?\n4. **Hallucination Resistance**: Did noise cause the agent to generate false or fabricated information?\n5. **Completeness**: Did noise cause the agent to miss important parts of the original query?\n\nNoise Impact Assessment:\n- **No Impact (1.0)**: Response is virtually identical in quality, accuracy, and completeness\n- **Minimal Impact (0.8-0.9)**: Slight changes in phrasing but maintains correctness and completeness  \n- **Moderate Impact (0.5-0.7)**: Noticeable changes that affect quality but core information remains correct\n- **Significant Impact (0.2-0.4)**: Major degradation in quality, accuracy, or completeness\n- **Severe Impact (0.0-0.1)**: Response is substantially worse, incorrect, or completely derailed\n\nBe thorough in comparing both responses and identifying specific ways the noise affected the agent's performance.";
+export declare function createAnalyzePrompt({ userQuery, baselineResponse, noisyQuery, noisyResponse, noiseType, }: {
+    userQuery: string;
+    baselineResponse: string;
+    noisyQuery: string;
+    noisyResponse: string;
+    noiseType?: string;
+}): string;
+export declare function createReasonPrompt({ userQuery, score, dimensions, majorIssues, overallAssessment, }: {
+    userQuery: string;
+    score: number;
+    dimensions: Array<{
+        dimension: string;
+        impactLevel: string;
+        specificChanges: string;
+        noiseInfluence: string;
+    }>;
+    majorIssues: string[];
+    overallAssessment: string;
+}): string;
+//# sourceMappingURL=prompts.d.ts.map

package/dist/scorers/llm/noise-sensitivity/prompts.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/noise-sensitivity/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,8BAA8B,61CAgBuE,CAAC;AAEnH,wBAAgB,mBAAmB,CAAC,EAClC,SAAS,EACT,gBAAgB,EAChB,UAAU,EACV,aAAa,EACb,SAAS,GACV,EAAE;IACD,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,UAgHA;AAED,wBAAgB,kBAAkB,CAAC,EACjC,SAAS,EACT,KAAK,EACL,UAAU,EACV,WAAW,EACX,iBAAiB,GAClB,EAAE;IACD,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,KAAK,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,EAAE,MAAM,CAAC;QACxB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC,CAAC;IACH,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,UA6CA"}

package/dist/scorers/llm/prompt-alignment/index.d.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import type { MastraLanguageModel } from '@mastra/core/agent';
+import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
+export interface PromptAlignmentOptions {
+    scale?: number;
+    evaluationMode?: 'user' | 'system' | 'both';
+}
+export declare function createPromptAlignmentScorerLLM({ model, options, }: {
+    model: MastraLanguageModel;
+    options?: PromptAlignmentOptions;
+}): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
+    overallAssessment: string;
+    intentAlignment: {
+        reasoning: string;
+        score: number;
+        primaryIntent: string;
+        isAddressed: boolean;
+    };
+    requirementsFulfillment: {
+        requirements: {
+            reasoning: string;
+            requirement: string;
+            isFulfilled: boolean;
+        }[];
+        overallScore: number;
+    };
+    completeness: {
+        reasoning: string;
+        score: number;
+        missingElements: string[];
+    };
+    responseAppropriateness: {
+        reasoning: string;
+        score: number;
+        formatAlignment: boolean;
+        toneAlignment: boolean;
+    };
+}> & Record<"generateScoreStepResult", number> & Record<"generateReasonStepResult", string>>;
+//# sourceMappingURL=index.d.ts.map

package/dist/scorers/llm/prompt-alignment/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/prompt-alignment/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,sBAAsB;IACrC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;CAC7C;AAsDD,wBAAgB,8BAA8B,CAAC,EAC7C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,CAAC,EAAE,sBAAsB,CAAC;CAClC;;;;;;;;;;;;;;;;;;;;;;;;;;;6FAoHA"}

package/dist/scorers/llm/prompt-alignment/prompts.d.ts ADDED Viewed

@@ -0,0 +1,44 @@
+export declare const PROMPT_ALIGNMENT_INSTRUCTIONS = "You are an expert prompt-response alignment evaluator. Your job is to analyze how well an agent's response aligns with the user's prompt in terms of intent, requirements, completeness, and appropriateness.\n\nKey Evaluation Dimensions:\n1. **Intent Alignment**: Does the response address the core purpose of the prompt?\n2. **Requirements Fulfillment**: Are all explicit and implicit requirements met?\n3. **Completeness**: Is the response comprehensive and thorough?\n4. **Response Appropriateness**: Does the format, tone, and style match expectations?\n\nEvaluation Guidelines:\n- Identify the primary intent and any secondary intents in the prompt\n- Extract all explicit requirements (specific tasks, constraints, formats)\n- Consider implicit requirements based on context and standard expectations\n- Assess whether the response fully addresses the prompt or leaves gaps\n- Evaluate if the response format and tone are appropriate for the request\n- Be objective and focus on alignment rather than response quality\n\nScore each dimension from 0.0 (completely misaligned) to 1.0 (perfectly aligned).";
+export declare function createAnalyzePrompt({ userPrompt, systemPrompt, agentResponse, evaluationMode, }: {
+    userPrompt: string;
+    systemPrompt?: string;
+    agentResponse: string;
+    evaluationMode: 'user' | 'system' | 'both';
+}): string;
+export type AnalysisResult = {
+    intentAlignment: {
+        score: number;
+        primaryIntent: string;
+        isAddressed: boolean;
+        reasoning: string;
+    };
+    requirementsFulfillment: {
+        requirements: Array<{
+            requirement: string;
+            isFulfilled: boolean;
+            reasoning: string;
+        }>;
+        overallScore: number;
+    };
+    completeness: {
+        score: number;
+        missingElements: string[];
+        reasoning: string;
+    };
+    responseAppropriateness: {
+        score: number;
+        formatAlignment: boolean;
+        toneAlignment: boolean;
+        reasoning: string;
+    };
+    overallAssessment: string;
+};
+export declare function createReasonPrompt({ userPrompt, systemPrompt, score, scale, analysis, evaluationMode, }: {
+    userPrompt: string;
+    systemPrompt?: string;
+    score: number;
+    scale: number;
+    analysis: AnalysisResult;
+    evaluationMode: 'user' | 'system' | 'both';
+}): string;
+//# sourceMappingURL=prompts.d.ts.map

package/dist/scorers/llm/prompt-alignment/prompts.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/prompt-alignment/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,6BAA6B,ulCAgBwC,CAAC;AAEnF,wBAAgB,mBAAmB,CAAC,EAClC,UAAU,EACV,YAAY,EACZ,aAAa,EACb,cAAc,GACf,EAAE;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;CAC5C,UA6KA;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,eAAe,EAAE;QACf,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,CAAC;QACtB,WAAW,EAAE,OAAO,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,uBAAuB,EAAE;QACvB,YAAY,EAAE,KAAK,CAAC;YAClB,WAAW,EAAE,MAAM,CAAC;YACpB,WAAW,EAAE,OAAO,CAAC;YACrB,SAAS,EAAE,MAAM,CAAC;SACnB,CAAC,CAAC;QACH,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,YAAY,EAAE;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,eAAe,EAAE,MAAM,EAAE,CAAC;QAC1B,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,uBAAuB,EAAE;QACvB,KAAK,EAAE,MAAM,CAAC;QACd,eAAe,EAAE,OAAO,CAAC;QACzB,aAAa,EAAE,OAAO,CAAC;QACvB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAAC;AAEF,wBAAgB,kBAAkB,CAAC,EACjC,UAAU,EACV,YAAY,EACZ,KAAK,EACL,KAAK,EACL,QAAQ,EACR,cAAc,GACf,EAAE;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,cAAc,CAAC;IACzB,cAAc,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;CAC5C,UAkEA"}

package/dist/scorers/llm/tool-call-accuracy/index.d.ts CHANGED Viewed

@@ -1,11 +1,9 @@
+import type { Tool } from '@mastra/core';
 import type { MastraLanguageModel } from '@mastra/core/agent';
 import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
 export interface ToolCallAccuracyOptions {
     model: MastraLanguageModel;
-    availableTools: Array<{
-        name: string;
-        description: string;
-    }>;
+    availableTools: Tool[];
 }
 export declare function createToolCallAccuracyScorerLLM({ model, availableTools }: ToolCallAccuracyOptions): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"preprocessStepResult", {
     actualTools: string[];

package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/tool-call-accuracy/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,mBAAmB,CAAC;IAC3B,cAAc,EAAE,~~KAAK,CAAC;QAAE,~~IAAI,EAAE,~~MAAM,~~CAAC;~~QAAC,WAAW,EAAE,MAAM,CAAA~~;~~KAAE,CAAC,CAAC;CAC9D;~~AAaD,wBAAgB,+BAA+B,CAAC,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,uBAAuB;;;;;;;;;;;6FA0EjG"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/tool-call-accuracy/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,mBAAmB,CAAC;IAC3B,cAAc,EAAE,IAAI,EAAE,CAAC;CACxB;AAaD,wBAAgB,+BAA+B,CAAC,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,uBAAuB;;;;;;;;;;;6FA0EjG"}

package/dist/scorers/utils.d.ts CHANGED Viewed

@@ -16,6 +16,8 @@ export type TestCaseWithContext = TestCase & {
 };
 export declare const createTestRun: (input: string, output: string, context?: string[]) => ScoringInput;
 export declare const getUserMessageFromRunInput: (input?: ScorerRunInputForAgent) => string | undefined;
+export declare const getSystemMessagesFromRunInput: (input?: ScorerRunInputForAgent) => string[];
+export declare const getCombinedSystemPrompt: (input?: ScorerRunInputForAgent) => string;
 export declare const getAssistantMessageFromRunOutput: (output?: ScorerRunOutputForAgent) => string | undefined;
 export declare const createToolInvocation: ({ toolCallId, toolName, args, result, state, }: {
     toolCallId: string;

package/dist/scorers/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACzG,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpD,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED,MAAM,MAAM,QAAQ,GAAG;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE;QACd,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,aAAa,GAAI,OAAO,MAAM,EAAE,QAAQ,MAAM,EAAE,UAAU,MAAM,EAAE,KAAG,YAOjF,CAAC;AAEF,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,uBAExE,CAAC;AAEF,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAEhF,CAAC;AAEF,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;CACjC,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,yCAK7B;IACD,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,KAAG,SAQH,CAAC;AAEF,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAuBpH;AAED,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACzG,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpD,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED,MAAM,MAAM,QAAQ,GAAG;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE;QACd,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,aAAa,GAAI,OAAO,MAAM,EAAE,QAAQ,MAAM,EAAE,UAAU,MAAM,EAAE,KAAG,YAOjF,CAAC;AAEF,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,uBAExE,CAAC;AAEF,eAAO,MAAM,6BAA6B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,EAoCpF,CAAC;AAEF,eAAO,MAAM,uBAAuB,GAAI,QAAQ,sBAAsB,KAAG,MAGxE,CAAC;AAEF,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAEhF,CAAC;AAEF,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;CACjC,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,yCAK7B;IACD,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,KAAG,SAQH,CAAC;AAEF,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAuBpH;AAED,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC"}

package/package.json CHANGED Viewed

@@ -1,11 +1,21 @@
 {
   "name": "@mastra/evals",
-  "version": "0.13.2",
+  "version": "0.13.3-alpha.1",
   "description": "",
   "type": "module",
   "files": [
-    "dist"
+    "dist",
+    "CHANGELOG.md"
   ],
+  "homepage": "https://mastra.ai",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/mastra-ai/mastra.git",
+    "directory": "packages/evals"
+  },
+  "bugs": {
+    "url": "https://github.com/mastra-ai/mastra/issues"
+  },
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
   "exports": {
@@ -100,10 +110,10 @@
     "tsup": "^8.5.0",
     "typescript": "^5.8.3",
     "vitest": "^3.2.4",
-    "zod": "^3.25.67",
-    "@mastra/core": "0.15.2",
+    "zod": "^3.25.76",
+    "@internal/lint": "0.0.34",
     "@internal/types-builder": "0.0.9",
-    "@internal/lint": "0.0.34"
+    "@mastra/core": "0.15.3-alpha.5"
   },
   "scripts": {
     "check": "tsc --noEmit",

package/dist/chunk-5CVZXIFW.js.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";AAIO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AA4BO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAmC;AAC5E,EAAA,OAAO,KAAA,EAAO,cAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA,EAAG,OAAA;AACnE;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,OAAO,MAAA,EAAQ,KAAK,CAAC,EAAE,MAAK,KAAM,IAAA,KAAS,WAAW,CAAA,EAAG,OAAA;AAC3D;AA4FO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AACnC,IAAA,IAAI,SAAS,eAAA,EAAiB;AAC5B,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACjG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAC1D,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C","file":"chunk-5CVZXIFW.js","sourcesContent":["import { RuntimeContext } from '@mastra/core/runtime-context';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/scores';\nimport type { ToolInvocation, UIMessage } from 'ai';\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (input: string, output: string, context?: string[]): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: { context },\n runtimeContext: {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent) => {\n return input?.inputMessages.find(({ role }) => role === 'user')?.content;\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n return output?.find(({ role }) => role === 'assistant')?.content;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\nexport const createUIMessage = ({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n id: string;\n role: 'user' | 'assistant' | 'system';\n content: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): UIMessage => {\n return {\n id,\n role,\n content,\n parts: [{ type: 'text', text: content }],\n toolInvocations,\n };\n};\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n runtimeContext = new RuntimeContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n runtimeContext?: RuntimeContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n runtimeContext: RuntimeContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n runtimeContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n if (message?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {\n const invocation = message.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => msg.content) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => msg.content);\n};\n"]}

package/dist/chunk-QVZBKGOE.cjs.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AAIO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AA4BO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAmC;AAC5E,EAAA,OAAO,KAAA,EAAO,cAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA,EAAG,OAAA;AACnE;AAEO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,OAAO,MAAA,EAAQ,KAAK,CAAC,EAAE,MAAK,KAAM,IAAA,KAAS,WAAW,CAAA,EAAG,OAAA;AAC3D;AA4FO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AACnC,IAAA,IAAI,SAAS,eAAA,EAAiB;AAC5B,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACjG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAC1D,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C","file":"chunk-QVZBKGOE.cjs","sourcesContent":["import { RuntimeContext } from '@mastra/core/runtime-context';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/scores';\nimport type { ToolInvocation, UIMessage } from 'ai';\n\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n\nexport const createTestRun = (input: string, output: string, context?: string[]): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: { context },\n runtimeContext: {},\n };\n};\n\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent) => {\n return input?.inputMessages.find(({ role }) => role === 'user')?.content;\n};\n\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n return output?.find(({ role }) => role === 'assistant')?.content;\n};\n\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: ToolInvocation['state'];\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\nexport const createUIMessage = ({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n id: string;\n role: 'user' | 'assistant' | 'system';\n content: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): UIMessage => {\n return {\n id,\n role,\n content,\n parts: [{ type: 'text', text: content }],\n toolInvocations,\n };\n};\n\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n runtimeContext = new RuntimeContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n runtimeContext?: RuntimeContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n runtimeContext: RuntimeContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n runtimeContext,\n runId,\n };\n};\n\nexport type ToolCallInfo = {\n toolName: string;\n toolCallId: string;\n messageIndex: number;\n invocationIndex: number;\n};\n\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n if (message?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.toolInvocations.length; invocationIndex++) {\n const invocation = message.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => msg.content) || [];\n};\n\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => msg.content);\n};\n"]}