@mastra/evals 1.1.2 → 1.2.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +50 -2
  2. package/LICENSE.md +15 -0
  3. package/dist/chunk-EVBNIL5M.js +606 -0
  4. package/dist/chunk-EVBNIL5M.js.map +1 -0
  5. package/dist/chunk-XRUR5PBK.cjs +632 -0
  6. package/dist/chunk-XRUR5PBK.cjs.map +1 -0
  7. package/dist/docs/SKILL.md +20 -19
  8. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  9. package/dist/docs/references/docs-evals-built-in-scorers.md +2 -1
  10. package/dist/docs/references/docs-evals-overview.md +11 -16
  11. package/dist/docs/references/reference-evals-answer-relevancy.md +25 -25
  12. package/dist/docs/references/reference-evals-answer-similarity.md +33 -35
  13. package/dist/docs/references/reference-evals-bias.md +24 -24
  14. package/dist/docs/references/reference-evals-completeness.md +19 -20
  15. package/dist/docs/references/reference-evals-content-similarity.md +20 -20
  16. package/dist/docs/references/reference-evals-context-precision.md +36 -36
  17. package/dist/docs/references/reference-evals-context-relevance.md +136 -141
  18. package/dist/docs/references/reference-evals-faithfulness.md +24 -24
  19. package/dist/docs/references/reference-evals-hallucination.md +52 -69
  20. package/dist/docs/references/reference-evals-keyword-coverage.md +18 -18
  21. package/dist/docs/references/reference-evals-noise-sensitivity.md +167 -177
  22. package/dist/docs/references/reference-evals-prompt-alignment.md +111 -116
  23. package/dist/docs/references/reference-evals-scorer-utils.md +285 -105
  24. package/dist/docs/references/reference-evals-textual-difference.md +18 -18
  25. package/dist/docs/references/reference-evals-tone-consistency.md +19 -19
  26. package/dist/docs/references/reference-evals-tool-call-accuracy.md +165 -165
  27. package/dist/docs/references/reference-evals-toxicity.md +21 -21
  28. package/dist/docs/references/reference-evals-trajectory-accuracy.md +613 -0
  29. package/dist/scorers/code/index.d.ts +1 -0
  30. package/dist/scorers/code/index.d.ts.map +1 -1
  31. package/dist/scorers/code/trajectory/index.d.ts +147 -0
  32. package/dist/scorers/code/trajectory/index.d.ts.map +1 -0
  33. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  34. package/dist/scorers/llm/context-precision/index.d.ts +2 -2
  35. package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
  36. package/dist/scorers/llm/faithfulness/index.d.ts +1 -1
  37. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  38. package/dist/scorers/llm/index.d.ts +1 -0
  39. package/dist/scorers/llm/index.d.ts.map +1 -1
  40. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  41. package/dist/scorers/llm/prompt-alignment/index.d.ts +5 -5
  42. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
  43. package/dist/scorers/llm/toxicity/index.d.ts +1 -1
  44. package/dist/scorers/llm/trajectory/index.d.ts +58 -0
  45. package/dist/scorers/llm/trajectory/index.d.ts.map +1 -0
  46. package/dist/scorers/llm/trajectory/prompts.d.ts +20 -0
  47. package/dist/scorers/llm/trajectory/prompts.d.ts.map +1 -0
  48. package/dist/scorers/prebuilt/index.cjs +638 -59
  49. package/dist/scorers/prebuilt/index.cjs.map +1 -1
  50. package/dist/scorers/prebuilt/index.js +578 -2
  51. package/dist/scorers/prebuilt/index.js.map +1 -1
  52. package/dist/scorers/utils.cjs +41 -17
  53. package/dist/scorers/utils.d.ts +171 -1
  54. package/dist/scorers/utils.d.ts.map +1 -1
  55. package/dist/scorers/utils.js +1 -1
  56. package/package.json +14 -11
  57. package/dist/chunk-OEOE7ZHN.js +0 -195
  58. package/dist/chunk-OEOE7ZHN.js.map +0 -1
  59. package/dist/chunk-W3U7MMDX.cjs +0 -212
  60. package/dist/chunk-W3U7MMDX.cjs.map +0 -1
@@ -1,72 +1,96 @@
1
1
  'use strict';
2
2
 
3
- var chunkW3U7MMDX_cjs = require('../chunk-W3U7MMDX.cjs');
3
+ var chunkXRUR5PBK_cjs = require('../chunk-XRUR5PBK.cjs');
4
4
 
5
5
 
6
6
 
7
+ Object.defineProperty(exports, "analyzeToolFailures", {
8
+ enumerable: true,
9
+ get: function () { return chunkXRUR5PBK_cjs.analyzeToolFailures; }
10
+ });
11
+ Object.defineProperty(exports, "checkTrajectoryBlacklist", {
12
+ enumerable: true,
13
+ get: function () { return chunkXRUR5PBK_cjs.checkTrajectoryBlacklist; }
14
+ });
15
+ Object.defineProperty(exports, "checkTrajectoryEfficiency", {
16
+ enumerable: true,
17
+ get: function () { return chunkXRUR5PBK_cjs.checkTrajectoryEfficiency; }
18
+ });
19
+ Object.defineProperty(exports, "compareTrajectories", {
20
+ enumerable: true,
21
+ get: function () { return chunkXRUR5PBK_cjs.compareTrajectories; }
22
+ });
7
23
  Object.defineProperty(exports, "createAgentTestRun", {
8
24
  enumerable: true,
9
- get: function () { return chunkW3U7MMDX_cjs.createAgentTestRun; }
25
+ get: function () { return chunkXRUR5PBK_cjs.createAgentTestRun; }
10
26
  });
11
27
  Object.defineProperty(exports, "createTestMessage", {
12
28
  enumerable: true,
13
- get: function () { return chunkW3U7MMDX_cjs.createTestMessage; }
29
+ get: function () { return chunkXRUR5PBK_cjs.createTestMessage; }
14
30
  });
15
31
  Object.defineProperty(exports, "createTestRun", {
16
32
  enumerable: true,
17
- get: function () { return chunkW3U7MMDX_cjs.createTestRun; }
33
+ get: function () { return chunkXRUR5PBK_cjs.createTestRun; }
18
34
  });
19
35
  Object.defineProperty(exports, "createToolInvocation", {
20
36
  enumerable: true,
21
- get: function () { return chunkW3U7MMDX_cjs.createToolInvocation; }
37
+ get: function () { return chunkXRUR5PBK_cjs.createToolInvocation; }
38
+ });
39
+ Object.defineProperty(exports, "createTrajectoryTestRun", {
40
+ enumerable: true,
41
+ get: function () { return chunkXRUR5PBK_cjs.createTrajectoryTestRun; }
22
42
  });
23
43
  Object.defineProperty(exports, "extractAgentResponseMessages", {
24
44
  enumerable: true,
25
- get: function () { return chunkW3U7MMDX_cjs.extractAgentResponseMessages; }
45
+ get: function () { return chunkXRUR5PBK_cjs.extractAgentResponseMessages; }
26
46
  });
27
47
  Object.defineProperty(exports, "extractInputMessages", {
28
48
  enumerable: true,
29
- get: function () { return chunkW3U7MMDX_cjs.extractInputMessages; }
49
+ get: function () { return chunkXRUR5PBK_cjs.extractInputMessages; }
30
50
  });
31
51
  Object.defineProperty(exports, "extractToolCalls", {
32
52
  enumerable: true,
33
- get: function () { return chunkW3U7MMDX_cjs.extractToolCalls; }
53
+ get: function () { return chunkXRUR5PBK_cjs.extractToolCalls; }
34
54
  });
35
55
  Object.defineProperty(exports, "extractToolResults", {
36
56
  enumerable: true,
37
- get: function () { return chunkW3U7MMDX_cjs.extractToolResults; }
57
+ get: function () { return chunkXRUR5PBK_cjs.extractToolResults; }
58
+ });
59
+ Object.defineProperty(exports, "extractTrajectory", {
60
+ enumerable: true,
61
+ get: function () { return chunkXRUR5PBK_cjs.extractTrajectory; }
38
62
  });
39
63
  Object.defineProperty(exports, "getAssistantMessageFromRunOutput", {
40
64
  enumerable: true,
41
- get: function () { return chunkW3U7MMDX_cjs.getAssistantMessageFromRunOutput; }
65
+ get: function () { return chunkXRUR5PBK_cjs.getAssistantMessageFromRunOutput; }
42
66
  });
43
67
  Object.defineProperty(exports, "getCombinedSystemPrompt", {
44
68
  enumerable: true,
45
- get: function () { return chunkW3U7MMDX_cjs.getCombinedSystemPrompt; }
69
+ get: function () { return chunkXRUR5PBK_cjs.getCombinedSystemPrompt; }
46
70
  });
47
71
  Object.defineProperty(exports, "getReasoningFromRunOutput", {
48
72
  enumerable: true,
49
- get: function () { return chunkW3U7MMDX_cjs.getReasoningFromRunOutput; }
73
+ get: function () { return chunkXRUR5PBK_cjs.getReasoningFromRunOutput; }
50
74
  });
51
75
  Object.defineProperty(exports, "getSystemMessagesFromRunInput", {
52
76
  enumerable: true,
53
- get: function () { return chunkW3U7MMDX_cjs.getSystemMessagesFromRunInput; }
77
+ get: function () { return chunkXRUR5PBK_cjs.getSystemMessagesFromRunInput; }
54
78
  });
55
79
  Object.defineProperty(exports, "getTextContentFromMastraDBMessage", {
56
80
  enumerable: true,
57
- get: function () { return chunkW3U7MMDX_cjs.getTextContentFromMastraDBMessage; }
81
+ get: function () { return chunkXRUR5PBK_cjs.getTextContentFromMastraDBMessage; }
58
82
  });
59
83
  Object.defineProperty(exports, "getUserMessageFromRunInput", {
60
84
  enumerable: true,
61
- get: function () { return chunkW3U7MMDX_cjs.getUserMessageFromRunInput; }
85
+ get: function () { return chunkXRUR5PBK_cjs.getUserMessageFromRunInput; }
62
86
  });
63
87
  Object.defineProperty(exports, "isCloserTo", {
64
88
  enumerable: true,
65
- get: function () { return chunkW3U7MMDX_cjs.isCloserTo; }
89
+ get: function () { return chunkXRUR5PBK_cjs.isCloserTo; }
66
90
  });
67
91
  Object.defineProperty(exports, "roundToTwoDecimals", {
68
92
  enumerable: true,
69
- get: function () { return chunkW3U7MMDX_cjs.roundToTwoDecimals; }
93
+ get: function () { return chunkXRUR5PBK_cjs.roundToTwoDecimals; }
70
94
  });
71
95
  //# sourceMappingURL=utils.cjs.map
72
96
  //# sourceMappingURL=utils.cjs.map
@@ -1,5 +1,5 @@
1
1
  import type { MastraDBMessage } from '@mastra/core/agent';
2
- import type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';
2
+ import type { ExpectedStep, ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput, TrajectoryExpectation, Trajectory } from '@mastra/core/evals';
3
3
  import { RequestContext } from '@mastra/core/request-context';
4
4
  /**
5
5
  * Extracts text content from a MastraDBMessage.
@@ -322,6 +322,38 @@ export declare const createAgentTestRun: ({ inputMessages, output, rememberedMes
322
322
  requestContext: RequestContext;
323
323
  runId: string;
324
324
  };
325
+ /**
326
+ * Creates a test run for trajectory scorers where `output` is a `Trajectory`
327
+ * (pre-extracted by the `runEvals` pipeline).
328
+ *
329
+ * @example
330
+ * ```ts
331
+ * const testRun = createTrajectoryTestRun({
332
+ * inputMessages: [createTestMessage({ content: 'Do X', role: 'user', id: 'u1' })],
333
+ * trajectory: {
334
+ * steps: [
335
+ * { stepType: 'tool_call', name: 'search', toolArgs: { q: 'test' } },
336
+ * ],
337
+ * },
338
+ * });
339
+ * ```
340
+ */
341
+ export declare const createTrajectoryTestRun: ({ inputMessages, trajectory, rememberedMessages, systemMessages, taggedSystemMessages, requestContext, runId, expectedTrajectory, }: {
342
+ inputMessages?: ScorerRunInputForAgent["inputMessages"];
343
+ trajectory: Trajectory;
344
+ rememberedMessages?: ScorerRunInputForAgent["rememberedMessages"];
345
+ systemMessages?: ScorerRunInputForAgent["systemMessages"];
346
+ taggedSystemMessages?: ScorerRunInputForAgent["taggedSystemMessages"];
347
+ requestContext?: RequestContext;
348
+ runId?: string;
349
+ expectedTrajectory?: TrajectoryExpectation;
350
+ }) => {
351
+ input: ScorerRunInputForAgent;
352
+ output: Trajectory;
353
+ requestContext: RequestContext;
354
+ runId: string;
355
+ expectedTrajectory?: TrajectoryExpectation;
356
+ };
325
357
  /**
326
358
  * Information about a tool call extracted from scorer output.
327
359
  */
@@ -433,4 +465,142 @@ export type ToolResultInfo = {
433
465
  * ```
434
466
  */
435
467
  export declare function extractToolResults(output: ScorerRunOutputForAgent): ToolResultInfo[];
468
+ export { extractTrajectory } from '@mastra/core/evals';
469
+ /**
470
+ * Compares two trajectories and returns detailed comparison results.
471
+ *
472
+ * This is the core comparison logic used by trajectory scorers. It supports
473
+ * strict and non-strict ordering, optional step data comparison, and loop detection.
474
+ *
475
+ * @param actual - The trajectory the agent actually took
476
+ * @param expected - The expected trajectory to compare against
477
+ * @param options - Comparison configuration options
478
+ * @returns Detailed comparison results including match scores and diagnostics
479
+ *
480
+ * @example
481
+ * ```ts
482
+ * const result = compareTrajectories(
483
+ * { steps: [{ stepType: 'tool_call', name: 'search' }, { stepType: 'tool_call', name: 'summarize' }] },
484
+ * { steps: [{ stepType: 'tool_call', name: 'search' }, { stepType: 'tool_call', name: 'summarize' }] },
485
+ * { strictOrder: true }
486
+ * );
487
+ * // result.score = 1.0
488
+ * ```
489
+ */
490
+ export declare function compareTrajectories(actual: Trajectory, expected: Trajectory | {
491
+ steps: ExpectedStep[];
492
+ }, options?: {
493
+ ordering?: 'strict' | 'relaxed' | 'unordered';
494
+ /** @deprecated Use ordering: 'strict' instead */
495
+ strictOrder?: boolean;
496
+ compareStepData?: boolean;
497
+ allowRepeatedSteps?: boolean;
498
+ }): TrajectoryComparisonResult;
499
+ /**
500
+ * Result of comparing two trajectories.
501
+ */
502
+ export type TrajectoryComparisonResult = {
503
+ /** Overall match score from 0 to 1 */
504
+ score: number;
505
+ /** Number of expected steps that were matched */
506
+ matchedSteps: number;
507
+ /** Total number of expected steps */
508
+ totalExpectedSteps: number;
509
+ /** Total number of actual steps taken */
510
+ totalActualSteps: number;
511
+ /** Expected steps that were not found in the actual trajectory */
512
+ missingSteps: string[];
513
+ /** Actual steps that were not in the expected trajectory */
514
+ extraSteps: string[];
515
+ /** Steps that appear but not in the expected position */
516
+ outOfOrderSteps: string[];
517
+ /** Steps that were repeated (appeared more than once) */
518
+ repeatedSteps: string[];
519
+ };
520
+ /**
521
+ * Result of checking trajectory efficiency.
522
+ */
523
+ export type TrajectoryEfficiencyResult = {
524
+ /** Overall efficiency score from 0 to 1 */
525
+ score: number;
526
+ /** Total number of steps taken */
527
+ totalSteps: number;
528
+ /** Whether the step budget was exceeded */
529
+ overStepBudget: boolean;
530
+ /** Total tokens used across model_generation steps */
531
+ totalTokens: number;
532
+ /** Whether the token budget was exceeded */
533
+ overTokenBudget: boolean;
534
+ /** Total duration in milliseconds */
535
+ totalDurationMs: number;
536
+ /** Whether the duration budget was exceeded */
537
+ overDurationBudget: boolean;
538
+ /** Redundant calls detected (same tool + same args consecutively) */
539
+ redundantCalls: Array<{
540
+ name: string;
541
+ index: number;
542
+ }>;
543
+ };
544
+ /**
545
+ * Evaluate trajectory efficiency against budgets and redundancy checks.
546
+ */
547
+ export declare function checkTrajectoryEfficiency(trajectory: Trajectory, options?: {
548
+ maxSteps?: number;
549
+ maxTotalTokens?: number;
550
+ maxTotalDurationMs?: number;
551
+ noRedundantCalls?: boolean;
552
+ }): TrajectoryEfficiencyResult;
553
+ /**
554
+ * Result of checking trajectory against a blacklist.
555
+ */
556
+ export type TrajectoryBlacklistResult = {
557
+ /** Score: 1.0 if clean, 0.0 if any violation found */
558
+ score: number;
559
+ /** Individual blacklisted tools that were found */
560
+ violatedTools: string[];
561
+ /** Blacklisted sequences that were found */
562
+ violatedSequences: string[][];
563
+ };
564
+ /**
565
+ * Check if a trajectory violates any blacklist rules.
566
+ * Returns score 0.0 if any violation is found (hard fail).
567
+ */
568
+ export declare function checkTrajectoryBlacklist(trajectory: Trajectory, options?: {
569
+ blacklistedTools?: string[];
570
+ blacklistedSequences?: string[][];
571
+ }): TrajectoryBlacklistResult;
572
+ /**
573
+ * A detected tool failure pattern in the trajectory.
574
+ */
575
+ export type ToolFailurePattern = {
576
+ /** The tool name that experienced failure */
577
+ toolName: string;
578
+ /** Number of consecutive retries (same tool, same or similar args) */
579
+ retryCount: number;
580
+ /** Whether the agent fell back to a different tool after failures */
581
+ fellBackToAlternative: boolean;
582
+ /** The alternative tool used, if any */
583
+ alternativeTool?: string;
584
+ /** Whether any retry eventually succeeded */
585
+ eventuallySucceeded: boolean;
586
+ };
587
+ /**
588
+ * Result of analyzing tool failure patterns in a trajectory.
589
+ */
590
+ export type ToolFailureAnalysisResult = {
591
+ /** Score from 0 to 1 (lower = more failures/retries) */
592
+ score: number;
593
+ /** Tool failure patterns detected */
594
+ patterns: ToolFailurePattern[];
595
+ /** Total number of retries across all tools */
596
+ totalRetries: number;
597
+ /** Tools that exceeded the retry threshold */
598
+ excessiveRetryTools: string[];
599
+ };
600
+ /**
601
+ * Analyze tool failure and retry patterns in a trajectory.
602
+ */
603
+ export declare function analyzeToolFailures(trajectory: Trajectory, options?: {
604
+ maxRetriesPerTool?: number;
605
+ }): ToolFailureAnalysisResult;
436
606
  //# sourceMappingURL=utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACxG,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,CAUlF;AAED;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACrB,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,cAAc,EAAE;QACd,yBAAyB;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,mCAAmC;QACnC,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,oDAAoD;IACpD,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,aAAa,GACxB,OAAO,MAAM,EACb,QAAQ,MAAM,EACd,oBAAoB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACvC,iBAAiB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KACnC,YAOF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,GAAG,SAGpF,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,6BAA6B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,EAoCpF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,uBAAuB,GAAI,QAAQ,sBAAsB,KAAG,MAGxE,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAGhF,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,eAAO,MAAM,yBAAyB,GAAI,SAAS,uBAAuB,KAAG,MAAM,GAAG,SAgCrF,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,cAAc,GAAG,QAAQ,CAAC;CAC5C,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,iBAAiB,CAAC,EAChC,OAAO,EACP,IAAI,EACJ,EAAmB,EACnB,eAAoB,GACrB,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,GAAG,eAAe,CAoBlB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG;IACzB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB,oEAAoE;IACpE,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAwBpH;AAED;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG;IAC3B,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,kCAAkC;IAClC,MAAM,EAAE,GAAG,CAAC;CACb,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,uBAAuB,GAAG,cAAc,EAAE,CAoBpF"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/scorers/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EACV,YAAY,EACZ,sBAAsB,EACtB,uBAAuB,EACvB,YAAY,EACZ,qBAAqB,EAErB,UAAU,EACX,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,cAAc,EAAE,MAAM,8BAA8B,CAAC;AAE9D;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,CAUlF;AAED;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,WAE7C,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAEnF;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACrB,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,4CAA4C;IAC5C,cAAc,EAAE;QACd,yBAAyB;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,mCAAmC;QACnC,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C,oDAAoD;IACpD,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,aAAa,GACxB,OAAO,MAAM,EACb,QAAQ,MAAM,EACd,oBAAoB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACvC,iBAAiB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KACnC,YAOF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,0BAA0B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,GAAG,SAGpF,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,6BAA6B,GAAI,QAAQ,sBAAsB,KAAG,MAAM,EAoCpF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,uBAAuB,GAAI,QAAQ,sBAAsB,KAAG,MAGxE,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,gCAAgC,GAAI,SAAS,uBAAuB,uBAGhF,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,eAAO,MAAM,yBAAyB,GAAI,SAAS,uBAAuB,KAAG,MAAM,GAAG,SAgCrF,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,oBAAoB,GAAI,gDAMlC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,cAAc,GAAG,QAAQ,CAAC;CAC5C,KAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAQhH,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,iBAAiB,CAAC,EAChC,OAAO,EACP,IAAI,EACJ,EAAmB,EACnB,eAAoB,GACrB,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,QAAQ,CAAC;IACtC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,KAAK,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC5B,KAAK,EAAE,GAAG,CAAC;KACZ,CAAC,CAAC;CACJ,GAAG,eAAe,CAoBlB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,eAAO,MAAM,kBAAkB,GAAI,6GAQhC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,MAAM,EAAE,uBAAuB,CAAC;IAChC,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,uBAAuB,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;CAaf,CAAC;AAEF;;;;;;;;;;;;;;;GAeG;AACH,eAAO,MAAM,uBAAuB,GAAI,qIASrC;IACD,aAAa,CAAC,EAAE,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACxD,UAAU,EAAE,UAAU,CAAC;IACvB,kBAAkB,CAAC,EAAE,sBAAsB,CAAC,oBAAoB,CAAC,CAAC;IAClE,cAAc,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAC1D,oBAAoB,CAAC,EAAE,sBAAsB,CAAC,sBAAsB,CAAC,CAAC;IACtE,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kBAAkB,CAAC,EAAE,qBAAqB,CAAC;CAC5C,KAAG;IACF,KAAK,EAAE,sBAAsB,CAAC;IAC9B,MAAM,EAAE,UAAU,CAAC;IACnB,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,kBAAkB,CAAC,EAAE,qBAAqB,CAAC;CAc5C,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG;IACzB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,YAAY,EAAE,MAAM,CAAC;IACrB,oEAAoE;IACpE,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,uBAAuB,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,aAAa,EAAE,YAAY,EAAE,CAAA;CAAE,CAwBpH;AAED;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,oBAAoB,GAAI,UAAU,sBAAsB,GAAG,SAAS,KAAG,MAAM,EAEzF,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,4BAA4B,GAAI,WAAW,uBAAuB,KAAG,MAAM,EAEvF,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG;IAC3B,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,kCAAkC;IAClC,MAAM,EAAE,GAAG,CAAC;CACb,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,uBAAuB,GAAG,cAAc,EAAE,CAoBpF;AAID,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEvD;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,UAAU,EAClB,QAAQ,EAAE,UAAU,GAAG;IAAE,KAAK,EAAE,YAAY,EAAE,CAAA;CAAE,EAChD,OAAO,GAAE;IACP,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,GAAG,WAAW,CAAC;IAC9C,iDAAiD;IACjD,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,kBAAkB,CAAC,EAAE,OAAO,CAAC;CACzB,GACL,0BAA0B,CAsF5B;AAED;;GAEG;AACH,MAAM,MAAM,0BAA0B,GAAG;IACvC,sCAAsC;IACtC,KAAK,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,YAAY,EAAE,MAAM,CAAC;IACrB,qCAAqC;IACrC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,yCAAyC;IACzC,gBAAgB,EAAE,MAAM,CAAC;IACzB,kEAAkE;IAClE,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,4DAA4D;IAC5D,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,yDAAyD;IACzD,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,yDAAyD;IACzD,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB,CAAC;AAqPF;;GAEG;AACH,MAAM,MAAM,0BAA0B,GAAG;IACvC,2CAA2C;IAC3C,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,2CAA2C;IAC3C,cAAc,EAAE,OAAO,CAAC;IACxB,sDAAsD;IACtD,WAAW,EAAE,MAAM,CAAC;IACpB,4CAA4C;IAC5C,eAAe,EAAE,OAAO,CAAC;IACzB,qCAAqC;IACrC,eAAe,EAAE,MAAM,CAAC;IACxB,+CAA+C;IAC/C,kBAAkB,EAAE,OAAO,CAAC;IAC5B,qEAAqE;IACrE,cAAc,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACxD,CAAC;AAEF;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,UAAU,EAAE,UAAU,EACtB,OAAO,GAAE;IACP,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,gBAAgB,CAAC,EAAE,OAAO,CAAC;CACvB,GACL,0BAA0B,CA2E5B;AAID;;GAEG;AACH,MAAM,MAAM,yBAAyB,GAAG;IACtC,sDAAsD;IACtD,KAAK,EAAE,MAAM,CAAC;IACd,mDAAmD;IACnD,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,4CAA4C;IAC5C,iBAAiB,EAAE,MAAM,EAAE,EAAE,CAAC;CAC/B,CAAC;AAEF;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,UAAU,EAAE,UAAU,EACtB,OAAO,GAAE;IACP,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,oBAAoB,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;CAC9B,GACL,yBAAyB,CAuC3B;AAID;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG;IAC/B,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,CAAC;IACjB,sEAAsE;IACtE,UAAU,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,qBAAqB,EAAE,OAAO,CAAC;IAC/B,wCAAwC;IACxC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,6CAA6C;IAC7C,mBAAmB,EAAE,OAAO,CAAC;CAC9B,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,yBAAyB,GAAG;IACtC,wDAAwD;IACxD,KAAK,EAAE,MAAM,CAAC;IACd,qCAAqC;IACrC,QAAQ,EAAE,kBAAkB,EAAE,CAAC;IAC/B,+CAA+C;IAC/C,YAAY,EAAE,MAAM,CAAC;IACrB,8CAA8C;IAC9C,mBAAmB,EAAE,MAAM,EAAE,CAAC;CAC/B,CAAC;AAEF;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,UAAU,EAAE,UAAU,EACtB,OAAO,GAAE;IACP,iBAAiB,CAAC,EAAE,MAAM,CAAC;CACvB,GACL,yBAAyB,CAgE3B"}
@@ -1,3 +1,3 @@
1
- export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, extractToolResults, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals } from '../chunk-OEOE7ZHN.js';
1
+ export { analyzeToolFailures, checkTrajectoryBlacklist, checkTrajectoryEfficiency, compareTrajectories, createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, createTrajectoryTestRun, extractAgentResponseMessages, extractInputMessages, extractToolCalls, extractToolResults, extractTrajectory, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals } from '../chunk-EVBNIL5M.js';
2
2
  //# sourceMappingURL=utils.js.map
3
3
  //# sourceMappingURL=utils.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/evals",
3
- "version": "1.1.2",
3
+ "version": "1.2.0-alpha.0",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "files": [
@@ -55,7 +55,7 @@
55
55
  "author": "",
56
56
  "license": "Apache-2.0",
57
57
  "dependencies": {
58
- "compromise": "^14.14.5",
58
+ "compromise": "^14.15.0",
59
59
  "keyword-extractor": "^0.0.28",
60
60
  "sentiment": "^5.0.2",
61
61
  "string-similarity": "^4.0.4"
@@ -68,17 +68,20 @@
68
68
  "@ai-sdk/openai": "^1.3.24",
69
69
  "@types/sentiment": "^5.0.4",
70
70
  "@types/string-similarity": "^4.0.2",
71
- "@vitest/coverage-v8": "4.0.12",
72
- "@vitest/ui": "4.0.12",
73
- "dotenv": "^17.2.3",
74
- "eslint": "^9.37.0",
71
+ "@vitest/coverage-v8": "4.0.18",
72
+ "@vitest/ui": "4.0.18",
73
+ "dotenv": "^17.3.1",
74
+ "eslint": "^9.39.4",
75
75
  "tsup": "^8.5.1",
76
76
  "typescript": "^5.9.3",
77
- "vitest": "4.0.16",
78
- "zod": "^3.25.76",
79
- "@internal/lint": "0.0.60",
80
- "@mastra/core": "1.5.0",
81
- "@internal/types-builder": "0.0.35"
77
+ "vitest": "4.0.18",
78
+ "zod": "^4.3.6",
79
+ "@internal/lint": "0.0.74",
80
+ "@internal/llm-recorder": "0.0.10",
81
+ "@internal/ai-sdk-v5": "0.0.21",
82
+ "@internal/test-utils": "0.0.10",
83
+ "@internal/types-builder": "0.0.49",
84
+ "@mastra/core": "1.18.0-alpha.1"
82
85
  },
83
86
  "engines": {
84
87
  "node": ">=22.13.0"
@@ -1,195 +0,0 @@
1
- import { RequestContext } from '@mastra/core/request-context';
2
-
3
- // src/scorers/utils.ts
4
- function getTextContentFromMastraDBMessage(message) {
5
- if (typeof message.content.content === "string" && message.content.content !== "") {
6
- return message.content.content;
7
- }
8
- if (message.content.parts && Array.isArray(message.content.parts)) {
9
- const textParts = message.content.parts.filter((p) => p.type === "text");
10
- return textParts.length > 0 ? textParts[textParts.length - 1]?.text || "" : "";
11
- }
12
- return "";
13
- }
14
- var roundToTwoDecimals = (num) => {
15
- return Math.round((num + Number.EPSILON) * 100) / 100;
16
- };
17
- function isCloserTo(value, target1, target2) {
18
- return Math.abs(value - target1) < Math.abs(value - target2);
19
- }
20
- var createTestRun = (input, output, additionalContext, requestContext) => {
21
- return {
22
- input: [{ role: "user", content: input }],
23
- output: { role: "assistant", text: output },
24
- additionalContext: additionalContext ?? {},
25
- requestContext: requestContext ?? {}
26
- };
27
- };
28
- var getUserMessageFromRunInput = (input) => {
29
- const message = input?.inputMessages.find(({ role }) => role === "user");
30
- return message ? getTextContentFromMastraDBMessage(message) : void 0;
31
- };
32
- var getSystemMessagesFromRunInput = (input) => {
33
- const systemMessages = [];
34
- if (input?.systemMessages) {
35
- systemMessages.push(
36
- ...input.systemMessages.map((msg) => {
37
- if (typeof msg.content === "string") {
38
- return msg.content;
39
- } else if (Array.isArray(msg.content)) {
40
- return msg.content.filter((part) => part.type === "text").map((part) => part.text || "").join(" ");
41
- }
42
- return "";
43
- }).filter((content) => content)
44
- );
45
- }
46
- if (input?.taggedSystemMessages) {
47
- Object.values(input.taggedSystemMessages).forEach((messages) => {
48
- messages.forEach((msg) => {
49
- if (typeof msg.content === "string") {
50
- systemMessages.push(msg.content);
51
- }
52
- });
53
- });
54
- }
55
- return systemMessages;
56
- };
57
- var getCombinedSystemPrompt = (input) => {
58
- const systemMessages = getSystemMessagesFromRunInput(input);
59
- return systemMessages.join("\n\n");
60
- };
61
- var getAssistantMessageFromRunOutput = (output) => {
62
- const message = output?.find(({ role }) => role === "assistant");
63
- return message ? getTextContentFromMastraDBMessage(message) : void 0;
64
- };
65
- var getReasoningFromRunOutput = (output) => {
66
- if (!output) return void 0;
67
- const message = output.find(({ role }) => role === "assistant");
68
- if (!message) return void 0;
69
- if (message.content.reasoning) {
70
- return message.content.reasoning;
71
- }
72
- const reasoningParts = message.content.parts?.filter((p) => p.type === "reasoning");
73
- if (reasoningParts && reasoningParts.length > 0) {
74
- const reasoningTexts = reasoningParts.map((p) => {
75
- if (p.details && Array.isArray(p.details)) {
76
- return p.details.filter((d) => d.type === "text").map((d) => d.text).join("");
77
- }
78
- return p.reasoning || "";
79
- }).filter(Boolean);
80
- return reasoningTexts.length > 0 ? reasoningTexts.join("\n") : void 0;
81
- }
82
- return void 0;
83
- };
84
- var createToolInvocation = ({
85
- toolCallId,
86
- toolName,
87
- args,
88
- result,
89
- state = "result"
90
- }) => {
91
- return {
92
- toolCallId,
93
- toolName,
94
- args,
95
- result,
96
- state
97
- };
98
- };
99
- function createTestMessage({
100
- content,
101
- role,
102
- id = "test-message",
103
- toolInvocations = []
104
- }) {
105
- return {
106
- id,
107
- role,
108
- content: {
109
- format: 2,
110
- parts: [{ type: "text", text: content }],
111
- content,
112
- ...toolInvocations.length > 0 && {
113
- toolInvocations: toolInvocations.map((ti) => ({
114
- toolCallId: ti.toolCallId,
115
- toolName: ti.toolName,
116
- args: ti.args,
117
- result: ti.result,
118
- state: ti.state
119
- }))
120
- }
121
- },
122
- createdAt: /* @__PURE__ */ new Date()
123
- };
124
- }
125
- var createAgentTestRun = ({
126
- inputMessages = [],
127
- output,
128
- rememberedMessages = [],
129
- systemMessages = [],
130
- taggedSystemMessages = {},
131
- requestContext = new RequestContext(),
132
- runId = crypto.randomUUID()
133
- }) => {
134
- return {
135
- input: {
136
- inputMessages,
137
- rememberedMessages,
138
- systemMessages,
139
- taggedSystemMessages
140
- },
141
- output,
142
- requestContext,
143
- runId
144
- };
145
- };
146
- function extractToolCalls(output) {
147
- const toolCalls = [];
148
- const toolCallInfos = [];
149
- for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {
150
- const message = output[messageIndex];
151
- if (message?.content?.toolInvocations) {
152
- for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {
153
- const invocation = message.content.toolInvocations[invocationIndex];
154
- if (invocation && invocation.toolName && (invocation.state === "result" || invocation.state === "call")) {
155
- toolCalls.push(invocation.toolName);
156
- toolCallInfos.push({
157
- toolName: invocation.toolName,
158
- toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,
159
- messageIndex,
160
- invocationIndex
161
- });
162
- }
163
- }
164
- }
165
- }
166
- return { tools: toolCalls, toolCallInfos };
167
- }
168
- var extractInputMessages = (runInput) => {
169
- return runInput?.inputMessages?.map((msg) => getTextContentFromMastraDBMessage(msg)) || [];
170
- };
171
- var extractAgentResponseMessages = (runOutput) => {
172
- return runOutput.filter((msg) => msg.role === "assistant").map((msg) => getTextContentFromMastraDBMessage(msg));
173
- };
174
- function extractToolResults(output) {
175
- const results = [];
176
- for (const message of output) {
177
- const toolInvocations = message?.content?.toolInvocations;
178
- if (!toolInvocations) continue;
179
- for (const invocation of toolInvocations) {
180
- if (invocation.state === "result" && invocation.result !== void 0) {
181
- results.push({
182
- toolName: invocation.toolName,
183
- toolCallId: invocation.toolCallId || "",
184
- args: invocation.args || {},
185
- result: invocation.result
186
- });
187
- }
188
- }
189
- }
190
- return results;
191
- }
192
-
193
- export { createAgentTestRun, createTestMessage, createTestRun, createToolInvocation, extractAgentResponseMessages, extractInputMessages, extractToolCalls, extractToolResults, getAssistantMessageFromRunOutput, getCombinedSystemPrompt, getReasoningFromRunOutput, getSystemMessagesFromRunInput, getTextContentFromMastraDBMessage, getUserMessageFromRunInput, isCloserTo, roundToTwoDecimals };
194
- //# sourceMappingURL=chunk-OEOE7ZHN.js.map
195
- //# sourceMappingURL=chunk-OEOE7ZHN.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/scorers/utils.ts"],"names":[],"mappings":";;;AAyBO,SAAS,kCAAkC,OAAA,EAAkC;AAClF,EAAA,IAAI,OAAO,QAAQ,OAAA,CAAQ,OAAA,KAAY,YAAY,OAAA,CAAQ,OAAA,CAAQ,YAAY,EAAA,EAAI;AACjF,IAAA,OAAO,QAAQ,OAAA,CAAQ,OAAA;AAAA,EACzB;AACA,EAAA,IAAI,OAAA,CAAQ,QAAQ,KAAA,IAAS,KAAA,CAAM,QAAQ,OAAA,CAAQ,OAAA,CAAQ,KAAK,CAAA,EAAG;AAEjE,IAAA,MAAM,SAAA,GAAY,QAAQ,OAAA,CAAQ,KAAA,CAAM,OAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,MAAM,CAAA;AACrE,IAAA,OAAO,SAAA,CAAU,SAAS,CAAA,GAAI,SAAA,CAAU,UAAU,MAAA,GAAS,CAAC,CAAA,EAAG,IAAA,IAAQ,EAAA,GAAK,EAAA;AAAA,EAC9E;AACA,EAAA,OAAO,EAAA;AACT;AAgBO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD;AAgBO,SAAS,UAAA,CAAW,KAAA,EAAe,OAAA,EAAiB,OAAA,EAA0B;AACnF,EAAA,OAAO,IAAA,CAAK,IAAI,KAAA,GAAQ,OAAO,IAAI,IAAA,CAAK,GAAA,CAAI,QAAQ,OAAO,CAAA;AAC7D;AA6CO,IAAM,aAAA,GAAgB,CAC3B,KAAA,EACA,MAAA,EACA,mBACA,cAAA,KACiB;AACjB,EAAA,OAAO;AAAA,IACL,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,OAAA,EAAS,OAAO,CAAA;AAAA,IACxC,MAAA,EAAQ,EAAE,IAAA,EAAM,WAAA,EAAa,MAAM,MAAA,EAAO;AAAA,IAC1C,iBAAA,EAAmB,qBAAqB,EAAC;AAAA,IACzC,cAAA,EAAgB,kBAAkB;AAAC,GACrC;AACF;AAmBO,IAAM,0BAAA,GAA6B,CAAC,KAAA,KAAuD;AAChG,EAAA,MAAM,OAAA,GAAU,OAAO,aAAA,CAAc,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,IAAA,KAAS,MAAM,CAAA;AACvE,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAoBO,IAAM,6BAAA,GAAgC,CAAC,KAAA,KAA6C;AACzF,EAAA,MAAM,iBAA2B,EAAC;AAGlC,EAAA,IAAI,OAAO,cAAA,EAAgB;AACzB,IAAA,cAAA,CAAe,IAAA;AAAA,MACb,GAAG,KAAA,CAAM,cAAA,CACN,GAAA,CAAI,CAAA,GAAA,KAAO;AAEV,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,OAAO,GAAA,CAAI,OAAA;AAAA,QACb,CAAA,MAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,GAAA,CAAI,OAAO,CAAA,EAAG;AAErC,UAAA,OAAO,IAAI,OAAA,CACR,MAAA,CAAO,CAAC,IAAA,KAAc,KAAK,IAAA,KAAS,MAAM,CAAA,CAC1C,GAAA,CAAI,CAAC,IAAA,KAAc,IAAA,CAAK,QAAQ,EAAE,CAAA,CAClC,KAAK,GAAG,CAAA;AAAA,QACb;AACA,QAAA,OAAO,EAAA;AAAA,MACT,CAAC,CAAA,CACA,MAAA,CAAO,CAAA,OAAA,KAAW,OAAO;AAAA,KAC9B;AAAA,EACF;AAGA,EAAA,IAAI,OAAO,oBAAA,EAAsB;AAC/B,IAAA,MAAA,CAAO,MAAA,CAAO,KAAA,CAAM,oBAAoB,CAAA,CAAE,QAAQ,CAAA,QAAA,KAAY;AAC5D,MAAA,QAAA,CAAS,QAAQ,CAAA,GAAA,KAAO;AACtB,QAAA,IAAI,OAAO,GAAA,CAAI,OAAA,KAAY,QAAA,EAAU;AACnC,UAAA,cAAA,CAAe,IAAA,CAAK,IAAI,OAAO,CAAA;AAAA,QACjC;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAC,CAAA;AAAA,EACH;AAEA,EAAA,OAAO,cAAA;AACT;AAmBO,IAAM,uBAAA,GAA0B,CAAC,KAAA,KAA2C;AACjF,EAAA,MAAM,cAAA,GAAiB,8BAA8B,KAAK,CAAA;AAC1D,EAAA,OAAO,cAAA,CAAe,KAAK,MAAM,CAAA;AACnC;AAmBO,IAAM,gCAAA,GAAmC,CAAC,MAAA,KAAqC;AACpF,EAAA,MAAM,OAAA,GAAU,QAAQ,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC/D,EAAA,OAAO,OAAA,GAAU,iCAAA,CAAkC,OAAO,CAAA,GAAI,MAAA;AAChE;AAiCO,IAAM,yBAAA,GAA4B,CAAC,MAAA,KAAyD;AACjG,EAAA,IAAI,CAAC,QAAQ,OAAO,MAAA;AAEpB,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,CAAK,CAAC,EAAE,IAAA,EAAK,KAAM,SAAS,WAAW,CAAA;AAC9D,EAAA,IAAI,CAAC,SAAS,OAAO,MAAA;AAGrB,EAAA,IAAI,OAAA,CAAQ,QAAQ,SAAA,EAAW;AAC7B,IAAA,OAAO,QAAQ,OAAA,CAAQ,SAAA;AAAA,EACzB;AAIA,EAAA,MAAM,cAAA,GAAiB,QAAQ,OAAA,CAAQ,KAAA,EAAO,OAAO,CAAC,CAAA,KAAW,CAAA,CAAE,IAAA,KAAS,WAAW,CAAA;AACvF,EAAA,IAAI,cAAA,IAAkB,cAAA,CAAe,MAAA,GAAS,CAAA,EAAG;AAC/C,IAAA,MAAM,cAAA,GAAiB,cAAA,CACpB,GAAA,CAAI,CAAC,CAAA,KAAW;AAEf,MAAA,IAAI,EAAE,OAAA,IAAW,KAAA,CAAM,OAAA,CAAQ,CAAA,CAAE,OAAO,CAAA,EAAG;AACzC,QAAA,OAAO,EAAE,OAAA,CACN,MAAA,CAAO,CAAC,CAAA,KAAW,EAAE,IAAA,KAAS,MAAM,CAAA,CACpC,GAAA,CAAI,CAAC,CAAA,KAAW,CAAA,CAAE,IAAI,CAAA,CACtB,KAAK,EAAE,CAAA;AAAA,MACZ;AACA,MAAA,OAAO,EAAE,SAAA,IAAa,EAAA;AAAA,IACxB,CAAC,CAAA,CACA,MAAA,CAAO,OAAO,CAAA;AAEjB,IAAA,OAAO,eAAe,MAAA,GAAS,CAAA,GAAI,cAAA,CAAe,IAAA,CAAK,IAAI,CAAA,GAAI,MAAA;AAAA,EACjE;AAEA,EAAA,OAAO,MAAA;AACT;AAuBO,IAAM,uBAAuB,CAAC;AAAA,EACnC,UAAA;AAAA,EACA,QAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA,GAAQ;AACV,CAAA,KAMuH;AACrH,EAAA,OAAO;AAAA,IACL,UAAA;AAAA,IACA,QAAA;AAAA,IACA,IAAA;AAAA,IACA,MAAA;AAAA,IACA;AAAA,GACF;AACF;AAmCO,SAAS,iBAAA,CAAkB;AAAA,EAChC,OAAA;AAAA,EACA,IAAA;AAAA,EACA,EAAA,GAAK,cAAA;AAAA,EACL,kBAAkB;AACpB,CAAA,EAWoB;AAClB,EAAA,OAAO;AAAA,IACL,EAAA;AAAA,IACA,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,MACP,MAAA,EAAQ,CAAA;AAAA,MACR,OAAO,CAAC,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,SAAS,CAAA;AAAA,MACvC,OAAA;AAAA,MACA,GAAI,eAAA,CAAgB,MAAA,GAAS,CAAA,IAAK;AAAA,QAChC,eAAA,EAAiB,eAAA,CAAgB,GAAA,CAAI,CAAA,EAAA,MAAO;AAAA,UAC1C,YAAY,EAAA,CAAG,UAAA;AAAA,UACf,UAAU,EAAA,CAAG,QAAA;AAAA,UACb,MAAM,EAAA,CAAG,IAAA;AAAA,UACT,QAAQ,EAAA,CAAG,MAAA;AAAA,UACX,OAAO,EAAA,CAAG;AAAA,SACZ,CAAE;AAAA;AACJ,KACF;AAAA,IACA,SAAA,sBAAe,IAAA;AAAK,GACtB;AACF;AA+BO,IAAM,qBAAqB,CAAC;AAAA,EACjC,gBAAgB,EAAC;AAAA,EACjB,MAAA;AAAA,EACA,qBAAqB,EAAC;AAAA,EACtB,iBAAiB,EAAC;AAAA,EAClB,uBAAuB,EAAC;AAAA,EACxB,cAAA,GAAiB,IAAI,cAAA,EAAe;AAAA,EACpC,KAAA,GAAQ,OAAO,UAAA;AACjB,CAAA,KAaK;AACH,EAAA,OAAO;AAAA,IACL,KAAA,EAAO;AAAA,MACL,aAAA;AAAA,MACA,kBAAA;AAAA,MACA,cAAA;AAAA,MACA;AAAA,KACF;AAAA,IACA,MAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF;AACF;AAqCO,SAAS,iBAAiB,MAAA,EAAqF;AACpH,EAAA,MAAM,YAAsB,EAAC;AAC7B,EAAA,MAAM,gBAAgC,EAAC;AAEvC,EAAA,KAAA,IAAS,YAAA,GAAe,CAAA,EAAG,YAAA,GAAe,MAAA,CAAO,QAAQ,YAAA,EAAA,EAAgB;AACvE,IAAA,MAAM,OAAA,GAAU,OAAO,YAAY,CAAA;AAEnC,IAAA,IAAI,OAAA,EAAS,SAAS,eAAA,EAAiB;AACrC,MAAA,KAAA,IAAS,kBAAkB,CAAA,EAAG,eAAA,GAAkB,QAAQ,OAAA,CAAQ,eAAA,CAAgB,QAAQ,eAAA,EAAA,EAAmB;AACzG,QAAA,MAAM,UAAA,GAAa,OAAA,CAAQ,OAAA,CAAQ,eAAA,CAAgB,eAAe,CAAA;AAClE,QAAA,IAAI,UAAA,IAAc,WAAW,QAAA,KAAa,UAAA,CAAW,UAAU,QAAA,IAAY,UAAA,CAAW,UAAU,MAAA,CAAA,EAAS;AACvG,UAAA,SAAA,CAAU,IAAA,CAAK,WAAW,QAAQ,CAAA;AAClC,UAAA,aAAA,CAAc,IAAA,CAAK;AAAA,YACjB,UAAU,UAAA,CAAW,QAAA;AAAA,YACrB,YAAY,UAAA,CAAW,UAAA,IAAc,CAAA,EAAG,YAAY,IAAI,eAAe,CAAA,CAAA;AAAA,YACvE,YAAA;AAAA,YACA;AAAA,WACD,CAAA;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,SAAA,EAAW,aAAA,EAAc;AAC3C;AAiBO,IAAM,oBAAA,GAAuB,CAAC,QAAA,KAA2D;AAC9F,EAAA,OAAO,QAAA,EAAU,eAAe,GAAA,CAAI,CAAA,GAAA,KAAO,kCAAkC,GAAG,CAAC,KAAK,EAAC;AACzF;AAmBO,IAAM,4BAAA,GAA+B,CAAC,SAAA,KAAiD;AAC5F,EAAA,OAAO,SAAA,CAAU,MAAA,CAAO,CAAA,GAAA,KAAO,GAAA,CAAI,IAAA,KAAS,WAAW,CAAA,CAAE,GAAA,CAAI,CAAA,GAAA,KAAO,iCAAA,CAAkC,GAAG,CAAC,CAAA;AAC5G;AAyCO,SAAS,mBAAmB,MAAA,EAAmD;AACpF,EAAA,MAAM,UAA4B,EAAC;AAEnC,EAAA,KAAA,MAAW,WAAW,MAAA,EAAQ;AAC5B,IAAA,MAAM,eAAA,GAAkB,SAAS,OAAA,EAAS,eAAA;AAC1C,IAAA,IAAI,CAAC,eAAA,EAAiB;AAEtB,IAAA,KAAA,MAAW,cAAc,eAAA,EAAiB;AACxC,MAAA,IAAI,UAAA,CAAW,KAAA,KAAU,QAAA,IAAY,UAAA,CAAW,WAAW,MAAA,EAAW;AACpE,QAAA,OAAA,CAAQ,IAAA,CAAK;AAAA,UACX,UAAU,UAAA,CAAW,QAAA;AAAA,UACrB,UAAA,EAAY,WAAW,UAAA,IAAc,EAAA;AAAA,UACrC,IAAA,EAAM,UAAA,CAAW,IAAA,IAAQ,EAAC;AAAA,UAC1B,QAAQ,UAAA,CAAW;AAAA,SACpB,CAAA;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,OAAA;AACT","file":"chunk-OEOE7ZHN.js","sourcesContent":["import type { MastraDBMessage } from '@mastra/core/agent';\nimport type { ScorerRunInputForAgent, ScorerRunOutputForAgent, ScoringInput } from '@mastra/core/evals';\nimport { RequestContext } from '@mastra/core/request-context';\n\n/**\n * Extracts text content from a MastraDBMessage.\n *\n * This function matches the logic used in `MessageList.mastraDBMessageToAIV4UIMessage`.\n * It first checks for a string `content.content` field, then falls back to extracting\n * text from the `parts` array (returning only the last text part, like AI SDK does).\n *\n * @param message - The MastraDBMessage to extract text from\n * @returns The extracted text content, or an empty string if no text is found\n *\n * @example\n * ```ts\n * const message: MastraDBMessage = {\n * id: 'msg-1',\n * role: 'assistant',\n * content: { format: 2, parts: [{ type: 'text', text: 'Hello!' }] },\n * createdAt: new Date(),\n * };\n * const text = getTextContentFromMastraDBMessage(message); // 'Hello!'\n * ```\n */\nexport function getTextContentFromMastraDBMessage(message: MastraDBMessage): string {\n if (typeof message.content.content === 'string' && message.content.content !== '') {\n return message.content.content;\n }\n if (message.content.parts && Array.isArray(message.content.parts)) {\n // Return only the last text part like AI SDK does\n const textParts = message.content.parts.filter(p => p.type === 'text');\n return textParts.length > 0 ? textParts[textParts.length - 1]?.text || '' : '';\n }\n return '';\n}\n\n/**\n * Rounds a number to two decimal places.\n *\n * Uses `Number.EPSILON` to handle floating-point precision issues.\n *\n * @param num - The number to round\n * @returns The number rounded to two decimal places\n *\n * @example\n * ```ts\n * roundToTwoDecimals(0.1 + 0.2); // 0.3\n * roundToTwoDecimals(1.005); // 1.01\n * ```\n */\nexport const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\n/**\n * Determines if a value is closer to the first target than the second.\n *\n * @param value - The value to compare\n * @param target1 - The first target value\n * @param target2 - The second target value\n * @returns `true` if `value` is closer to `target1` than `target2`\n *\n * @example\n * ```ts\n * isCloserTo(0.6, 1, 0); // true (0.6 is closer to 1)\n * isCloserTo(0.3, 1, 0); // false (0.3 is closer to 0)\n * ```\n */\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\n/**\n * Represents a test case for scorer evaluation.\n */\nexport type TestCase = {\n /** The input text to evaluate */\n input: string;\n /** The output text to evaluate */\n output: string;\n /** The expected result of the evaluation */\n expectedResult: {\n /** The expected score */\n score: number;\n /** The optional expected reason */\n reason?: string;\n };\n};\n\n/**\n * Represents a test case with additional context for scorer evaluation.\n */\nexport type TestCaseWithContext = TestCase & {\n /** Additional context strings for the evaluation */\n context: string[];\n};\n\n/**\n * Creates a scoring input object for testing purposes.\n *\n * @param input - The user input text\n * @param output - The assistant output text\n * @param additionalContext - Optional additional context data\n * @param requestContext - Optional request context data\n * @returns A ScoringInput object ready for use in scorer tests\n *\n * @example\n * ```ts\n * const run = createTestRun(\n * 'What is 2+2?',\n * 'The answer is 4.',\n * { topic: 'math' }\n * );\n * ```\n */\nexport const createTestRun = (\n input: string,\n output: string,\n additionalContext?: Record<string, any>,\n requestContext?: Record<string, any>,\n): ScoringInput => {\n return {\n input: [{ role: 'user', content: input }],\n output: { role: 'assistant', text: output },\n additionalContext: additionalContext ?? {},\n requestContext: requestContext ?? {},\n };\n};\n\n/**\n * Extracts the user message text from a scorer run input.\n *\n * Finds the first message with role 'user' and extracts its text content.\n *\n * @param input - The scorer run input containing input messages\n * @returns The user message text, or `undefined` if no user message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const userText = getUserMessageFromRunInput(run.input);\n * return { userText };\n * });\n * ```\n */\nexport const getUserMessageFromRunInput = (input?: ScorerRunInputForAgent): string | undefined => {\n const message = input?.inputMessages.find(({ role }) => role === 'user');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts all system messages from a scorer run input.\n *\n * Collects text from both standard system messages and tagged system messages\n * (specialized system prompts like memory instructions).\n *\n * @param input - The scorer run input containing system messages\n * @returns An array of system message strings\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemMessages = getSystemMessagesFromRunInput(run.input);\n * return { systemPrompt: systemMessages.join('\\n') };\n * });\n * ```\n */\nexport const getSystemMessagesFromRunInput = (input?: ScorerRunInputForAgent): string[] => {\n const systemMessages: string[] = [];\n\n // Add standard system messages\n if (input?.systemMessages) {\n systemMessages.push(\n ...input.systemMessages\n .map(msg => {\n // Handle different content types - extract text if it's an array of parts\n if (typeof msg.content === 'string') {\n return msg.content;\n } else if (Array.isArray(msg.content)) {\n // Extract text from parts array\n return msg.content\n .filter((part: any) => part.type === 'text')\n .map((part: any) => part.text || '')\n .join(' ');\n }\n return '';\n })\n .filter(content => content),\n );\n }\n\n // Add tagged system messages (these are specialized system prompts)\n if (input?.taggedSystemMessages) {\n Object.values(input.taggedSystemMessages).forEach(messages => {\n messages.forEach(msg => {\n if (typeof msg.content === 'string') {\n systemMessages.push(msg.content);\n }\n });\n });\n }\n\n return systemMessages;\n};\n\n/**\n * Combines all system messages into a single prompt string.\n *\n * Joins all system messages (standard and tagged) with double newlines.\n *\n * @param input - The scorer run input containing system messages\n * @returns A combined system prompt string\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const systemPrompt = getCombinedSystemPrompt(run.input);\n * return { systemPrompt };\n * });\n * ```\n */\nexport const getCombinedSystemPrompt = (input?: ScorerRunInputForAgent): string => {\n const systemMessages = getSystemMessagesFromRunInput(input);\n return systemMessages.join('\\n\\n');\n};\n\n/**\n * Extracts the assistant message text from a scorer run output.\n *\n * Finds the first message with role 'assistant' and extracts its text content.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The assistant message text, or `undefined` if no assistant message is found\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { response };\n * });\n * ```\n */\nexport const getAssistantMessageFromRunOutput = (output?: ScorerRunOutputForAgent) => {\n const message = output?.find(({ role }) => role === 'assistant');\n return message ? getTextContentFromMastraDBMessage(message) : undefined;\n};\n\n/**\n * Extracts reasoning text from a scorer run output.\n *\n * This function extracts reasoning content from assistant messages, which is\n * produced by reasoning models like `deepseek-reasoner`. The reasoning can be\n * stored in two places:\n * 1. `content.reasoning` - a string field on the message content\n * 2. `content.parts` - as parts with `type: 'reasoning'` containing `details`\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns The reasoning text, or `undefined` if no reasoning is present\n *\n * @example\n * ```ts\n * const reasoningScorer = createScorer({\n * id: 'reasoning-scorer',\n * name: 'Reasoning Quality',\n * description: 'Evaluates the quality of model reasoning',\n * type: 'agent',\n * })\n * .preprocess(({ run }) => {\n * const reasoning = getReasoningFromRunOutput(run.output);\n * const response = getAssistantMessageFromRunOutput(run.output);\n * return { reasoning, response };\n * })\n * .generateScore(({ results }) => {\n * // Score based on reasoning quality\n * return results.preprocessStepResult?.reasoning ? 1 : 0;\n * });\n * ```\n */\nexport const getReasoningFromRunOutput = (output?: ScorerRunOutputForAgent): string | undefined => {\n if (!output) return undefined;\n\n const message = output.find(({ role }) => role === 'assistant');\n if (!message) return undefined;\n\n // Check for reasoning in content.reasoning (string format)\n if (message.content.reasoning) {\n return message.content.reasoning;\n }\n\n // Check for reasoning in parts with type 'reasoning'\n // Reasoning models store reasoning in parts as { type: 'reasoning', details: [{ type: 'text', text: '...' }] }\n const reasoningParts = message.content.parts?.filter((p: any) => p.type === 'reasoning');\n if (reasoningParts && reasoningParts.length > 0) {\n const reasoningTexts = reasoningParts\n .map((p: any) => {\n // The reasoning text can be in p.reasoning or in p.details[].text\n if (p.details && Array.isArray(p.details)) {\n return p.details\n .filter((d: any) => d.type === 'text')\n .map((d: any) => d.text)\n .join('');\n }\n return p.reasoning || '';\n })\n .filter(Boolean);\n\n return reasoningTexts.length > 0 ? reasoningTexts.join('\\n') : undefined;\n }\n\n return undefined;\n};\n\n/**\n * Creates a tool invocation object for testing purposes.\n *\n * @param options - The tool invocation configuration\n * @param options.toolCallId - Unique identifier for the tool call\n * @param options.toolName - Name of the tool being called\n * @param options.args - Arguments passed to the tool\n * @param options.result - Result returned by the tool\n * @param options.state - State of the invocation (default: 'result')\n * @returns A tool invocation object\n *\n * @example\n * ```ts\n * const invocation = createToolInvocation({\n * toolCallId: 'call-123',\n * toolName: 'weatherTool',\n * args: { location: 'London' },\n * result: { temperature: 20, condition: 'sunny' },\n * });\n * ```\n */\nexport const createToolInvocation = ({\n toolCallId,\n toolName,\n args,\n result,\n state = 'result',\n}: {\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state?: 'call' | 'partial-call' | 'result';\n}): { toolCallId: string; toolName: string; args: Record<string, any>; result: Record<string, any>; state: string } => {\n return {\n toolCallId,\n toolName,\n args,\n result,\n state,\n };\n};\n\n/**\n * Creates a MastraDBMessage object for testing purposes.\n *\n * Supports optional tool invocations for testing tool call scenarios.\n *\n * @param options - The message configuration\n * @param options.content - The text content of the message\n * @param options.role - The role of the message sender ('user', 'assistant', or 'system')\n * @param options.id - Optional message ID (default: 'test-message')\n * @param options.toolInvocations - Optional array of tool invocations\n * @returns A MastraDBMessage object\n *\n * @example\n * ```ts\n * const message = createTestMessage({\n * content: 'Hello, how can I help?',\n * role: 'assistant',\n * });\n *\n * // With tool invocations\n * const messageWithTools = createTestMessage({\n * content: 'Let me check the weather.',\n * role: 'assistant',\n * toolInvocations: [{\n * toolCallId: 'call-1',\n * toolName: 'weatherTool',\n * args: { location: 'Paris' },\n * result: { temp: 22 },\n * state: 'result',\n * }],\n * });\n * ```\n */\nexport function createTestMessage({\n content,\n role,\n id = 'test-message',\n toolInvocations = [],\n}: {\n content: string;\n role: 'user' | 'assistant' | 'system';\n id?: string;\n toolInvocations?: Array<{\n toolCallId: string;\n toolName: string;\n args: Record<string, any>;\n result: Record<string, any>;\n state: any;\n }>;\n}): MastraDBMessage {\n return {\n id,\n role,\n content: {\n format: 2,\n parts: [{ type: 'text', text: content }],\n content,\n ...(toolInvocations.length > 0 && {\n toolInvocations: toolInvocations.map(ti => ({\n toolCallId: ti.toolCallId,\n toolName: ti.toolName,\n args: ti.args,\n result: ti.result,\n state: ti.state,\n })),\n }),\n },\n createdAt: new Date(),\n };\n}\n\n/**\n * Creates a complete agent test run object for testing scorers.\n *\n * Provides a convenient way to construct the full run object that scorers receive,\n * including input messages, output, system messages, and request context.\n *\n * @param options - The test run configuration\n * @param options.inputMessages - Array of input messages (default: [])\n * @param options.output - The output messages (required)\n * @param options.rememberedMessages - Array of remembered messages from memory (default: [])\n * @param options.systemMessages - Array of system messages (default: [])\n * @param options.taggedSystemMessages - Tagged system messages map (default: {})\n * @param options.requestContext - Request context (default: new RequestContext())\n * @param options.runId - Unique run ID (default: random UUID)\n * @returns A complete test run object\n *\n * @example\n * ```ts\n * const testRun = createAgentTestRun({\n * inputMessages: [createTestMessage({ content: 'Hello', role: 'user' })],\n * output: [createTestMessage({ content: 'Hi there!', role: 'assistant' })],\n * });\n *\n * const result = await scorer.run({\n * input: testRun.input,\n * output: testRun.output,\n * });\n * ```\n */\nexport const createAgentTestRun = ({\n inputMessages = [],\n output,\n rememberedMessages = [],\n systemMessages = [],\n taggedSystemMessages = {},\n requestContext = new RequestContext(),\n runId = crypto.randomUUID(),\n}: {\n inputMessages?: ScorerRunInputForAgent['inputMessages'];\n output: ScorerRunOutputForAgent;\n rememberedMessages?: ScorerRunInputForAgent['rememberedMessages'];\n systemMessages?: ScorerRunInputForAgent['systemMessages'];\n taggedSystemMessages?: ScorerRunInputForAgent['taggedSystemMessages'];\n requestContext?: RequestContext;\n runId?: string;\n}): {\n input: ScorerRunInputForAgent;\n output: ScorerRunOutputForAgent;\n requestContext: RequestContext;\n runId: string;\n} => {\n return {\n input: {\n inputMessages,\n rememberedMessages,\n systemMessages,\n taggedSystemMessages,\n },\n output,\n requestContext,\n runId,\n };\n};\n\n/**\n * Information about a tool call extracted from scorer output.\n */\nexport type ToolCallInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Index of the message containing this tool call */\n messageIndex: number;\n /** Index of the invocation within the message's tool invocations */\n invocationIndex: number;\n};\n\n/**\n * Extracts all tool calls from a scorer run output.\n *\n * Iterates through all messages and their tool invocations to collect\n * information about tools that were called (with state 'result' or 'call').\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An object containing tool names and detailed tool call info\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const { tools, toolCallInfos } = extractToolCalls(run.output);\n * return {\n * toolsUsed: tools,\n * toolCount: tools.length,\n * };\n * });\n * ```\n */\nexport function extractToolCalls(output: ScorerRunOutputForAgent): { tools: string[]; toolCallInfos: ToolCallInfo[] } {\n const toolCalls: string[] = [];\n const toolCallInfos: ToolCallInfo[] = [];\n\n for (let messageIndex = 0; messageIndex < output.length; messageIndex++) {\n const message = output[messageIndex];\n // Tool invocations are now nested under content\n if (message?.content?.toolInvocations) {\n for (let invocationIndex = 0; invocationIndex < message.content.toolInvocations.length; invocationIndex++) {\n const invocation = message.content.toolInvocations[invocationIndex];\n if (invocation && invocation.toolName && (invocation.state === 'result' || invocation.state === 'call')) {\n toolCalls.push(invocation.toolName);\n toolCallInfos.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || `${messageIndex}-${invocationIndex}`,\n messageIndex,\n invocationIndex,\n });\n }\n }\n }\n }\n\n return { tools: toolCalls, toolCallInfos };\n}\n\n/**\n * Extracts text content from all input messages.\n *\n * @param runInput - The scorer run input\n * @returns An array of text strings from each input message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const messages = extractInputMessages(run.input);\n * return { allUserMessages: messages.join('\\n') };\n * });\n * ```\n */\nexport const extractInputMessages = (runInput: ScorerRunInputForAgent | undefined): string[] => {\n return runInput?.inputMessages?.map(msg => getTextContentFromMastraDBMessage(msg)) || [];\n};\n\n/**\n * Extracts text content from all assistant response messages.\n *\n * Filters for messages with role 'assistant' and extracts their text content.\n *\n * @param runOutput - The scorer run output (array of MastraDBMessage)\n * @returns An array of text strings from each assistant message\n *\n * @example\n * ```ts\n * const scorer = createScorer({ ... })\n * .preprocess(({ run }) => {\n * const responses = extractAgentResponseMessages(run.output);\n * return { allResponses: responses.join('\\n') };\n * });\n * ```\n */\nexport const extractAgentResponseMessages = (runOutput: ScorerRunOutputForAgent): string[] => {\n return runOutput.filter(msg => msg.role === 'assistant').map(msg => getTextContentFromMastraDBMessage(msg));\n};\n\n/**\n * Information about a tool result extracted from scorer output.\n */\nexport type ToolResultInfo = {\n /** Name of the tool that was called */\n toolName: string;\n /** Unique identifier for the tool call */\n toolCallId: string;\n /** Arguments passed to the tool */\n args: Record<string, any>;\n /** Result returned by the tool */\n result: any;\n};\n\n/**\n * Extracts tool results from a scorer run output.\n *\n * Returns structured objects that can be used with the hallucination scorer's\n * `getContext` hook or for other scorer logic.\n *\n * @param output - The scorer run output (array of MastraDBMessage)\n * @returns An array of ToolResultInfo objects\n *\n * @example\n * ```ts\n * import { extractToolResults } from '@mastra/evals/scorers';\n * import { createHallucinationScorer } from '@mastra/evals/scorers/prebuilt';\n *\n * const scorer = createHallucinationScorer({\n * model: openai('gpt-4o'),\n * options: {\n * getContext: (run) => {\n * const toolResults = extractToolResults(run.output);\n * return toolResults.map(t => JSON.stringify({ tool: t.toolName, result: t.result }));\n * },\n * },\n * });\n * ```\n */\nexport function extractToolResults(output: ScorerRunOutputForAgent): ToolResultInfo[] {\n const results: ToolResultInfo[] = [];\n\n for (const message of output) {\n const toolInvocations = message?.content?.toolInvocations;\n if (!toolInvocations) continue;\n\n for (const invocation of toolInvocations) {\n if (invocation.state === 'result' && invocation.result !== undefined) {\n results.push({\n toolName: invocation.toolName,\n toolCallId: invocation.toolCallId || '',\n args: invocation.args || {},\n result: invocation.result,\n });\n }\n }\n }\n\n return results;\n}\n"]}