@mastra/core 0.15.3 → 0.16.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/dist/agent/index.cjs +8 -8
- package/dist/agent/index.d.ts.map +1 -1
- package/dist/agent/index.js +1 -1
- package/dist/agent/input-processor/index.cjs +6 -6
- package/dist/agent/input-processor/index.js +1 -1
- package/dist/chunk-2BFCQRWV.cjs +673 -0
- package/dist/chunk-2BFCQRWV.cjs.map +1 -0
- package/dist/{chunk-K6UMYGK5.cjs → chunk-3CJXUAA2.cjs} +2 -2
- package/dist/{chunk-K6UMYGK5.cjs.map → chunk-3CJXUAA2.cjs.map} +1 -1
- package/dist/{chunk-EZCMCWSC.cjs → chunk-7GYN2OE3.cjs} +131 -61
- package/dist/chunk-7GYN2OE3.cjs.map +1 -0
- package/dist/{chunk-AWES6LTC.cjs → chunk-7NKIBBCV.cjs} +4 -4
- package/dist/{chunk-AWES6LTC.cjs.map → chunk-7NKIBBCV.cjs.map} +1 -1
- package/dist/{chunk-PDODMDSQ.js → chunk-A5VGUE2H.js} +3 -3
- package/dist/{chunk-PDODMDSQ.js.map → chunk-A5VGUE2H.js.map} +1 -1
- package/dist/chunk-ABJOUEVA.cjs +10 -0
- package/dist/chunk-ABJOUEVA.cjs.map +1 -0
- package/dist/{chunk-VZYIKGJ6.cjs → chunk-AHJY7RHO.cjs} +4 -4
- package/dist/{chunk-VZYIKGJ6.cjs.map → chunk-AHJY7RHO.cjs.map} +1 -1
- package/dist/{chunk-Z637KSJF.cjs → chunk-BHB4PDNV.cjs} +4 -4
- package/dist/{chunk-Z637KSJF.cjs.map → chunk-BHB4PDNV.cjs.map} +1 -1
- package/dist/{chunk-AEE54ND4.js → chunk-D2Y5SXZN.js} +13 -7
- package/dist/chunk-D2Y5SXZN.js.map +1 -0
- package/dist/{chunk-65ZPZGFH.cjs → chunk-DRMT4EQA.cjs} +4 -4
- package/dist/{chunk-65ZPZGFH.cjs.map → chunk-DRMT4EQA.cjs.map} +1 -1
- package/dist/chunk-IHVB4C5U.cjs +4 -0
- package/dist/{chunk-X4RMXTXF.cjs.map → chunk-IHVB4C5U.cjs.map} +1 -1
- package/dist/{chunk-QQO4SF3C.js → chunk-JT2QXHUD.js} +16 -6
- package/dist/chunk-JT2QXHUD.js.map +1 -0
- package/dist/{chunk-YGW2WEJ5.js → chunk-JTU7FZ5O.js} +2 -2
- package/dist/{chunk-YGW2WEJ5.js.map → chunk-JTU7FZ5O.js.map} +1 -1
- package/dist/chunk-MMYGYTJK.js +665 -0
- package/dist/chunk-MMYGYTJK.js.map +1 -0
- package/dist/chunk-NLNKQD2T.js +7 -0
- package/dist/chunk-NLNKQD2T.js.map +1 -0
- package/dist/{chunk-4VU6A5XE.js → chunk-QQIBOVFQ.js} +3 -3
- package/dist/{chunk-4VU6A5XE.js.map → chunk-QQIBOVFQ.js.map} +1 -1
- package/dist/chunk-S43VACTO.js +3 -0
- package/dist/{chunk-GXSERFAG.js.map → chunk-S43VACTO.js.map} +1 -1
- package/dist/{chunk-FTAXL6TR.cjs → chunk-SLDGELU7.cjs} +25 -15
- package/dist/chunk-SLDGELU7.cjs.map +1 -0
- package/dist/{chunk-R7K2QO7M.js → chunk-TLJPVRO5.js} +4 -4
- package/dist/{chunk-R7K2QO7M.js.map → chunk-TLJPVRO5.js.map} +1 -1
- package/dist/{chunk-ZHFYYSVY.cjs → chunk-YEZD4ZLX.cjs} +17 -11
- package/dist/chunk-YEZD4ZLX.cjs.map +1 -0
- package/dist/{chunk-VZS4UVKF.js → chunk-YFAHGS45.js} +3 -3
- package/dist/{chunk-VZS4UVKF.js.map → chunk-YFAHGS45.js.map} +1 -1
- package/dist/{chunk-AVPW677Z.js → chunk-YILDTNQM.js} +117 -47
- package/dist/chunk-YILDTNQM.js.map +1 -0
- package/dist/index.cjs +31 -31
- package/dist/index.js +8 -8
- package/dist/loop/index.cjs +2 -2
- package/dist/loop/index.js +1 -1
- package/dist/loop/workflow/llm-execution.d.ts +4 -4
- package/dist/loop/workflow/outer-llm-step.d.ts +2 -2
- package/dist/loop/workflow/schema.d.ts +2 -2
- package/dist/mastra/hooks.d.ts +3 -1
- package/dist/mastra/hooks.d.ts.map +1 -1
- package/dist/mastra/index.cjs +2 -2
- package/dist/mastra/index.js +1 -1
- package/dist/network/index.cjs +2 -2
- package/dist/network/index.js +1 -1
- package/dist/network/vNext/index.cjs +15 -15
- package/dist/network/vNext/index.js +2 -2
- package/dist/processors/index.cjs +8 -8
- package/dist/processors/index.js +2 -2
- package/dist/relevance/index.cjs +4 -4
- package/dist/relevance/index.js +1 -1
- package/dist/scores/hooks.d.ts +3 -1
- package/dist/scores/hooks.d.ts.map +1 -1
- package/dist/scores/index.cjs +29 -638
- package/dist/scores/index.cjs.map +1 -1
- package/dist/scores/index.js +1 -636
- package/dist/scores/index.js.map +1 -1
- package/dist/scores/types.d.ts +80 -0
- package/dist/scores/types.d.ts.map +1 -1
- package/dist/server/index.cjs +2 -2
- package/dist/server/index.js +1 -1
- package/dist/storage/base.d.ts +2 -2
- package/dist/storage/base.d.ts.map +1 -1
- package/dist/storage/domains/memory/inmemory.d.ts.map +1 -1
- package/dist/storage/index.cjs +8 -6
- package/dist/storage/index.cjs.map +1 -1
- package/dist/storage/index.js +4 -2
- package/dist/storage/index.js.map +1 -1
- package/dist/stream/base/output.d.ts.map +1 -1
- package/dist/stream/index.cjs +3 -3
- package/dist/stream/index.js +1 -1
- package/dist/telemetry/index.cjs +7 -7
- package/dist/telemetry/index.js +1 -1
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/ui-types.d.ts +39 -0
- package/dist/tools/ui-types.d.ts.map +1 -0
- package/dist/tts/index.cjs +2 -2
- package/dist/tts/index.js +1 -1
- package/dist/voice/index.cjs +4 -4
- package/dist/voice/index.js +1 -1
- package/dist/workflows/constants.cjs +6 -2
- package/dist/workflows/constants.d.ts +1 -0
- package/dist/workflows/constants.d.ts.map +1 -1
- package/dist/workflows/constants.js +1 -1
- package/dist/workflows/default.d.ts +6 -3
- package/dist/workflows/default.d.ts.map +1 -1
- package/dist/workflows/evented/execution-engine.d.ts +1 -0
- package/dist/workflows/evented/execution-engine.d.ts.map +1 -1
- package/dist/workflows/evented/index.cjs +10 -10
- package/dist/workflows/evented/index.js +1 -1
- package/dist/workflows/evented/step-executor.d.ts.map +1 -1
- package/dist/workflows/execution-engine.d.ts +1 -0
- package/dist/workflows/execution-engine.d.ts.map +1 -1
- package/dist/workflows/index.cjs +10 -10
- package/dist/workflows/index.js +1 -1
- package/dist/workflows/legacy/index.cjs +22 -22
- package/dist/workflows/legacy/index.js +1 -1
- package/dist/workflows/step.d.ts +2 -1
- package/dist/workflows/step.d.ts.map +1 -1
- package/dist/workflows/types.d.ts +13 -1
- package/dist/workflows/types.d.ts.map +1 -1
- package/dist/workflows/workflow.d.ts +9 -1
- package/dist/workflows/workflow.d.ts.map +1 -1
- package/package.json +3 -3
- package/dist/chunk-AEE54ND4.js.map +0 -1
- package/dist/chunk-AVPW677Z.js.map +0 -1
- package/dist/chunk-EZCMCWSC.cjs.map +0 -1
- package/dist/chunk-FTAXL6TR.cjs.map +0 -1
- package/dist/chunk-GK5V7YTQ.js +0 -6
- package/dist/chunk-GK5V7YTQ.js.map +0 -1
- package/dist/chunk-GXSERFAG.js +0 -3
- package/dist/chunk-NFXTYMWZ.cjs +0 -8
- package/dist/chunk-NFXTYMWZ.cjs.map +0 -1
- package/dist/chunk-QQO4SF3C.js.map +0 -1
- package/dist/chunk-X4RMXTXF.cjs +0 -4
- package/dist/chunk-ZHFYYSVY.cjs.map +0 -1
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
import { createStep, createWorkflow, Agent, Workflow } from './chunk-YILDTNQM.js';
|
|
2
|
+
import { MastraError } from './chunk-MCOVMKIS.js';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { randomUUID } from 'crypto';
|
|
5
|
+
|
|
6
|
+
var scoringExtractStepResultSchema = z.record(z.string(), z.any()).optional();
|
|
7
|
+
var scoringValueSchema = z.number();
|
|
8
|
+
var scoreResultSchema = z.object({
|
|
9
|
+
result: z.record(z.string(), z.any()).optional(),
|
|
10
|
+
score: scoringValueSchema,
|
|
11
|
+
prompt: z.string().optional()
|
|
12
|
+
});
|
|
13
|
+
var saveScorePayloadSchema = z.object({
|
|
14
|
+
runId: z.string(),
|
|
15
|
+
scorerId: z.string(),
|
|
16
|
+
entityId: z.string(),
|
|
17
|
+
score: z.number(),
|
|
18
|
+
input: z.any().optional(),
|
|
19
|
+
output: z.any(),
|
|
20
|
+
source: z.enum(["LIVE", "TEST"]),
|
|
21
|
+
entityType: z.enum(["AGENT", "WORKFLOW"]).optional(),
|
|
22
|
+
traceId: z.string().optional(),
|
|
23
|
+
scorer: z.record(z.string(), z.any()).optional(),
|
|
24
|
+
preprocessStepResult: z.record(z.string(), z.any()).optional(),
|
|
25
|
+
extractStepResult: z.record(z.string(), z.any()).optional(),
|
|
26
|
+
analyzeStepResult: z.record(z.string(), z.any()).optional(),
|
|
27
|
+
reason: z.string().optional(),
|
|
28
|
+
metadata: z.record(z.string(), z.any()).optional(),
|
|
29
|
+
preprocessPrompt: z.string().optional(),
|
|
30
|
+
extractPrompt: z.string().optional(),
|
|
31
|
+
generateScorePrompt: z.string().optional(),
|
|
32
|
+
generateReasonPrompt: z.string().optional(),
|
|
33
|
+
analyzePrompt: z.string().optional(),
|
|
34
|
+
additionalContext: z.record(z.string(), z.any()).optional(),
|
|
35
|
+
runtimeContext: z.record(z.string(), z.any()).optional(),
|
|
36
|
+
entity: z.record(z.string(), z.any()).optional(),
|
|
37
|
+
resourceId: z.string().optional(),
|
|
38
|
+
threadId: z.string().optional()
|
|
39
|
+
});
|
|
40
|
+
var MastraScorer = class _MastraScorer {
|
|
41
|
+
constructor(config, steps = [], originalPromptObjects = /* @__PURE__ */ new Map()) {
|
|
42
|
+
this.config = config;
|
|
43
|
+
this.steps = steps;
|
|
44
|
+
this.originalPromptObjects = originalPromptObjects;
|
|
45
|
+
}
|
|
46
|
+
get name() {
|
|
47
|
+
return this.config.name;
|
|
48
|
+
}
|
|
49
|
+
get description() {
|
|
50
|
+
return this.config.description;
|
|
51
|
+
}
|
|
52
|
+
get judge() {
|
|
53
|
+
return this.config.judge;
|
|
54
|
+
}
|
|
55
|
+
preprocess(stepDef) {
|
|
56
|
+
const isPromptObj = this.isPromptObject(stepDef);
|
|
57
|
+
if (isPromptObj) {
|
|
58
|
+
const promptObj = stepDef;
|
|
59
|
+
this.originalPromptObjects.set("preprocess", promptObj);
|
|
60
|
+
}
|
|
61
|
+
return new _MastraScorer(
|
|
62
|
+
this.config,
|
|
63
|
+
[
|
|
64
|
+
...this.steps,
|
|
65
|
+
{
|
|
66
|
+
name: "preprocess",
|
|
67
|
+
definition: stepDef,
|
|
68
|
+
isPromptObject: isPromptObj
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
new Map(this.originalPromptObjects)
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
analyze(stepDef) {
|
|
75
|
+
const isPromptObj = this.isPromptObject(stepDef);
|
|
76
|
+
if (isPromptObj) {
|
|
77
|
+
const promptObj = stepDef;
|
|
78
|
+
this.originalPromptObjects.set("analyze", promptObj);
|
|
79
|
+
}
|
|
80
|
+
return new _MastraScorer(
|
|
81
|
+
this.config,
|
|
82
|
+
[
|
|
83
|
+
...this.steps,
|
|
84
|
+
{
|
|
85
|
+
name: "analyze",
|
|
86
|
+
definition: isPromptObj ? void 0 : stepDef,
|
|
87
|
+
isPromptObject: isPromptObj
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
new Map(this.originalPromptObjects)
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
generateScore(stepDef) {
|
|
94
|
+
const isPromptObj = this.isPromptObject(stepDef);
|
|
95
|
+
if (isPromptObj) {
|
|
96
|
+
const promptObj = stepDef;
|
|
97
|
+
this.originalPromptObjects.set("generateScore", promptObj);
|
|
98
|
+
}
|
|
99
|
+
return new _MastraScorer(
|
|
100
|
+
this.config,
|
|
101
|
+
[
|
|
102
|
+
...this.steps,
|
|
103
|
+
{
|
|
104
|
+
name: "generateScore",
|
|
105
|
+
definition: isPromptObj ? void 0 : stepDef,
|
|
106
|
+
isPromptObject: isPromptObj
|
|
107
|
+
}
|
|
108
|
+
],
|
|
109
|
+
new Map(this.originalPromptObjects)
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
generateReason(stepDef) {
|
|
113
|
+
const isPromptObj = this.isPromptObject(stepDef);
|
|
114
|
+
if (isPromptObj) {
|
|
115
|
+
const promptObj = stepDef;
|
|
116
|
+
this.originalPromptObjects.set("generateReason", promptObj);
|
|
117
|
+
}
|
|
118
|
+
return new _MastraScorer(
|
|
119
|
+
this.config,
|
|
120
|
+
[
|
|
121
|
+
...this.steps,
|
|
122
|
+
{
|
|
123
|
+
name: "generateReason",
|
|
124
|
+
definition: isPromptObj ? void 0 : stepDef,
|
|
125
|
+
isPromptObject: isPromptObj
|
|
126
|
+
}
|
|
127
|
+
],
|
|
128
|
+
new Map(this.originalPromptObjects)
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
get hasGenerateScore() {
|
|
132
|
+
return this.steps.some((step) => step.name === "generateScore");
|
|
133
|
+
}
|
|
134
|
+
async run(input) {
|
|
135
|
+
if (!this.hasGenerateScore) {
|
|
136
|
+
throw new MastraError({
|
|
137
|
+
id: "MASTR_SCORER_FAILED_TO_RUN_MISSING_GENERATE_SCORE",
|
|
138
|
+
domain: "SCORER" /* SCORER */,
|
|
139
|
+
category: "USER" /* USER */,
|
|
140
|
+
text: `Cannot execute pipeline without generateScore() step`,
|
|
141
|
+
details: {
|
|
142
|
+
scorerId: this.config.name,
|
|
143
|
+
steps: this.steps.map((s) => s.name).join(", ")
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
const { tracingContext } = input;
|
|
148
|
+
let runId = input.runId;
|
|
149
|
+
if (!runId) {
|
|
150
|
+
runId = randomUUID();
|
|
151
|
+
}
|
|
152
|
+
const run = { ...input, runId };
|
|
153
|
+
const workflow = this.toMastraWorkflow();
|
|
154
|
+
const workflowRun = await workflow.createRunAsync();
|
|
155
|
+
const workflowResult = await workflowRun.start({
|
|
156
|
+
inputData: {
|
|
157
|
+
run
|
|
158
|
+
},
|
|
159
|
+
tracingContext
|
|
160
|
+
});
|
|
161
|
+
if (workflowResult.status === "failed") {
|
|
162
|
+
throw new MastraError({
|
|
163
|
+
id: "MASTR_SCORER_FAILED_TO_RUN_WORKFLOW_FAILED",
|
|
164
|
+
domain: "SCORER" /* SCORER */,
|
|
165
|
+
category: "USER" /* USER */,
|
|
166
|
+
text: `Scorer Run Failed: ${workflowResult.error}`,
|
|
167
|
+
details: {
|
|
168
|
+
scorerId: this.config.name,
|
|
169
|
+
steps: this.steps.map((s) => s.name).join(", ")
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
return this.transformToScorerResult({ workflowResult, originalInput: run });
|
|
174
|
+
}
|
|
175
|
+
isPromptObject(stepDef) {
|
|
176
|
+
if (typeof stepDef === "object" && "description" in stepDef && "createPrompt" in stepDef && !("outputSchema" in stepDef)) {
|
|
177
|
+
return true;
|
|
178
|
+
}
|
|
179
|
+
const isOtherPromptObject = typeof stepDef === "object" && "description" in stepDef && "outputSchema" in stepDef && "createPrompt" in stepDef;
|
|
180
|
+
return isOtherPromptObject;
|
|
181
|
+
}
|
|
182
|
+
getSteps() {
|
|
183
|
+
return this.steps.map((step) => ({
|
|
184
|
+
name: step.name,
|
|
185
|
+
type: step.isPromptObject ? "prompt" : "function",
|
|
186
|
+
description: step.definition.description
|
|
187
|
+
}));
|
|
188
|
+
}
|
|
189
|
+
toMastraWorkflow() {
|
|
190
|
+
const workflowSteps = this.steps.map((scorerStep) => {
|
|
191
|
+
return createStep({
|
|
192
|
+
id: scorerStep.name,
|
|
193
|
+
description: `Scorer step: ${scorerStep.name}`,
|
|
194
|
+
inputSchema: z.any(),
|
|
195
|
+
outputSchema: z.any(),
|
|
196
|
+
execute: async ({ inputData, getInitData, tracingContext }) => {
|
|
197
|
+
const { accumulatedResults = {}, generatedPrompts = {} } = inputData;
|
|
198
|
+
const { run } = getInitData();
|
|
199
|
+
const context = this.createScorerContext(scorerStep.name, run, accumulatedResults);
|
|
200
|
+
let stepResult;
|
|
201
|
+
let newGeneratedPrompts = generatedPrompts;
|
|
202
|
+
if (scorerStep.isPromptObject) {
|
|
203
|
+
const { result, prompt } = await this.executePromptStep(scorerStep, tracingContext, context);
|
|
204
|
+
stepResult = result;
|
|
205
|
+
newGeneratedPrompts = {
|
|
206
|
+
...generatedPrompts,
|
|
207
|
+
[`${scorerStep.name}Prompt`]: prompt
|
|
208
|
+
};
|
|
209
|
+
} else {
|
|
210
|
+
stepResult = await this.executeFunctionStep(scorerStep, context);
|
|
211
|
+
}
|
|
212
|
+
const newAccumulatedResults = {
|
|
213
|
+
...accumulatedResults,
|
|
214
|
+
[`${scorerStep.name}StepResult`]: stepResult
|
|
215
|
+
};
|
|
216
|
+
return {
|
|
217
|
+
stepResult,
|
|
218
|
+
accumulatedResults: newAccumulatedResults,
|
|
219
|
+
generatedPrompts: newGeneratedPrompts
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
});
|
|
224
|
+
const workflow = createWorkflow({
|
|
225
|
+
id: `scorer-${this.config.name}`,
|
|
226
|
+
description: this.config.description,
|
|
227
|
+
inputSchema: z.object({
|
|
228
|
+
run: z.any()
|
|
229
|
+
// ScorerRun
|
|
230
|
+
}),
|
|
231
|
+
outputSchema: z.object({
|
|
232
|
+
run: z.any(),
|
|
233
|
+
score: z.number(),
|
|
234
|
+
reason: z.string().optional(),
|
|
235
|
+
preprocessResult: z.any().optional(),
|
|
236
|
+
analyzeResult: z.any().optional(),
|
|
237
|
+
preprocessPrompt: z.string().optional(),
|
|
238
|
+
analyzePrompt: z.string().optional(),
|
|
239
|
+
generateScorePrompt: z.string().optional(),
|
|
240
|
+
generateReasonPrompt: z.string().optional()
|
|
241
|
+
})
|
|
242
|
+
});
|
|
243
|
+
let chainedWorkflow = workflow;
|
|
244
|
+
for (const step of workflowSteps) {
|
|
245
|
+
chainedWorkflow = chainedWorkflow.then(step);
|
|
246
|
+
}
|
|
247
|
+
return chainedWorkflow.commit();
|
|
248
|
+
}
|
|
249
|
+
createScorerContext(stepName, run, accumulatedResults) {
|
|
250
|
+
if (stepName === "generateReason") {
|
|
251
|
+
const score = accumulatedResults.generateScoreStepResult;
|
|
252
|
+
return { run, results: accumulatedResults, score };
|
|
253
|
+
}
|
|
254
|
+
return { run, results: accumulatedResults };
|
|
255
|
+
}
|
|
256
|
+
async executeFunctionStep(scorerStep, context) {
|
|
257
|
+
return await scorerStep.definition(context);
|
|
258
|
+
}
|
|
259
|
+
async executePromptStep(scorerStep, tracingContext, context) {
|
|
260
|
+
const originalStep = this.originalPromptObjects.get(scorerStep.name);
|
|
261
|
+
if (!originalStep) {
|
|
262
|
+
throw new Error(`Step "${scorerStep.name}" is not a prompt object`);
|
|
263
|
+
}
|
|
264
|
+
const prompt = await originalStep.createPrompt(context);
|
|
265
|
+
const model = originalStep.judge?.model ?? this.config.judge?.model;
|
|
266
|
+
const instructions = originalStep.judge?.instructions ?? this.config.judge?.instructions;
|
|
267
|
+
if (!model || !instructions) {
|
|
268
|
+
throw new MastraError({
|
|
269
|
+
id: "MASTR_SCORER_FAILED_TO_RUN_MISSING_MODEL_OR_INSTRUCTIONS",
|
|
270
|
+
domain: "SCORER" /* SCORER */,
|
|
271
|
+
category: "USER" /* USER */,
|
|
272
|
+
text: `Step "${scorerStep.name}" requires a model and instructions`,
|
|
273
|
+
details: {
|
|
274
|
+
scorerId: this.config.name,
|
|
275
|
+
step: scorerStep.name
|
|
276
|
+
}
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
const judge = new Agent({ name: "judge", model, instructions });
|
|
280
|
+
if (scorerStep.name === "generateScore") {
|
|
281
|
+
let result;
|
|
282
|
+
if (model.specificationVersion === "v2") {
|
|
283
|
+
result = await judge.generateVNext(prompt, {
|
|
284
|
+
output: z.object({ score: z.number() }),
|
|
285
|
+
tracingContext
|
|
286
|
+
});
|
|
287
|
+
} else {
|
|
288
|
+
result = await judge.generate(prompt, {
|
|
289
|
+
output: z.object({ score: z.number() }),
|
|
290
|
+
tracingContext
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
return { result: result.object.score, prompt };
|
|
294
|
+
} else if (scorerStep.name === "generateReason") {
|
|
295
|
+
let result;
|
|
296
|
+
if (model.specificationVersion === "v2") {
|
|
297
|
+
result = await judge.generateVNext(prompt, { tracingContext });
|
|
298
|
+
} else {
|
|
299
|
+
result = await judge.generate(prompt, { tracingContext });
|
|
300
|
+
}
|
|
301
|
+
return { result: result.text, prompt };
|
|
302
|
+
} else {
|
|
303
|
+
const promptStep = originalStep;
|
|
304
|
+
let result;
|
|
305
|
+
if (model.specificationVersion === "v2") {
|
|
306
|
+
result = await judge.generateVNext(prompt, {
|
|
307
|
+
output: promptStep.outputSchema,
|
|
308
|
+
tracingContext
|
|
309
|
+
});
|
|
310
|
+
} else {
|
|
311
|
+
result = await judge.generate(prompt, {
|
|
312
|
+
output: promptStep.outputSchema,
|
|
313
|
+
tracingContext
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
return { result: result.object, prompt };
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
transformToScorerResult({
|
|
320
|
+
workflowResult,
|
|
321
|
+
originalInput
|
|
322
|
+
}) {
|
|
323
|
+
const finalStepResult = workflowResult.result;
|
|
324
|
+
const accumulatedResults = finalStepResult?.accumulatedResults || {};
|
|
325
|
+
const generatedPrompts = finalStepResult?.generatedPrompts || {};
|
|
326
|
+
return {
|
|
327
|
+
...originalInput,
|
|
328
|
+
score: accumulatedResults.generateScoreStepResult,
|
|
329
|
+
generateScorePrompt: generatedPrompts.generateScorePrompt,
|
|
330
|
+
reason: accumulatedResults.generateReasonStepResult,
|
|
331
|
+
generateReasonPrompt: generatedPrompts.generateReasonPrompt,
|
|
332
|
+
preprocessStepResult: accumulatedResults.preprocessStepResult,
|
|
333
|
+
preprocessPrompt: generatedPrompts.preprocessPrompt,
|
|
334
|
+
analyzeStepResult: accumulatedResults.analyzeStepResult,
|
|
335
|
+
analyzePrompt: generatedPrompts.analyzePrompt
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
};
|
|
339
|
+
function createScorer(config) {
|
|
340
|
+
return new MastraScorer({
|
|
341
|
+
name: config.name,
|
|
342
|
+
description: config.description,
|
|
343
|
+
judge: config.judge
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// src/scores/run-experiment/scorerAccumulator.ts
|
|
348
|
+
var ScoreAccumulator = class {
|
|
349
|
+
flatScores = {};
|
|
350
|
+
workflowScores = {};
|
|
351
|
+
stepScores = {};
|
|
352
|
+
addScores(scorerResults) {
|
|
353
|
+
const isTargetWorkflowAndHasStepScores = "steps" in scorerResults;
|
|
354
|
+
if (isTargetWorkflowAndHasStepScores) {
|
|
355
|
+
this.addNestedScores(scorerResults);
|
|
356
|
+
} else {
|
|
357
|
+
this.addFlatScores(scorerResults);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
addFlatScores(scorerResults) {
|
|
361
|
+
for (const [scorerName, result] of Object.entries(scorerResults)) {
|
|
362
|
+
if (!this.flatScores[scorerName]) {
|
|
363
|
+
this.flatScores[scorerName] = [];
|
|
364
|
+
}
|
|
365
|
+
this.flatScores[scorerName].push(result.score);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
addNestedScores(scorerResults) {
|
|
369
|
+
if ("workflow" in scorerResults && scorerResults.workflow) {
|
|
370
|
+
for (const [scorerName, result] of Object.entries(scorerResults.workflow)) {
|
|
371
|
+
if (!this.workflowScores[scorerName]) {
|
|
372
|
+
this.workflowScores[scorerName] = [];
|
|
373
|
+
}
|
|
374
|
+
this.workflowScores[scorerName].push(result.score);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
if ("steps" in scorerResults && scorerResults.steps) {
|
|
378
|
+
for (const [stepId, stepResults] of Object.entries(scorerResults.steps)) {
|
|
379
|
+
if (!this.stepScores[stepId]) {
|
|
380
|
+
this.stepScores[stepId] = {};
|
|
381
|
+
}
|
|
382
|
+
for (const [scorerName, result] of Object.entries(stepResults)) {
|
|
383
|
+
if (!this.stepScores[stepId][scorerName]) {
|
|
384
|
+
this.stepScores[stepId][scorerName] = [];
|
|
385
|
+
}
|
|
386
|
+
this.stepScores[stepId][scorerName].push(result.score);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
addStepScores(stepScorerResults) {
|
|
392
|
+
for (const [stepId, stepResults] of Object.entries(stepScorerResults)) {
|
|
393
|
+
if (!this.stepScores[stepId]) {
|
|
394
|
+
this.stepScores[stepId] = {};
|
|
395
|
+
}
|
|
396
|
+
for (const [scorerName, result] of Object.entries(stepResults)) {
|
|
397
|
+
if (!this.stepScores[stepId][scorerName]) {
|
|
398
|
+
this.stepScores[stepId][scorerName] = [];
|
|
399
|
+
}
|
|
400
|
+
this.stepScores[stepId][scorerName].push(result.score);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
getAverageScores() {
|
|
405
|
+
const result = {};
|
|
406
|
+
for (const [scorerName, scoreArray] of Object.entries(this.flatScores)) {
|
|
407
|
+
result[scorerName] = this.getAverageScore(scoreArray);
|
|
408
|
+
}
|
|
409
|
+
if (Object.keys(this.workflowScores).length > 0) {
|
|
410
|
+
result.workflow = {};
|
|
411
|
+
for (const [scorerName, scoreArray] of Object.entries(this.workflowScores)) {
|
|
412
|
+
result.workflow[scorerName] = this.getAverageScore(scoreArray);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
if (Object.keys(this.stepScores).length > 0) {
|
|
416
|
+
result.steps = {};
|
|
417
|
+
for (const [stepId, stepScorers] of Object.entries(this.stepScores)) {
|
|
418
|
+
result.steps[stepId] = {};
|
|
419
|
+
for (const [scorerName, scoreArray] of Object.entries(stepScorers)) {
|
|
420
|
+
result.steps[stepId][scorerName] = this.getAverageScore(scoreArray);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
return result;
|
|
425
|
+
}
|
|
426
|
+
getAverageScore(scoreArray) {
|
|
427
|
+
if (scoreArray.length > 0) {
|
|
428
|
+
return scoreArray.reduce((a, b) => a + b, 0) / scoreArray.length;
|
|
429
|
+
} else {
|
|
430
|
+
return 0;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
// src/scores/run-experiment/index.ts
|
|
436
|
+
async function runExperiment(config) {
|
|
437
|
+
const { data, scorers, target, onItemComplete, concurrency = 1 } = config;
|
|
438
|
+
validateExperimentInputs(data, scorers, target);
|
|
439
|
+
let totalItems = 0;
|
|
440
|
+
const scoreAccumulator = new ScoreAccumulator();
|
|
441
|
+
const pMap = (await import('p-map')).default;
|
|
442
|
+
await pMap(
|
|
443
|
+
data,
|
|
444
|
+
async (item) => {
|
|
445
|
+
const targetResult = await executeTarget(target, item);
|
|
446
|
+
const scorerResults = await runScorers(scorers, targetResult, item);
|
|
447
|
+
scoreAccumulator.addScores(scorerResults);
|
|
448
|
+
if (onItemComplete) {
|
|
449
|
+
await onItemComplete({
|
|
450
|
+
item,
|
|
451
|
+
targetResult,
|
|
452
|
+
scorerResults
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
totalItems++;
|
|
456
|
+
},
|
|
457
|
+
{ concurrency }
|
|
458
|
+
);
|
|
459
|
+
return {
|
|
460
|
+
scores: scoreAccumulator.getAverageScores(),
|
|
461
|
+
summary: {
|
|
462
|
+
totalItems
|
|
463
|
+
}
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
function isWorkflow(target) {
|
|
467
|
+
return target instanceof Workflow;
|
|
468
|
+
}
|
|
469
|
+
function isWorkflowScorerConfig(scorers) {
|
|
470
|
+
return typeof scorers === "object" && !Array.isArray(scorers) && ("workflow" in scorers || "steps" in scorers);
|
|
471
|
+
}
|
|
472
|
+
function validateExperimentInputs(data, scorers, target) {
|
|
473
|
+
if (data.length === 0) {
|
|
474
|
+
throw new MastraError({
|
|
475
|
+
domain: "SCORER",
|
|
476
|
+
id: "RUN_EXPERIMENT_FAILED_NO_DATA_PROVIDED",
|
|
477
|
+
category: "USER",
|
|
478
|
+
text: "Failed to run experiment: Data array is empty"
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
for (let i = 0; i < data.length; i++) {
|
|
482
|
+
const item = data[i];
|
|
483
|
+
if (!item || typeof item !== "object" || !("input" in item)) {
|
|
484
|
+
throw new MastraError({
|
|
485
|
+
domain: "SCORER",
|
|
486
|
+
id: "INVALID_DATA_ITEM",
|
|
487
|
+
category: "USER",
|
|
488
|
+
text: `Invalid data item at index ${i}: must have 'input' properties`
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
if (Array.isArray(scorers)) {
|
|
493
|
+
if (scorers.length === 0) {
|
|
494
|
+
throw new MastraError({
|
|
495
|
+
domain: "SCORER",
|
|
496
|
+
id: "NO_SCORERS_PROVIDED",
|
|
497
|
+
category: "USER",
|
|
498
|
+
text: "At least one scorer must be provided"
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
} else if (isWorkflow(target) && isWorkflowScorerConfig(scorers)) {
|
|
502
|
+
const hasScorers = scorers.workflow && scorers.workflow.length > 0 || scorers.steps && Object.keys(scorers.steps).length > 0;
|
|
503
|
+
if (!hasScorers) {
|
|
504
|
+
throw new MastraError({
|
|
505
|
+
domain: "SCORER",
|
|
506
|
+
id: "NO_SCORERS_PROVIDED",
|
|
507
|
+
category: "USER",
|
|
508
|
+
text: "At least one workflow or step scorer must be provided"
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
} else if (!isWorkflow(target) && !Array.isArray(scorers)) {
|
|
512
|
+
throw new MastraError({
|
|
513
|
+
domain: "SCORER",
|
|
514
|
+
id: "INVALID_AGENT_SCORERS",
|
|
515
|
+
category: "USER",
|
|
516
|
+
text: "Agent scorers must be an array of scorers"
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
async function executeTarget(target, item) {
|
|
521
|
+
try {
|
|
522
|
+
if (isWorkflow(target)) {
|
|
523
|
+
return await executeWorkflow(target, item);
|
|
524
|
+
} else {
|
|
525
|
+
return await executeAgent(target, item);
|
|
526
|
+
}
|
|
527
|
+
} catch (error) {
|
|
528
|
+
throw new MastraError(
|
|
529
|
+
{
|
|
530
|
+
domain: "SCORER",
|
|
531
|
+
id: "RUN_EXPERIMENT_TARGET_FAILED_TO_GENERATE_RESULT",
|
|
532
|
+
category: "USER",
|
|
533
|
+
text: "Failed to run experiment: Error generating result from target",
|
|
534
|
+
details: {
|
|
535
|
+
item: JSON.stringify(item)
|
|
536
|
+
}
|
|
537
|
+
},
|
|
538
|
+
error
|
|
539
|
+
);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
async function executeWorkflow(target, item) {
|
|
543
|
+
const run = target.createRun({ disableScorers: true });
|
|
544
|
+
const workflowResult = await run.start({
|
|
545
|
+
inputData: item.input,
|
|
546
|
+
runtimeContext: item.runtimeContext
|
|
547
|
+
});
|
|
548
|
+
return {
|
|
549
|
+
scoringData: {
|
|
550
|
+
input: item.input,
|
|
551
|
+
output: workflowResult.status === "success" ? workflowResult.result : void 0,
|
|
552
|
+
stepResults: workflowResult.steps
|
|
553
|
+
}
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
async function executeAgent(agent, item) {
|
|
557
|
+
const model = await agent.getModel();
|
|
558
|
+
if (model.specificationVersion === "v2") {
|
|
559
|
+
return await agent.generateVNext(item.input, {
|
|
560
|
+
scorers: {},
|
|
561
|
+
returnScorerData: true,
|
|
562
|
+
runtimeContext: item.runtimeContext
|
|
563
|
+
});
|
|
564
|
+
} else {
|
|
565
|
+
return await agent.generate(item.input, {
|
|
566
|
+
scorers: {},
|
|
567
|
+
returnScorerData: true,
|
|
568
|
+
runtimeContext: item.runtimeContext
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
async function runScorers(scorers, targetResult, item) {
|
|
573
|
+
const scorerResults = {};
|
|
574
|
+
if (Array.isArray(scorers)) {
|
|
575
|
+
for (const scorer of scorers) {
|
|
576
|
+
try {
|
|
577
|
+
const score = await scorer.run({
|
|
578
|
+
input: targetResult.scoringData?.input,
|
|
579
|
+
output: targetResult.scoringData?.output,
|
|
580
|
+
groundTruth: item.groundTruth,
|
|
581
|
+
runtimeContext: item.runtimeContext,
|
|
582
|
+
tracingContext: item.tracingContext
|
|
583
|
+
});
|
|
584
|
+
scorerResults[scorer.name] = score;
|
|
585
|
+
} catch (error) {
|
|
586
|
+
throw new MastraError(
|
|
587
|
+
{
|
|
588
|
+
domain: "SCORER",
|
|
589
|
+
id: "RUN_EXPERIMENT_SCORER_FAILED_TO_SCORE_RESULT",
|
|
590
|
+
category: "USER",
|
|
591
|
+
text: `Failed to run experiment: Error running scorer ${scorer.name}`,
|
|
592
|
+
details: {
|
|
593
|
+
scorerName: scorer.name,
|
|
594
|
+
item: JSON.stringify(item)
|
|
595
|
+
}
|
|
596
|
+
},
|
|
597
|
+
error
|
|
598
|
+
);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
} else {
|
|
602
|
+
if (scorers.workflow) {
|
|
603
|
+
const workflowScorerResults = {};
|
|
604
|
+
for (const scorer of scorers.workflow) {
|
|
605
|
+
const score = await scorer.run({
|
|
606
|
+
input: targetResult.scoringData.input,
|
|
607
|
+
output: targetResult.scoringData.output,
|
|
608
|
+
groundTruth: item.groundTruth,
|
|
609
|
+
runtimeContext: item.runtimeContext,
|
|
610
|
+
tracingContext: item.tracingContext
|
|
611
|
+
});
|
|
612
|
+
workflowScorerResults[scorer.name] = score;
|
|
613
|
+
}
|
|
614
|
+
if (Object.keys(workflowScorerResults).length > 0) {
|
|
615
|
+
scorerResults.workflow = workflowScorerResults;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
if (scorers.steps) {
|
|
619
|
+
const stepScorerResults = {};
|
|
620
|
+
for (const [stepId, stepScorers] of Object.entries(scorers.steps)) {
|
|
621
|
+
const stepResult = targetResult.scoringData.stepResults?.[stepId];
|
|
622
|
+
if (stepResult?.status === "success" && stepResult.payload && stepResult.output) {
|
|
623
|
+
const stepResults = {};
|
|
624
|
+
for (const scorer of stepScorers) {
|
|
625
|
+
try {
|
|
626
|
+
const score = await scorer.run({
|
|
627
|
+
input: stepResult.payload,
|
|
628
|
+
output: stepResult.output,
|
|
629
|
+
groundTruth: item.groundTruth,
|
|
630
|
+
runtimeContext: item.runtimeContext,
|
|
631
|
+
tracingContext: item.tracingContext
|
|
632
|
+
});
|
|
633
|
+
stepResults[scorer.name] = score;
|
|
634
|
+
} catch (error) {
|
|
635
|
+
throw new MastraError(
|
|
636
|
+
{
|
|
637
|
+
domain: "SCORER",
|
|
638
|
+
id: "RUN_EXPERIMENT_SCORER_FAILED_TO_SCORE_STEP_RESULT",
|
|
639
|
+
category: "USER",
|
|
640
|
+
text: `Failed to run experiment: Error running scorer ${scorer.name} on step ${stepId}`,
|
|
641
|
+
details: {
|
|
642
|
+
scorerName: scorer.name,
|
|
643
|
+
stepId
|
|
644
|
+
}
|
|
645
|
+
},
|
|
646
|
+
error
|
|
647
|
+
);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
if (Object.keys(stepResults).length > 0) {
|
|
651
|
+
stepScorerResults[stepId] = stepResults;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
if (Object.keys(stepScorerResults).length > 0) {
|
|
656
|
+
scorerResults.steps = stepScorerResults;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
return scorerResults;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
export { MastraScorer, createScorer, runExperiment, saveScorePayloadSchema, scoreResultSchema, scoringExtractStepResultSchema, scoringValueSchema };
|
|
664
|
+
//# sourceMappingURL=chunk-MMYGYTJK.js.map
|
|
665
|
+
//# sourceMappingURL=chunk-MMYGYTJK.js.map
|