@mastra/core 0.15.2 → 0.15.3-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/agent.types.d.ts +2 -0
- package/dist/agent/agent.types.d.ts.map +1 -1
- package/dist/agent/index.cjs +8 -8
- package/dist/agent/index.d.ts +0 -2
- package/dist/agent/index.d.ts.map +1 -1
- package/dist/agent/index.js +1 -1
- package/dist/agent/input-processor/index.cjs +6 -6
- package/dist/agent/input-processor/index.js +1 -1
- package/dist/ai-tracing/base.d.ts +3 -1
- package/dist/ai-tracing/base.d.ts.map +1 -1
- package/dist/ai-tracing/default.d.ts +1 -9
- package/dist/ai-tracing/default.d.ts.map +1 -1
- package/dist/ai-tracing/exporters/console.d.ts +10 -0
- package/dist/ai-tracing/exporters/console.d.ts.map +1 -0
- package/dist/ai-tracing/exporters/default.d.ts +97 -0
- package/dist/ai-tracing/exporters/default.d.ts.map +1 -0
- package/dist/ai-tracing/exporters/index.d.ts +6 -0
- package/dist/ai-tracing/exporters/index.d.ts.map +1 -0
- package/dist/ai-tracing/index.cjs +31 -27
- package/dist/ai-tracing/index.d.ts +1 -0
- package/dist/ai-tracing/index.d.ts.map +1 -1
- package/dist/ai-tracing/index.js +1 -1
- package/dist/ai-tracing/no-op.d.ts +8 -0
- package/dist/ai-tracing/no-op.d.ts.map +1 -1
- package/dist/ai-tracing/types.d.ts +33 -0
- package/dist/ai-tracing/types.d.ts.map +1 -1
- package/dist/{chunk-DPE6K23N.cjs → chunk-2KK7QQH2.cjs} +47 -8
- package/dist/chunk-2KK7QQH2.cjs.map +1 -0
- package/dist/{chunk-EQLCC3M7.cjs → chunk-4DKPMUAC.cjs} +646 -101
- package/dist/chunk-4DKPMUAC.cjs.map +1 -0
- package/dist/{chunk-FCFQE5BD.js → chunk-6JAIHLZY.js} +130 -99
- package/dist/chunk-6JAIHLZY.js.map +1 -0
- package/dist/{chunk-RMEG4MOG.cjs → chunk-6VROHRAR.cjs} +45 -30
- package/dist/chunk-6VROHRAR.cjs.map +1 -0
- package/dist/{chunk-DAJYN7HG.cjs → chunk-7U76TYYN.cjs} +4 -4
- package/dist/{chunk-DAJYN7HG.cjs.map → chunk-7U76TYYN.cjs.map} +1 -1
- package/dist/{chunk-VKJWTAHZ.js → chunk-E3LAPNKY.js} +3 -3
- package/dist/{chunk-VKJWTAHZ.js.map → chunk-E3LAPNKY.js.map} +1 -1
- package/dist/{chunk-QLRALF4I.js → chunk-EEGBKQYC.js} +6 -4
- package/dist/chunk-EEGBKQYC.js.map +1 -0
- package/dist/{chunk-FFGJPMKP.js → chunk-FLXWZUIG.js} +45 -30
- package/dist/chunk-FLXWZUIG.js.map +1 -0
- package/dist/{chunk-SNYSVGIU.cjs → chunk-I2YGYG3Y.cjs} +144 -113
- package/dist/chunk-I2YGYG3Y.cjs.map +1 -0
- package/dist/{chunk-HRPTZGDT.js → chunk-IAOFOXVA.js} +3 -3
- package/dist/{chunk-HRPTZGDT.js.map → chunk-IAOFOXVA.js.map} +1 -1
- package/dist/{chunk-UKQI74TN.cjs → chunk-K6UMYGK5.cjs} +17 -2
- package/dist/chunk-K6UMYGK5.cjs.map +1 -0
- package/dist/{chunk-XJFIB2FO.js → chunk-Q67FV4QB.js} +3 -3
- package/dist/{chunk-XJFIB2FO.js.map → chunk-Q67FV4QB.js.map} +1 -1
- package/dist/{chunk-7EXGDKNQ.cjs → chunk-QBNRMJAN.cjs} +4 -4
- package/dist/{chunk-7EXGDKNQ.cjs.map → chunk-QBNRMJAN.cjs.map} +1 -1
- package/dist/{chunk-G6WYC4SF.cjs → chunk-SFLYVXLG.cjs} +6 -6
- package/dist/{chunk-G6WYC4SF.cjs.map → chunk-SFLYVXLG.cjs.map} +1 -1
- package/dist/{chunk-7TH2KSEC.js → chunk-WOTBMZCN.js} +645 -101
- package/dist/chunk-WOTBMZCN.js.map +1 -0
- package/dist/{chunk-6NYFECSO.js → chunk-Y3WIANDM.js} +43 -4
- package/dist/chunk-Y3WIANDM.js.map +1 -0
- package/dist/{chunk-Y44DK4T5.js → chunk-YGW2WEJ5.js} +17 -2
- package/dist/chunk-YGW2WEJ5.js.map +1 -0
- package/dist/{chunk-ASRKKIW7.cjs → chunk-YPT3YX6U.cjs} +10 -8
- package/dist/chunk-YPT3YX6U.cjs.map +1 -0
- package/dist/index.cjs +42 -42
- package/dist/index.js +9 -9
- package/dist/llm/model/model.loop.d.ts +1 -1
- package/dist/llm/model/model.loop.d.ts.map +1 -1
- package/dist/loop/index.cjs +2 -2
- package/dist/loop/index.js +1 -1
- package/dist/loop/types.d.ts +1 -0
- package/dist/loop/types.d.ts.map +1 -1
- package/dist/mastra/index.cjs +2 -2
- package/dist/mastra/index.d.ts +8 -0
- package/dist/mastra/index.d.ts.map +1 -1
- package/dist/mastra/index.js +1 -1
- package/dist/memory/index.cjs +4 -4
- package/dist/memory/index.js +1 -1
- package/dist/network/index.cjs +2 -2
- package/dist/network/index.js +1 -1
- package/dist/network/vNext/index.cjs +15 -13
- package/dist/network/vNext/index.cjs.map +1 -1
- package/dist/network/vNext/index.d.ts.map +1 -1
- package/dist/network/vNext/index.js +4 -2
- package/dist/network/vNext/index.js.map +1 -1
- package/dist/processors/index.cjs +8 -8
- package/dist/processors/index.js +2 -2
- package/dist/relevance/index.cjs +4 -4
- package/dist/relevance/index.js +1 -1
- package/dist/scores/index.cjs +282 -88
- package/dist/scores/index.cjs.map +1 -1
- package/dist/scores/index.d.ts +1 -1
- package/dist/scores/index.js +279 -85
- package/dist/scores/index.js.map +1 -1
- package/dist/scores/run-experiment/index.d.ts +59 -0
- package/dist/scores/run-experiment/index.d.ts.map +1 -0
- package/dist/scores/run-experiment/scorerAccumulator.d.ts +12 -0
- package/dist/scores/run-experiment/scorerAccumulator.d.ts.map +1 -0
- package/dist/storage/base.d.ts +9 -0
- package/dist/storage/base.d.ts.map +1 -1
- package/dist/storage/domains/observability/base.d.ts +9 -0
- package/dist/storage/domains/observability/base.d.ts.map +1 -1
- package/dist/storage/domains/observability/index.d.ts +1 -0
- package/dist/storage/domains/observability/index.d.ts.map +1 -1
- package/dist/storage/domains/observability/inmemory.d.ts +5 -0
- package/dist/storage/domains/observability/inmemory.d.ts.map +1 -1
- package/dist/storage/index.cjs +24 -6
- package/dist/storage/index.cjs.map +1 -1
- package/dist/storage/index.js +20 -3
- package/dist/storage/index.js.map +1 -1
- package/dist/stream/index.cjs +3 -3
- package/dist/stream/index.js +1 -1
- package/dist/test-utils/llm-mock.cjs +2 -2
- package/dist/test-utils/llm-mock.js +1 -1
- package/dist/tools/tool-builder/builder.d.ts.map +1 -1
- package/dist/tools/types.d.ts +2 -2
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/utils.cjs +16 -16
- package/dist/utils.js +1 -1
- package/dist/workflows/default.d.ts +25 -6
- package/dist/workflows/default.d.ts.map +1 -1
- package/dist/workflows/evented/index.cjs +10 -10
- package/dist/workflows/evented/index.js +1 -1
- package/dist/workflows/evented/workflow.d.ts.map +1 -1
- package/dist/workflows/execution-engine.d.ts +1 -0
- package/dist/workflows/execution-engine.d.ts.map +1 -1
- package/dist/workflows/index.cjs +10 -10
- package/dist/workflows/index.js +1 -1
- package/dist/workflows/legacy/index.cjs +22 -22
- package/dist/workflows/legacy/index.js +1 -1
- package/dist/workflows/workflow.d.ts +7 -0
- package/dist/workflows/workflow.d.ts.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-6NYFECSO.js.map +0 -1
- package/dist/chunk-7TH2KSEC.js.map +0 -1
- package/dist/chunk-ASRKKIW7.cjs.map +0 -1
- package/dist/chunk-DPE6K23N.cjs.map +0 -1
- package/dist/chunk-EQLCC3M7.cjs.map +0 -1
- package/dist/chunk-FCFQE5BD.js.map +0 -1
- package/dist/chunk-FFGJPMKP.js.map +0 -1
- package/dist/chunk-QLRALF4I.js.map +0 -1
- package/dist/chunk-RMEG4MOG.cjs.map +0 -1
- package/dist/chunk-SNYSVGIU.cjs.map +0 -1
- package/dist/chunk-UKQI74TN.cjs.map +0 -1
- package/dist/chunk-Y44DK4T5.js.map +0 -1
- package/dist/scores/run-experiment.d.ts +0 -35
- package/dist/scores/run-experiment.d.ts.map +0 -1
package/dist/scores/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
3
|
+
var chunkI2YGYG3Y_cjs = require('../chunk-I2YGYG3Y.cjs');
|
|
4
4
|
var chunkC73WLCY3_cjs = require('../chunk-C73WLCY3.cjs');
|
|
5
5
|
var zod = require('zod');
|
|
6
6
|
var crypto = require('crypto');
|
|
@@ -161,7 +161,7 @@ var MastraScorer = class _MastraScorer {
|
|
|
161
161
|
}
|
|
162
162
|
toMastraWorkflow() {
|
|
163
163
|
const workflowSteps = this.steps.map((scorerStep) => {
|
|
164
|
-
return
|
|
164
|
+
return chunkI2YGYG3Y_cjs.createStep({
|
|
165
165
|
id: scorerStep.name,
|
|
166
166
|
description: `Scorer step: ${scorerStep.name}`,
|
|
167
167
|
inputSchema: zod.z.any(),
|
|
@@ -194,7 +194,7 @@ var MastraScorer = class _MastraScorer {
|
|
|
194
194
|
}
|
|
195
195
|
});
|
|
196
196
|
});
|
|
197
|
-
const workflow =
|
|
197
|
+
const workflow = chunkI2YGYG3Y_cjs.createWorkflow({
|
|
198
198
|
id: `scorer-${this.config.name}`,
|
|
199
199
|
description: this.config.description,
|
|
200
200
|
inputSchema: zod.z.object({
|
|
@@ -249,7 +249,7 @@ var MastraScorer = class _MastraScorer {
|
|
|
249
249
|
}
|
|
250
250
|
});
|
|
251
251
|
}
|
|
252
|
-
const judge = new
|
|
252
|
+
const judge = new chunkI2YGYG3Y_cjs.Agent({ name: "judge", model, instructions });
|
|
253
253
|
if (scorerStep.name === "generateScore") {
|
|
254
254
|
let result;
|
|
255
255
|
if (model.specificationVersion === "v2") {
|
|
@@ -313,16 +313,132 @@ function createScorer(config) {
|
|
|
313
313
|
});
|
|
314
314
|
}
|
|
315
315
|
|
|
316
|
-
// src/scores/run-experiment.ts
|
|
317
|
-
var
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
316
|
+
// src/scores/run-experiment/scorerAccumulator.ts
|
|
317
|
+
var ScoreAccumulator = class {
|
|
318
|
+
flatScores = {};
|
|
319
|
+
workflowScores = {};
|
|
320
|
+
stepScores = {};
|
|
321
|
+
addScores(scorerResults) {
|
|
322
|
+
const isTargetWorkflowAndHasStepScores = "steps" in scorerResults;
|
|
323
|
+
if (isTargetWorkflowAndHasStepScores) {
|
|
324
|
+
this.addNestedScores(scorerResults);
|
|
325
|
+
} else {
|
|
326
|
+
this.addFlatScores(scorerResults);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
addFlatScores(scorerResults) {
|
|
330
|
+
for (const [scorerName, result] of Object.entries(scorerResults)) {
|
|
331
|
+
if (!this.flatScores[scorerName]) {
|
|
332
|
+
this.flatScores[scorerName] = [];
|
|
333
|
+
}
|
|
334
|
+
this.flatScores[scorerName].push(result.score);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
addNestedScores(scorerResults) {
|
|
338
|
+
if ("workflow" in scorerResults && scorerResults.workflow) {
|
|
339
|
+
for (const [scorerName, result] of Object.entries(scorerResults.workflow)) {
|
|
340
|
+
if (!this.workflowScores[scorerName]) {
|
|
341
|
+
this.workflowScores[scorerName] = [];
|
|
342
|
+
}
|
|
343
|
+
this.workflowScores[scorerName].push(result.score);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
if ("steps" in scorerResults && scorerResults.steps) {
|
|
347
|
+
for (const [stepId, stepResults] of Object.entries(scorerResults.steps)) {
|
|
348
|
+
if (!this.stepScores[stepId]) {
|
|
349
|
+
this.stepScores[stepId] = {};
|
|
350
|
+
}
|
|
351
|
+
for (const [scorerName, result] of Object.entries(stepResults)) {
|
|
352
|
+
if (!this.stepScores[stepId][scorerName]) {
|
|
353
|
+
this.stepScores[stepId][scorerName] = [];
|
|
354
|
+
}
|
|
355
|
+
this.stepScores[stepId][scorerName].push(result.score);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
addStepScores(stepScorerResults) {
|
|
361
|
+
for (const [stepId, stepResults] of Object.entries(stepScorerResults)) {
|
|
362
|
+
if (!this.stepScores[stepId]) {
|
|
363
|
+
this.stepScores[stepId] = {};
|
|
364
|
+
}
|
|
365
|
+
for (const [scorerName, result] of Object.entries(stepResults)) {
|
|
366
|
+
if (!this.stepScores[stepId][scorerName]) {
|
|
367
|
+
this.stepScores[stepId][scorerName] = [];
|
|
368
|
+
}
|
|
369
|
+
this.stepScores[stepId][scorerName].push(result.score);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
getAverageScores() {
|
|
374
|
+
const result = {};
|
|
375
|
+
for (const [scorerName, scoreArray] of Object.entries(this.flatScores)) {
|
|
376
|
+
result[scorerName] = this.getAverageScore(scoreArray);
|
|
377
|
+
}
|
|
378
|
+
if (Object.keys(this.workflowScores).length > 0) {
|
|
379
|
+
result.workflow = {};
|
|
380
|
+
for (const [scorerName, scoreArray] of Object.entries(this.workflowScores)) {
|
|
381
|
+
result.workflow[scorerName] = this.getAverageScore(scoreArray);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
if (Object.keys(this.stepScores).length > 0) {
|
|
385
|
+
result.steps = {};
|
|
386
|
+
for (const [stepId, stepScorers] of Object.entries(this.stepScores)) {
|
|
387
|
+
result.steps[stepId] = {};
|
|
388
|
+
for (const [scorerName, scoreArray] of Object.entries(stepScorers)) {
|
|
389
|
+
result.steps[stepId][scorerName] = this.getAverageScore(scoreArray);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
return result;
|
|
394
|
+
}
|
|
395
|
+
getAverageScore(scoreArray) {
|
|
396
|
+
if (scoreArray.length > 0) {
|
|
397
|
+
return scoreArray.reduce((a, b) => a + b, 0) / scoreArray.length;
|
|
398
|
+
} else {
|
|
399
|
+
return 0;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
// src/scores/run-experiment/index.ts
|
|
405
|
+
async function runExperiment(config) {
|
|
406
|
+
const { data, scorers, target, onItemComplete, concurrency = 1 } = config;
|
|
407
|
+
validateExperimentInputs(data, scorers, target);
|
|
324
408
|
let totalItems = 0;
|
|
325
|
-
const
|
|
409
|
+
const scoreAccumulator = new ScoreAccumulator();
|
|
410
|
+
const pMap = (await import('p-map')).default;
|
|
411
|
+
await pMap(
|
|
412
|
+
data,
|
|
413
|
+
async (item) => {
|
|
414
|
+
const targetResult = await executeTarget(target, item);
|
|
415
|
+
const scorerResults = await runScorers(scorers, targetResult, item);
|
|
416
|
+
scoreAccumulator.addScores(scorerResults);
|
|
417
|
+
if (onItemComplete) {
|
|
418
|
+
await onItemComplete({
|
|
419
|
+
item,
|
|
420
|
+
targetResult,
|
|
421
|
+
scorerResults
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
totalItems++;
|
|
425
|
+
},
|
|
426
|
+
{ concurrency }
|
|
427
|
+
);
|
|
428
|
+
return {
|
|
429
|
+
scores: scoreAccumulator.getAverageScores(),
|
|
430
|
+
summary: {
|
|
431
|
+
totalItems
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
function isWorkflow(target) {
|
|
436
|
+
return target instanceof chunkI2YGYG3Y_cjs.Workflow;
|
|
437
|
+
}
|
|
438
|
+
function isWorkflowScorerConfig(scorers) {
|
|
439
|
+
return typeof scorers === "object" && !Array.isArray(scorers) && ("workflow" in scorers || "steps" in scorers);
|
|
440
|
+
}
|
|
441
|
+
function validateExperimentInputs(data, scorers, target) {
|
|
326
442
|
if (data.length === 0) {
|
|
327
443
|
throw new chunkC73WLCY3_cjs.MastraError({
|
|
328
444
|
domain: "SCORER",
|
|
@@ -331,106 +447,184 @@ var runExperiment = async ({
|
|
|
331
447
|
text: "Failed to run experiment: Data array is empty"
|
|
332
448
|
});
|
|
333
449
|
}
|
|
334
|
-
|
|
450
|
+
for (let i = 0; i < data.length; i++) {
|
|
451
|
+
const item = data[i];
|
|
452
|
+
if (!item || typeof item !== "object" || !("input" in item)) {
|
|
453
|
+
throw new chunkC73WLCY3_cjs.MastraError({
|
|
454
|
+
domain: "SCORER",
|
|
455
|
+
id: "INVALID_DATA_ITEM",
|
|
456
|
+
category: "USER",
|
|
457
|
+
text: `Invalid data item at index ${i}: must have 'input' properties`
|
|
458
|
+
});
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
if (Array.isArray(scorers)) {
|
|
462
|
+
if (scorers.length === 0) {
|
|
463
|
+
throw new chunkC73WLCY3_cjs.MastraError({
|
|
464
|
+
domain: "SCORER",
|
|
465
|
+
id: "NO_SCORERS_PROVIDED",
|
|
466
|
+
category: "USER",
|
|
467
|
+
text: "At least one scorer must be provided"
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
} else if (isWorkflow(target) && isWorkflowScorerConfig(scorers)) {
|
|
471
|
+
const hasScorers = scorers.workflow && scorers.workflow.length > 0 || scorers.steps && Object.keys(scorers.steps).length > 0;
|
|
472
|
+
if (!hasScorers) {
|
|
473
|
+
throw new chunkC73WLCY3_cjs.MastraError({
|
|
474
|
+
domain: "SCORER",
|
|
475
|
+
id: "NO_SCORERS_PROVIDED",
|
|
476
|
+
category: "USER",
|
|
477
|
+
text: "At least one workflow or step scorer must be provided"
|
|
478
|
+
});
|
|
479
|
+
}
|
|
480
|
+
} else if (!isWorkflow(target) && !Array.isArray(scorers)) {
|
|
335
481
|
throw new chunkC73WLCY3_cjs.MastraError({
|
|
336
482
|
domain: "SCORER",
|
|
337
|
-
id: "
|
|
483
|
+
id: "INVALID_AGENT_SCORERS",
|
|
338
484
|
category: "USER",
|
|
339
|
-
text: "
|
|
485
|
+
text: "Agent scorers must be an array of scorers"
|
|
340
486
|
});
|
|
341
487
|
}
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
488
|
+
}
|
|
489
|
+
async function executeTarget(target, item) {
|
|
490
|
+
try {
|
|
491
|
+
if (isWorkflow(target)) {
|
|
492
|
+
return await executeWorkflow(target, item);
|
|
493
|
+
} else {
|
|
494
|
+
return await executeAgent(target, item);
|
|
495
|
+
}
|
|
496
|
+
} catch (error) {
|
|
497
|
+
throw new chunkC73WLCY3_cjs.MastraError(
|
|
498
|
+
{
|
|
499
|
+
domain: "SCORER",
|
|
500
|
+
id: "RUN_EXPERIMENT_TARGET_FAILED_TO_GENERATE_RESULT",
|
|
501
|
+
category: "USER",
|
|
502
|
+
text: "Failed to run experiment: Error generating result from target",
|
|
503
|
+
details: {
|
|
504
|
+
item: JSON.stringify(item)
|
|
505
|
+
}
|
|
506
|
+
},
|
|
507
|
+
error
|
|
508
|
+
);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
async function executeWorkflow(target, item) {
|
|
512
|
+
const run = target.createRun({ disableScorers: true });
|
|
513
|
+
const workflowResult = await run.start({
|
|
514
|
+
inputData: item.input,
|
|
515
|
+
runtimeContext: item.runtimeContext
|
|
516
|
+
});
|
|
517
|
+
return {
|
|
518
|
+
scoringData: {
|
|
519
|
+
input: item.input,
|
|
520
|
+
output: workflowResult.status === "success" ? workflowResult.result : void 0,
|
|
521
|
+
stepResults: workflowResult.steps
|
|
522
|
+
}
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
async function executeAgent(agent, item) {
|
|
526
|
+
const model = await agent.getModel();
|
|
527
|
+
if (model.specificationVersion === "v2") {
|
|
528
|
+
return await agent.generateVNext(item.input, {
|
|
529
|
+
scorers: {},
|
|
530
|
+
returnScorerData: true,
|
|
531
|
+
runtimeContext: item.runtimeContext
|
|
532
|
+
});
|
|
533
|
+
} else {
|
|
534
|
+
return await agent.generate(item.input, {
|
|
535
|
+
scorers: {},
|
|
536
|
+
returnScorerData: true,
|
|
537
|
+
runtimeContext: item.runtimeContext
|
|
348
538
|
});
|
|
349
539
|
}
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
540
|
+
}
|
|
541
|
+
async function runScorers(scorers, targetResult, item) {
|
|
542
|
+
const scorerResults = {};
|
|
543
|
+
if (Array.isArray(scorers)) {
|
|
544
|
+
for (const scorer of scorers) {
|
|
355
545
|
try {
|
|
356
|
-
const
|
|
357
|
-
|
|
358
|
-
targetResult
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
} else {
|
|
364
|
-
targetResult = await target.generate(item.input, {
|
|
365
|
-
scorers: {},
|
|
366
|
-
returnScorerData: true,
|
|
367
|
-
runtimeContext: item.runtimeContext
|
|
368
|
-
});
|
|
369
|
-
}
|
|
546
|
+
const score = await scorer.run({
|
|
547
|
+
input: targetResult.scoringData?.input,
|
|
548
|
+
output: targetResult.scoringData?.output,
|
|
549
|
+
groundTruth: item.groundTruth,
|
|
550
|
+
runtimeContext: item.runtimeContext
|
|
551
|
+
});
|
|
552
|
+
scorerResults[scorer.name] = score;
|
|
370
553
|
} catch (error) {
|
|
371
554
|
throw new chunkC73WLCY3_cjs.MastraError(
|
|
372
555
|
{
|
|
373
556
|
domain: "SCORER",
|
|
374
|
-
id: "
|
|
557
|
+
id: "RUN_EXPERIMENT_SCORER_FAILED_TO_SCORE_RESULT",
|
|
375
558
|
category: "USER",
|
|
376
|
-
text:
|
|
559
|
+
text: `Failed to run experiment: Error running scorer ${scorer.name}`,
|
|
377
560
|
details: {
|
|
561
|
+
scorerName: scorer.name,
|
|
378
562
|
item: JSON.stringify(item)
|
|
379
563
|
}
|
|
380
564
|
},
|
|
381
565
|
error
|
|
382
566
|
);
|
|
383
567
|
}
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
category: "USER",
|
|
400
|
-
text: `Failed to run experiment: Error running scorer ${scorer.name}`,
|
|
401
|
-
details: {
|
|
402
|
-
scorerName: scorer.name,
|
|
403
|
-
item: JSON.stringify(item)
|
|
404
|
-
}
|
|
405
|
-
},
|
|
406
|
-
error
|
|
407
|
-
);
|
|
408
|
-
}
|
|
568
|
+
}
|
|
569
|
+
} else {
|
|
570
|
+
if (scorers.workflow) {
|
|
571
|
+
const workflowScorerResults = {};
|
|
572
|
+
for (const scorer of scorers.workflow) {
|
|
573
|
+
const score = await scorer.run({
|
|
574
|
+
input: targetResult.scoringData.input,
|
|
575
|
+
output: targetResult.scoringData.output,
|
|
576
|
+
groundTruth: item.groundTruth,
|
|
577
|
+
runtimeContext: item.runtimeContext
|
|
578
|
+
});
|
|
579
|
+
workflowScorerResults[scorer.name] = score;
|
|
580
|
+
}
|
|
581
|
+
if (Object.keys(workflowScorerResults).length > 0) {
|
|
582
|
+
scorerResults.workflow = workflowScorerResults;
|
|
409
583
|
}
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
584
|
+
}
|
|
585
|
+
if (scorers.steps) {
|
|
586
|
+
const stepScorerResults = {};
|
|
587
|
+
for (const [stepId, stepScorers] of Object.entries(scorers.steps)) {
|
|
588
|
+
const stepResult = targetResult.scoringData.stepResults?.[stepId];
|
|
589
|
+
if (stepResult?.status === "success" && stepResult.payload && stepResult.output) {
|
|
590
|
+
const stepResults = {};
|
|
591
|
+
for (const scorer of stepScorers) {
|
|
592
|
+
try {
|
|
593
|
+
const score = await scorer.run({
|
|
594
|
+
input: stepResult.payload,
|
|
595
|
+
output: stepResult.output,
|
|
596
|
+
groundTruth: item.groundTruth,
|
|
597
|
+
runtimeContext: item.runtimeContext
|
|
598
|
+
});
|
|
599
|
+
stepResults[scorer.name] = score;
|
|
600
|
+
} catch (error) {
|
|
601
|
+
throw new chunkC73WLCY3_cjs.MastraError(
|
|
602
|
+
{
|
|
603
|
+
domain: "SCORER",
|
|
604
|
+
id: "RUN_EXPERIMENT_SCORER_FAILED_TO_SCORE_STEP_RESULT",
|
|
605
|
+
category: "USER",
|
|
606
|
+
text: `Failed to run experiment: Error running scorer ${scorer.name} on step ${stepId}`,
|
|
607
|
+
details: {
|
|
608
|
+
scorerName: scorer.name,
|
|
609
|
+
stepId
|
|
610
|
+
}
|
|
611
|
+
},
|
|
612
|
+
error
|
|
613
|
+
);
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
if (Object.keys(stepResults).length > 0) {
|
|
617
|
+
stepScorerResults[stepId] = stepResults;
|
|
618
|
+
}
|
|
413
619
|
}
|
|
414
|
-
scoreAccumulators[scorerName].push(result.score);
|
|
415
620
|
}
|
|
416
|
-
if (
|
|
417
|
-
|
|
621
|
+
if (Object.keys(stepScorerResults).length > 0) {
|
|
622
|
+
scorerResults.steps = stepScorerResults;
|
|
418
623
|
}
|
|
419
|
-
totalItems++;
|
|
420
|
-
},
|
|
421
|
-
{ concurrency }
|
|
422
|
-
);
|
|
423
|
-
const averageScores = {};
|
|
424
|
-
for (const [scorerName, scores] of Object.entries(scoreAccumulators)) {
|
|
425
|
-
averageScores[scorerName] = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
426
|
-
}
|
|
427
|
-
return {
|
|
428
|
-
scores: averageScores,
|
|
429
|
-
summary: {
|
|
430
|
-
totalItems
|
|
431
624
|
}
|
|
432
|
-
}
|
|
433
|
-
|
|
625
|
+
}
|
|
626
|
+
return scorerResults;
|
|
627
|
+
}
|
|
434
628
|
|
|
435
629
|
exports.MastraScorer = MastraScorer;
|
|
436
630
|
exports.createScorer = createScorer;
|