@mastra/core 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +462 -0
- package/datasets.d.ts +1 -0
- package/dist/agent/agent.d.ts +1 -1
- package/dist/agent/index.cjs +13 -13
- package/dist/agent/index.js +2 -2
- package/dist/agent/message-list/adapters/AIV4Adapter.d.ts.map +1 -1
- package/dist/agent/message-list/adapters/AIV5Adapter.d.ts.map +1 -1
- package/dist/agent/message-list/conversion/output-converter.d.ts +2 -1
- package/dist/agent/message-list/conversion/output-converter.d.ts.map +1 -1
- package/dist/agent/message-list/index.cjs +18 -18
- package/dist/agent/message-list/index.js +1 -1
- package/dist/agent/workflows/prepare-stream/index.d.ts +1 -1
- package/dist/chunk-3JVFFAJX.cjs +1207 -0
- package/dist/chunk-3JVFFAJX.cjs.map +1 -0
- package/dist/{chunk-WL3AW3YA.js → chunk-3X3CZUXI.js} +4070 -3983
- package/dist/chunk-3X3CZUXI.js.map +1 -0
- package/dist/{chunk-2GWTJFVM.js → chunk-4EHGOATH.js} +46 -14
- package/dist/chunk-4EHGOATH.js.map +1 -0
- package/dist/{chunk-YNXIGRQE.cjs → chunk-4IJ4UDZX.cjs} +319 -83
- package/dist/chunk-4IJ4UDZX.cjs.map +1 -0
- package/dist/{chunk-CGPH7CMG.cjs → chunk-4KFEMXTV.cjs} +46 -14
- package/dist/chunk-4KFEMXTV.cjs.map +1 -0
- package/dist/{chunk-PHYJYZ32.js → chunk-4TQ4EBYX.js} +16 -8
- package/dist/chunk-4TQ4EBYX.js.map +1 -0
- package/dist/{chunk-SIZEIYNH.js → chunk-4XSAZPPS.js} +254 -18
- package/dist/chunk-4XSAZPPS.js.map +1 -0
- package/dist/{chunk-KUTU2YZF.js → chunk-5Q5Y34SS.js} +5 -5
- package/dist/{chunk-KUTU2YZF.js.map → chunk-5Q5Y34SS.js.map} +1 -1
- package/dist/{chunk-EH6SAGEO.cjs → chunk-64WGYTQK.cjs} +72 -53
- package/dist/{chunk-EH6SAGEO.cjs.map → chunk-64WGYTQK.cjs.map} +1 -1
- package/dist/{chunk-OOCEAC6U.cjs → chunk-65PHUUMF.cjs} +3 -3
- package/dist/{chunk-OOCEAC6U.cjs.map → chunk-65PHUUMF.cjs.map} +1 -1
- package/dist/{chunk-JNE2ABVB.js → chunk-7NKUSQEV.js} +1094 -10
- package/dist/chunk-7NKUSQEV.js.map +1 -0
- package/dist/{chunk-ZHFM7HCQ.js → chunk-AXHBJ4GX.js} +3 -3
- package/dist/{chunk-ZHFM7HCQ.js.map → chunk-AXHBJ4GX.js.map} +1 -1
- package/dist/{chunk-ILQXPZCD.js → chunk-AY6DBRS3.js} +37 -21
- package/dist/chunk-AY6DBRS3.js.map +1 -0
- package/dist/{chunk-TERSHTY5.cjs → chunk-BP7VYTOP.cjs} +1116 -21
- package/dist/chunk-BP7VYTOP.cjs.map +1 -0
- package/dist/{chunk-UE2G2LRP.cjs → chunk-CZ4NQANZ.cjs} +37 -21
- package/dist/chunk-CZ4NQANZ.cjs.map +1 -0
- package/dist/{chunk-NCC45KOB.cjs → chunk-DBSVT6AR.cjs} +7 -7
- package/dist/{chunk-NCC45KOB.cjs.map → chunk-DBSVT6AR.cjs.map} +1 -1
- package/dist/{chunk-BXLLXTT4.js → chunk-FLPEGTEK.js} +4 -4
- package/dist/{chunk-BXLLXTT4.js.map → chunk-FLPEGTEK.js.map} +1 -1
- package/dist/{chunk-ON2KVIUJ.cjs → chunk-HYRYTTMT.cjs} +7 -7
- package/dist/{chunk-ON2KVIUJ.cjs.map → chunk-HYRYTTMT.cjs.map} +1 -1
- package/dist/chunk-NJ7TL3LQ.js +1196 -0
- package/dist/chunk-NJ7TL3LQ.js.map +1 -0
- package/dist/{chunk-EUG4AON3.cjs → chunk-NKYWDNCI.cjs} +8 -7
- package/dist/{chunk-EUG4AON3.cjs.map → chunk-NKYWDNCI.cjs.map} +1 -1
- package/dist/{chunk-UHVG25VW.cjs → chunk-NZG2JAKS.cjs} +23 -15
- package/dist/chunk-NZG2JAKS.cjs.map +1 -0
- package/dist/{chunk-44SUGDBR.js → chunk-PS5ONCXY.js} +109 -5
- package/dist/chunk-PS5ONCXY.js.map +1 -0
- package/dist/{chunk-57QAF2ZQ.js → chunk-QTTWRCB5.js} +4 -4
- package/dist/{chunk-57QAF2ZQ.js.map → chunk-QTTWRCB5.js.map} +1 -1
- package/dist/{chunk-VM25PDSW.js → chunk-RZ4CIIZR.js} +4 -4
- package/dist/{chunk-VM25PDSW.js.map → chunk-RZ4CIIZR.js.map} +1 -1
- package/dist/{chunk-C3XU7ZDC.cjs → chunk-SU5APAM6.cjs} +123 -4
- package/dist/chunk-SU5APAM6.cjs.map +1 -0
- package/dist/{chunk-3MJCJLZS.js → chunk-U2HKJZCI.js} +24 -5
- package/dist/{chunk-3MJCJLZS.js.map → chunk-U2HKJZCI.js.map} +1 -1
- package/dist/{chunk-GCTAD6B7.cjs → chunk-VD5YA6RH.cjs} +12 -12
- package/dist/{chunk-GCTAD6B7.cjs.map → chunk-VD5YA6RH.cjs.map} +1 -1
- package/dist/{chunk-KAJNBNWP.cjs → chunk-YNNJLLFN.cjs} +4071 -3984
- package/dist/chunk-YNNJLLFN.cjs.map +1 -0
- package/dist/datasets/dataset.d.ts +153 -0
- package/dist/datasets/dataset.d.ts.map +1 -0
- package/dist/datasets/experiment/analytics/aggregate.d.ts +46 -0
- package/dist/datasets/experiment/analytics/aggregate.d.ts.map +1 -0
- package/dist/datasets/experiment/analytics/compare.d.ts +33 -0
- package/dist/datasets/experiment/analytics/compare.d.ts.map +1 -0
- package/dist/datasets/experiment/analytics/index.d.ts +9 -0
- package/dist/datasets/experiment/analytics/index.d.ts.map +1 -0
- package/dist/datasets/experiment/analytics/types.d.ts +103 -0
- package/dist/datasets/experiment/analytics/types.d.ts.map +1 -0
- package/dist/datasets/experiment/executor.d.ts +40 -0
- package/dist/datasets/experiment/executor.d.ts.map +1 -0
- package/dist/datasets/experiment/index.d.ts +31 -0
- package/dist/datasets/experiment/index.d.ts.map +1 -0
- package/dist/datasets/experiment/scorer.d.ts +21 -0
- package/dist/datasets/experiment/scorer.d.ts.map +1 -0
- package/dist/datasets/experiment/types.d.ts +140 -0
- package/dist/datasets/experiment/types.d.ts.map +1 -0
- package/dist/datasets/index.cjs +69 -0
- package/dist/datasets/index.cjs.map +1 -0
- package/dist/datasets/index.d.ts +6 -0
- package/dist/datasets/index.d.ts.map +1 -0
- package/dist/datasets/index.js +4 -0
- package/dist/datasets/index.js.map +1 -0
- package/dist/datasets/manager.d.ts +73 -0
- package/dist/datasets/manager.d.ts.map +1 -0
- package/dist/datasets/validation/errors.d.ts +44 -0
- package/dist/datasets/validation/errors.d.ts.map +1 -0
- package/dist/datasets/validation/index.d.ts +3 -0
- package/dist/datasets/validation/index.d.ts.map +1 -0
- package/dist/datasets/validation/validator.d.ts +24 -0
- package/dist/datasets/validation/validator.d.ts.map +1 -0
- package/dist/docs/SKILL.md +1 -3
- package/dist/docs/assets/SOURCE_MAP.json +436 -346
- package/dist/docs/references/docs-memory-observational-memory.md +86 -11
- package/dist/docs/references/docs-streaming-events.md +23 -0
- package/dist/docs/references/docs-workspace-filesystem.md +72 -1
- package/dist/docs/references/docs-workspace-overview.md +95 -12
- package/dist/docs/references/docs-workspace-sandbox.md +2 -0
- package/dist/docs/references/guides-agent-frameworks-ai-sdk.md +6 -2
- package/dist/docs/references/reference-ai-sdk-with-mastra.md +1 -1
- package/dist/docs/references/reference-memory-observational-memory.md +318 -9
- package/dist/docs/references/reference-streaming-workflows-stream.md +1 -0
- package/dist/docs/references/reference-workflows-workflow-methods-foreach.md +30 -0
- package/dist/docs/references/reference.md +3 -0
- package/dist/editor/index.d.ts +1 -1
- package/dist/editor/index.d.ts.map +1 -1
- package/dist/editor/types.d.ts +21 -3
- package/dist/editor/types.d.ts.map +1 -1
- package/dist/evals/index.cjs +20 -20
- package/dist/evals/index.js +3 -3
- package/dist/evals/scoreTraces/index.cjs +5 -5
- package/dist/evals/scoreTraces/index.js +2 -2
- package/dist/features/index.cjs +1 -1
- package/dist/features/index.cjs.map +1 -1
- package/dist/features/index.d.ts.map +1 -1
- package/dist/features/index.js +1 -1
- package/dist/features/index.js.map +1 -1
- package/dist/index.cjs +2 -7
- package/dist/index.d.ts +0 -9
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -2
- package/dist/llm/index.cjs +10 -10
- package/dist/llm/index.js +2 -2
- package/dist/llm/model/provider-types.generated.d.ts +1694 -1659
- package/dist/loop/index.cjs +12 -12
- package/dist/loop/index.js +1 -1
- package/dist/loop/network/index.d.ts +1 -1
- package/dist/loop/workflows/agentic-execution/index.d.ts +1 -1
- package/dist/loop/workflows/agentic-execution/llm-execution-step.d.ts.map +1 -1
- package/dist/loop/workflows/agentic-execution/llm-mapping-step.d.ts.map +1 -1
- package/dist/loop/workflows/agentic-execution/tool-call-step.d.ts.map +1 -1
- package/dist/loop/workflows/agentic-loop/index.d.ts +1 -1
- package/dist/loop/workflows/errors.d.ts +4 -0
- package/dist/loop/workflows/errors.d.ts.map +1 -0
- package/dist/mastra/index.cjs +2 -2
- package/dist/mastra/index.d.ts +2 -0
- package/dist/mastra/index.d.ts.map +1 -1
- package/dist/mastra/index.js +1 -1
- package/dist/memory/index.cjs +14 -14
- package/dist/memory/index.js +1 -1
- package/dist/memory/memory.d.ts +5 -0
- package/dist/memory/memory.d.ts.map +1 -1
- package/dist/memory/types.d.ts +56 -0
- package/dist/memory/types.d.ts.map +1 -1
- package/dist/processors/index.cjs +41 -41
- package/dist/processors/index.js +1 -1
- package/dist/processors/processors/skills.d.ts +1 -1
- package/dist/processors/processors/skills.d.ts.map +1 -1
- package/dist/processors/runner.d.ts.map +1 -1
- package/dist/processors/trailing-assistant-guard.d.ts +34 -0
- package/dist/processors/trailing-assistant-guard.d.ts.map +1 -0
- package/dist/provider-registry-6LZAGQET.cjs +40 -0
- package/dist/{provider-registry-C6XCYX44.cjs.map → provider-registry-6LZAGQET.cjs.map} +1 -1
- package/dist/provider-registry-QUNT7S55.js +3 -0
- package/dist/{provider-registry-NWU4YFQW.js.map → provider-registry-QUNT7S55.js.map} +1 -1
- package/dist/provider-registry.json +4068 -3981
- package/dist/relevance/index.cjs +3 -3
- package/dist/relevance/index.js +1 -1
- package/dist/storage/base.d.ts +4 -1
- package/dist/storage/base.d.ts.map +1 -1
- package/dist/storage/constants.cjs +87 -27
- package/dist/storage/constants.d.ts +21 -2
- package/dist/storage/constants.d.ts.map +1 -1
- package/dist/storage/constants.js +1 -1
- package/dist/storage/domains/agents/inmemory.d.ts.map +1 -1
- package/dist/storage/domains/datasets/base.d.ts +83 -0
- package/dist/storage/domains/datasets/base.d.ts.map +1 -0
- package/dist/storage/domains/datasets/index.d.ts +3 -0
- package/dist/storage/domains/datasets/index.d.ts.map +1 -0
- package/dist/storage/domains/datasets/inmemory.d.ts +40 -0
- package/dist/storage/domains/datasets/inmemory.d.ts.map +1 -0
- package/dist/storage/domains/experiments/base.d.ts +28 -0
- package/dist/storage/domains/experiments/base.d.ts.map +1 -0
- package/dist/storage/domains/experiments/index.d.ts +3 -0
- package/dist/storage/domains/experiments/index.d.ts.map +1 -0
- package/dist/storage/domains/experiments/inmemory.d.ts +28 -0
- package/dist/storage/domains/experiments/inmemory.d.ts.map +1 -0
- package/dist/storage/domains/index.d.ts +3 -0
- package/dist/storage/domains/index.d.ts.map +1 -1
- package/dist/storage/domains/inmemory-db.d.ts +9 -1
- package/dist/storage/domains/inmemory-db.d.ts.map +1 -1
- package/dist/storage/domains/mcp-clients/base.d.ts +47 -0
- package/dist/storage/domains/mcp-clients/base.d.ts.map +1 -0
- package/dist/storage/domains/mcp-clients/index.d.ts +3 -0
- package/dist/storage/domains/mcp-clients/index.d.ts.map +1 -0
- package/dist/storage/domains/mcp-clients/inmemory.d.ts +31 -0
- package/dist/storage/domains/mcp-clients/inmemory.d.ts.map +1 -0
- package/dist/storage/domains/operations/inmemory.d.ts.map +1 -1
- package/dist/storage/index.cjs +201 -117
- package/dist/storage/index.js +2 -2
- package/dist/storage/mock.d.ts.map +1 -1
- package/dist/storage/types.d.ts +338 -3
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/stream/base/output.d.ts.map +1 -1
- package/dist/stream/index.cjs +11 -11
- package/dist/stream/index.js +2 -2
- package/dist/stream/types.d.ts +15 -0
- package/dist/stream/types.d.ts.map +1 -1
- package/dist/test-utils/llm-mock.cjs +4 -4
- package/dist/test-utils/llm-mock.js +1 -1
- package/dist/tool-loop-agent/index.cjs +4 -4
- package/dist/tool-loop-agent/index.js +1 -1
- package/dist/tool-provider/index.cjs +4 -0
- package/dist/tool-provider/index.cjs.map +1 -0
- package/dist/tool-provider/index.d.ts +2 -0
- package/dist/tool-provider/index.d.ts.map +1 -0
- package/dist/tool-provider/index.js +3 -0
- package/dist/tool-provider/index.js.map +1 -0
- package/dist/tool-provider/types.d.ts +113 -0
- package/dist/tool-provider/types.d.ts.map +1 -0
- package/dist/utils.cjs +23 -23
- package/dist/utils.d.ts +1 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +1 -1
- package/dist/vector/index.cjs +7 -7
- package/dist/vector/index.js +1 -1
- package/dist/workflows/evented/index.cjs +10 -10
- package/dist/workflows/evented/index.js +1 -1
- package/dist/workflows/evented/workflow-event-processor/index.d.ts.map +1 -1
- package/dist/workflows/handlers/control-flow.d.ts.map +1 -1
- package/dist/workflows/index.cjs +25 -25
- package/dist/workflows/index.js +1 -1
- package/dist/workflows/workflow.d.ts +2 -2
- package/dist/workflows/workflow.d.ts.map +1 -1
- package/dist/workspace/filesystem/filesystem.d.ts +5 -15
- package/dist/workspace/filesystem/filesystem.d.ts.map +1 -1
- package/dist/workspace/filesystem/local-filesystem.d.ts +18 -2
- package/dist/workspace/filesystem/local-filesystem.d.ts.map +1 -1
- package/dist/workspace/filesystem/mastra-filesystem.d.ts +21 -1
- package/dist/workspace/filesystem/mastra-filesystem.d.ts.map +1 -1
- package/dist/workspace/index.cjs +41 -41
- package/dist/workspace/index.d.ts +1 -1
- package/dist/workspace/index.d.ts.map +1 -1
- package/dist/workspace/index.js +1 -1
- package/dist/workspace/lifecycle.d.ts +49 -53
- package/dist/workspace/lifecycle.d.ts.map +1 -1
- package/dist/workspace/sandbox/local-sandbox.d.ts +1 -1
- package/dist/workspace/sandbox/local-sandbox.d.ts.map +1 -1
- package/dist/workspace/sandbox/mastra-sandbox.d.ts +3 -2
- package/dist/workspace/sandbox/mastra-sandbox.d.ts.map +1 -1
- package/dist/workspace/sandbox/sandbox.d.ts +3 -4
- package/dist/workspace/sandbox/sandbox.d.ts.map +1 -1
- package/dist/workspace/workspace.d.ts +2 -15
- package/dist/workspace/workspace.d.ts.map +1 -1
- package/package.json +7 -7
- package/src/llm/model/provider-types.generated.d.ts +1694 -1659
- package/tool-provider.d.ts +1 -0
- package/dist/chunk-2GWTJFVM.js.map +0 -1
- package/dist/chunk-44SUGDBR.js.map +0 -1
- package/dist/chunk-C3XU7ZDC.cjs.map +0 -1
- package/dist/chunk-CGPH7CMG.cjs.map +0 -1
- package/dist/chunk-ILQXPZCD.js.map +0 -1
- package/dist/chunk-JNE2ABVB.js.map +0 -1
- package/dist/chunk-KAJNBNWP.cjs.map +0 -1
- package/dist/chunk-PHYJYZ32.js.map +0 -1
- package/dist/chunk-SIZEIYNH.js.map +0 -1
- package/dist/chunk-TERSHTY5.cjs.map +0 -1
- package/dist/chunk-UE2G2LRP.cjs.map +0 -1
- package/dist/chunk-UHVG25VW.cjs.map +0 -1
- package/dist/chunk-WL3AW3YA.js.map +0 -1
- package/dist/chunk-YNXIGRQE.cjs.map +0 -1
- package/dist/docs/references/reference-cli-mastra.md +0 -336
- package/dist/docs/references/reference-deployer-cloudflare.md +0 -56
- package/dist/provider-registry-C6XCYX44.cjs +0 -40
- package/dist/provider-registry-NWU4YFQW.js +0 -3
|
@@ -0,0 +1,1207 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var chunkNKYWDNCI_cjs = require('./chunk-NKYWDNCI.cjs');
|
|
4
|
+
var chunk4IJ4UDZX_cjs = require('./chunk-4IJ4UDZX.cjs');
|
|
5
|
+
var chunk4U7ZLI36_cjs = require('./chunk-4U7ZLI36.cjs');
|
|
6
|
+
var schemaCompat = require('@mastra/schema-compat');
|
|
7
|
+
var zodToJson = require('@mastra/schema-compat/zod-to-json');
|
|
8
|
+
|
|
9
|
+
// src/datasets/experiment/executor.ts
|
|
10
|
+
async function executeScorer(scorer, item) {
|
|
11
|
+
try {
|
|
12
|
+
const result = await scorer.run(item.input);
|
|
13
|
+
const score = typeof result.score === "number" && !isNaN(result.score) ? result.score : null;
|
|
14
|
+
if (score === null && result.score !== void 0) {
|
|
15
|
+
console.warn(`Scorer ${scorer.id} returned invalid score: ${result.score}`);
|
|
16
|
+
}
|
|
17
|
+
return {
|
|
18
|
+
output: {
|
|
19
|
+
score,
|
|
20
|
+
reason: typeof result.reason === "string" ? result.reason : null
|
|
21
|
+
},
|
|
22
|
+
error: null,
|
|
23
|
+
traceId: null
|
|
24
|
+
// Scorers don't produce traces
|
|
25
|
+
};
|
|
26
|
+
} catch (error) {
|
|
27
|
+
return {
|
|
28
|
+
output: null,
|
|
29
|
+
error: {
|
|
30
|
+
message: error instanceof Error ? error.message : String(error),
|
|
31
|
+
stack: error instanceof Error ? error.stack : void 0
|
|
32
|
+
},
|
|
33
|
+
traceId: null
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
async function executeTarget(target, targetType, item, options) {
|
|
38
|
+
try {
|
|
39
|
+
const signal = options?.signal;
|
|
40
|
+
if (signal?.aborted) {
|
|
41
|
+
throw signal.reason ?? new DOMException("The operation was aborted.", "AbortError");
|
|
42
|
+
}
|
|
43
|
+
let executionPromise;
|
|
44
|
+
switch (targetType) {
|
|
45
|
+
case "agent":
|
|
46
|
+
executionPromise = executeAgent(target, item, signal);
|
|
47
|
+
break;
|
|
48
|
+
case "workflow":
|
|
49
|
+
executionPromise = executeWorkflow(target, item);
|
|
50
|
+
break;
|
|
51
|
+
case "scorer":
|
|
52
|
+
executionPromise = executeScorer(target, item);
|
|
53
|
+
break;
|
|
54
|
+
case "processor":
|
|
55
|
+
throw new Error(`Target type '${targetType}' not yet supported.`);
|
|
56
|
+
default:
|
|
57
|
+
throw new Error(`Unknown target type: ${targetType}`);
|
|
58
|
+
}
|
|
59
|
+
if (signal) {
|
|
60
|
+
return await raceWithSignal(executionPromise, signal);
|
|
61
|
+
}
|
|
62
|
+
return await executionPromise;
|
|
63
|
+
} catch (error) {
|
|
64
|
+
return {
|
|
65
|
+
output: null,
|
|
66
|
+
error: {
|
|
67
|
+
message: error instanceof Error ? error.message : String(error),
|
|
68
|
+
stack: error instanceof Error ? error.stack : void 0
|
|
69
|
+
},
|
|
70
|
+
traceId: null
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function raceWithSignal(promise, signal) {
|
|
75
|
+
if (signal.aborted) {
|
|
76
|
+
return Promise.reject(signal.reason ?? new DOMException("The operation was aborted.", "AbortError"));
|
|
77
|
+
}
|
|
78
|
+
return new Promise((resolve, reject) => {
|
|
79
|
+
const onAbort = () => {
|
|
80
|
+
reject(signal.reason ?? new DOMException("The operation was aborted.", "AbortError"));
|
|
81
|
+
};
|
|
82
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
83
|
+
promise.then(
|
|
84
|
+
(value) => {
|
|
85
|
+
signal.removeEventListener("abort", onAbort);
|
|
86
|
+
resolve(value);
|
|
87
|
+
},
|
|
88
|
+
(err) => {
|
|
89
|
+
signal.removeEventListener("abort", onAbort);
|
|
90
|
+
reject(err);
|
|
91
|
+
}
|
|
92
|
+
);
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
async function executeAgent(agent, item, signal) {
|
|
96
|
+
const model = await agent.getModel();
|
|
97
|
+
const result = chunk4IJ4UDZX_cjs.isSupportedLanguageModel(model) ? await agent.generate(item.input, {
|
|
98
|
+
scorers: {},
|
|
99
|
+
returnScorerData: true,
|
|
100
|
+
abortSignal: signal
|
|
101
|
+
}) : await agent.generateLegacy?.(item.input, {
|
|
102
|
+
scorers: {},
|
|
103
|
+
returnScorerData: true
|
|
104
|
+
});
|
|
105
|
+
if (result == null) {
|
|
106
|
+
throw new Error(`Agent "${agent.name}" does not support generateLegacy for this model type`);
|
|
107
|
+
}
|
|
108
|
+
const traceId = result?.traceId ?? null;
|
|
109
|
+
const scoringData = result?.scoringData;
|
|
110
|
+
return {
|
|
111
|
+
output: result,
|
|
112
|
+
error: null,
|
|
113
|
+
traceId,
|
|
114
|
+
scorerInput: scoringData?.input,
|
|
115
|
+
scorerOutput: scoringData?.output
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
async function executeWorkflow(workflow, item) {
|
|
119
|
+
const run = await workflow.createRun({ disableScorers: true });
|
|
120
|
+
const result = await run.start({
|
|
121
|
+
inputData: item.input
|
|
122
|
+
});
|
|
123
|
+
const traceId = result?.traceId ?? null;
|
|
124
|
+
if (result.status === "success") {
|
|
125
|
+
return { output: result.result, error: null, traceId };
|
|
126
|
+
}
|
|
127
|
+
if (result.status === "failed") {
|
|
128
|
+
return {
|
|
129
|
+
output: null,
|
|
130
|
+
error: { message: result.error?.message ?? "Workflow failed", stack: result.error?.stack },
|
|
131
|
+
traceId
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
if (result.status === "tripwire") {
|
|
135
|
+
return {
|
|
136
|
+
output: null,
|
|
137
|
+
error: { message: `Workflow tripwire: ${result.tripwire?.reason ?? "Unknown reason"}` },
|
|
138
|
+
traceId
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
if (result.status === "suspended") {
|
|
142
|
+
return {
|
|
143
|
+
output: null,
|
|
144
|
+
error: { message: "Workflow suspended - not yet supported in dataset experiments" },
|
|
145
|
+
traceId
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
if (result.status === "paused") {
|
|
149
|
+
return { output: null, error: { message: "Workflow paused - not yet supported in dataset experiments" }, traceId };
|
|
150
|
+
}
|
|
151
|
+
const _exhaustiveCheck = result;
|
|
152
|
+
return {
|
|
153
|
+
output: null,
|
|
154
|
+
error: { message: `Workflow ended with unexpected status: ${_exhaustiveCheck.status}` },
|
|
155
|
+
traceId
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// src/datasets/experiment/scorer.ts
|
|
160
|
+
function resolveScorers(mastra, scorers) {
|
|
161
|
+
if (!scorers || scorers.length === 0) return [];
|
|
162
|
+
return scorers.map((scorer) => {
|
|
163
|
+
if (typeof scorer === "string") {
|
|
164
|
+
const resolved = mastra.getScorerById(scorer);
|
|
165
|
+
if (!resolved) {
|
|
166
|
+
console.warn(`Scorer not found: ${scorer}`);
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
return resolved;
|
|
170
|
+
}
|
|
171
|
+
return scorer;
|
|
172
|
+
}).filter((s) => s !== null);
|
|
173
|
+
}
|
|
174
|
+
async function runScorersForItem(scorers, item, output, storage, runId, targetType, targetId, itemId, scorerInput, scorerOutput) {
|
|
175
|
+
if (scorers.length === 0) return [];
|
|
176
|
+
const settled = await Promise.allSettled(
|
|
177
|
+
scorers.map(async (scorer) => {
|
|
178
|
+
const result = await runScorerSafe(scorer, item, output, scorerInput, scorerOutput);
|
|
179
|
+
if (storage && result.score !== null) {
|
|
180
|
+
try {
|
|
181
|
+
await chunkNKYWDNCI_cjs.validateAndSaveScore(storage, {
|
|
182
|
+
scorerId: scorer.id,
|
|
183
|
+
score: result.score,
|
|
184
|
+
reason: result.reason ?? void 0,
|
|
185
|
+
input: item.input,
|
|
186
|
+
output,
|
|
187
|
+
additionalContext: item.metadata,
|
|
188
|
+
entityType: targetType.toUpperCase(),
|
|
189
|
+
entityId: itemId,
|
|
190
|
+
source: "TEST",
|
|
191
|
+
runId,
|
|
192
|
+
scorer: {
|
|
193
|
+
id: scorer.id,
|
|
194
|
+
name: scorer.name,
|
|
195
|
+
description: scorer.description ?? ""
|
|
196
|
+
},
|
|
197
|
+
entity: {
|
|
198
|
+
id: targetId,
|
|
199
|
+
name: targetId
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
} catch (saveError) {
|
|
203
|
+
console.warn(`Failed to save score for scorer ${scorer.id}:`, saveError);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return result;
|
|
207
|
+
})
|
|
208
|
+
);
|
|
209
|
+
return settled.map(
|
|
210
|
+
(s, i) => s.status === "fulfilled" ? s.value : { scorerId: scorers[i].id, scorerName: scorers[i].name, score: null, reason: null, error: String(s.reason) }
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
async function runScorerSafe(scorer, item, output, scorerInput, scorerOutput) {
|
|
214
|
+
try {
|
|
215
|
+
const scoreResult = await scorer.run({
|
|
216
|
+
input: scorerInput ?? item.input,
|
|
217
|
+
output: scorerOutput ?? output,
|
|
218
|
+
groundTruth: item.groundTruth
|
|
219
|
+
});
|
|
220
|
+
const score = scoreResult.score;
|
|
221
|
+
const reason = scoreResult.reason;
|
|
222
|
+
return {
|
|
223
|
+
scorerId: scorer.id,
|
|
224
|
+
scorerName: scorer.name,
|
|
225
|
+
score: typeof score === "number" ? score : null,
|
|
226
|
+
reason: typeof reason === "string" ? reason : null,
|
|
227
|
+
error: null
|
|
228
|
+
};
|
|
229
|
+
} catch (error) {
|
|
230
|
+
return {
|
|
231
|
+
scorerId: scorer.id,
|
|
232
|
+
scorerName: scorer.name,
|
|
233
|
+
score: null,
|
|
234
|
+
reason: null,
|
|
235
|
+
error: error instanceof Error ? error.message : String(error)
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// src/datasets/experiment/analytics/aggregate.ts
|
|
241
|
+
function computeMean(values) {
|
|
242
|
+
if (values.length === 0) {
|
|
243
|
+
return 0;
|
|
244
|
+
}
|
|
245
|
+
const sum = values.reduce((acc, val) => acc + val, 0);
|
|
246
|
+
return sum / values.length;
|
|
247
|
+
}
|
|
248
|
+
function computeScorerStats(scores, passThreshold = 0.5) {
|
|
249
|
+
const totalItems = scores.length;
|
|
250
|
+
if (totalItems === 0) {
|
|
251
|
+
return {
|
|
252
|
+
errorRate: 0,
|
|
253
|
+
errorCount: 0,
|
|
254
|
+
passRate: 0,
|
|
255
|
+
passCount: 0,
|
|
256
|
+
avgScore: 0,
|
|
257
|
+
scoreCount: 0,
|
|
258
|
+
totalItems: 0
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
const validScores = [];
|
|
262
|
+
let errorCount = 0;
|
|
263
|
+
for (const score of scores) {
|
|
264
|
+
if (score.score === null || score.score === void 0) {
|
|
265
|
+
errorCount++;
|
|
266
|
+
} else {
|
|
267
|
+
validScores.push(score.score);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
const scoreCount = validScores.length;
|
|
271
|
+
const errorRate = errorCount / totalItems;
|
|
272
|
+
const passCount = validScores.filter((s) => s >= passThreshold).length;
|
|
273
|
+
const passRate = scoreCount > 0 ? passCount / scoreCount : 0;
|
|
274
|
+
const avgScore = computeMean(validScores);
|
|
275
|
+
return {
|
|
276
|
+
errorRate,
|
|
277
|
+
errorCount,
|
|
278
|
+
passRate,
|
|
279
|
+
passCount,
|
|
280
|
+
avgScore,
|
|
281
|
+
scoreCount,
|
|
282
|
+
totalItems
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
function isRegression(delta, threshold, direction = "higher-is-better") {
|
|
286
|
+
if (direction === "higher-is-better") {
|
|
287
|
+
return delta < -threshold;
|
|
288
|
+
} else {
|
|
289
|
+
return delta > threshold;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// src/datasets/experiment/analytics/compare.ts
|
|
294
|
+
var DEFAULT_THRESHOLD = {
|
|
295
|
+
value: 0,
|
|
296
|
+
direction: "higher-is-better"
|
|
297
|
+
};
|
|
298
|
+
var DEFAULT_PASS_THRESHOLD = 0.5;
|
|
299
|
+
async function compareExperiments(mastra, config) {
|
|
300
|
+
const { experimentIdA, experimentIdB, thresholds = {} } = config;
|
|
301
|
+
const warnings = [];
|
|
302
|
+
const storage = mastra.getStorage();
|
|
303
|
+
if (!storage) {
|
|
304
|
+
throw new Error("Storage not configured. Configure storage in Mastra instance.");
|
|
305
|
+
}
|
|
306
|
+
const experimentsStore = await storage.getStore("experiments");
|
|
307
|
+
const scoresStore = await storage.getStore("scores");
|
|
308
|
+
if (!experimentsStore) {
|
|
309
|
+
throw new Error("ExperimentsStorage not configured.");
|
|
310
|
+
}
|
|
311
|
+
if (!scoresStore) {
|
|
312
|
+
throw new Error("ScoresStorage not configured.");
|
|
313
|
+
}
|
|
314
|
+
const [experimentA, experimentB] = await Promise.all([
|
|
315
|
+
experimentsStore.getExperimentById({ id: experimentIdA }),
|
|
316
|
+
experimentsStore.getExperimentById({ id: experimentIdB })
|
|
317
|
+
]);
|
|
318
|
+
if (!experimentA) {
|
|
319
|
+
throw new Error(`Experiment not found: ${experimentIdA}`);
|
|
320
|
+
}
|
|
321
|
+
if (!experimentB) {
|
|
322
|
+
throw new Error(`Experiment not found: ${experimentIdB}`);
|
|
323
|
+
}
|
|
324
|
+
const versionMismatch = experimentA.datasetVersion !== experimentB.datasetVersion;
|
|
325
|
+
if (versionMismatch) {
|
|
326
|
+
warnings.push(
|
|
327
|
+
`Experiments have different dataset versions: ${experimentA.datasetVersion} vs ${experimentB.datasetVersion}`
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
const [resultsA, resultsB] = await Promise.all([
|
|
331
|
+
experimentsStore.listExperimentResults({ experimentId: experimentIdA, pagination: { page: 0, perPage: false } }),
|
|
332
|
+
experimentsStore.listExperimentResults({ experimentId: experimentIdB, pagination: { page: 0, perPage: false } })
|
|
333
|
+
]);
|
|
334
|
+
const [scoresA, scoresB] = await Promise.all([
|
|
335
|
+
scoresStore.listScoresByRunId({ runId: experimentIdA, pagination: { page: 0, perPage: false } }),
|
|
336
|
+
scoresStore.listScoresByRunId({ runId: experimentIdB, pagination: { page: 0, perPage: false } })
|
|
337
|
+
]);
|
|
338
|
+
if (resultsA.results.length === 0 && resultsB.results.length === 0) {
|
|
339
|
+
warnings.push("Both experiments have no results.");
|
|
340
|
+
return buildEmptyResult(experimentA, experimentB, versionMismatch, warnings);
|
|
341
|
+
}
|
|
342
|
+
if (resultsA.results.length === 0) {
|
|
343
|
+
warnings.push("Experiment A has no results.");
|
|
344
|
+
}
|
|
345
|
+
if (resultsB.results.length === 0) {
|
|
346
|
+
warnings.push("Experiment B has no results.");
|
|
347
|
+
}
|
|
348
|
+
const itemIdsA = new Set(resultsA.results.map((r) => r.itemId));
|
|
349
|
+
const itemIdsB = new Set(resultsB.results.map((r) => r.itemId));
|
|
350
|
+
const overlappingItemIds = [...itemIdsA].filter((id) => itemIdsB.has(id));
|
|
351
|
+
if (overlappingItemIds.length === 0) {
|
|
352
|
+
warnings.push("No overlapping items between experiments.");
|
|
353
|
+
}
|
|
354
|
+
const scoresMapA = groupScoresByScorerAndItem(scoresA.scores);
|
|
355
|
+
const scoresMapB = groupScoresByScorerAndItem(scoresB.scores);
|
|
356
|
+
const allScorerIds = /* @__PURE__ */ new Set([...Object.keys(scoresMapA), ...Object.keys(scoresMapB)]);
|
|
357
|
+
const scorers = {};
|
|
358
|
+
let hasRegression = false;
|
|
359
|
+
for (const scorerId of allScorerIds) {
|
|
360
|
+
const scorerScoresA = scoresMapA[scorerId] ?? {};
|
|
361
|
+
const scorerScoresB = scoresMapB[scorerId] ?? {};
|
|
362
|
+
const scoresArrayA = Object.values(scorerScoresA);
|
|
363
|
+
const scoresArrayB = Object.values(scorerScoresB);
|
|
364
|
+
const thresholdConfig = thresholds[scorerId] ?? DEFAULT_THRESHOLD;
|
|
365
|
+
const threshold = thresholdConfig.value;
|
|
366
|
+
const direction = thresholdConfig.direction ?? "higher-is-better";
|
|
367
|
+
const statsA = computeScorerStats(scoresArrayA, DEFAULT_PASS_THRESHOLD);
|
|
368
|
+
const statsB = computeScorerStats(scoresArrayB, DEFAULT_PASS_THRESHOLD);
|
|
369
|
+
const delta = statsB.avgScore - statsA.avgScore;
|
|
370
|
+
const regressed = isRegression(delta, threshold, direction);
|
|
371
|
+
if (regressed) {
|
|
372
|
+
hasRegression = true;
|
|
373
|
+
}
|
|
374
|
+
scorers[scorerId] = {
|
|
375
|
+
statsA,
|
|
376
|
+
statsB,
|
|
377
|
+
delta,
|
|
378
|
+
regressed,
|
|
379
|
+
threshold
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
const allItemIds = /* @__PURE__ */ new Set([...itemIdsA, ...itemIdsB]);
|
|
383
|
+
const items = [];
|
|
384
|
+
for (const itemId of allItemIds) {
|
|
385
|
+
const inBothExperiments = itemIdsA.has(itemId) && itemIdsB.has(itemId);
|
|
386
|
+
const itemScoresA = {};
|
|
387
|
+
const itemScoresB = {};
|
|
388
|
+
for (const scorerId of allScorerIds) {
|
|
389
|
+
const scoreA = scoresMapA[scorerId]?.[itemId];
|
|
390
|
+
const scoreB = scoresMapB[scorerId]?.[itemId];
|
|
391
|
+
itemScoresA[scorerId] = scoreA?.score ?? null;
|
|
392
|
+
itemScoresB[scorerId] = scoreB?.score ?? null;
|
|
393
|
+
}
|
|
394
|
+
items.push({
|
|
395
|
+
itemId,
|
|
396
|
+
inBothExperiments,
|
|
397
|
+
scoresA: itemScoresA,
|
|
398
|
+
scoresB: itemScoresB
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
return {
|
|
402
|
+
experimentA: {
|
|
403
|
+
id: experimentA.id,
|
|
404
|
+
datasetVersion: experimentA.datasetVersion
|
|
405
|
+
},
|
|
406
|
+
experimentB: {
|
|
407
|
+
id: experimentB.id,
|
|
408
|
+
datasetVersion: experimentB.datasetVersion
|
|
409
|
+
},
|
|
410
|
+
versionMismatch,
|
|
411
|
+
hasRegression,
|
|
412
|
+
scorers,
|
|
413
|
+
items,
|
|
414
|
+
warnings
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
function groupScoresByScorerAndItem(scores) {
|
|
418
|
+
const result = {};
|
|
419
|
+
for (const score of scores) {
|
|
420
|
+
const scorerId = score.scorerId;
|
|
421
|
+
const itemId = score.entityId;
|
|
422
|
+
if (!result[scorerId]) {
|
|
423
|
+
result[scorerId] = {};
|
|
424
|
+
}
|
|
425
|
+
result[scorerId][itemId] = score;
|
|
426
|
+
}
|
|
427
|
+
return result;
|
|
428
|
+
}
|
|
429
|
+
function buildEmptyResult(experimentA, experimentB, versionMismatch, warnings) {
|
|
430
|
+
return {
|
|
431
|
+
experimentA: {
|
|
432
|
+
id: experimentA.id,
|
|
433
|
+
datasetVersion: experimentA.datasetVersion
|
|
434
|
+
},
|
|
435
|
+
experimentB: {
|
|
436
|
+
id: experimentB.id,
|
|
437
|
+
datasetVersion: experimentB.datasetVersion
|
|
438
|
+
},
|
|
439
|
+
versionMismatch,
|
|
440
|
+
hasRegression: false,
|
|
441
|
+
scorers: {},
|
|
442
|
+
items: [],
|
|
443
|
+
warnings
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// src/datasets/experiment/index.ts
|
|
448
|
+
async function runExperiment(mastra, config) {
|
|
449
|
+
const {
|
|
450
|
+
datasetId,
|
|
451
|
+
targetType,
|
|
452
|
+
targetId,
|
|
453
|
+
scorers: scorerInput,
|
|
454
|
+
version,
|
|
455
|
+
maxConcurrency = 5,
|
|
456
|
+
signal,
|
|
457
|
+
itemTimeout,
|
|
458
|
+
maxRetries = 0,
|
|
459
|
+
experimentId: providedExperimentId,
|
|
460
|
+
name,
|
|
461
|
+
description,
|
|
462
|
+
metadata
|
|
463
|
+
} = config;
|
|
464
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
465
|
+
const experimentId = providedExperimentId ?? crypto.randomUUID();
|
|
466
|
+
const storage = mastra.getStorage();
|
|
467
|
+
const datasetsStore = await storage?.getStore("datasets");
|
|
468
|
+
const experimentsStore = await storage?.getStore("experiments");
|
|
469
|
+
let items;
|
|
470
|
+
let datasetVersion;
|
|
471
|
+
if (config.data) {
|
|
472
|
+
const rawData = typeof config.data === "function" ? await config.data() : config.data;
|
|
473
|
+
items = rawData.map((dataItem) => {
|
|
474
|
+
const id = dataItem.id ?? crypto.randomUUID();
|
|
475
|
+
return {
|
|
476
|
+
id,
|
|
477
|
+
datasetVersion: null,
|
|
478
|
+
input: dataItem.input,
|
|
479
|
+
groundTruth: dataItem.groundTruth,
|
|
480
|
+
metadata: dataItem.metadata
|
|
481
|
+
};
|
|
482
|
+
});
|
|
483
|
+
datasetVersion = null;
|
|
484
|
+
} else if (datasetId) {
|
|
485
|
+
if (!datasetsStore) {
|
|
486
|
+
throw new Error("DatasetsStorage not configured. Configure storage in Mastra instance.");
|
|
487
|
+
}
|
|
488
|
+
const dataset = await datasetsStore.getDatasetById({ id: datasetId });
|
|
489
|
+
if (!dataset) {
|
|
490
|
+
throw new Error(`Dataset not found: ${datasetId}`);
|
|
491
|
+
}
|
|
492
|
+
datasetVersion = version ?? dataset.version;
|
|
493
|
+
const versionItems = await datasetsStore.getItemsByVersion({
|
|
494
|
+
datasetId,
|
|
495
|
+
version: datasetVersion
|
|
496
|
+
});
|
|
497
|
+
if (versionItems.length === 0) {
|
|
498
|
+
throw new Error(`No items in dataset ${datasetId} at version ${datasetVersion}`);
|
|
499
|
+
}
|
|
500
|
+
items = versionItems.map((v) => ({
|
|
501
|
+
id: v.id,
|
|
502
|
+
datasetVersion: v.datasetVersion,
|
|
503
|
+
input: v.input,
|
|
504
|
+
groundTruth: v.groundTruth,
|
|
505
|
+
metadata: v.metadata
|
|
506
|
+
}));
|
|
507
|
+
} else {
|
|
508
|
+
throw new Error("No data source: provide datasetId or data");
|
|
509
|
+
}
|
|
510
|
+
let execFn;
|
|
511
|
+
if (config.task) {
|
|
512
|
+
const taskFn = config.task;
|
|
513
|
+
execFn = async (item, itemSignal) => {
|
|
514
|
+
try {
|
|
515
|
+
const result = await taskFn({
|
|
516
|
+
input: item.input,
|
|
517
|
+
mastra,
|
|
518
|
+
groundTruth: item.groundTruth,
|
|
519
|
+
metadata: item.metadata,
|
|
520
|
+
signal: itemSignal
|
|
521
|
+
});
|
|
522
|
+
return { output: result, error: null, traceId: null };
|
|
523
|
+
} catch (err) {
|
|
524
|
+
return {
|
|
525
|
+
output: null,
|
|
526
|
+
error: {
|
|
527
|
+
message: err instanceof Error ? err.message : String(err),
|
|
528
|
+
stack: err instanceof Error ? err.stack : void 0
|
|
529
|
+
},
|
|
530
|
+
traceId: null
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
};
|
|
534
|
+
} else if (targetType && targetId) {
|
|
535
|
+
const target = resolveTarget(mastra, targetType, targetId);
|
|
536
|
+
if (!target) {
|
|
537
|
+
throw new Error(`Target not found: ${targetType}/${targetId}`);
|
|
538
|
+
}
|
|
539
|
+
execFn = (item, itemSignal) => executeTarget(target, targetType, item, { signal: itemSignal });
|
|
540
|
+
} else {
|
|
541
|
+
throw new Error("No task: provide targetType+targetId or task");
|
|
542
|
+
}
|
|
543
|
+
const scorers = resolveScorers(mastra, scorerInput);
|
|
544
|
+
if (experimentsStore) {
|
|
545
|
+
if (!providedExperimentId) {
|
|
546
|
+
await experimentsStore.createExperiment({
|
|
547
|
+
id: experimentId,
|
|
548
|
+
name,
|
|
549
|
+
description,
|
|
550
|
+
metadata,
|
|
551
|
+
datasetId: datasetId ?? null,
|
|
552
|
+
datasetVersion,
|
|
553
|
+
targetType: targetType ?? "agent",
|
|
554
|
+
targetId: targetId ?? "inline",
|
|
555
|
+
totalItems: items.length
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
await experimentsStore.updateExperiment({
|
|
559
|
+
id: experimentId,
|
|
560
|
+
status: "running",
|
|
561
|
+
startedAt
|
|
562
|
+
});
|
|
563
|
+
}
|
|
564
|
+
let succeededCount = 0;
|
|
565
|
+
let failedCount = 0;
|
|
566
|
+
const results = new Array(items.length);
|
|
567
|
+
const PROGRESS_UPDATE_INTERVAL = 2e3;
|
|
568
|
+
let lastProgressUpdate = 0;
|
|
569
|
+
try {
|
|
570
|
+
const pMap = (await import('p-map')).default;
|
|
571
|
+
await pMap(
|
|
572
|
+
items.map((item, idx) => ({ item, idx })),
|
|
573
|
+
async ({ item, idx }) => {
|
|
574
|
+
if (signal?.aborted) {
|
|
575
|
+
throw new DOMException("Aborted", "AbortError");
|
|
576
|
+
}
|
|
577
|
+
const itemStartedAt = /* @__PURE__ */ new Date();
|
|
578
|
+
let itemSignal = signal;
|
|
579
|
+
if (itemTimeout) {
|
|
580
|
+
const timeoutSignal = AbortSignal.timeout(itemTimeout);
|
|
581
|
+
itemSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
|
|
582
|
+
}
|
|
583
|
+
let retryCount = 0;
|
|
584
|
+
let execResult = await execFn(item, itemSignal);
|
|
585
|
+
while (execResult.error && retryCount < maxRetries) {
|
|
586
|
+
if (execResult.error.message.toLowerCase().includes("abort")) break;
|
|
587
|
+
retryCount++;
|
|
588
|
+
const delay = Math.min(1e3 * Math.pow(2, retryCount - 1), 3e4);
|
|
589
|
+
const jitter = delay * 0.2 * Math.random();
|
|
590
|
+
await new Promise((r) => setTimeout(r, delay + jitter));
|
|
591
|
+
if (signal?.aborted) {
|
|
592
|
+
throw new DOMException("Aborted", "AbortError");
|
|
593
|
+
}
|
|
594
|
+
execResult = await execFn(item, itemSignal);
|
|
595
|
+
}
|
|
596
|
+
const itemCompletedAt = /* @__PURE__ */ new Date();
|
|
597
|
+
if (execResult.error) {
|
|
598
|
+
failedCount++;
|
|
599
|
+
} else {
|
|
600
|
+
succeededCount++;
|
|
601
|
+
}
|
|
602
|
+
const itemResult = {
|
|
603
|
+
itemId: item.id,
|
|
604
|
+
itemVersion: item.datasetVersion ?? 0,
|
|
605
|
+
input: item.input,
|
|
606
|
+
output: execResult.output,
|
|
607
|
+
groundTruth: item.groundTruth ?? null,
|
|
608
|
+
error: execResult.error,
|
|
609
|
+
startedAt: itemStartedAt,
|
|
610
|
+
completedAt: itemCompletedAt,
|
|
611
|
+
retryCount
|
|
612
|
+
};
|
|
613
|
+
const itemScores = await runScorersForItem(
|
|
614
|
+
scorers,
|
|
615
|
+
item,
|
|
616
|
+
execResult.output,
|
|
617
|
+
storage ?? null,
|
|
618
|
+
experimentId,
|
|
619
|
+
targetType ?? "agent",
|
|
620
|
+
targetId ?? "inline",
|
|
621
|
+
item.id,
|
|
622
|
+
execResult.scorerInput,
|
|
623
|
+
execResult.scorerOutput
|
|
624
|
+
);
|
|
625
|
+
if (experimentsStore) {
|
|
626
|
+
try {
|
|
627
|
+
await experimentsStore.addExperimentResult({
|
|
628
|
+
experimentId,
|
|
629
|
+
itemId: item.id,
|
|
630
|
+
itemDatasetVersion: item.datasetVersion,
|
|
631
|
+
input: item.input,
|
|
632
|
+
output: execResult.output,
|
|
633
|
+
groundTruth: item.groundTruth ?? null,
|
|
634
|
+
error: execResult.error,
|
|
635
|
+
startedAt: itemStartedAt,
|
|
636
|
+
completedAt: itemCompletedAt,
|
|
637
|
+
retryCount,
|
|
638
|
+
traceId: execResult.traceId
|
|
639
|
+
});
|
|
640
|
+
} catch (persistError) {
|
|
641
|
+
console.warn(`Failed to persist result for item ${item.id}:`, persistError);
|
|
642
|
+
}
|
|
643
|
+
const now = Date.now();
|
|
644
|
+
if (now - lastProgressUpdate >= PROGRESS_UPDATE_INTERVAL) {
|
|
645
|
+
lastProgressUpdate = now;
|
|
646
|
+
try {
|
|
647
|
+
await experimentsStore.updateExperiment({
|
|
648
|
+
id: experimentId,
|
|
649
|
+
succeededCount,
|
|
650
|
+
failedCount
|
|
651
|
+
});
|
|
652
|
+
} catch {
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
results[idx] = {
|
|
657
|
+
...itemResult,
|
|
658
|
+
scores: itemScores
|
|
659
|
+
};
|
|
660
|
+
},
|
|
661
|
+
{ concurrency: maxConcurrency }
|
|
662
|
+
);
|
|
663
|
+
} catch {
|
|
664
|
+
const completedAt2 = /* @__PURE__ */ new Date();
|
|
665
|
+
const skippedCount2 = items.length - succeededCount - failedCount;
|
|
666
|
+
if (experimentsStore) {
|
|
667
|
+
await experimentsStore.updateExperiment({
|
|
668
|
+
id: experimentId,
|
|
669
|
+
status: "failed",
|
|
670
|
+
succeededCount,
|
|
671
|
+
failedCount,
|
|
672
|
+
skippedCount: skippedCount2,
|
|
673
|
+
completedAt: completedAt2
|
|
674
|
+
});
|
|
675
|
+
}
|
|
676
|
+
return {
|
|
677
|
+
experimentId,
|
|
678
|
+
status: "failed",
|
|
679
|
+
totalItems: items.length,
|
|
680
|
+
succeededCount,
|
|
681
|
+
failedCount,
|
|
682
|
+
skippedCount: skippedCount2,
|
|
683
|
+
completedWithErrors: false,
|
|
684
|
+
startedAt,
|
|
685
|
+
completedAt: completedAt2,
|
|
686
|
+
results: results.filter(Boolean)
|
|
687
|
+
};
|
|
688
|
+
}
|
|
689
|
+
const completedAt = /* @__PURE__ */ new Date();
|
|
690
|
+
const status = failedCount === items.length ? "failed" : "completed";
|
|
691
|
+
const completedWithErrors = status === "completed" && failedCount > 0;
|
|
692
|
+
const skippedCount = items.length - succeededCount - failedCount;
|
|
693
|
+
if (experimentsStore) {
|
|
694
|
+
await experimentsStore.updateExperiment({
|
|
695
|
+
id: experimentId,
|
|
696
|
+
status,
|
|
697
|
+
succeededCount,
|
|
698
|
+
failedCount,
|
|
699
|
+
skippedCount,
|
|
700
|
+
completedAt
|
|
701
|
+
});
|
|
702
|
+
}
|
|
703
|
+
return {
|
|
704
|
+
experimentId,
|
|
705
|
+
status,
|
|
706
|
+
totalItems: items.length,
|
|
707
|
+
succeededCount,
|
|
708
|
+
failedCount,
|
|
709
|
+
skippedCount,
|
|
710
|
+
completedWithErrors,
|
|
711
|
+
startedAt,
|
|
712
|
+
completedAt,
|
|
713
|
+
results
|
|
714
|
+
};
|
|
715
|
+
}
|
|
716
|
+
function resolveTarget(mastra, targetType, targetId) {
|
|
717
|
+
switch (targetType) {
|
|
718
|
+
case "agent":
|
|
719
|
+
try {
|
|
720
|
+
return mastra.getAgentById(targetId);
|
|
721
|
+
} catch {
|
|
722
|
+
try {
|
|
723
|
+
return mastra.getAgent(targetId);
|
|
724
|
+
} catch {
|
|
725
|
+
return null;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
case "workflow":
|
|
729
|
+
try {
|
|
730
|
+
return mastra.getWorkflowById(targetId);
|
|
731
|
+
} catch {
|
|
732
|
+
try {
|
|
733
|
+
return mastra.getWorkflow(targetId);
|
|
734
|
+
} catch {
|
|
735
|
+
return null;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
case "scorer":
|
|
739
|
+
try {
|
|
740
|
+
return mastra.getScorerById(targetId) ?? null;
|
|
741
|
+
} catch {
|
|
742
|
+
return null;
|
|
743
|
+
}
|
|
744
|
+
case "processor":
|
|
745
|
+
return null;
|
|
746
|
+
default:
|
|
747
|
+
return null;
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
var Dataset = class {
|
|
751
|
+
id;
|
|
752
|
+
#mastra;
|
|
753
|
+
#datasetsStore;
|
|
754
|
+
#experimentsStore;
|
|
755
|
+
constructor(id, mastra) {
|
|
756
|
+
this.id = id;
|
|
757
|
+
this.#mastra = mastra;
|
|
758
|
+
}
|
|
759
|
+
// ---------------------------------------------------------------------------
|
|
760
|
+
// Lazy storage resolution
|
|
761
|
+
// ---------------------------------------------------------------------------
|
|
762
|
+
async #getDatasetsStore() {
|
|
763
|
+
if (this.#datasetsStore) return this.#datasetsStore;
|
|
764
|
+
const storage = this.#mastra.getStorage();
|
|
765
|
+
if (!storage) {
|
|
766
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
767
|
+
id: "DATASETS_STORAGE_NOT_CONFIGURED",
|
|
768
|
+
text: "Storage not configured. Configure storage in Mastra instance.",
|
|
769
|
+
domain: "STORAGE",
|
|
770
|
+
category: "USER"
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
const store = await storage.getStore("datasets");
|
|
774
|
+
if (!store) {
|
|
775
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
776
|
+
id: "DATASETS_STORE_NOT_AVAILABLE",
|
|
777
|
+
text: "Datasets store not available. Ensure your storage adapter provides a datasets domain.",
|
|
778
|
+
domain: "STORAGE",
|
|
779
|
+
category: "USER"
|
|
780
|
+
});
|
|
781
|
+
}
|
|
782
|
+
this.#datasetsStore = store;
|
|
783
|
+
return store;
|
|
784
|
+
}
|
|
785
|
+
async #getExperimentsStore() {
|
|
786
|
+
if (this.#experimentsStore) return this.#experimentsStore;
|
|
787
|
+
const storage = this.#mastra.getStorage();
|
|
788
|
+
if (!storage) {
|
|
789
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
790
|
+
id: "DATASETS_STORAGE_NOT_CONFIGURED",
|
|
791
|
+
text: "Storage not configured. Configure storage in Mastra instance.",
|
|
792
|
+
domain: "STORAGE",
|
|
793
|
+
category: "USER"
|
|
794
|
+
});
|
|
795
|
+
}
|
|
796
|
+
const store = await storage.getStore("experiments");
|
|
797
|
+
if (!store) {
|
|
798
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
799
|
+
id: "EXPERIMENTS_STORE_NOT_AVAILABLE",
|
|
800
|
+
text: "Experiments store not available. Ensure your storage adapter provides an experiments domain.",
|
|
801
|
+
domain: "STORAGE",
|
|
802
|
+
category: "USER"
|
|
803
|
+
});
|
|
804
|
+
}
|
|
805
|
+
this.#experimentsStore = store;
|
|
806
|
+
return store;
|
|
807
|
+
}
|
|
808
|
+
// ---------------------------------------------------------------------------
|
|
809
|
+
// Dataset metadata
|
|
810
|
+
// ---------------------------------------------------------------------------
|
|
811
|
+
/**
|
|
812
|
+
* Get the full dataset record from storage.
|
|
813
|
+
*/
|
|
814
|
+
async getDetails() {
|
|
815
|
+
const store = await this.#getDatasetsStore();
|
|
816
|
+
const record = await store.getDatasetById({ id: this.id });
|
|
817
|
+
if (!record) {
|
|
818
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
819
|
+
id: "DATASET_NOT_FOUND",
|
|
820
|
+
text: `Dataset not found: ${this.id}`,
|
|
821
|
+
domain: "STORAGE",
|
|
822
|
+
category: "USER"
|
|
823
|
+
});
|
|
824
|
+
}
|
|
825
|
+
return record;
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* Update dataset metadata and/or schemas.
|
|
829
|
+
* Zod schemas are automatically converted to JSON Schema.
|
|
830
|
+
*/
|
|
831
|
+
async update(input) {
|
|
832
|
+
const store = await this.#getDatasetsStore();
|
|
833
|
+
let { inputSchema, groundTruthSchema, ...rest } = input;
|
|
834
|
+
if (inputSchema !== void 0 && schemaCompat.isZodType(inputSchema)) {
|
|
835
|
+
inputSchema = zodToJson.zodToJsonSchema(inputSchema);
|
|
836
|
+
}
|
|
837
|
+
if (groundTruthSchema !== void 0 && schemaCompat.isZodType(groundTruthSchema)) {
|
|
838
|
+
groundTruthSchema = zodToJson.zodToJsonSchema(groundTruthSchema);
|
|
839
|
+
}
|
|
840
|
+
return store.updateDataset({
|
|
841
|
+
id: this.id,
|
|
842
|
+
...rest,
|
|
843
|
+
inputSchema,
|
|
844
|
+
groundTruthSchema
|
|
845
|
+
});
|
|
846
|
+
}
|
|
847
|
+
// ---------------------------------------------------------------------------
|
|
848
|
+
// Item CRUD
|
|
849
|
+
// ---------------------------------------------------------------------------
|
|
850
|
+
/**
|
|
851
|
+
* Add a single item to the dataset.
|
|
852
|
+
*/
|
|
853
|
+
async addItem(input) {
|
|
854
|
+
const store = await this.#getDatasetsStore();
|
|
855
|
+
return store.addItem({
|
|
856
|
+
datasetId: this.id,
|
|
857
|
+
input: input.input,
|
|
858
|
+
groundTruth: input.groundTruth,
|
|
859
|
+
metadata: input.metadata
|
|
860
|
+
});
|
|
861
|
+
}
|
|
862
|
+
/**
|
|
863
|
+
* Add multiple items to the dataset in bulk.
|
|
864
|
+
*/
|
|
865
|
+
async addItems(input) {
|
|
866
|
+
const store = await this.#getDatasetsStore();
|
|
867
|
+
return store.batchInsertItems({
|
|
868
|
+
datasetId: this.id,
|
|
869
|
+
items: input.items
|
|
870
|
+
});
|
|
871
|
+
}
|
|
872
|
+
/**
|
|
873
|
+
* Get a single item by ID, optionally at a specific version.
|
|
874
|
+
*/
|
|
875
|
+
async getItem(args) {
|
|
876
|
+
const store = await this.#getDatasetsStore();
|
|
877
|
+
return store.getItemById({ id: args.itemId, datasetVersion: args.version });
|
|
878
|
+
}
|
|
879
|
+
/**
|
|
880
|
+
* List items in the dataset, optionally at a specific version.
|
|
881
|
+
*/
|
|
882
|
+
async listItems(args) {
|
|
883
|
+
const store = await this.#getDatasetsStore();
|
|
884
|
+
if (args?.version) {
|
|
885
|
+
return store.getItemsByVersion({ datasetId: this.id, version: args.version });
|
|
886
|
+
}
|
|
887
|
+
return store.listItems({
|
|
888
|
+
datasetId: this.id,
|
|
889
|
+
search: args?.search,
|
|
890
|
+
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
|
|
891
|
+
});
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Update an existing item in the dataset.
|
|
895
|
+
*/
|
|
896
|
+
async updateItem(input) {
|
|
897
|
+
const store = await this.#getDatasetsStore();
|
|
898
|
+
return store.updateItem({
|
|
899
|
+
id: input.itemId,
|
|
900
|
+
datasetId: this.id,
|
|
901
|
+
input: input.input,
|
|
902
|
+
groundTruth: input.groundTruth,
|
|
903
|
+
metadata: input.metadata
|
|
904
|
+
});
|
|
905
|
+
}
|
|
906
|
+
/**
|
|
907
|
+
* Delete a single item from the dataset.
|
|
908
|
+
*/
|
|
909
|
+
async deleteItem(args) {
|
|
910
|
+
const store = await this.#getDatasetsStore();
|
|
911
|
+
return store.deleteItem({ id: args.itemId, datasetId: this.id });
|
|
912
|
+
}
|
|
913
|
+
/**
|
|
914
|
+
* Delete multiple items from the dataset in bulk.
|
|
915
|
+
*/
|
|
916
|
+
async deleteItems(args) {
|
|
917
|
+
const store = await this.#getDatasetsStore();
|
|
918
|
+
return store.batchDeleteItems({ datasetId: this.id, itemIds: args.itemIds });
|
|
919
|
+
}
|
|
920
|
+
// ---------------------------------------------------------------------------
|
|
921
|
+
// Versioning
|
|
922
|
+
// ---------------------------------------------------------------------------
|
|
923
|
+
/**
|
|
924
|
+
* List all versions of this dataset.
|
|
925
|
+
*/
|
|
926
|
+
async listVersions(args) {
|
|
927
|
+
const store = await this.#getDatasetsStore();
|
|
928
|
+
return store.listDatasetVersions({
|
|
929
|
+
datasetId: this.id,
|
|
930
|
+
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
|
|
931
|
+
});
|
|
932
|
+
}
|
|
933
|
+
/**
|
|
934
|
+
* Get full SCD-2 history of a specific item across all dataset versions.
|
|
935
|
+
*/
|
|
936
|
+
async getItemHistory(args) {
|
|
937
|
+
const store = await this.#getDatasetsStore();
|
|
938
|
+
return store.getItemHistory(args.itemId);
|
|
939
|
+
}
|
|
940
|
+
// ---------------------------------------------------------------------------
|
|
941
|
+
// Experiments
|
|
942
|
+
// ---------------------------------------------------------------------------
|
|
943
|
+
/**
|
|
944
|
+
* Run an experiment on this dataset and wait for completion.
|
|
945
|
+
*/
|
|
946
|
+
async startExperiment(config) {
|
|
947
|
+
return runExperiment(this.#mastra, { datasetId: this.id, ...config });
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Start an experiment asynchronously (fire-and-forget).
|
|
951
|
+
* Returns immediately with the experiment ID and pending status.
|
|
952
|
+
*/
|
|
953
|
+
async startExperimentAsync(config) {
|
|
954
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
955
|
+
const datasetsStore = await this.#getDatasetsStore();
|
|
956
|
+
const dataset = await datasetsStore.getDatasetById({ id: this.id });
|
|
957
|
+
if (!dataset) {
|
|
958
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
959
|
+
id: "DATASET_NOT_FOUND",
|
|
960
|
+
text: `Dataset not found: ${this.id}`,
|
|
961
|
+
domain: "STORAGE",
|
|
962
|
+
category: "USER"
|
|
963
|
+
});
|
|
964
|
+
}
|
|
965
|
+
const run = await experimentsStore.createExperiment({
|
|
966
|
+
datasetId: this.id,
|
|
967
|
+
datasetVersion: dataset.version,
|
|
968
|
+
targetType: config.targetType ?? "agent",
|
|
969
|
+
targetId: config.targetId ?? "inline",
|
|
970
|
+
totalItems: 0,
|
|
971
|
+
name: config.name,
|
|
972
|
+
description: config.description,
|
|
973
|
+
metadata: config.metadata
|
|
974
|
+
});
|
|
975
|
+
const experimentId = run.id;
|
|
976
|
+
void runExperiment(this.#mastra, {
|
|
977
|
+
datasetId: this.id,
|
|
978
|
+
experimentId,
|
|
979
|
+
...config
|
|
980
|
+
}).catch(() => {
|
|
981
|
+
});
|
|
982
|
+
return { experimentId, status: "pending" };
|
|
983
|
+
}
|
|
984
|
+
/**
|
|
985
|
+
* List all experiments (runs) for this dataset.
|
|
986
|
+
*/
|
|
987
|
+
async listExperiments(args) {
|
|
988
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
989
|
+
return experimentsStore.listExperiments({
|
|
990
|
+
datasetId: this.id,
|
|
991
|
+
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
|
|
992
|
+
});
|
|
993
|
+
}
|
|
994
|
+
/**
|
|
995
|
+
* Get a specific experiment (run) by ID.
|
|
996
|
+
*/
|
|
997
|
+
async getExperiment(args) {
|
|
998
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
999
|
+
return experimentsStore.getExperimentById({ id: args.experimentId });
|
|
1000
|
+
}
|
|
1001
|
+
/**
|
|
1002
|
+
* List results for a specific experiment.
|
|
1003
|
+
*/
|
|
1004
|
+
async listExperimentResults(args) {
|
|
1005
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
1006
|
+
return experimentsStore.listExperimentResults({
|
|
1007
|
+
experimentId: args.experimentId,
|
|
1008
|
+
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
|
|
1009
|
+
});
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Delete an experiment (run) by ID.
|
|
1013
|
+
*/
|
|
1014
|
+
async deleteExperiment(args) {
|
|
1015
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
1016
|
+
return experimentsStore.deleteExperiment({ id: args.experimentId });
|
|
1017
|
+
}
|
|
1018
|
+
};
|
|
1019
|
+
var DatasetsManager = class {
|
|
1020
|
+
#mastra;
|
|
1021
|
+
#datasetsStore;
|
|
1022
|
+
#experimentsStore;
|
|
1023
|
+
constructor(mastra) {
|
|
1024
|
+
this.#mastra = mastra;
|
|
1025
|
+
}
|
|
1026
|
+
// ---------------------------------------------------------------------------
|
|
1027
|
+
// Lazy storage resolution
|
|
1028
|
+
// ---------------------------------------------------------------------------
|
|
1029
|
+
async #getDatasetsStore() {
|
|
1030
|
+
if (this.#datasetsStore) return this.#datasetsStore;
|
|
1031
|
+
const storage = this.#mastra.getStorage();
|
|
1032
|
+
if (!storage) {
|
|
1033
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
1034
|
+
id: "DATASETS_STORAGE_NOT_CONFIGURED",
|
|
1035
|
+
text: "Storage not configured. Configure storage in Mastra instance.",
|
|
1036
|
+
domain: "STORAGE",
|
|
1037
|
+
category: "USER"
|
|
1038
|
+
});
|
|
1039
|
+
}
|
|
1040
|
+
const store = await storage.getStore("datasets");
|
|
1041
|
+
if (!store) {
|
|
1042
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
1043
|
+
id: "DATASETS_STORE_NOT_AVAILABLE",
|
|
1044
|
+
text: "Datasets store not available. Ensure your storage adapter provides a datasets domain.",
|
|
1045
|
+
domain: "STORAGE",
|
|
1046
|
+
category: "USER"
|
|
1047
|
+
});
|
|
1048
|
+
}
|
|
1049
|
+
this.#datasetsStore = store;
|
|
1050
|
+
return store;
|
|
1051
|
+
}
|
|
1052
|
+
async #getExperimentsStore() {
|
|
1053
|
+
if (this.#experimentsStore) return this.#experimentsStore;
|
|
1054
|
+
const storage = this.#mastra.getStorage();
|
|
1055
|
+
if (!storage) {
|
|
1056
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
1057
|
+
id: "DATASETS_STORAGE_NOT_CONFIGURED",
|
|
1058
|
+
text: "Storage not configured. Configure storage in Mastra instance.",
|
|
1059
|
+
domain: "STORAGE",
|
|
1060
|
+
category: "USER"
|
|
1061
|
+
});
|
|
1062
|
+
}
|
|
1063
|
+
const store = await storage.getStore("experiments");
|
|
1064
|
+
if (!store) {
|
|
1065
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
1066
|
+
id: "EXPERIMENTS_STORE_NOT_AVAILABLE",
|
|
1067
|
+
text: "Experiments store not available. Ensure your storage adapter provides an experiments domain.",
|
|
1068
|
+
domain: "STORAGE",
|
|
1069
|
+
category: "USER"
|
|
1070
|
+
});
|
|
1071
|
+
}
|
|
1072
|
+
this.#experimentsStore = store;
|
|
1073
|
+
return store;
|
|
1074
|
+
}
|
|
1075
|
+
// ---------------------------------------------------------------------------
|
|
1076
|
+
// Dataset CRUD
|
|
1077
|
+
// ---------------------------------------------------------------------------
|
|
1078
|
+
/**
|
|
1079
|
+
* Create a new dataset.
|
|
1080
|
+
* Zod schemas are automatically converted to JSON Schema.
|
|
1081
|
+
*/
|
|
1082
|
+
async create(input) {
|
|
1083
|
+
const store = await this.#getDatasetsStore();
|
|
1084
|
+
let { inputSchema, groundTruthSchema, ...rest } = input;
|
|
1085
|
+
if (inputSchema !== void 0 && schemaCompat.isZodType(inputSchema)) {
|
|
1086
|
+
inputSchema = zodToJson.zodToJsonSchema(inputSchema);
|
|
1087
|
+
}
|
|
1088
|
+
if (groundTruthSchema !== void 0 && schemaCompat.isZodType(groundTruthSchema)) {
|
|
1089
|
+
groundTruthSchema = zodToJson.zodToJsonSchema(groundTruthSchema);
|
|
1090
|
+
}
|
|
1091
|
+
const result = await store.createDataset({
|
|
1092
|
+
...rest,
|
|
1093
|
+
inputSchema,
|
|
1094
|
+
groundTruthSchema
|
|
1095
|
+
});
|
|
1096
|
+
return new Dataset(result.id, this.#mastra);
|
|
1097
|
+
}
|
|
1098
|
+
/**
|
|
1099
|
+
* Get an existing dataset by ID.
|
|
1100
|
+
* Throws if the dataset does not exist.
|
|
1101
|
+
*/
|
|
1102
|
+
async get(args) {
|
|
1103
|
+
const store = await this.#getDatasetsStore();
|
|
1104
|
+
const record = await store.getDatasetById({ id: args.id });
|
|
1105
|
+
if (!record) {
|
|
1106
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
1107
|
+
id: "DATASET_NOT_FOUND",
|
|
1108
|
+
text: "Dataset not found",
|
|
1109
|
+
domain: "STORAGE",
|
|
1110
|
+
category: "USER"
|
|
1111
|
+
});
|
|
1112
|
+
}
|
|
1113
|
+
return new Dataset(args.id, this.#mastra);
|
|
1114
|
+
}
|
|
1115
|
+
/**
|
|
1116
|
+
* List all datasets with pagination.
|
|
1117
|
+
*/
|
|
1118
|
+
async list(args) {
|
|
1119
|
+
const store = await this.#getDatasetsStore();
|
|
1120
|
+
return store.listDatasets({
|
|
1121
|
+
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
|
|
1122
|
+
});
|
|
1123
|
+
}
|
|
1124
|
+
/**
|
|
1125
|
+
* Delete a dataset by ID.
|
|
1126
|
+
*/
|
|
1127
|
+
async delete(args) {
|
|
1128
|
+
const store = await this.#getDatasetsStore();
|
|
1129
|
+
return store.deleteDataset({ id: args.id });
|
|
1130
|
+
}
|
|
1131
|
+
// ---------------------------------------------------------------------------
|
|
1132
|
+
// Cross-dataset experiment operations
|
|
1133
|
+
// ---------------------------------------------------------------------------
|
|
1134
|
+
/**
|
|
1135
|
+
* Get a specific experiment (run) by ID.
|
|
1136
|
+
*/
|
|
1137
|
+
async getExperiment(args) {
|
|
1138
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
1139
|
+
return experimentsStore.getExperimentById({ id: args.experimentId });
|
|
1140
|
+
}
|
|
1141
|
+
/**
|
|
1142
|
+
* Compare two or more experiments.
|
|
1143
|
+
*
|
|
1144
|
+
* Uses the internal `compareExperiments` function for pairwise comparison,
|
|
1145
|
+
* then enriches results with per-item input/groundTruth/output data.
|
|
1146
|
+
*/
|
|
1147
|
+
async compareExperiments(args) {
|
|
1148
|
+
const { experimentIds, baselineId } = args;
|
|
1149
|
+
if (experimentIds.length < 2) {
|
|
1150
|
+
throw new chunk4U7ZLI36_cjs.MastraError({
|
|
1151
|
+
id: "COMPARE_INVALID_INPUT",
|
|
1152
|
+
text: "compareExperiments requires at least 2 experiment IDs.",
|
|
1153
|
+
domain: "STORAGE",
|
|
1154
|
+
category: "USER"
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
const resolvedBaseline = baselineId ?? experimentIds[0];
|
|
1158
|
+
const otherExperimentId = experimentIds.find((id) => id !== resolvedBaseline) ?? experimentIds[1];
|
|
1159
|
+
const internal = await compareExperiments(this.#mastra, {
|
|
1160
|
+
experimentIdA: resolvedBaseline,
|
|
1161
|
+
experimentIdB: otherExperimentId
|
|
1162
|
+
});
|
|
1163
|
+
const experimentsStore = await this.#getExperimentsStore();
|
|
1164
|
+
const [resultsA, resultsB] = await Promise.all([
|
|
1165
|
+
experimentsStore.listExperimentResults({
|
|
1166
|
+
experimentId: resolvedBaseline,
|
|
1167
|
+
pagination: { page: 0, perPage: false }
|
|
1168
|
+
}),
|
|
1169
|
+
experimentsStore.listExperimentResults({
|
|
1170
|
+
experimentId: otherExperimentId,
|
|
1171
|
+
pagination: { page: 0, perPage: false }
|
|
1172
|
+
})
|
|
1173
|
+
]);
|
|
1174
|
+
const resultsMapA = new Map(resultsA.results.map((r) => [r.itemId, r]));
|
|
1175
|
+
const resultsMapB = new Map(resultsB.results.map((r) => [r.itemId, r]));
|
|
1176
|
+
const items = internal.items.map((item) => {
|
|
1177
|
+
const resultA = resultsMapA.get(item.itemId);
|
|
1178
|
+
const resultB = resultsMapB.get(item.itemId);
|
|
1179
|
+
return {
|
|
1180
|
+
itemId: item.itemId,
|
|
1181
|
+
input: resultA?.input ?? resultB?.input ?? null,
|
|
1182
|
+
groundTruth: resultA?.groundTruth ?? resultB?.groundTruth ?? null,
|
|
1183
|
+
results: {
|
|
1184
|
+
[resolvedBaseline]: resultA ? { output: resultA.output, scores: item.scoresA } : null,
|
|
1185
|
+
[otherExperimentId]: resultB ? { output: resultB.output, scores: item.scoresB } : null
|
|
1186
|
+
}
|
|
1187
|
+
};
|
|
1188
|
+
});
|
|
1189
|
+
return {
|
|
1190
|
+
baselineId: resolvedBaseline,
|
|
1191
|
+
items
|
|
1192
|
+
};
|
|
1193
|
+
}
|
|
1194
|
+
};
|
|
1195
|
+
|
|
1196
|
+
exports.Dataset = Dataset;
|
|
1197
|
+
exports.DatasetsManager = DatasetsManager;
|
|
1198
|
+
exports.compareExperiments = compareExperiments;
|
|
1199
|
+
exports.computeMean = computeMean;
|
|
1200
|
+
exports.computeScorerStats = computeScorerStats;
|
|
1201
|
+
exports.executeTarget = executeTarget;
|
|
1202
|
+
exports.isRegression = isRegression;
|
|
1203
|
+
exports.resolveScorers = resolveScorers;
|
|
1204
|
+
exports.runExperiment = runExperiment;
|
|
1205
|
+
exports.runScorersForItem = runScorersForItem;
|
|
1206
|
+
//# sourceMappingURL=chunk-3JVFFAJX.cjs.map
|
|
1207
|
+
//# sourceMappingURL=chunk-3JVFFAJX.cjs.map
|