@mastra/core 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/CHANGELOG.md +462 -0
  2. package/datasets.d.ts +1 -0
  3. package/dist/agent/agent.d.ts +1 -1
  4. package/dist/agent/index.cjs +13 -13
  5. package/dist/agent/index.js +2 -2
  6. package/dist/agent/message-list/adapters/AIV4Adapter.d.ts.map +1 -1
  7. package/dist/agent/message-list/adapters/AIV5Adapter.d.ts.map +1 -1
  8. package/dist/agent/message-list/conversion/output-converter.d.ts +2 -1
  9. package/dist/agent/message-list/conversion/output-converter.d.ts.map +1 -1
  10. package/dist/agent/message-list/index.cjs +18 -18
  11. package/dist/agent/message-list/index.js +1 -1
  12. package/dist/agent/workflows/prepare-stream/index.d.ts +1 -1
  13. package/dist/chunk-3JVFFAJX.cjs +1207 -0
  14. package/dist/chunk-3JVFFAJX.cjs.map +1 -0
  15. package/dist/{chunk-WL3AW3YA.js → chunk-3X3CZUXI.js} +4070 -3983
  16. package/dist/chunk-3X3CZUXI.js.map +1 -0
  17. package/dist/{chunk-2GWTJFVM.js → chunk-4EHGOATH.js} +46 -14
  18. package/dist/chunk-4EHGOATH.js.map +1 -0
  19. package/dist/{chunk-YNXIGRQE.cjs → chunk-4IJ4UDZX.cjs} +319 -83
  20. package/dist/chunk-4IJ4UDZX.cjs.map +1 -0
  21. package/dist/{chunk-CGPH7CMG.cjs → chunk-4KFEMXTV.cjs} +46 -14
  22. package/dist/chunk-4KFEMXTV.cjs.map +1 -0
  23. package/dist/{chunk-PHYJYZ32.js → chunk-4TQ4EBYX.js} +16 -8
  24. package/dist/chunk-4TQ4EBYX.js.map +1 -0
  25. package/dist/{chunk-SIZEIYNH.js → chunk-4XSAZPPS.js} +254 -18
  26. package/dist/chunk-4XSAZPPS.js.map +1 -0
  27. package/dist/{chunk-KUTU2YZF.js → chunk-5Q5Y34SS.js} +5 -5
  28. package/dist/{chunk-KUTU2YZF.js.map → chunk-5Q5Y34SS.js.map} +1 -1
  29. package/dist/{chunk-EH6SAGEO.cjs → chunk-64WGYTQK.cjs} +72 -53
  30. package/dist/{chunk-EH6SAGEO.cjs.map → chunk-64WGYTQK.cjs.map} +1 -1
  31. package/dist/{chunk-OOCEAC6U.cjs → chunk-65PHUUMF.cjs} +3 -3
  32. package/dist/{chunk-OOCEAC6U.cjs.map → chunk-65PHUUMF.cjs.map} +1 -1
  33. package/dist/{chunk-JNE2ABVB.js → chunk-7NKUSQEV.js} +1094 -10
  34. package/dist/chunk-7NKUSQEV.js.map +1 -0
  35. package/dist/{chunk-ZHFM7HCQ.js → chunk-AXHBJ4GX.js} +3 -3
  36. package/dist/{chunk-ZHFM7HCQ.js.map → chunk-AXHBJ4GX.js.map} +1 -1
  37. package/dist/{chunk-ILQXPZCD.js → chunk-AY6DBRS3.js} +37 -21
  38. package/dist/chunk-AY6DBRS3.js.map +1 -0
  39. package/dist/{chunk-TERSHTY5.cjs → chunk-BP7VYTOP.cjs} +1116 -21
  40. package/dist/chunk-BP7VYTOP.cjs.map +1 -0
  41. package/dist/{chunk-UE2G2LRP.cjs → chunk-CZ4NQANZ.cjs} +37 -21
  42. package/dist/chunk-CZ4NQANZ.cjs.map +1 -0
  43. package/dist/{chunk-NCC45KOB.cjs → chunk-DBSVT6AR.cjs} +7 -7
  44. package/dist/{chunk-NCC45KOB.cjs.map → chunk-DBSVT6AR.cjs.map} +1 -1
  45. package/dist/{chunk-BXLLXTT4.js → chunk-FLPEGTEK.js} +4 -4
  46. package/dist/{chunk-BXLLXTT4.js.map → chunk-FLPEGTEK.js.map} +1 -1
  47. package/dist/{chunk-ON2KVIUJ.cjs → chunk-HYRYTTMT.cjs} +7 -7
  48. package/dist/{chunk-ON2KVIUJ.cjs.map → chunk-HYRYTTMT.cjs.map} +1 -1
  49. package/dist/chunk-NJ7TL3LQ.js +1196 -0
  50. package/dist/chunk-NJ7TL3LQ.js.map +1 -0
  51. package/dist/{chunk-EUG4AON3.cjs → chunk-NKYWDNCI.cjs} +8 -7
  52. package/dist/{chunk-EUG4AON3.cjs.map → chunk-NKYWDNCI.cjs.map} +1 -1
  53. package/dist/{chunk-UHVG25VW.cjs → chunk-NZG2JAKS.cjs} +23 -15
  54. package/dist/chunk-NZG2JAKS.cjs.map +1 -0
  55. package/dist/{chunk-44SUGDBR.js → chunk-PS5ONCXY.js} +109 -5
  56. package/dist/chunk-PS5ONCXY.js.map +1 -0
  57. package/dist/{chunk-57QAF2ZQ.js → chunk-QTTWRCB5.js} +4 -4
  58. package/dist/{chunk-57QAF2ZQ.js.map → chunk-QTTWRCB5.js.map} +1 -1
  59. package/dist/{chunk-VM25PDSW.js → chunk-RZ4CIIZR.js} +4 -4
  60. package/dist/{chunk-VM25PDSW.js.map → chunk-RZ4CIIZR.js.map} +1 -1
  61. package/dist/{chunk-C3XU7ZDC.cjs → chunk-SU5APAM6.cjs} +123 -4
  62. package/dist/chunk-SU5APAM6.cjs.map +1 -0
  63. package/dist/{chunk-3MJCJLZS.js → chunk-U2HKJZCI.js} +24 -5
  64. package/dist/{chunk-3MJCJLZS.js.map → chunk-U2HKJZCI.js.map} +1 -1
  65. package/dist/{chunk-GCTAD6B7.cjs → chunk-VD5YA6RH.cjs} +12 -12
  66. package/dist/{chunk-GCTAD6B7.cjs.map → chunk-VD5YA6RH.cjs.map} +1 -1
  67. package/dist/{chunk-KAJNBNWP.cjs → chunk-YNNJLLFN.cjs} +4071 -3984
  68. package/dist/chunk-YNNJLLFN.cjs.map +1 -0
  69. package/dist/datasets/dataset.d.ts +153 -0
  70. package/dist/datasets/dataset.d.ts.map +1 -0
  71. package/dist/datasets/experiment/analytics/aggregate.d.ts +46 -0
  72. package/dist/datasets/experiment/analytics/aggregate.d.ts.map +1 -0
  73. package/dist/datasets/experiment/analytics/compare.d.ts +33 -0
  74. package/dist/datasets/experiment/analytics/compare.d.ts.map +1 -0
  75. package/dist/datasets/experiment/analytics/index.d.ts +9 -0
  76. package/dist/datasets/experiment/analytics/index.d.ts.map +1 -0
  77. package/dist/datasets/experiment/analytics/types.d.ts +103 -0
  78. package/dist/datasets/experiment/analytics/types.d.ts.map +1 -0
  79. package/dist/datasets/experiment/executor.d.ts +40 -0
  80. package/dist/datasets/experiment/executor.d.ts.map +1 -0
  81. package/dist/datasets/experiment/index.d.ts +31 -0
  82. package/dist/datasets/experiment/index.d.ts.map +1 -0
  83. package/dist/datasets/experiment/scorer.d.ts +21 -0
  84. package/dist/datasets/experiment/scorer.d.ts.map +1 -0
  85. package/dist/datasets/experiment/types.d.ts +140 -0
  86. package/dist/datasets/experiment/types.d.ts.map +1 -0
  87. package/dist/datasets/index.cjs +69 -0
  88. package/dist/datasets/index.cjs.map +1 -0
  89. package/dist/datasets/index.d.ts +6 -0
  90. package/dist/datasets/index.d.ts.map +1 -0
  91. package/dist/datasets/index.js +4 -0
  92. package/dist/datasets/index.js.map +1 -0
  93. package/dist/datasets/manager.d.ts +73 -0
  94. package/dist/datasets/manager.d.ts.map +1 -0
  95. package/dist/datasets/validation/errors.d.ts +44 -0
  96. package/dist/datasets/validation/errors.d.ts.map +1 -0
  97. package/dist/datasets/validation/index.d.ts +3 -0
  98. package/dist/datasets/validation/index.d.ts.map +1 -0
  99. package/dist/datasets/validation/validator.d.ts +24 -0
  100. package/dist/datasets/validation/validator.d.ts.map +1 -0
  101. package/dist/docs/SKILL.md +1 -3
  102. package/dist/docs/assets/SOURCE_MAP.json +436 -346
  103. package/dist/docs/references/docs-memory-observational-memory.md +86 -11
  104. package/dist/docs/references/docs-streaming-events.md +23 -0
  105. package/dist/docs/references/docs-workspace-filesystem.md +72 -1
  106. package/dist/docs/references/docs-workspace-overview.md +95 -12
  107. package/dist/docs/references/docs-workspace-sandbox.md +2 -0
  108. package/dist/docs/references/guides-agent-frameworks-ai-sdk.md +6 -2
  109. package/dist/docs/references/reference-ai-sdk-with-mastra.md +1 -1
  110. package/dist/docs/references/reference-memory-observational-memory.md +318 -9
  111. package/dist/docs/references/reference-streaming-workflows-stream.md +1 -0
  112. package/dist/docs/references/reference-workflows-workflow-methods-foreach.md +30 -0
  113. package/dist/docs/references/reference.md +3 -0
  114. package/dist/editor/index.d.ts +1 -1
  115. package/dist/editor/index.d.ts.map +1 -1
  116. package/dist/editor/types.d.ts +21 -3
  117. package/dist/editor/types.d.ts.map +1 -1
  118. package/dist/evals/index.cjs +20 -20
  119. package/dist/evals/index.js +3 -3
  120. package/dist/evals/scoreTraces/index.cjs +5 -5
  121. package/dist/evals/scoreTraces/index.js +2 -2
  122. package/dist/features/index.cjs +1 -1
  123. package/dist/features/index.cjs.map +1 -1
  124. package/dist/features/index.d.ts.map +1 -1
  125. package/dist/features/index.js +1 -1
  126. package/dist/features/index.js.map +1 -1
  127. package/dist/index.cjs +2 -7
  128. package/dist/index.d.ts +0 -9
  129. package/dist/index.d.ts.map +1 -1
  130. package/dist/index.js +1 -2
  131. package/dist/llm/index.cjs +10 -10
  132. package/dist/llm/index.js +2 -2
  133. package/dist/llm/model/provider-types.generated.d.ts +1694 -1659
  134. package/dist/loop/index.cjs +12 -12
  135. package/dist/loop/index.js +1 -1
  136. package/dist/loop/network/index.d.ts +1 -1
  137. package/dist/loop/workflows/agentic-execution/index.d.ts +1 -1
  138. package/dist/loop/workflows/agentic-execution/llm-execution-step.d.ts.map +1 -1
  139. package/dist/loop/workflows/agentic-execution/llm-mapping-step.d.ts.map +1 -1
  140. package/dist/loop/workflows/agentic-execution/tool-call-step.d.ts.map +1 -1
  141. package/dist/loop/workflows/agentic-loop/index.d.ts +1 -1
  142. package/dist/loop/workflows/errors.d.ts +4 -0
  143. package/dist/loop/workflows/errors.d.ts.map +1 -0
  144. package/dist/mastra/index.cjs +2 -2
  145. package/dist/mastra/index.d.ts +2 -0
  146. package/dist/mastra/index.d.ts.map +1 -1
  147. package/dist/mastra/index.js +1 -1
  148. package/dist/memory/index.cjs +14 -14
  149. package/dist/memory/index.js +1 -1
  150. package/dist/memory/memory.d.ts +5 -0
  151. package/dist/memory/memory.d.ts.map +1 -1
  152. package/dist/memory/types.d.ts +56 -0
  153. package/dist/memory/types.d.ts.map +1 -1
  154. package/dist/processors/index.cjs +41 -41
  155. package/dist/processors/index.js +1 -1
  156. package/dist/processors/processors/skills.d.ts +1 -1
  157. package/dist/processors/processors/skills.d.ts.map +1 -1
  158. package/dist/processors/runner.d.ts.map +1 -1
  159. package/dist/processors/trailing-assistant-guard.d.ts +34 -0
  160. package/dist/processors/trailing-assistant-guard.d.ts.map +1 -0
  161. package/dist/provider-registry-6LZAGQET.cjs +40 -0
  162. package/dist/{provider-registry-C6XCYX44.cjs.map → provider-registry-6LZAGQET.cjs.map} +1 -1
  163. package/dist/provider-registry-QUNT7S55.js +3 -0
  164. package/dist/{provider-registry-NWU4YFQW.js.map → provider-registry-QUNT7S55.js.map} +1 -1
  165. package/dist/provider-registry.json +4068 -3981
  166. package/dist/relevance/index.cjs +3 -3
  167. package/dist/relevance/index.js +1 -1
  168. package/dist/storage/base.d.ts +4 -1
  169. package/dist/storage/base.d.ts.map +1 -1
  170. package/dist/storage/constants.cjs +87 -27
  171. package/dist/storage/constants.d.ts +21 -2
  172. package/dist/storage/constants.d.ts.map +1 -1
  173. package/dist/storage/constants.js +1 -1
  174. package/dist/storage/domains/agents/inmemory.d.ts.map +1 -1
  175. package/dist/storage/domains/datasets/base.d.ts +83 -0
  176. package/dist/storage/domains/datasets/base.d.ts.map +1 -0
  177. package/dist/storage/domains/datasets/index.d.ts +3 -0
  178. package/dist/storage/domains/datasets/index.d.ts.map +1 -0
  179. package/dist/storage/domains/datasets/inmemory.d.ts +40 -0
  180. package/dist/storage/domains/datasets/inmemory.d.ts.map +1 -0
  181. package/dist/storage/domains/experiments/base.d.ts +28 -0
  182. package/dist/storage/domains/experiments/base.d.ts.map +1 -0
  183. package/dist/storage/domains/experiments/index.d.ts +3 -0
  184. package/dist/storage/domains/experiments/index.d.ts.map +1 -0
  185. package/dist/storage/domains/experiments/inmemory.d.ts +28 -0
  186. package/dist/storage/domains/experiments/inmemory.d.ts.map +1 -0
  187. package/dist/storage/domains/index.d.ts +3 -0
  188. package/dist/storage/domains/index.d.ts.map +1 -1
  189. package/dist/storage/domains/inmemory-db.d.ts +9 -1
  190. package/dist/storage/domains/inmemory-db.d.ts.map +1 -1
  191. package/dist/storage/domains/mcp-clients/base.d.ts +47 -0
  192. package/dist/storage/domains/mcp-clients/base.d.ts.map +1 -0
  193. package/dist/storage/domains/mcp-clients/index.d.ts +3 -0
  194. package/dist/storage/domains/mcp-clients/index.d.ts.map +1 -0
  195. package/dist/storage/domains/mcp-clients/inmemory.d.ts +31 -0
  196. package/dist/storage/domains/mcp-clients/inmemory.d.ts.map +1 -0
  197. package/dist/storage/domains/operations/inmemory.d.ts.map +1 -1
  198. package/dist/storage/index.cjs +201 -117
  199. package/dist/storage/index.js +2 -2
  200. package/dist/storage/mock.d.ts.map +1 -1
  201. package/dist/storage/types.d.ts +338 -3
  202. package/dist/storage/types.d.ts.map +1 -1
  203. package/dist/stream/base/output.d.ts.map +1 -1
  204. package/dist/stream/index.cjs +11 -11
  205. package/dist/stream/index.js +2 -2
  206. package/dist/stream/types.d.ts +15 -0
  207. package/dist/stream/types.d.ts.map +1 -1
  208. package/dist/test-utils/llm-mock.cjs +4 -4
  209. package/dist/test-utils/llm-mock.js +1 -1
  210. package/dist/tool-loop-agent/index.cjs +4 -4
  211. package/dist/tool-loop-agent/index.js +1 -1
  212. package/dist/tool-provider/index.cjs +4 -0
  213. package/dist/tool-provider/index.cjs.map +1 -0
  214. package/dist/tool-provider/index.d.ts +2 -0
  215. package/dist/tool-provider/index.d.ts.map +1 -0
  216. package/dist/tool-provider/index.js +3 -0
  217. package/dist/tool-provider/index.js.map +1 -0
  218. package/dist/tool-provider/types.d.ts +113 -0
  219. package/dist/tool-provider/types.d.ts.map +1 -0
  220. package/dist/utils.cjs +23 -23
  221. package/dist/utils.d.ts +1 -1
  222. package/dist/utils.d.ts.map +1 -1
  223. package/dist/utils.js +1 -1
  224. package/dist/vector/index.cjs +7 -7
  225. package/dist/vector/index.js +1 -1
  226. package/dist/workflows/evented/index.cjs +10 -10
  227. package/dist/workflows/evented/index.js +1 -1
  228. package/dist/workflows/evented/workflow-event-processor/index.d.ts.map +1 -1
  229. package/dist/workflows/handlers/control-flow.d.ts.map +1 -1
  230. package/dist/workflows/index.cjs +25 -25
  231. package/dist/workflows/index.js +1 -1
  232. package/dist/workflows/workflow.d.ts +2 -2
  233. package/dist/workflows/workflow.d.ts.map +1 -1
  234. package/dist/workspace/filesystem/filesystem.d.ts +5 -15
  235. package/dist/workspace/filesystem/filesystem.d.ts.map +1 -1
  236. package/dist/workspace/filesystem/local-filesystem.d.ts +18 -2
  237. package/dist/workspace/filesystem/local-filesystem.d.ts.map +1 -1
  238. package/dist/workspace/filesystem/mastra-filesystem.d.ts +21 -1
  239. package/dist/workspace/filesystem/mastra-filesystem.d.ts.map +1 -1
  240. package/dist/workspace/index.cjs +41 -41
  241. package/dist/workspace/index.d.ts +1 -1
  242. package/dist/workspace/index.d.ts.map +1 -1
  243. package/dist/workspace/index.js +1 -1
  244. package/dist/workspace/lifecycle.d.ts +49 -53
  245. package/dist/workspace/lifecycle.d.ts.map +1 -1
  246. package/dist/workspace/sandbox/local-sandbox.d.ts +1 -1
  247. package/dist/workspace/sandbox/local-sandbox.d.ts.map +1 -1
  248. package/dist/workspace/sandbox/mastra-sandbox.d.ts +3 -2
  249. package/dist/workspace/sandbox/mastra-sandbox.d.ts.map +1 -1
  250. package/dist/workspace/sandbox/sandbox.d.ts +3 -4
  251. package/dist/workspace/sandbox/sandbox.d.ts.map +1 -1
  252. package/dist/workspace/workspace.d.ts +2 -15
  253. package/dist/workspace/workspace.d.ts.map +1 -1
  254. package/package.json +7 -7
  255. package/src/llm/model/provider-types.generated.d.ts +1694 -1659
  256. package/tool-provider.d.ts +1 -0
  257. package/dist/chunk-2GWTJFVM.js.map +0 -1
  258. package/dist/chunk-44SUGDBR.js.map +0 -1
  259. package/dist/chunk-C3XU7ZDC.cjs.map +0 -1
  260. package/dist/chunk-CGPH7CMG.cjs.map +0 -1
  261. package/dist/chunk-ILQXPZCD.js.map +0 -1
  262. package/dist/chunk-JNE2ABVB.js.map +0 -1
  263. package/dist/chunk-KAJNBNWP.cjs.map +0 -1
  264. package/dist/chunk-PHYJYZ32.js.map +0 -1
  265. package/dist/chunk-SIZEIYNH.js.map +0 -1
  266. package/dist/chunk-TERSHTY5.cjs.map +0 -1
  267. package/dist/chunk-UE2G2LRP.cjs.map +0 -1
  268. package/dist/chunk-UHVG25VW.cjs.map +0 -1
  269. package/dist/chunk-WL3AW3YA.js.map +0 -1
  270. package/dist/chunk-YNXIGRQE.cjs.map +0 -1
  271. package/dist/docs/references/reference-cli-mastra.md +0 -336
  272. package/dist/docs/references/reference-deployer-cloudflare.md +0 -56
  273. package/dist/provider-registry-C6XCYX44.cjs +0 -40
  274. package/dist/provider-registry-NWU4YFQW.js +0 -3
@@ -0,0 +1,153 @@
1
+ import type { Mastra } from '../mastra/index.js';
2
+ import type { DatasetRecord, DatasetItem, DatasetItemRow, DatasetVersion } from '../storage/types.js';
3
+ import type { StartExperimentConfig, ExperimentSummary } from './experiment/types.js';
4
+ /**
5
+ * Public API for interacting with a single dataset.
6
+ *
7
+ * Provides methods for item CRUD, versioning, and experiment management.
8
+ * Obtained via `DatasetsManager.get()` or `DatasetsManager.create()`.
9
+ */
10
+ export declare class Dataset {
11
+ #private;
12
+ readonly id: string;
13
+ constructor(id: string, mastra: Mastra);
14
+ /**
15
+ * Get the full dataset record from storage.
16
+ */
17
+ getDetails(): Promise<DatasetRecord>;
18
+ /**
19
+ * Update dataset metadata and/or schemas.
20
+ * Zod schemas are automatically converted to JSON Schema.
21
+ */
22
+ update(input: {
23
+ name?: string;
24
+ description?: string;
25
+ metadata?: Record<string, unknown>;
26
+ inputSchema?: unknown;
27
+ groundTruthSchema?: unknown;
28
+ }): Promise<DatasetRecord>;
29
+ /**
30
+ * Add a single item to the dataset.
31
+ */
32
+ addItem(input: {
33
+ input: unknown;
34
+ groundTruth?: unknown;
35
+ metadata?: Record<string, unknown>;
36
+ }): Promise<DatasetItem>;
37
+ /**
38
+ * Add multiple items to the dataset in bulk.
39
+ */
40
+ addItems(input: {
41
+ items: Array<{
42
+ input: unknown;
43
+ groundTruth?: unknown;
44
+ metadata?: Record<string, unknown>;
45
+ }>;
46
+ }): Promise<DatasetItem[]>;
47
+ /**
48
+ * Get a single item by ID, optionally at a specific version.
49
+ */
50
+ getItem(args: {
51
+ itemId: string;
52
+ version?: number;
53
+ }): Promise<DatasetItem | null>;
54
+ /**
55
+ * List items in the dataset, optionally at a specific version.
56
+ */
57
+ listItems(args?: {
58
+ version?: number;
59
+ page?: number;
60
+ perPage?: number;
61
+ search?: string;
62
+ }): Promise<DatasetItem[] | {
63
+ items: DatasetItem[];
64
+ pagination: {
65
+ total: number;
66
+ page: number;
67
+ perPage: number | false;
68
+ hasMore: boolean;
69
+ };
70
+ }>;
71
+ /**
72
+ * Update an existing item in the dataset.
73
+ */
74
+ updateItem(input: {
75
+ itemId: string;
76
+ input?: unknown;
77
+ groundTruth?: unknown;
78
+ metadata?: Record<string, unknown>;
79
+ }): Promise<DatasetItem>;
80
+ /**
81
+ * Delete a single item from the dataset.
82
+ */
83
+ deleteItem(args: {
84
+ itemId: string;
85
+ }): Promise<void>;
86
+ /**
87
+ * Delete multiple items from the dataset in bulk.
88
+ */
89
+ deleteItems(args: {
90
+ itemIds: string[];
91
+ }): Promise<void>;
92
+ /**
93
+ * List all versions of this dataset.
94
+ */
95
+ listVersions(args?: {
96
+ page?: number;
97
+ perPage?: number;
98
+ }): Promise<{
99
+ versions: DatasetVersion[];
100
+ pagination: {
101
+ total: number;
102
+ page: number;
103
+ perPage: number | false;
104
+ hasMore: boolean;
105
+ };
106
+ }>;
107
+ /**
108
+ * Get full SCD-2 history of a specific item across all dataset versions.
109
+ */
110
+ getItemHistory(args: {
111
+ itemId: string;
112
+ }): Promise<DatasetItemRow[]>;
113
+ /**
114
+ * Run an experiment on this dataset and wait for completion.
115
+ */
116
+ startExperiment<I = unknown, O = unknown, E = unknown>(config: StartExperimentConfig<I, O, E>): Promise<ExperimentSummary>;
117
+ /**
118
+ * Start an experiment asynchronously (fire-and-forget).
119
+ * Returns immediately with the experiment ID and pending status.
120
+ */
121
+ startExperimentAsync<I = unknown, O = unknown, E = unknown>(config: StartExperimentConfig<I, O, E>): Promise<{
122
+ experimentId: string;
123
+ status: 'pending';
124
+ }>;
125
+ /**
126
+ * List all experiments (runs) for this dataset.
127
+ */
128
+ listExperiments(args?: {
129
+ page?: number;
130
+ perPage?: number;
131
+ }): Promise<import("../storage/types.js").ListExperimentsOutput>;
132
+ /**
133
+ * Get a specific experiment (run) by ID.
134
+ */
135
+ getExperiment(args: {
136
+ experimentId: string;
137
+ }): Promise<import("../storage/types.js").Experiment | null>;
138
+ /**
139
+ * List results for a specific experiment.
140
+ */
141
+ listExperimentResults(args: {
142
+ experimentId: string;
143
+ page?: number;
144
+ perPage?: number;
145
+ }): Promise<import("../storage/types.js").ListExperimentResultsOutput>;
146
+ /**
147
+ * Delete an experiment (run) by ID.
148
+ */
149
+ deleteExperiment(args: {
150
+ experimentId: string;
151
+ }): Promise<void>;
152
+ }
153
+ //# sourceMappingURL=dataset.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset.d.ts","sourceRoot":"","sources":["../../src/datasets/dataset.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,OAAO,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAEtG,OAAO,KAAK,EAAoB,qBAAqB,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAExG;;;;;GAKG;AACH,qBAAa,OAAO;;IAClB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;gBAKR,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAmEtC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,aAAa,CAAC;IAc1C;;;OAGG;IACG,MAAM,CAAC,KAAK,EAAE;QAClB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC,aAAa,CAAC;IAwB1B;;OAEG;IACG,OAAO,CAAC,KAAK,EAAE;QACnB,KAAK,EAAE,OAAO,CAAC;QACf,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACpC,GAAG,OAAO,CAAC,WAAW,CAAC;IAUxB;;OAEG;IACG,QAAQ,CAAC,KAAK,EAAE;QACpB,KAAK,EAAE,KAAK,CAAC;YACX,KAAK,EAAE,OAAO,CAAC;YACf,WAAW,CAAC,EAAE,OAAO,CAAC;YACtB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;SACpC,CAAC,CAAC;KACJ,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IAQ1B;;OAEG;IACG,OAAO,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IAKtF;;OAEG;IACG,SAAS,CAAC,IAAI,CAAC,EAAE;QACrB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CACP,WAAW,EAAE,GACb;QAAE,KAAK,EAAE,WAAW,EAAE,CAAC;QAAC,UAAU,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,GAAG,KAAK,CAAC;YAAC,OAAO,EAAE,OAAO,CAAA;SAAE,CAAA;KAAE,CACnH;IAYD;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE;QACtB,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACpC,GAAG,OAAO,CAAC,WAAW,CAAC;IAWxB;;OAEG;IACG,UAAU,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAKzD;;OAEG;IACG,WAAW,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAS7D;;OAEG;IACG,YAAY,CAAC,IAAI,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC;QACtE,QAAQ,EAAE,cAAc,EAAE,CAAC;QAC3B,UAAU,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,GAAG,KAAK,CAAC;YAAC,OAAO,EAAE,OAAO,CAAA;SAAE,CAAC;KACxF,CAAC;IAQF;;OAEG;IACG,cAAc,CAAC,IAAI,EAAE;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IASzE;;OAEG;IACG,eAAe,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,EACzD,MAAM,EAAE,qBAAqB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GACrC,OAAO,CAAC,iBAAiB,CAAC;IAI7B;;;OAGG;IACG,oBAAoB,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,EAC9D,MAAM,EAAE,qBAAqB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GACrC,OAAO,CAAC;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,SAAS,CAAA;KAAE,CAAC;IAqCvD;;OAEG;IACG,eAAe,CAAC,IAAI,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE;IAQhE;;OAEG;IACG,aAAa,CAAC,IAAI,EAAE;QAAE,YAAY,EAAE,MAAM,CAAA;KAAE;IAKlD;;OAEG;IACG,qBAAqB,CAAC,IAAI,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE;IAQ3F;;OAEG;IACG,gBAAgB,CAAC,IAAI,EAAE;QAAE,YAAY,EAAE,MAAM,CAAA;KAAE;CAItD"}
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Experiment Analytics Aggregation Helpers
3
+ *
4
+ * Pure functions for computing statistics from raw score data.
5
+ * Used by compareExperiments to build ScorerStats and detect regressions.
6
+ */
7
+ import type { ScoreRowData } from '../../../evals/types.js';
8
+ import type { ScorerStats } from './types.js';
9
+ /**
10
+ * Compute the arithmetic mean of an array of numbers.
11
+ *
12
+ * @param values - Array of numbers to average
13
+ * @returns Mean value, or 0 if array is empty
14
+ */
15
+ export declare function computeMean(values: number[]): number;
16
+ /**
17
+ * Compute aggregate statistics for a set of scores.
18
+ *
19
+ * Metrics:
20
+ * - errorRate: proportion of items with null scores (errors)
21
+ * - passRate: proportion of scored items meeting threshold
22
+ * - avgScore: mean of non-null scores
23
+ *
24
+ * @param scores - Score records from storage
25
+ * @param passThreshold - Absolute threshold for pass (score >= threshold)
26
+ * @returns ScorerStats with all computed metrics
27
+ */
28
+ export declare function computeScorerStats(scores: ScoreRowData[], passThreshold?: number): ScorerStats;
29
+ /**
30
+ * Determine if a score delta represents a regression.
31
+ *
32
+ * @param delta - Score difference (experiment B - experiment A)
33
+ * @param threshold - Absolute threshold for regression detection
34
+ * @param direction - Score direction ('higher-is-better' or 'lower-is-better')
35
+ * @returns True if delta represents a regression
36
+ *
37
+ * @example
38
+ * // Higher is better (default): negative delta is bad
39
+ * isRegression(-0.1, 0.05, 'higher-is-better') // true (dropped more than 0.05)
40
+ * isRegression(-0.01, 0.05, 'higher-is-better') // false (within tolerance)
41
+ *
42
+ * // Lower is better: positive delta is bad
43
+ * isRegression(0.1, 0.05, 'lower-is-better') // true (increased more than 0.05)
44
+ */
45
+ export declare function isRegression(delta: number, threshold: number, direction?: 'higher-is-better' | 'lower-is-better'): boolean;
46
+ //# sourceMappingURL=aggregate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aggregate.d.ts","sourceRoot":"","sources":["../../../../src/datasets/experiment/analytics/aggregate.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAE3C;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAMpD;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,aAAa,GAAE,MAAY,GAAG,WAAW,CA8CnG;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EACjB,SAAS,GAAE,kBAAkB,GAAG,iBAAsC,GACrE,OAAO,CAUT"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Experiment Comparison
3
+ *
4
+ * Compare two experiments to detect score regressions.
5
+ * Returns per-scorer deltas and per-item score diffs.
6
+ */
7
+ import type { Mastra } from '../../../mastra/index.js';
8
+ import type { CompareExperimentsConfig, ComparisonResult } from './types.js';
9
+ /**
10
+ * Compare two experiments to detect score regressions.
11
+ *
12
+ * @param mastra - Mastra instance for storage access
13
+ * @param config - Comparison configuration
14
+ * @returns ComparisonResult with per-scorer and per-item comparisons
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * const result = await compareExperiments(mastra, {
19
+ * experimentIdA: 'baseline-experiment-id',
20
+ * experimentIdB: 'candidate-experiment-id',
21
+ * thresholds: {
22
+ * 'accuracy': { value: 0.05, direction: 'higher-is-better' },
23
+ * 'latency': { value: 100, direction: 'lower-is-better' },
24
+ * },
25
+ * });
26
+ *
27
+ * if (result.hasRegression) {
28
+ * console.log('Quality regression detected!');
29
+ * }
30
+ * ```
31
+ */
32
+ export declare function compareExperiments(mastra: Mastra, config: CompareExperimentsConfig): Promise<ComparisonResult>;
33
+ //# sourceMappingURL=compare.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"compare.d.ts","sourceRoot":"","sources":["../../../../src/datasets/experiment/analytics/compare.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,KAAK,EACV,wBAAwB,EACxB,gBAAgB,EAIjB,MAAM,SAAS,CAAC;AAejB;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,wBAAwB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAiKpH"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Experiment Analytics
3
+ *
4
+ * Compare experiments and compute aggregate statistics for regression detection.
5
+ */
6
+ export * from './types.js';
7
+ export * from './aggregate.js';
8
+ export * from './compare.js';
9
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/datasets/experiment/analytics/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,SAAS,CAAC;AACxB,cAAc,aAAa,CAAC;AAC5B,cAAc,WAAW,CAAC"}
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Experiment Analytics Types
3
+ *
4
+ * Types for comparing experiments and computing aggregate statistics.
5
+ * Supports regression detection for CI/CD quality gates.
6
+ */
7
+ /**
8
+ * Aggregate statistics for a single scorer across an experiment.
9
+ */
10
+ export interface ScorerStats {
11
+ /** Items with null score / total items */
12
+ errorRate: number;
13
+ /** Count of items with null score */
14
+ errorCount: number;
15
+ /** Items >= threshold / items with scores */
16
+ passRate: number;
17
+ /** Count of items that passed threshold */
18
+ passCount: number;
19
+ /** Mean of non-null scores */
20
+ avgScore: number;
21
+ /** Count of items with non-null scores */
22
+ scoreCount: number;
23
+ /** Total items evaluated by this scorer */
24
+ totalItems: number;
25
+ }
26
+ /**
27
+ * Per-scorer comparison between two experiments.
28
+ */
29
+ export interface ScorerComparison {
30
+ /** Stats from experiment A (baseline) */
31
+ statsA: ScorerStats;
32
+ /** Stats from experiment B (candidate) */
33
+ statsB: ScorerStats;
34
+ /** avgScore difference: statsB.avgScore - statsA.avgScore */
35
+ delta: number;
36
+ /** Whether this scorer regressed (delta below threshold) */
37
+ regressed: boolean;
38
+ /** Threshold used for regression detection */
39
+ threshold: number;
40
+ }
41
+ /**
42
+ * Per-item comparison showing score differences.
43
+ */
44
+ export interface ItemComparison {
45
+ /** Dataset item ID */
46
+ itemId: string;
47
+ /** Whether item exists in both experiments */
48
+ inBothExperiments: boolean;
49
+ /** Scores from experiment A by scorer ID (null if no score) */
50
+ scoresA: Record<string, number | null>;
51
+ /** Scores from experiment B by scorer ID (null if no score) */
52
+ scoresB: Record<string, number | null>;
53
+ }
54
+ /**
55
+ * Top-level comparison result.
56
+ */
57
+ export interface ComparisonResult {
58
+ /** Experiment A metadata */
59
+ experimentA: {
60
+ id: string;
61
+ datasetVersion: number | null;
62
+ };
63
+ /** Experiment B metadata */
64
+ experimentB: {
65
+ id: string;
66
+ datasetVersion: number | null;
67
+ };
68
+ /** True if experiments used different dataset versions */
69
+ versionMismatch: boolean;
70
+ /** True if any scorer regressed (for CI quick check) */
71
+ hasRegression: boolean;
72
+ /** Per-scorer comparison results */
73
+ scorers: Record<string, ScorerComparison>;
74
+ /** Per-item comparison details */
75
+ items: ItemComparison[];
76
+ /** Warning messages (e.g., version mismatch, no overlap) */
77
+ warnings: string[];
78
+ }
79
+ /**
80
+ * Threshold configuration for a single scorer.
81
+ */
82
+ export interface ScorerThreshold {
83
+ /** Absolute threshold value for regression detection */
84
+ value: number;
85
+ /** Score direction: 'higher-is-better' (default) or 'lower-is-better' */
86
+ direction?: 'higher-is-better' | 'lower-is-better';
87
+ }
88
+ /**
89
+ * Configuration for compareExperiments function.
90
+ */
91
+ export interface CompareExperimentsConfig {
92
+ /** ID of experiment A (baseline) */
93
+ experimentIdA: string;
94
+ /** ID of experiment B (candidate) */
95
+ experimentIdB: string;
96
+ /**
97
+ * Per-scorer thresholds for regression detection.
98
+ * Key is scorer ID, value is threshold config.
99
+ * Default when not specified: { value: 0, direction: 'higher-is-better' }
100
+ */
101
+ thresholds?: Record<string, ScorerThreshold>;
102
+ }
103
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/datasets/experiment/analytics/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,CAAC;IACjB,2CAA2C;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,CAAC;IACnB,2CAA2C;IAC3C,UAAU,EAAE,MAAM,CAAC;CACpB;AAMD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yCAAyC;IACzC,MAAM,EAAE,WAAW,CAAC;IACpB,0CAA0C;IAC1C,MAAM,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,KAAK,EAAE,MAAM,CAAC;IACd,4DAA4D;IAC5D,SAAS,EAAE,OAAO,CAAC;IACnB,8CAA8C;IAC9C,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,iBAAiB,EAAE,OAAO,CAAC;IAC3B,+DAA+D;IAC/D,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC;IACvC,+DAA+D;IAC/D,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC;CACxC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,4BAA4B;IAC5B,WAAW,EAAE;QACX,EAAE,EAAE,MAAM,CAAC;QACX,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;KAC/B,CAAC;IACF,4BAA4B;IAC5B,WAAW,EAAE;QACX,EAAE,EAAE,MAAM,CAAC;QACX,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;KAC/B,CAAC;IACF,0DAA0D;IAC1D,eAAe,EAAE,OAAO,CAAC;IACzB,wDAAwD;IACxD,aAAa,EAAE,OAAO,CAAC;IACvB,oCAAoC;IACpC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IAC1C,kCAAkC;IAClC,KAAK,EAAE,cAAc,EAAE,CAAC;IACxB,4DAA4D;IAC5D,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAMD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,wDAAwD;IACxD,KAAK,EAAE,MAAM,CAAC;IACd,yEAAyE;IACzE,SAAS,CAAC,EAAE,kBAAkB,GAAG,iBAAiB,CAAC;CACpD;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,oCAAoC;IACpC,aAAa,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;CAC9C"}
@@ -0,0 +1,40 @@
1
+ import type { Agent } from '../../agent/index.js';
2
+ import type { MastraScorer } from '../../evals/base.js';
3
+ import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '../../evals/types.js';
4
+ import type { TargetType } from '../../storage/types.js';
5
+ import type { Workflow } from '../../workflows/index.js';
6
+ /**
7
+ * Target types supported for dataset execution.
8
+ * Agent and Workflow are Phase 2; scorer and processor are Phase 4.
9
+ */
10
+ export type Target = Agent | Workflow | MastraScorer<any, any, any, any>;
11
+ /**
12
+ * Result from executing a target against a dataset item.
13
+ */
14
+ export interface ExecutionResult {
15
+ /** Output from the target (null if failed) */
16
+ output: unknown;
17
+ /** Structured error if execution failed */
18
+ error: {
19
+ message: string;
20
+ stack?: string;
21
+ code?: string;
22
+ } | null;
23
+ /** Trace ID from agent/workflow execution (null for scorers or errors) */
24
+ traceId: string | null;
25
+ /** Structured input for scorers (extracted from agent scoring data) */
26
+ scorerInput?: ScorerRunInputForAgent;
27
+ /** Structured output for scorers (extracted from agent scoring data) */
28
+ scorerOutput?: ScorerRunOutputForAgent;
29
+ }
30
+ /**
31
+ * Execute a dataset item against a target (agent, workflow, scorer, processor).
32
+ * Phase 2: agent/workflow. Phase 4: scorer. Processor deferred.
33
+ */
34
+ export declare function executeTarget(target: Target, targetType: TargetType, item: {
35
+ input: unknown;
36
+ groundTruth?: unknown;
37
+ }, options?: {
38
+ signal?: AbortSignal;
39
+ }): Promise<ExecutionResult>;
40
+ //# sourceMappingURL=executor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../../src/datasets/experiment/executor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAEhD;;;GAGG;AACH,MAAM,MAAM,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,8CAA8C;IAC9C,MAAM,EAAE,OAAO,CAAC;IAChB,2CAA2C;IAC3C,KAAK,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IACjE,0EAA0E;IAC1E,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,uEAAuE;IACvE,WAAW,CAAC,EAAE,sBAAsB,CAAC;IACrC,wEAAwE;IACxE,YAAY,CAAC,EAAE,uBAAuB,CAAC;CACxC;AA2CD;;;GAGG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,UAAU,EACtB,IAAI,EAAE;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,WAAW,CAAC,EAAE,OAAO,CAAA;CAAE,EAC/C,OAAO,CAAC,EAAE;IAAE,MAAM,CAAC,EAAE,WAAW,CAAA;CAAE,GACjC,OAAO,CAAC,eAAe,CAAC,CA2C1B"}
@@ -0,0 +1,31 @@
1
+ import type { Mastra } from '../../mastra/index.js';
2
+ import type { ExperimentConfig, ExperimentSummary } from './types.js';
3
+ export type { DataItem, ExperimentConfig, ExperimentSummary, ItemWithScores, ItemResult, ScorerResult, StartExperimentConfig, } from './types.js';
4
+ export { executeTarget, type Target, type ExecutionResult } from './executor.js';
5
+ export { resolveScorers, runScorersForItem } from './scorer.js';
6
+ export * from './analytics/index.js';
7
+ /**
8
+ * Run a dataset experiment against a target with optional scoring.
9
+ *
10
+ * Executes all items in the dataset concurrently (up to maxConcurrency) against
11
+ * the specified target (agent or workflow). Optionally applies scorers to each
12
+ * result and persists both results and scores to storage.
13
+ *
14
+ * @param mastra - Mastra instance for storage and target resolution
15
+ * @param config - Experiment configuration
16
+ * @returns ExperimentSummary with results and scores
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * const summary = await runExperiment(mastra, {
21
+ * datasetId: 'my-dataset',
22
+ * targetType: 'agent',
23
+ * targetId: 'my-agent',
24
+ * scorers: [accuracyScorer, latencyScorer],
25
+ * maxConcurrency: 10,
26
+ * });
27
+ * console.log(`${summary.succeededCount}/${summary.totalItems} succeeded`);
28
+ * ```
29
+ */
30
+ export declare function runExperiment(mastra: Mastra, config: ExperimentConfig): Promise<ExperimentSummary>;
31
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/datasets/experiment/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAa3C,OAAO,KAAK,EAAE,gBAAgB,EAAE,iBAAiB,EAA8B,MAAM,SAAS,CAAC;AAG/F,YAAY,EACV,QAAQ,EACR,gBAAgB,EAChB,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,YAAY,EACZ,qBAAqB,GACtB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,aAAa,EAAE,KAAK,MAAM,EAAE,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAG7D,cAAc,aAAa,CAAC;AAE5B;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAsB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CA0UxG"}
@@ -0,0 +1,21 @@
1
+ import type { MastraScorer } from '../../evals/base.js';
2
+ import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '../../evals/types.js';
3
+ import type { Mastra } from '../../mastra/index.js';
4
+ import type { MastraCompositeStore } from '../../storage/base.js';
5
+ import type { TargetType } from '../../storage/types.js';
6
+ import type { ScorerResult } from './types.js';
7
+ /**
8
+ * Resolve scorers from mixed array of instances and string IDs.
9
+ * String IDs are looked up from Mastra's scorer registry.
10
+ */
11
+ export declare function resolveScorers(mastra: Mastra, scorers?: (MastraScorer<any, any, any, any> | string)[]): MastraScorer<any, any, any, any>[];
12
+ /**
13
+ * Run all scorers for a single item result.
14
+ * Errors are isolated per scorer - one failing scorer doesn't affect others.
15
+ */
16
+ export declare function runScorersForItem(scorers: MastraScorer<any, any, any, any>[], item: {
17
+ input: unknown;
18
+ groundTruth?: unknown;
19
+ metadata?: Record<string, unknown>;
20
+ }, output: unknown, storage: MastraCompositeStore | null, runId: string, targetType: TargetType, targetId: string, itemId: string, scorerInput?: ScorerRunInputForAgent, scorerOutput?: ScorerRunOutputForAgent): Promise<ScorerResult[]>;
21
+ //# sourceMappingURL=scorer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scorer.d.ts","sourceRoot":"","sources":["../../../src/datasets/experiment/scorer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAE5C;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,EAAE,GACtD,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAgBpC;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,EAC3C,IAAI,EAAE;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,WAAW,CAAC,EAAE,OAAO,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,EACnF,MAAM,EAAE,OAAO,EACf,OAAO,EAAE,oBAAoB,GAAG,IAAI,EACpC,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,WAAW,CAAC,EAAE,sBAAsB,EACpC,YAAY,CAAC,EAAE,uBAAuB,GACrC,OAAO,CAAC,YAAY,EAAE,CAAC,CA8CzB"}
@@ -0,0 +1,140 @@
1
+ import type { MastraScorer } from '../../evals/base.js';
2
+ import type { Mastra } from '../../mastra/index.js';
3
+ import type { TargetType, ExperimentStatus } from '../../storage/types.js';
4
+ /**
5
+ * A single data item for inline experiment data.
6
+ * Internal — not publicly exported from @mastra/core.
7
+ */
8
+ export interface DataItem<I = unknown, E = unknown> {
9
+ /** Unique ID (auto-generated if omitted) */
10
+ id?: string;
11
+ /** Input data passed to task */
12
+ input: I;
13
+ /** Ground truth for scoring */
14
+ groundTruth?: E;
15
+ /** Additional metadata */
16
+ metadata?: Record<string, unknown>;
17
+ }
18
+ /**
19
+ * Internal configuration for running a dataset experiment.
20
+ * Not publicly exported — users interact via Dataset.startExperiment().
21
+ * All new fields are optional — existing internal callers are unaffected.
22
+ */
23
+ export interface ExperimentConfig<I = unknown, O = unknown, E = unknown> {
24
+ /** ID of dataset in storage (injected by Dataset) */
25
+ datasetId?: string;
26
+ /** Override data source — inline array or async factory (bypasses storage load) */
27
+ data?: DataItem<I, E>[] | (() => Promise<DataItem<I, E>[]>);
28
+ /** Registry-based target type (existing) */
29
+ targetType?: TargetType;
30
+ /** Registry-based target ID (existing) */
31
+ targetId?: string;
32
+ /** Inline task function (sync or async) */
33
+ task?: (args: {
34
+ input: I;
35
+ mastra: Mastra;
36
+ groundTruth?: E;
37
+ metadata?: Record<string, unknown>;
38
+ signal?: AbortSignal;
39
+ }) => O | Promise<O>;
40
+ /** Scorers — MastraScorer instances or string IDs */
41
+ scorers?: (MastraScorer<any, any, any, any> | string)[];
42
+ /** Pin to specific dataset version (default: latest). Only applies when datasetId is used. */
43
+ version?: number;
44
+ /** Maximum concurrent executions (default: 5) */
45
+ maxConcurrency?: number;
46
+ /** AbortSignal for cancellation */
47
+ signal?: AbortSignal;
48
+ /** Per-item execution timeout in milliseconds */
49
+ itemTimeout?: number;
50
+ /** Maximum retries per item on failure (default: 0 = no retries). Abort errors are never retried. */
51
+ maxRetries?: number;
52
+ /** Pre-created experiment ID (for async trigger — skips experiment creation). */
53
+ experimentId?: string;
54
+ /** Experiment name (used for display / grouping) */
55
+ name?: string;
56
+ /** Experiment description */
57
+ description?: string;
58
+ /** Arbitrary metadata for the experiment */
59
+ metadata?: Record<string, unknown>;
60
+ }
61
+ /**
62
+ * Configuration for starting an experiment on a dataset.
63
+ * The dataset is always the data source — no datasetId/data needed.
64
+ */
65
+ export type StartExperimentConfig<I = unknown, O = unknown, E = unknown> = Omit<ExperimentConfig<I, O, E>, 'datasetId' | 'data' | 'experimentId'>;
66
+ /**
67
+ * Result of executing a single dataset item.
68
+ */
69
+ export interface ItemResult {
70
+ /** ID of the dataset item */
71
+ itemId: string;
72
+ /** Dataset version of the item when executed */
73
+ itemVersion: number;
74
+ /** Input data that was passed to the target */
75
+ input: unknown;
76
+ /** Output from the target (null if failed) */
77
+ output: unknown | null;
78
+ /** Expected output from the dataset item */
79
+ groundTruth: unknown | null;
80
+ /** Structured error if execution failed */
81
+ error: {
82
+ message: string;
83
+ stack?: string;
84
+ code?: string;
85
+ } | null;
86
+ /** When execution started */
87
+ startedAt: Date;
88
+ /** When execution completed */
89
+ completedAt: Date;
90
+ /** Number of retry attempts */
91
+ retryCount: number;
92
+ }
93
+ /**
94
+ * Result from a single scorer for an item.
95
+ */
96
+ export interface ScorerResult {
97
+ /** ID of the scorer */
98
+ scorerId: string;
99
+ /** Display name of the scorer */
100
+ scorerName: string;
101
+ /** Computed score (null if scorer failed) */
102
+ score: number | null;
103
+ /** Reason/explanation for the score */
104
+ reason: string | null;
105
+ /** Error message if scorer failed */
106
+ error: string | null;
107
+ }
108
+ /**
109
+ * Item result with all scorer results attached.
110
+ */
111
+ export interface ItemWithScores extends ItemResult {
112
+ /** Results from all scorers for this item */
113
+ scores: ScorerResult[];
114
+ }
115
+ /**
116
+ * Summary of an entire dataset experiment.
117
+ */
118
+ export interface ExperimentSummary {
119
+ /** Unique ID of this experiment */
120
+ experimentId: string;
121
+ /** Final status of the experiment */
122
+ status: ExperimentStatus;
123
+ /** Total number of items in the dataset */
124
+ totalItems: number;
125
+ /** Number of items that succeeded */
126
+ succeededCount: number;
127
+ /** Number of items that failed */
128
+ failedCount: number;
129
+ /** Number of items skipped (e.g. due to abort) */
130
+ skippedCount: number;
131
+ /** True if run completed but some items failed */
132
+ completedWithErrors: boolean;
133
+ /** When the experiment started */
134
+ startedAt: Date;
135
+ /** When the experiment completed */
136
+ completedAt: Date;
137
+ /** All item results with their scores */
138
+ results: ItemWithScores[];
139
+ }
140
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/datasets/experiment/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,KAAK,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAExE;;;GAGG;AACH,MAAM,WAAW,QAAQ,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO;IAChD,4CAA4C;IAC5C,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,gCAAgC;IAChC,KAAK,EAAE,CAAC,CAAC;IACT,+BAA+B;IAC/B,WAAW,CAAC,EAAE,CAAC,CAAC;IAChB,0BAA0B;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;;;GAIG;AACH,MAAM,WAAW,gBAAgB,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO;IAGrE,qDAAqD;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mFAAmF;IACnF,IAAI,CAAC,EAAE,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAI5D,4CAA4C;IAC5C,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2CAA2C;IAC3C,IAAI,CAAC,EAAE,CAAC,IAAI,EAAE;QACZ,KAAK,EAAE,CAAC,CAAC;QACT,MAAM,EAAE,MAAM,CAAC;QACf,WAAW,CAAC,EAAE,CAAC,CAAC;QAChB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,EAAE,WAAW,CAAC;KACtB,KAAK,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAIrB,qDAAqD;IACrD,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC;IAIxD,8FAA8F;IAC9F,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mCAAmC;IACnC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,iDAAiD;IACjD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qGAAqG;IACrG,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iFAAiF;IACjF,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6BAA6B;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;;GAGG;AACH,MAAM,MAAM,qBAAqB,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,GAAG,OAAO,IAAI,IAAI,CAC7E,gBAAgB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,EACzB,WAAW,GAAG,MAAM,GAAG,cAAc,CACtC,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,6BAA6B;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,gDAAgD;IAChD,WAAW,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,KAAK,EAAE,OAAO,CAAC;IACf,8CAA8C;IAC9C,MAAM,EAAE,OAAO,GAAG,IAAI,CAAC;IACvB,4CAA4C;IAC5C,WAAW,EAAE,OAAO,GAAG,IAAI,CAAC;IAC5B,2CAA2C;IAC3C,KAAK,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IACjE,6BAA6B;IAC7B,SAAS,EAAE,IAAI,CAAC;IAChB,+BAA+B;IAC/B,WAAW,EAAE,IAAI,CAAC;IAClB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,uBAAuB;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,6CAA6C;IAC7C,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,uCAAuC;IACvC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,qCAAqC;IACrC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,cAAe,SAAQ,UAAU;IAChD,6CAA6C;IAC7C,MAAM,EAAE,YAAY,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,mCAAmC;IACnC,YAAY,EAAE,MAAM,CAAC;IACrB,qCAAqC;IACrC,MAAM,EAAE,gBAAgB,CAAC;IACzB,2CAA2C;IAC3C,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,cAAc,EAAE,MAAM,CAAC;IACvB,kCAAkC;IAClC,WAAW,EAAE,MAAM,CAAC;IACpB,kDAAkD;IAClD,YAAY,EAAE,MAAM,CAAC;IACrB,kDAAkD;IAClD,mBAAmB,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,SAAS,EAAE,IAAI,CAAC;IAChB,oCAAoC;IACpC,WAAW,EAAE,IAAI,CAAC;IAClB,yCAAyC;IACzC,OAAO,EAAE,cAAc,EAAE,CAAC;CAC3B"}