@mastra/core 1.18.0-alpha.0 → 1.18.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/agent/index.cjs +8 -8
- package/dist/agent/index.js +1 -1
- package/dist/{chunk-3ECYXW43.cjs → chunk-2U4M4SGY.cjs} +16 -16
- package/dist/{chunk-3ECYXW43.cjs.map → chunk-2U4M4SGY.cjs.map} +1 -1
- package/dist/{chunk-JEEYINIB.js → chunk-3NMSFQPY.js} +4 -4
- package/dist/{chunk-JEEYINIB.js.map → chunk-3NMSFQPY.js.map} +1 -1
- package/dist/{chunk-7CDFI4GW.cjs → chunk-3O73Q6M2.cjs} +76 -76
- package/dist/{chunk-7CDFI4GW.cjs.map → chunk-3O73Q6M2.cjs.map} +1 -1
- package/dist/{chunk-ZOPIN73X.js → chunk-3U4ZNZIF.js} +4 -4
- package/dist/{chunk-ZOPIN73X.js.map → chunk-3U4ZNZIF.js.map} +1 -1
- package/dist/{chunk-QVQ3PGG2.cjs → chunk-7IDACSBM.cjs} +206 -2
- package/dist/chunk-7IDACSBM.cjs.map +1 -0
- package/dist/{chunk-YFYB3Q5L.js → chunk-A2Z26HFR.js} +9 -9
- package/dist/{chunk-YFYB3Q5L.js.map → chunk-A2Z26HFR.js.map} +1 -1
- package/dist/{chunk-AIYWGXK7.js → chunk-BYJDWJCX.js} +3 -3
- package/dist/{chunk-AIYWGXK7.js.map → chunk-BYJDWJCX.js.map} +1 -1
- package/dist/{chunk-F7YLMJSQ.js → chunk-D4DCPE5Y.js} +4 -4
- package/dist/{chunk-F7YLMJSQ.js.map → chunk-D4DCPE5Y.js.map} +1 -1
- package/dist/{chunk-CYQ2TNSK.js → chunk-EYM6DWKD.js} +18 -3
- package/dist/{chunk-CYQ2TNSK.js.map → chunk-EYM6DWKD.js.map} +1 -1
- package/dist/{chunk-3LH5QX5F.cjs → chunk-G62XEVSI.cjs} +3 -3
- package/dist/{chunk-3LH5QX5F.cjs.map → chunk-G62XEVSI.cjs.map} +1 -1
- package/dist/{chunk-BHVF4Z5D.cjs → chunk-GFBH7Q4W.cjs} +13 -13
- package/dist/{chunk-BHVF4Z5D.cjs.map → chunk-GFBH7Q4W.cjs.map} +1 -1
- package/dist/{chunk-3DDJ33AU.js → chunk-HEHK7LXS.js} +4 -4
- package/dist/{chunk-3DDJ33AU.js.map → chunk-HEHK7LXS.js.map} +1 -1
- package/dist/{chunk-TG4U3K3I.cjs → chunk-JFPVUKPB.cjs} +50 -50
- package/dist/{chunk-TG4U3K3I.cjs.map → chunk-JFPVUKPB.cjs.map} +1 -1
- package/dist/{chunk-AIEKKCJ7.js → chunk-K22Q6XPG.js} +3 -3
- package/dist/{chunk-AIEKKCJ7.js.map → chunk-K22Q6XPG.js.map} +1 -1
- package/dist/{chunk-V54LY5HR.cjs → chunk-KL2IRBNK.cjs} +6107 -5697
- package/dist/chunk-KL2IRBNK.cjs.map +1 -0
- package/dist/{chunk-DV7FIKOO.cjs → chunk-MSDZQ4DW.cjs} +10 -10
- package/dist/{chunk-DV7FIKOO.cjs.map → chunk-MSDZQ4DW.cjs.map} +1 -1
- package/dist/{chunk-3H34KWBU.cjs → chunk-PK3X5KIF.cjs} +24 -9
- package/dist/{chunk-3H34KWBU.cjs.map → chunk-PK3X5KIF.cjs.map} +1 -1
- package/dist/{chunk-SLG7AUBK.cjs → chunk-PQWEJELD.cjs} +6 -6
- package/dist/{chunk-SLG7AUBK.cjs.map → chunk-PQWEJELD.cjs.map} +1 -1
- package/dist/{chunk-P2C4NFMY.js → chunk-RB2QRGRV.js} +4 -4
- package/dist/{chunk-P2C4NFMY.js.map → chunk-RB2QRGRV.js.map} +1 -1
- package/dist/{chunk-BIB2LCIR.js → chunk-TKKBZUWD.js} +6106 -5696
- package/dist/chunk-TKKBZUWD.js.map +1 -0
- package/dist/{chunk-AEIPJW4F.cjs → chunk-UEZU2TA2.cjs} +185 -185
- package/dist/{chunk-AEIPJW4F.cjs.map → chunk-UEZU2TA2.cjs.map} +1 -1
- package/dist/{chunk-DL4A7URO.cjs → chunk-UPCNNWXW.cjs} +236 -20
- package/dist/chunk-UPCNNWXW.cjs.map +1 -0
- package/dist/{chunk-KMVGT2JI.js → chunk-UQ2HAEHL.js} +233 -17
- package/dist/chunk-UQ2HAEHL.js.map +1 -0
- package/dist/{chunk-AGEYVARR.js → chunk-VMKNS3YO.js} +204 -3
- package/dist/chunk-VMKNS3YO.js.map +1 -0
- package/dist/{chunk-YXNO6AHV.js → chunk-WYFCQAHY.js} +7 -7
- package/dist/{chunk-YXNO6AHV.js.map → chunk-WYFCQAHY.js.map} +1 -1
- package/dist/{chunk-A5JHKGFY.cjs → chunk-Z3VUDCXI.cjs} +7 -7
- package/dist/{chunk-A5JHKGFY.cjs.map → chunk-Z3VUDCXI.cjs.map} +1 -1
- package/dist/datasets/index.cjs +17 -17
- package/dist/datasets/index.js +2 -2
- package/dist/docs/SKILL.md +2 -1
- package/dist/docs/assets/SOURCE_MAP.json +349 -334
- package/dist/docs/references/reference-evals-run-evals.md +78 -3
- package/dist/docs/references/reference-evals-scorer-utils.md +184 -0
- package/dist/docs/references/reference-evals-trajectory-accuracy.md +613 -0
- package/dist/docs/references/reference.md +1 -0
- package/dist/evals/base.d.ts +8 -2
- package/dist/evals/base.d.ts.map +1 -1
- package/dist/evals/index.cjs +32 -20
- package/dist/evals/index.js +3 -3
- package/dist/evals/run/index.d.ts +28 -1
- package/dist/evals/run/index.d.ts.map +1 -1
- package/dist/evals/run/scorerAccumulator.d.ts +4 -1
- package/dist/evals/run/scorerAccumulator.d.ts.map +1 -1
- package/dist/evals/scoreTraces/index.cjs +5 -5
- package/dist/evals/scoreTraces/index.js +2 -2
- package/dist/evals/types.d.ts +305 -10
- package/dist/evals/types.d.ts.map +1 -1
- package/dist/harness/index.cjs +7 -7
- package/dist/harness/index.js +5 -5
- package/dist/index.cjs +2 -2
- package/dist/index.js +1 -1
- package/dist/llm/index.cjs +16 -16
- package/dist/llm/index.js +5 -5
- package/dist/llm/model/provider-types.generated.d.ts +2444 -2243
- package/dist/loop/index.cjs +14 -14
- package/dist/loop/index.js +1 -1
- package/dist/loop/network/index.d.ts +3 -3
- package/dist/loop/types.d.ts +1 -1
- package/dist/mastra/index.cjs +2 -2
- package/dist/mastra/index.js +1 -1
- package/dist/memory/index.cjs +14 -14
- package/dist/memory/index.js +1 -1
- package/dist/models-dev-3U6ICCNW.js +3 -0
- package/dist/{models-dev-JEMMVJFN.js.map → models-dev-3U6ICCNW.js.map} +1 -1
- package/dist/models-dev-BJHT4GXA.cjs +12 -0
- package/dist/{models-dev-MU7QGI4O.cjs.map → models-dev-BJHT4GXA.cjs.map} +1 -1
- package/dist/netlify-DE3IXMT2.js +3 -0
- package/dist/{netlify-BROEKRZF.js.map → netlify-DE3IXMT2.js.map} +1 -1
- package/dist/netlify-UARJYMCW.cjs +12 -0
- package/dist/{netlify-GOEIG6IC.cjs.map → netlify-UARJYMCW.cjs.map} +1 -1
- package/dist/processor-provider/index.cjs +10 -10
- package/dist/processor-provider/index.js +1 -1
- package/dist/processors/index.cjs +44 -44
- package/dist/processors/index.js +1 -1
- package/dist/provider-registry-65CPGTUU.cjs +40 -0
- package/dist/{provider-registry-CKFMYEBK.cjs.map → provider-registry-65CPGTUU.cjs.map} +1 -1
- package/dist/provider-registry-EQAT7FXK.js +3 -0
- package/dist/{provider-registry-P7EMCVJI.js.map → provider-registry-EQAT7FXK.js.map} +1 -1
- package/dist/provider-registry.json +6102 -5692
- package/dist/relevance/index.cjs +3 -3
- package/dist/relevance/index.js +1 -1
- package/dist/storage/constants.cjs +56 -56
- package/dist/storage/constants.d.ts +1 -1
- package/dist/storage/constants.js +1 -1
- package/dist/storage/domains/experiments/base.d.ts +2 -1
- package/dist/storage/domains/experiments/base.d.ts.map +1 -1
- package/dist/storage/domains/experiments/inmemory.d.ts +2 -1
- package/dist/storage/domains/experiments/inmemory.d.ts.map +1 -1
- package/dist/storage/domains/observability/tracing.d.ts +45 -45
- package/dist/storage/index.cjs +160 -160
- package/dist/storage/index.js +2 -2
- package/dist/storage/types.d.ts +12 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/stream/MastraAgentNetworkStream.d.ts +1 -1
- package/dist/stream/MastraWorkflowStream.d.ts +1 -1
- package/dist/stream/index.cjs +8 -8
- package/dist/stream/index.js +1 -1
- package/dist/tool-loop-agent/index.cjs +4 -4
- package/dist/tool-loop-agent/index.js +1 -1
- package/dist/vector/index.cjs +7 -7
- package/dist/vector/index.js +1 -1
- package/dist/workflows/evented/index.cjs +10 -10
- package/dist/workflows/evented/index.js +1 -1
- package/dist/workflows/index.cjs +24 -24
- package/dist/workflows/index.js +1 -1
- package/package.json +5 -5
- package/src/llm/model/provider-types.generated.d.ts +2444 -2243
- package/dist/chunk-AGEYVARR.js.map +0 -1
- package/dist/chunk-BIB2LCIR.js.map +0 -1
- package/dist/chunk-DL4A7URO.cjs.map +0 -1
- package/dist/chunk-KMVGT2JI.js.map +0 -1
- package/dist/chunk-QVQ3PGG2.cjs.map +0 -1
- package/dist/chunk-V54LY5HR.cjs.map +0 -1
- package/dist/models-dev-JEMMVJFN.js +0 -3
- package/dist/models-dev-MU7QGI4O.cjs +0 -12
- package/dist/netlify-BROEKRZF.js +0 -3
- package/dist/netlify-GOEIG6IC.cjs +0 -12
- package/dist/provider-registry-CKFMYEBK.cjs +0 -40
- package/dist/provider-registry-P7EMCVJI.js +0 -3
package/dist/evals/base.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/evals/base.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,QAAQ,CAAC;AAK3B,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAEnE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AAExC,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAG9C,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/evals/base.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,QAAQ,CAAC;AAK3B,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAEnE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AAExC,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAG9C,OAAO,KAAK,EACV,qBAAqB,EACrB,sBAAsB,EACtB,uBAAuB,EACvB,UAAU,EACV,qBAAqB,EACtB,MAAM,SAAS,CAAC;AAEjB,UAAU,oBAAoB;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,GAAG,CAAC;IAChB,cAAc,EAAE,OAAO,CAAC;CACzB;AAGD,KAAK,mBAAmB,GAAG;IACzB,KAAK,EAAE;QACL,KAAK,EAAE,sBAAsB,CAAC;QAC9B,MAAM,EAAE,uBAAuB,CAAC;KACjC,CAAC;IACF,UAAU,EAAE;QACV,KAAK,EAAE,sBAAsB,CAAC;QAC9B,MAAM,EAAE,UAAU,CAAC;KACpB,CAAC;CACH,CAAC;AAKF,UAAU,YAAY,CAAC,GAAG,SAAS,MAAM,EAAE,MAAM,GAAG,GAAG,EAAE,UAAU,GAAG,GAAG;IACvE,EAAE,EAAE,GAAG,CAAC;IACR,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE;QACN,KAAK,EAAE,iBAAiB,CAAC;QACzB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IAEF,IAAI,CAAC,EACD,MAAM,mBAAmB,GACzB;QACE,KAAK,EAAE,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC3B,MAAM,EAAE,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;KACjC,CAAC;CACP;AAGD,UAAU,SAAS,CAAC,MAAM,GAAG,GAAG,EAAE,OAAO,GAAG,GAAG,CAAE,SAAQ,OAAO,CAAC,oBAAoB,CAAC;IACpF,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,WAAW,CAAC,EAAE,GAAG,CAAC;IAClB,yGAAyG;IACzG,kBAAkB,CAAC,EAAE,qBAAqB,CAAC;IAC3C,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CACtC;AAGD,UAAU,YAAY,CACpB,OAAO,EACP,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACxC,SAAS,SAAS,MAAM,GAAG,MAAM,EACjC,MAAM,GAAG,GAAG,EACZ,UAAU,GAAG,GAAG;IAEhB,WAAW,EAAE,MAAM,CAAC;IACpB;;;;;;OAMG;IACH,YAAY,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IACpC,KAAK,CAAC,EAAE;QACN,KAAK,EAAE,iBAAiB,CAAC;QACzB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IAGF,YAAY,EAAE,CAAC,OAAO,EAAE,mBAAmB,CAAC,YAAY,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACvH;AAGD,KAAK,aAAa,CAAC,CAAC,SAAS,MAAM,IAAI,GAAG,CAAC,YAAY,CAAC;AAGxD,KAAK,OAAO,CAAC,CAAC,IAAI,CAAC,SAAS,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;AAGrD,KAAK,WAAW,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IAAI;IAC/E,GAAG,EAAE,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACnC,OAAO,EAAE,YAAY,CAAC;CACvB,CAAC;AAGF,KAAK,kBAAkB,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC,SAAS,MAAM,EAAE,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAG9G,KAAK,qBAAqB,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IAAI,WAAW,CACpG,YAAY,EACZ,MAAM,EACN,UAAU,CACX,GAAG;IACF,KAAK,EAAE,YAAY,SAAS,MAAM,CAAC,yBAAyB,EAAE,MAAM,MAAM,CAAC,GAAG,MAAM,GAAG,KAAK,CAAC;CAC9F,CAAC;AAEF,KAAK,eAAe,CAAC,mBAAmB,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IAAI,OAAO,CACjG,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,GAAG;IAC9B,KAAK,EAAE,mBAAmB,SAAS,MAAM,CAAC,yBAAyB,EAAE,MAAM,MAAM,CAAC,GAAG,MAAM,GAAG,KAAK,CAAC;IACpG,MAAM,CAAC,EAAE,mBAAmB,SAAS,MAAM,CAAC,0BAA0B,EAAE,MAAM,OAAO,CAAC,GAAG,OAAO,GAAG,SAAS,CAAC;IAG7G,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAG9B,oBAAoB,CAAC,EAAE,mBAAmB,SAAS,MAAM,CAAC,sBAAsB,EAAE,MAAM,WAAW,CAAC,GAChG,WAAW,GACX,SAAS,CAAC;IACd,iBAAiB,CAAC,EAAE,mBAAmB,SAAS,MAAM,CAAC,mBAAmB,EAAE,MAAM,QAAQ,CAAC,GAAG,QAAQ,GAAG,SAAS,CAAC;CACpH,GAAG;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CACtB,CAAC;AAGF,KAAK,mBAAmB,CACtB,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACxC,SAAS,SAAS,MAAM,EACxB,MAAM,EACN,UAAU,IACR,SAAS,SAAS,gBAAgB,GAClC,qBAAqB,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,GACvD,WAAW,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;AAGlD,KAAK,YAAY,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,IACnF,CAAC,CAAC,OAAO,EAAE,WAAW,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,OAAO,CAAC,GACrE,CAAC,CAAC,OAAO,EAAE,WAAW,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AAEnF,KAAK,0BAA0B,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IACxF,CAAC,CAAC,OAAO,EAAE,qBAAqB,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,GAAG,CAAC,GAC3E,CAAC,CAAC,OAAO,EAAE,qBAAqB,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;AAEzF,KAAK,yBAAyB,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IACvF,CAAC,CAAC,OAAO,EAAE,WAAW,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,MAAM,CAAC,GACpE,CAAC,CAAC,OAAO,EAAE,WAAW,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;AAGlF,UAAU,yBAAyB,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU;IAC9F,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE;QACN,KAAK,EAAE,iBAAiB,CAAC;QACzB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IAEF,YAAY,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACpG;AAGD,UAAU,0BAA0B,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU;IAC/F,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE;QACN,KAAK,EAAE,iBAAiB,CAAC;QACzB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IAEF,YAAY,EAAE,CAAC,OAAO,EAAE,qBAAqB,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC9G;AAGD,KAAK,iBAAiB,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,IAC5F,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,GAC3D,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,GAAG;IAC3E,YAAY,EAAE,YAAY,CAAC,WAAW,CAAC,CAAC;CACzC,CAAC,CAAC;AAEP,KAAK,cAAc,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,IACzF,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,CAAC,GAC3D,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,GAAG;IACxE,YAAY,EAAE,YAAY,CAAC,WAAW,CAAC,CAAC;CACzC,CAAC,CAAC;AAGP,KAAK,oBAAoB,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IAClF,yBAAyB,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,GAC3D,yBAAyB,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;AAGhE,KAAK,qBAAqB,CAAC,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,UAAU,IACnF,0BAA0B,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,GAC5D,0BAA0B,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;AAEjE,cAAM,YAAY,CAChB,GAAG,SAAS,MAAM,GAAG,MAAM,EAC3B,MAAM,GAAG,GAAG,EACZ,UAAU,GAAG,GAAG,EAChB,mBAAmB,SAAS,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,EAAE;;IAY3C,MAAM,EAAE,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC;IACpD,OAAO,CAAC,KAAK;IACb,OAAO,CAAC,qBAAqB;IAT/B;;;OAGG;IACI,MAAM,CAAC,EAAE,MAAM,GAAG,QAAQ,CAAC;gBAGzB,MAAM,EAAE,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,EAC5C,KAAK,GAAE,KAAK,CAAC,oBAAoB,CAAM,EACvC,qBAAqB,GAAE,GAAG,CAChC,MAAM,EACJ,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,GAC/C,0BAA0B,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,GACnD,yBAAyB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,CACzC,EACb,MAAM,CAAC,EAAE,MAAM;IAajB;;;;OAIG;IACH,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAItC;;;OAGG;IACH,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS;IAIlD;;;OAGG;IACH,cAAc,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI;IAIxD,IAAI,IAAI;;;kBAEP;IAED,IAAI,EAAE,IAAI,GAAG,CAEZ;IAED,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,WAAW,IAAI,MAAM,CAExB;IAED,IAAI,KAAK;eA9OA,iBAAiB;sBACV,MAAM;kBA+OrB;IAED,UAAU,CAAC,iBAAiB,EAC1B,OAAO,EAAE,iBAAiB,CAAC,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,EAAE,UAAU,CAAC,GACrF,YAAY,CACb,GAAG,EACH,MAAM,EACN,UAAU,EACV,kBAAkB,CAAC,mBAAmB,EAAE,YAAY,EAAE,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAClF;IA6BD,OAAO,CAAC,cAAc,EACpB,OAAO,EAAE,cAAc,CAAC,mBAAmB,EAAE,cAAc,EAAE,MAAM,EAAE,UAAU,CAAC,GAC/E,YAAY,CACb,GAAG,EACH,MAAM,EACN,UAAU,EACV,kBAAkB,CAAC,mBAAmB,EAAE,SAAS,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC,CAC5E;IAuBD,aAAa,CAAC,YAAY,SAAS,MAAM,GAAG,MAAM,EAChD,OAAO,EAAE,oBAAoB,CAAC,mBAAmB,EAAE,MAAM,EAAE,UAAU,CAAC,GACrE,YAAY,CACb,GAAG,EACH,MAAM,EACN,UAAU,EACV,kBAAkB,CAAC,mBAAmB,EAAE,eAAe,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,CAChF;IAuBD,cAAc,CAAC,aAAa,GAAG,MAAM,EACnC,OAAO,EAAE,qBAAqB,CAAC,mBAAmB,EAAE,MAAM,EAAE,UAAU,CAAC,GACtE,YAAY,CACb,GAAG,EACH,MAAM,EACN,UAAU,EACV,kBAAkB,CAAC,mBAAmB,EAAE,gBAAgB,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAClF;IAuBD,OAAO,KAAK,gBAAgB,GAE3B;IAEK,GAAG,CAAC,KAAK,EAAE,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,GAAG,eAAe,CAAC,mBAAmB,EAAE,MAAM,EAAE,UAAU,CAAC;IAoDzG,OAAO,CAAC,cAAc;IAkBtB,QAAQ,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,UAAU,GAAG,QAAQ,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAQtF,OAAO,CAAC,gBAAgB;IA+ExB,OAAO,CAAC,mBAAmB;YAab,mBAAmB;YAInB,iBAAiB;IA8F/B,OAAO,CAAC,uBAAuB;CAuBhC;AAGD,wBAAgB,YAAY,CAAC,GAAG,SAAS,MAAM,EAAE,KAAK,SAAS,MAAM,mBAAmB,EACtF,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,MAAM,CAAC,GAAG;IAClD,IAAI,EAAE,KAAK,CAAC;CACb,GACA,YAAY,CAAC,GAAG,EAAE,mBAAmB,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,EAAE,mBAAmB,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;AAGpG,wBAAgB,YAAY,CAAC,GAAG,SAAS,MAAM,EAAE,YAAY,SAAS,CAAC,CAAC,UAAU,EAAE,aAAa,SAAS,CAAC,CAAC,UAAU,EACpH,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,EAAE,MAAM,CAAC,GAAG;IACvF,IAAI,EAAE;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,MAAM,EAAE,aAAa,CAAA;KAAE,CAAC;CACtD,GACA,YAAY,CAAC,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC,CAAC;AAGxE,wBAAgB,YAAY,CAAC,MAAM,GAAG,GAAG,EAAE,UAAU,GAAG,GAAG,EAAE,GAAG,SAAS,MAAM,GAAG,MAAM,EACtF,MAAM,EAAE,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,GAC5C,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC,CAAC;AAa7C,MAAM,MAAM,iBAAiB,GAAG;IAC9B,MAAM,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IACzC,QAAQ,CAAC,EAAE,qBAAqB,CAAC;CAClC,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;AAG9D,YAAY,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;AAEtD,OAAO,EAAE,YAAY,EAAE,CAAC"}
|
package/dist/evals/index.cjs
CHANGED
|
@@ -1,78 +1,90 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
4
|
-
var
|
|
5
|
-
var
|
|
3
|
+
var chunkUPCNNWXW_cjs = require('../chunk-UPCNNWXW.cjs');
|
|
4
|
+
var chunkGFBH7Q4W_cjs = require('../chunk-GFBH7Q4W.cjs');
|
|
5
|
+
var chunk7IDACSBM_cjs = require('../chunk-7IDACSBM.cjs');
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
Object.defineProperty(exports, "runEvals", {
|
|
10
10
|
enumerable: true,
|
|
11
|
-
get: function () { return
|
|
11
|
+
get: function () { return chunkUPCNNWXW_cjs.runEvals; }
|
|
12
12
|
});
|
|
13
13
|
Object.defineProperty(exports, "MastraScorer", {
|
|
14
14
|
enumerable: true,
|
|
15
|
-
get: function () { return
|
|
15
|
+
get: function () { return chunkGFBH7Q4W_cjs.MastraScorer; }
|
|
16
16
|
});
|
|
17
17
|
Object.defineProperty(exports, "createScorer", {
|
|
18
18
|
enumerable: true,
|
|
19
|
-
get: function () { return
|
|
19
|
+
get: function () { return chunkGFBH7Q4W_cjs.createScorer; }
|
|
20
|
+
});
|
|
21
|
+
Object.defineProperty(exports, "extractTrajectory", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
get: function () { return chunk7IDACSBM_cjs.extractTrajectory; }
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(exports, "extractTrajectoryFromTrace", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
get: function () { return chunk7IDACSBM_cjs.extractTrajectoryFromTrace; }
|
|
28
|
+
});
|
|
29
|
+
Object.defineProperty(exports, "extractWorkflowTrajectory", {
|
|
30
|
+
enumerable: true,
|
|
31
|
+
get: function () { return chunk7IDACSBM_cjs.extractWorkflowTrajectory; }
|
|
20
32
|
});
|
|
21
33
|
Object.defineProperty(exports, "listScoresResponseSchema", {
|
|
22
34
|
enumerable: true,
|
|
23
|
-
get: function () { return
|
|
35
|
+
get: function () { return chunk7IDACSBM_cjs.listScoresResponseSchema; }
|
|
24
36
|
});
|
|
25
37
|
Object.defineProperty(exports, "saveScorePayloadSchema", {
|
|
26
38
|
enumerable: true,
|
|
27
|
-
get: function () { return
|
|
39
|
+
get: function () { return chunk7IDACSBM_cjs.saveScorePayloadSchema; }
|
|
28
40
|
});
|
|
29
41
|
Object.defineProperty(exports, "scoreResultSchema", {
|
|
30
42
|
enumerable: true,
|
|
31
|
-
get: function () { return
|
|
43
|
+
get: function () { return chunk7IDACSBM_cjs.scoreResultSchema; }
|
|
32
44
|
});
|
|
33
45
|
Object.defineProperty(exports, "scoreRowDataSchema", {
|
|
34
46
|
enumerable: true,
|
|
35
|
-
get: function () { return
|
|
47
|
+
get: function () { return chunk7IDACSBM_cjs.scoreRowDataSchema; }
|
|
36
48
|
});
|
|
37
49
|
Object.defineProperty(exports, "scoringEntityTypeSchema", {
|
|
38
50
|
enumerable: true,
|
|
39
|
-
get: function () { return
|
|
51
|
+
get: function () { return chunk7IDACSBM_cjs.scoringEntityTypeSchema; }
|
|
40
52
|
});
|
|
41
53
|
Object.defineProperty(exports, "scoringExtractStepResultSchema", {
|
|
42
54
|
enumerable: true,
|
|
43
|
-
get: function () { return
|
|
55
|
+
get: function () { return chunk7IDACSBM_cjs.scoringExtractStepResultSchema; }
|
|
44
56
|
});
|
|
45
57
|
Object.defineProperty(exports, "scoringHookInputSchema", {
|
|
46
58
|
enumerable: true,
|
|
47
|
-
get: function () { return
|
|
59
|
+
get: function () { return chunk7IDACSBM_cjs.scoringHookInputSchema; }
|
|
48
60
|
});
|
|
49
61
|
Object.defineProperty(exports, "scoringInputSchema", {
|
|
50
62
|
enumerable: true,
|
|
51
|
-
get: function () { return
|
|
63
|
+
get: function () { return chunk7IDACSBM_cjs.scoringInputSchema; }
|
|
52
64
|
});
|
|
53
65
|
Object.defineProperty(exports, "scoringInputWithExtractStepResultAndAnalyzeStepResultSchema", {
|
|
54
66
|
enumerable: true,
|
|
55
|
-
get: function () { return
|
|
67
|
+
get: function () { return chunk7IDACSBM_cjs.scoringInputWithExtractStepResultAndAnalyzeStepResultSchema; }
|
|
56
68
|
});
|
|
57
69
|
Object.defineProperty(exports, "scoringInputWithExtractStepResultAndScoreAndReasonSchema", {
|
|
58
70
|
enumerable: true,
|
|
59
|
-
get: function () { return
|
|
71
|
+
get: function () { return chunk7IDACSBM_cjs.scoringInputWithExtractStepResultAndScoreAndReasonSchema; }
|
|
60
72
|
});
|
|
61
73
|
Object.defineProperty(exports, "scoringInputWithExtractStepResultSchema", {
|
|
62
74
|
enumerable: true,
|
|
63
|
-
get: function () { return
|
|
75
|
+
get: function () { return chunk7IDACSBM_cjs.scoringInputWithExtractStepResultSchema; }
|
|
64
76
|
});
|
|
65
77
|
Object.defineProperty(exports, "scoringPromptsSchema", {
|
|
66
78
|
enumerable: true,
|
|
67
|
-
get: function () { return
|
|
79
|
+
get: function () { return chunk7IDACSBM_cjs.scoringPromptsSchema; }
|
|
68
80
|
});
|
|
69
81
|
Object.defineProperty(exports, "scoringSourceSchema", {
|
|
70
82
|
enumerable: true,
|
|
71
|
-
get: function () { return
|
|
83
|
+
get: function () { return chunk7IDACSBM_cjs.scoringSourceSchema; }
|
|
72
84
|
});
|
|
73
85
|
Object.defineProperty(exports, "scoringValueSchema", {
|
|
74
86
|
enumerable: true,
|
|
75
|
-
get: function () { return
|
|
87
|
+
get: function () { return chunk7IDACSBM_cjs.scoringValueSchema; }
|
|
76
88
|
});
|
|
77
89
|
//# sourceMappingURL=index.cjs.map
|
|
78
90
|
//# sourceMappingURL=index.cjs.map
|
package/dist/evals/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export { runEvals } from '../chunk-
|
|
2
|
-
export { MastraScorer, createScorer } from '../chunk-
|
|
3
|
-
export { listScoresResponseSchema, saveScorePayloadSchema, scoreResultSchema, scoreRowDataSchema, scoringEntityTypeSchema, scoringExtractStepResultSchema, scoringHookInputSchema, scoringInputSchema, scoringInputWithExtractStepResultAndAnalyzeStepResultSchema, scoringInputWithExtractStepResultAndScoreAndReasonSchema, scoringInputWithExtractStepResultSchema, scoringPromptsSchema, scoringSourceSchema, scoringValueSchema } from '../chunk-
|
|
1
|
+
export { runEvals } from '../chunk-UQ2HAEHL.js';
|
|
2
|
+
export { MastraScorer, createScorer } from '../chunk-3U4ZNZIF.js';
|
|
3
|
+
export { extractTrajectory, extractTrajectoryFromTrace, extractWorkflowTrajectory, listScoresResponseSchema, saveScorePayloadSchema, scoreResultSchema, scoreRowDataSchema, scoringEntityTypeSchema, scoringExtractStepResultSchema, scoringHookInputSchema, scoringInputSchema, scoringInputWithExtractStepResultAndAnalyzeStepResultSchema, scoringInputWithExtractStepResultAndScoreAndReasonSchema, scoringInputWithExtractStepResultSchema, scoringPromptsSchema, scoringSourceSchema, scoringValueSchema } from '../chunk-VMKNS3YO.js';
|
|
4
4
|
//# sourceMappingURL=index.js.map
|
|
5
5
|
//# sourceMappingURL=index.js.map
|
|
@@ -11,12 +11,23 @@ type WorkflowRunOptions = WorkflowRunStartOptions & {
|
|
|
11
11
|
type RunEvalsDataItem<TTarget = unknown> = {
|
|
12
12
|
input: TTarget extends Workflow<any, any> ? any : TTarget extends Agent ? string | string[] | CoreMessage[] | AiMessageType[] | UIMessageWithMetadata[] : unknown;
|
|
13
13
|
groundTruth?: any;
|
|
14
|
+
expectedTrajectory?: any;
|
|
14
15
|
requestContext?: RequestContext;
|
|
15
16
|
startOptions?: WorkflowRunOptions;
|
|
16
17
|
} & Partial<ObservabilityContext>;
|
|
17
|
-
type WorkflowScorerConfig = {
|
|
18
|
+
export type WorkflowScorerConfig = {
|
|
19
|
+
/** Scorers that evaluate the overall workflow input/output */
|
|
18
20
|
workflow?: MastraScorer<any, any, any, any>[];
|
|
21
|
+
/** Scorers that evaluate individual workflow steps by step ID */
|
|
19
22
|
steps?: Record<string, MastraScorer<any, any, any, any>[]>;
|
|
23
|
+
/** Scorers that evaluate the workflow's step execution trajectory */
|
|
24
|
+
trajectory?: MastraScorer<any, any, any, any>[];
|
|
25
|
+
};
|
|
26
|
+
export type AgentScorerConfig = {
|
|
27
|
+
/** Scorers that evaluate the full agent input/output */
|
|
28
|
+
agent?: MastraScorer<any, any, any, any>[];
|
|
29
|
+
/** Scorers that evaluate the agent's tool call trajectory */
|
|
30
|
+
trajectory?: MastraScorer<any, any, any, any>[];
|
|
20
31
|
};
|
|
21
32
|
type RunEvalsResult = {
|
|
22
33
|
scores: Record<string, any>;
|
|
@@ -59,6 +70,22 @@ export declare function runEvals<TWorkflow extends AnyWorkflow>(config: {
|
|
|
59
70
|
scorerResults: {
|
|
60
71
|
workflow?: Record<string, any>;
|
|
61
72
|
steps?: Record<string, Record<string, any>>;
|
|
73
|
+
trajectory?: Record<string, any>;
|
|
74
|
+
};
|
|
75
|
+
}) => void | Promise<void>;
|
|
76
|
+
concurrency?: number;
|
|
77
|
+
}): Promise<RunEvalsResult>;
|
|
78
|
+
export declare function runEvals<TAgent extends Agent>(config: {
|
|
79
|
+
data: RunEvalsDataItem<TAgent>[];
|
|
80
|
+
scorers: AgentScorerConfig;
|
|
81
|
+
target: TAgent;
|
|
82
|
+
targetOptions?: Omit<AgentExecutionOptions<any>, 'scorers' | 'returnScorerData' | 'requestContext'>;
|
|
83
|
+
onItemComplete?: (params: {
|
|
84
|
+
item: RunEvalsDataItem<TAgent>;
|
|
85
|
+
targetResult: Awaited<ReturnType<Agent['generate']>>;
|
|
86
|
+
scorerResults: {
|
|
87
|
+
agent?: Record<string, any>;
|
|
88
|
+
trajectory?: Record<string, any>;
|
|
62
89
|
};
|
|
63
90
|
}) => void | Promise<void>;
|
|
64
91
|
concurrency?: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/evals/run/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,KAAK,EAAE,qBAAqB,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAItG,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/evals/run/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,KAAK,EAAE,qBAAqB,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAItG,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAEhE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAE5D,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAE,uBAAuB,EAAc,MAAM,iBAAiB,CAAC;AACxG,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAI5C,KAAK,kBAAkB,GAAG,uBAAuB,GAAG;IAClD,YAAY,CAAC,EAAE,GAAG,CAAC;CACpB,CAAC;AAEF,KAAK,gBAAgB,CAAC,OAAO,GAAG,OAAO,IAAI;IACzC,KAAK,EAAE,OAAO,SAAS,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC,GACrC,GAAG,GACH,OAAO,SAAS,KAAK,GACnB,MAAM,GAAG,MAAM,EAAE,GAAG,WAAW,EAAE,GAAG,aAAa,EAAE,GAAG,qBAAqB,EAAE,GAC7E,OAAO,CAAC;IACd,WAAW,CAAC,EAAE,GAAG,CAAC;IAClB,kBAAkB,CAAC,EAAE,GAAG,CAAC;IACzB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,YAAY,CAAC,EAAE,kBAAkB,CAAC;CACnC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;AAElC,MAAM,MAAM,oBAAoB,GAAG;IACjC,8DAA8D;IAC9D,QAAQ,CAAC,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC9C,iEAAiE;IACjE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IAC3D,qEAAqE;IACrE,UAAU,CAAC,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;CACjD,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG;IAC9B,wDAAwD;IACxD,KAAK,CAAC,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC3C,6DAA6D;IAC7D,UAAU,CAAC,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;CACjD,CAAC;AAEF,KAAK,cAAc,GAAG;IACpB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,OAAO,EAAE;QACP,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;CACH,CAAC;AAGF,wBAAgB,QAAQ,CAAC,MAAM,SAAS,KAAK,EAAE,MAAM,EAAE;IACrD,IAAI,EAAE,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC;IACjC,OAAO,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC5C,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,SAAS,GAAG,kBAAkB,GAAG,gBAAgB,CAAC,CAAC;IACpG,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE;QACxB,IAAI,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC;QAC/B,YAAY,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACrD,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KACpC,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;AAG5B,wBAAgB,QAAQ,CAAC,SAAS,SAAS,WAAW,EAAE,MAAM,EAAE;IAC9D,IAAI,EAAE,gBAAgB,CAAC,SAAS,CAAC,EAAE,CAAC;IACpC,OAAO,EAAE,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC5C,MAAM,EAAE,SAAS,CAAC;IAClB,aAAa,CAAC,EAAE,kBAAkB,CAAC;IACnC,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE;QACxB,IAAI,EAAE,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAClC,YAAY,EAAE,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QACjD,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KACpC,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;AAG5B,wBAAgB,QAAQ,CAAC,SAAS,SAAS,WAAW,EAAE,MAAM,EAAE;IAC9D,IAAI,EAAE,gBAAgB,CAAC,SAAS,CAAC,EAAE,CAAC;IACpC,OAAO,EAAE,oBAAoB,CAAC;IAC9B,MAAM,EAAE,SAAS,CAAC;IAClB,aAAa,CAAC,EAAE,kBAAkB,CAAC;IACnC,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE;QACxB,IAAI,EAAE,gBAAgB,CAAC,SAAS,CAAC,CAAC;QAClC,YAAY,EAAE,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QACjD,aAAa,EAAE;YACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAC/B,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC;YAC5C,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;SAClC,CAAC;KACH,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;AAG5B,wBAAgB,QAAQ,CAAC,MAAM,SAAS,KAAK,EAAE,MAAM,EAAE;IACrD,IAAI,EAAE,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,SAAS,GAAG,kBAAkB,GAAG,gBAAgB,CAAC,CAAC;IACpG,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE;QACxB,IAAI,EAAE,gBAAgB,CAAC,MAAM,CAAC,CAAC;QAC/B,YAAY,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACrD,aAAa,EAAE;YACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAC5B,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;SAClC,CAAC;KACH,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC"}
|
|
@@ -2,9 +2,12 @@ export declare class ScoreAccumulator {
|
|
|
2
2
|
private flatScores;
|
|
3
3
|
private workflowScores;
|
|
4
4
|
private stepScores;
|
|
5
|
+
private agentScores;
|
|
6
|
+
private trajectoryScores;
|
|
5
7
|
addScores(scorerResults: Record<string, any>): void;
|
|
6
8
|
private addFlatScores;
|
|
7
|
-
private
|
|
9
|
+
private addWorkflowScores;
|
|
10
|
+
private addAgentScores;
|
|
8
11
|
addStepScores(stepScorerResults: Record<string, Record<string, any>>): void;
|
|
9
12
|
getAverageScores(): Record<string, any>;
|
|
10
13
|
private getAverageScore;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scorerAccumulator.d.ts","sourceRoot":"","sources":["../../../src/evals/run/scorerAccumulator.ts"],"names":[],"mappings":"AAAA,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAgC;IAClD,OAAO,CAAC,cAAc,CAAgC;IACtD,OAAO,CAAC,UAAU,CAAgD;
|
|
1
|
+
{"version":3,"file":"scorerAccumulator.d.ts","sourceRoot":"","sources":["../../../src/evals/run/scorerAccumulator.ts"],"names":[],"mappings":"AAAA,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAAgC;IAClD,OAAO,CAAC,cAAc,CAAgC;IACtD,OAAO,CAAC,UAAU,CAAgD;IAClE,OAAO,CAAC,WAAW,CAAgC;IACnD,OAAO,CAAC,gBAAgB,CAAgC;IAExD,SAAS,CAAC,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiB5C,OAAO,CAAC,aAAa;IASrB,OAAO,CAAC,iBAAiB;IAmCzB,OAAO,CAAC,cAAc;IAoBtB,aAAa,CAAC,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAcpE,gBAAgB,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IA4CvC,OAAO,CAAC,eAAe;CAOxB"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
4
|
-
var
|
|
3
|
+
var chunkJFPVUKPB_cjs = require('../../chunk-JFPVUKPB.cjs');
|
|
4
|
+
var chunk7IDACSBM_cjs = require('../../chunk-7IDACSBM.cjs');
|
|
5
5
|
var chunkG5HKDGNT_cjs = require('../../chunk-G5HKDGNT.cjs');
|
|
6
6
|
var chunk4U7ZLI36_cjs = require('../../chunk-4U7ZLI36.cjs');
|
|
7
7
|
var pMap = require('p-map');
|
|
@@ -235,7 +235,7 @@ function transformTraceToScorerInputAndOutput(trace) {
|
|
|
235
235
|
}
|
|
236
236
|
|
|
237
237
|
// src/evals/scoreTraces/scoreTracesWorkflow.ts
|
|
238
|
-
var getTraceStep =
|
|
238
|
+
var getTraceStep = chunkJFPVUKPB_cjs.createStep({
|
|
239
239
|
id: "__process-trace-scoring",
|
|
240
240
|
inputSchema: v4.z.object({
|
|
241
241
|
targets: v4.z.array(
|
|
@@ -383,7 +383,7 @@ async function validateAndSaveScore({ storage, scorerResult }) {
|
|
|
383
383
|
text: "Scores storage domain is not available"
|
|
384
384
|
});
|
|
385
385
|
}
|
|
386
|
-
const payloadToSave =
|
|
386
|
+
const payloadToSave = chunk7IDACSBM_cjs.saveScorePayloadSchema.parse(scorerResult);
|
|
387
387
|
const result = await scoresStore.saveScore(payloadToSave);
|
|
388
388
|
return result.score;
|
|
389
389
|
}
|
|
@@ -446,7 +446,7 @@ async function attachScoreToSpan({
|
|
|
446
446
|
} catch {
|
|
447
447
|
}
|
|
448
448
|
}
|
|
449
|
-
var scoreTracesWorkflow =
|
|
449
|
+
var scoreTracesWorkflow = chunkJFPVUKPB_cjs.createWorkflow({
|
|
450
450
|
id: "__batch-scoring-traces",
|
|
451
451
|
inputSchema: v4.z.object({
|
|
452
452
|
targets: v4.z.array(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { createStep, createWorkflow } from '../../chunk-
|
|
2
|
-
import { saveScorePayloadSchema } from '../../chunk-
|
|
1
|
+
import { createStep, createWorkflow } from '../../chunk-3NMSFQPY.js';
|
|
2
|
+
import { saveScorePayloadSchema } from '../../chunk-VMKNS3YO.js';
|
|
3
3
|
import { resolveObservabilityContext } from '../../chunk-CT4YYQI3.js';
|
|
4
4
|
import { MastraError } from '../../chunk-FJEVLHJT.js';
|
|
5
5
|
import pMap from 'p-map';
|
package/dist/evals/types.d.ts
CHANGED
|
@@ -2,6 +2,8 @@ import type { CoreMessage, CoreSystemMessage } from '../_types/@internal_ai-sdk-
|
|
|
2
2
|
import { z } from 'zod/v4';
|
|
3
3
|
import type { MastraDBMessage } from '../agent/index.js';
|
|
4
4
|
import type { ObservabilityContext } from '../observability/index.js';
|
|
5
|
+
import type { SpanRecord } from '../storage/domains/observability/tracing.js';
|
|
6
|
+
import type { StepResult } from '../workflows/types.js';
|
|
5
7
|
export type ScoringSamplingConfig = {
|
|
6
8
|
type: 'none';
|
|
7
9
|
} | {
|
|
@@ -156,25 +158,25 @@ export declare const saveScorePayloadSchema: z.ZodObject<{
|
|
|
156
158
|
TEST: "TEST";
|
|
157
159
|
}>;
|
|
158
160
|
output: z.ZodUnknown;
|
|
159
|
-
|
|
161
|
+
traceId: z.ZodOptional<z.ZodString>;
|
|
162
|
+
spanId: z.ZodOptional<z.ZodString>;
|
|
160
163
|
requestContext: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
161
|
-
scorer: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
162
|
-
entity: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
163
164
|
entityType: z.ZodOptional<z.ZodEnum<{
|
|
164
165
|
[x: string]: string;
|
|
165
166
|
}>>;
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
spanId: z.ZodOptional<z.ZodString>;
|
|
167
|
+
entityId: z.ZodString;
|
|
168
|
+
additionalContext: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
169
169
|
extractStepResult: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
170
|
-
extractPrompt: z.ZodOptional<z.ZodString>;
|
|
171
170
|
analyzeStepResult: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
171
|
+
scorer: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
172
|
+
entity: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
173
|
+
preprocessStepResult: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
174
|
+
scorerId: z.ZodString;
|
|
175
|
+
extractPrompt: z.ZodOptional<z.ZodString>;
|
|
172
176
|
analyzePrompt: z.ZodOptional<z.ZodString>;
|
|
173
177
|
reason: z.ZodOptional<z.ZodString>;
|
|
174
178
|
reasonPrompt: z.ZodOptional<z.ZodString>;
|
|
175
|
-
|
|
176
|
-
scorerId: z.ZodString;
|
|
177
|
-
entityId: z.ZodString;
|
|
179
|
+
structuredOutput: z.ZodOptional<z.ZodBoolean>;
|
|
178
180
|
preprocessPrompt: z.ZodOptional<z.ZodString>;
|
|
179
181
|
generateScorePrompt: z.ZodOptional<z.ZodString>;
|
|
180
182
|
generateReasonPrompt: z.ZodOptional<z.ZodString>;
|
|
@@ -249,4 +251,297 @@ export type ScorerRunInputForAgent = {
|
|
|
249
251
|
taggedSystemMessages: Record<string, CoreSystemMessage[]>;
|
|
250
252
|
};
|
|
251
253
|
export type ScorerRunOutputForAgent = MastraDBMessage[];
|
|
254
|
+
/**
|
|
255
|
+
* Base properties shared by all trajectory step types.
|
|
256
|
+
*/
|
|
257
|
+
export type TrajectoryStepBase = {
|
|
258
|
+
/** Name of the tool called, model used, or step executed */
|
|
259
|
+
name: string;
|
|
260
|
+
/** Duration of this step in milliseconds */
|
|
261
|
+
durationMs?: number;
|
|
262
|
+
/** Additional metadata about this step */
|
|
263
|
+
metadata?: Record<string, unknown>;
|
|
264
|
+
/** Nested child steps (e.g., tool calls inside a workflow step, or steps inside an agent run) */
|
|
265
|
+
children?: TrajectoryStep[];
|
|
266
|
+
};
|
|
267
|
+
export type ToolCallStep = TrajectoryStepBase & {
|
|
268
|
+
stepType: 'tool_call';
|
|
269
|
+
/** Arguments passed to the tool */
|
|
270
|
+
toolArgs?: Record<string, unknown>;
|
|
271
|
+
/** Result returned by the tool */
|
|
272
|
+
toolResult?: Record<string, unknown>;
|
|
273
|
+
/** Whether the tool call succeeded */
|
|
274
|
+
success?: boolean;
|
|
275
|
+
};
|
|
276
|
+
export type McpToolCallStep = TrajectoryStepBase & {
|
|
277
|
+
stepType: 'mcp_tool_call';
|
|
278
|
+
/** Arguments passed to the MCP tool */
|
|
279
|
+
toolArgs?: Record<string, unknown>;
|
|
280
|
+
/** Result returned by the MCP tool */
|
|
281
|
+
toolResult?: Record<string, unknown>;
|
|
282
|
+
/** The MCP server that handled this tool call */
|
|
283
|
+
mcpServer?: string;
|
|
284
|
+
/** Whether the tool call succeeded */
|
|
285
|
+
success?: boolean;
|
|
286
|
+
};
|
|
287
|
+
export type ModelGenerationStep = TrajectoryStepBase & {
|
|
288
|
+
stepType: 'model_generation';
|
|
289
|
+
/** The model ID used for generation */
|
|
290
|
+
modelId?: string;
|
|
291
|
+
/** Number of prompt tokens consumed */
|
|
292
|
+
promptTokens?: number;
|
|
293
|
+
/** Number of completion tokens generated */
|
|
294
|
+
completionTokens?: number;
|
|
295
|
+
/** Reason the generation finished (e.g., 'stop', 'tool-calls') */
|
|
296
|
+
finishReason?: string;
|
|
297
|
+
};
|
|
298
|
+
export type AgentRunStep = TrajectoryStepBase & {
|
|
299
|
+
stepType: 'agent_run';
|
|
300
|
+
/** The ID of the agent that was run */
|
|
301
|
+
agentId?: string;
|
|
302
|
+
};
|
|
303
|
+
export type WorkflowStepStep = TrajectoryStepBase & {
|
|
304
|
+
stepType: 'workflow_step';
|
|
305
|
+
/** The step ID within the workflow */
|
|
306
|
+
stepId?: string;
|
|
307
|
+
/** Status of the step (e.g., 'success', 'failed', 'suspended') */
|
|
308
|
+
status?: string;
|
|
309
|
+
/** Output data from the step */
|
|
310
|
+
output?: Record<string, unknown>;
|
|
311
|
+
};
|
|
312
|
+
export type WorkflowRunStep = TrajectoryStepBase & {
|
|
313
|
+
stepType: 'workflow_run';
|
|
314
|
+
/** The ID of the workflow that was run */
|
|
315
|
+
workflowId?: string;
|
|
316
|
+
/** Status of the workflow run */
|
|
317
|
+
status?: string;
|
|
318
|
+
};
|
|
319
|
+
export type WorkflowConditionalStep = TrajectoryStepBase & {
|
|
320
|
+
stepType: 'workflow_conditional';
|
|
321
|
+
/** Number of conditions evaluated */
|
|
322
|
+
conditionCount?: number;
|
|
323
|
+
/** Steps selected by the conditional */
|
|
324
|
+
selectedSteps?: string[];
|
|
325
|
+
};
|
|
326
|
+
export type WorkflowParallelStep = TrajectoryStepBase & {
|
|
327
|
+
stepType: 'workflow_parallel';
|
|
328
|
+
/** Number of parallel branches */
|
|
329
|
+
branchCount?: number;
|
|
330
|
+
/** Steps that ran in parallel */
|
|
331
|
+
parallelSteps?: string[];
|
|
332
|
+
};
|
|
333
|
+
export type WorkflowLoopStep = TrajectoryStepBase & {
|
|
334
|
+
stepType: 'workflow_loop';
|
|
335
|
+
/** Type of loop (e.g., 'dowhile', 'dountil') */
|
|
336
|
+
loopType?: string;
|
|
337
|
+
/** Total number of iterations executed */
|
|
338
|
+
totalIterations?: number;
|
|
339
|
+
};
|
|
340
|
+
export type WorkflowSleepStep = TrajectoryStepBase & {
|
|
341
|
+
stepType: 'workflow_sleep';
|
|
342
|
+
/** Sleep duration in milliseconds */
|
|
343
|
+
sleepDurationMs?: number;
|
|
344
|
+
/** Type of sleep */
|
|
345
|
+
sleepType?: string;
|
|
346
|
+
};
|
|
347
|
+
export type WorkflowWaitEventStep = TrajectoryStepBase & {
|
|
348
|
+
stepType: 'workflow_wait_event';
|
|
349
|
+
/** Name of the event being waited on */
|
|
350
|
+
eventName?: string;
|
|
351
|
+
/** Whether the event was received */
|
|
352
|
+
eventReceived?: boolean;
|
|
353
|
+
};
|
|
354
|
+
export type ProcessorRunStep = TrajectoryStepBase & {
|
|
355
|
+
stepType: 'processor_run';
|
|
356
|
+
/** The ID of the processor that was run */
|
|
357
|
+
processorId?: string;
|
|
358
|
+
};
|
|
359
|
+
/**
|
|
360
|
+
* A single step in an agent's or workflow's trajectory.
|
|
361
|
+
* Discriminated union on `stepType` — each variant carries properties specific
|
|
362
|
+
* to that kind of action.
|
|
363
|
+
*/
|
|
364
|
+
export type TrajectoryStep = ToolCallStep | McpToolCallStep | ModelGenerationStep | AgentRunStep | WorkflowStepStep | WorkflowRunStep | WorkflowConditionalStep | WorkflowParallelStep | WorkflowLoopStep | WorkflowSleepStep | WorkflowWaitEventStep | ProcessorRunStep;
|
|
365
|
+
/**
|
|
366
|
+
* The type of action taken in a trajectory step.
|
|
367
|
+
* Derived from the discriminated union for convenience.
|
|
368
|
+
*/
|
|
369
|
+
export type TrajectoryStepType = TrajectoryStep['stepType'];
|
|
370
|
+
/**
|
|
371
|
+
* A complete trajectory: the ordered sequence of steps an agent or workflow took
|
|
372
|
+
* to go from input to output.
|
|
373
|
+
*/
|
|
374
|
+
export type Trajectory = {
|
|
375
|
+
/** Ordered list of steps taken */
|
|
376
|
+
steps: TrajectoryStep[];
|
|
377
|
+
/** Total duration of the full trajectory in milliseconds */
|
|
378
|
+
totalDurationMs?: number;
|
|
379
|
+
/** The raw agent output messages, preserved for scorers that need text context */
|
|
380
|
+
rawOutput?: ScorerRunOutputForAgent;
|
|
381
|
+
/** The raw workflow result, preserved for scorers that need workflow-specific data */
|
|
382
|
+
rawWorkflowResult?: {
|
|
383
|
+
stepResults: Record<string, StepResult<any, any, any, any>>;
|
|
384
|
+
stepExecutionPath?: string[];
|
|
385
|
+
};
|
|
386
|
+
};
|
|
387
|
+
/**
|
|
388
|
+
* Configuration for trajectory comparison behavior.
|
|
389
|
+
*/
|
|
390
|
+
export type TrajectoryComparisonOptions = {
|
|
391
|
+
/**
|
|
392
|
+
* How to compare step ordering.
|
|
393
|
+
* - 'strict': exact match (same steps, same order, no extras)
|
|
394
|
+
* - 'relaxed': subsequence match (extra steps OK, order matters)
|
|
395
|
+
* - 'unordered': just check presence (don't care about order)
|
|
396
|
+
* @default 'relaxed'
|
|
397
|
+
*/
|
|
398
|
+
ordering?: 'strict' | 'relaxed' | 'unordered';
|
|
399
|
+
/**
|
|
400
|
+
* Whether to require exact match of the trajectory (same steps in same order, no extra steps).
|
|
401
|
+
* When false, allows additional steps as long as expected steps appear in order.
|
|
402
|
+
* @default false
|
|
403
|
+
* @deprecated Use `ordering: 'strict'` instead
|
|
404
|
+
*/
|
|
405
|
+
strictOrder?: boolean;
|
|
406
|
+
/**
|
|
407
|
+
* Whether to compare step-specific data (e.g., toolArgs/toolResult for tool_call steps).
|
|
408
|
+
* @default false
|
|
409
|
+
*/
|
|
410
|
+
compareStepData?: boolean;
|
|
411
|
+
/**
|
|
412
|
+
* Whether to allow repeated steps in the trajectory.
|
|
413
|
+
* When false, repeated steps (loops) are penalized.
|
|
414
|
+
* @default true
|
|
415
|
+
*/
|
|
416
|
+
allowRepeatedSteps?: boolean;
|
|
417
|
+
};
|
|
418
|
+
/**
|
|
419
|
+
* A lightweight step matcher for trajectory expectations.
|
|
420
|
+
* Simpler than `TrajectoryStep` — just specify `name` and optionally `stepType` and `data`.
|
|
421
|
+
*
|
|
422
|
+
* @example
|
|
423
|
+
* ```ts
|
|
424
|
+
* // Match any step named 'search'
|
|
425
|
+
* { name: 'search' }
|
|
426
|
+
*
|
|
427
|
+
* // Match a tool_call named 'search' with specific args
|
|
428
|
+
* { name: 'search', stepType: 'tool_call', data: { query: 'weather' } }
|
|
429
|
+
*
|
|
430
|
+
* // Match an agent run with nested expectations for its children
|
|
431
|
+
* {
|
|
432
|
+
* name: 'researchAgent',
|
|
433
|
+
* stepType: 'agent_run',
|
|
434
|
+
* children: {
|
|
435
|
+
* ordering: 'unordered',
|
|
436
|
+
* steps: [
|
|
437
|
+
* { name: 'search', stepType: 'tool_call' },
|
|
438
|
+
* { name: 'summarize', stepType: 'tool_call' },
|
|
439
|
+
* ],
|
|
440
|
+
* },
|
|
441
|
+
* }
|
|
442
|
+
* ```
|
|
443
|
+
*/
|
|
444
|
+
export type ExpectedStep = {
|
|
445
|
+
/** Step name to match (tool name, agent ID, workflow step name, etc.) */
|
|
446
|
+
name: string;
|
|
447
|
+
/** Step type to match. If omitted, matches any step type with the given name */
|
|
448
|
+
stepType?: TrajectoryStepType;
|
|
449
|
+
/** Expected step-specific data (toolArgs for tool_call, output for workflow_step, etc.) */
|
|
450
|
+
data?: Record<string, unknown>;
|
|
451
|
+
/**
|
|
452
|
+
* Nested trajectory expectation for this step's children.
|
|
453
|
+
* Overrides the parent config for evaluating this step's children.
|
|
454
|
+
* e.g., require strict ordering for the parent agent but unordered for a sub-agent.
|
|
455
|
+
*/
|
|
456
|
+
children?: TrajectoryExpectation;
|
|
457
|
+
};
|
|
458
|
+
/**
|
|
459
|
+
* Full trajectory expectation config for the unified trajectory scorer.
|
|
460
|
+
* Can be set as constructor defaults (agent-level) or per dataset item (prompt-specific).
|
|
461
|
+
* Per-item values override constructor defaults.
|
|
462
|
+
*/
|
|
463
|
+
export type TrajectoryExpectation = {
|
|
464
|
+
/** Expected steps for accuracy checking */
|
|
465
|
+
steps?: ExpectedStep[];
|
|
466
|
+
/**
|
|
467
|
+
* How to compare step ordering.
|
|
468
|
+
* - 'strict': exact match (same steps, same order, no extras)
|
|
469
|
+
* - 'relaxed': subsequence match (extra steps OK, order matters)
|
|
470
|
+
* - 'unordered': just check presence (don't care about order)
|
|
471
|
+
* @default 'relaxed'
|
|
472
|
+
*/
|
|
473
|
+
ordering?: 'strict' | 'relaxed' | 'unordered';
|
|
474
|
+
/** Whether to compare step-specific data (toolArgs/toolResult, output, etc.) */
|
|
475
|
+
compareStepData?: boolean;
|
|
476
|
+
/** Whether to allow repeated steps in accuracy evaluation. @default true */
|
|
477
|
+
allowRepeatedSteps?: boolean;
|
|
478
|
+
/** Maximum number of steps allowed */
|
|
479
|
+
maxSteps?: number;
|
|
480
|
+
/** Maximum total tokens across all model_generation steps */
|
|
481
|
+
maxTotalTokens?: number;
|
|
482
|
+
/** Maximum total duration in milliseconds */
|
|
483
|
+
maxTotalDurationMs?: number;
|
|
484
|
+
/** Whether to penalize redundant calls (same tool + same args consecutively). @default true */
|
|
485
|
+
noRedundantCalls?: boolean;
|
|
486
|
+
/** Tool names that should never appear in the trajectory */
|
|
487
|
+
blacklistedTools?: string[];
|
|
488
|
+
/** Tool name sequences that should never appear (contiguous subsequences) */
|
|
489
|
+
blacklistedSequences?: string[][];
|
|
490
|
+
/** Maximum acceptable retries per tool before penalizing. @default 2 */
|
|
491
|
+
maxRetriesPerTool?: number;
|
|
492
|
+
};
|
|
493
|
+
/**
|
|
494
|
+
* Extracts a Trajectory from agent output messages by walking through
|
|
495
|
+
* tool invocations.
|
|
496
|
+
*
|
|
497
|
+
* This is called automatically by `runEvals` when using `AgentScorerConfig.trajectory`
|
|
498
|
+
* scorers — trajectory scorers receive a pre-extracted `Trajectory` as their `output`
|
|
499
|
+
* instead of raw `MastraDBMessage[]`.
|
|
500
|
+
*
|
|
501
|
+
* @param output - The raw agent output messages
|
|
502
|
+
* @returns A Trajectory with ToolCallStep entries extracted from tool invocations
|
|
503
|
+
*/
|
|
504
|
+
export declare function extractTrajectory(output: ScorerRunOutputForAgent): Trajectory;
|
|
505
|
+
/**
|
|
506
|
+
* Extracts a Trajectory from workflow step results.
|
|
507
|
+
*
|
|
508
|
+
* Converts the `stepResults` record (and optional `stepExecutionPath` ordering)
|
|
509
|
+
* into a flat list of `WorkflowStepStep` entries. Each step captures its status,
|
|
510
|
+
* output, and timing.
|
|
511
|
+
*
|
|
512
|
+
* This is called automatically by `runEvals` when using `WorkflowScorerConfig.trajectory`
|
|
513
|
+
* scorers.
|
|
514
|
+
*
|
|
515
|
+
* @param stepResults - The workflow step results record
|
|
516
|
+
* @param stepExecutionPath - Optional ordered list of step IDs for execution ordering
|
|
517
|
+
* @returns A Trajectory with WorkflowStepStep entries
|
|
518
|
+
*/
|
|
519
|
+
export declare function extractWorkflowTrajectory(stepResults: Record<string, StepResult<any, any, any, any>>, stepExecutionPath?: string[]): Trajectory;
|
|
520
|
+
/**
|
|
521
|
+
* Extracts a hierarchical Trajectory from trace spans (as returned by the
|
|
522
|
+
* observability store's `getTrace()`).
|
|
523
|
+
*
|
|
524
|
+
* Builds a parent-child tree from `parentSpanId` references, then recursively
|
|
525
|
+
* converts each span to the appropriate `TrajectoryStep` discriminated union
|
|
526
|
+
* type with nested `children`.
|
|
527
|
+
*
|
|
528
|
+
* Noise spans (`generic`, `model_step`, `model_chunk`, `workflow_conditional_eval`)
|
|
529
|
+
* are automatically skipped.
|
|
530
|
+
*
|
|
531
|
+
* This is used by `runEvals` when storage is available to produce richer,
|
|
532
|
+
* hierarchical trajectories that include nested agent runs, tool calls, and
|
|
533
|
+
* model generations inside workflow or agent steps.
|
|
534
|
+
*
|
|
535
|
+
* @param spans - Flat array of span records from `getTrace().spans`
|
|
536
|
+
* @param rootSpanId - Optional span ID to use as root. If omitted, spans with
|
|
537
|
+
* no parent are used as roots.
|
|
538
|
+
* @returns A Trajectory with hierarchical TrajectoryStep entries
|
|
539
|
+
*
|
|
540
|
+
* @example
|
|
541
|
+
* ```ts
|
|
542
|
+
* const trace = await observabilityStore.getTrace({ traceId });
|
|
543
|
+
* const trajectory = extractTrajectoryFromTrace(trace.spans, workflowSpanId);
|
|
544
|
+
* ```
|
|
545
|
+
*/
|
|
546
|
+
export declare function extractTrajectoryFromTrace(spans: SpanRecord[], rootSpanId?: string): Trajectory;
|
|
252
547
|
//# sourceMappingURL=types.d.ts.map
|