npm - @mastra/core - Versions diffs - 1.18.0-alpha.2 → 1.18.0-alpha.3 - Mend

@mastra/core 1.18.0-alpha.2 → 1.18.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

package/CHANGELOG.md +87 -0
package/dist/agent/agent-legacy.d.ts +3 -3
package/dist/agent/agent-legacy.d.ts.map +1 -1
package/dist/agent/agent.d.ts +12 -12
package/dist/agent/agent.d.ts.map +1 -1
package/dist/agent/index.cjs +8 -8
package/dist/agent/index.js +1 -1
package/dist/agent/types.d.ts +12 -12
package/dist/agent/types.d.ts.map +1 -1
package/dist/agent/workflows/prepare-stream/index.d.ts.map +1 -1
package/dist/agent/workflows/prepare-stream/prepare-memory-step.d.ts +2 -1
package/dist/agent/workflows/prepare-stream/prepare-memory-step.d.ts.map +1 -1
package/dist/{chunk-VF3CL3JV.js → chunk-32NEFSTI.js} +153 -44
package/dist/chunk-32NEFSTI.js.map +1 -0
package/dist/{chunk-XRZNAZRP.cjs → chunk-3UM5VCQ5.cjs} +3 -3
package/dist/{chunk-XRZNAZRP.cjs.map → chunk-3UM5VCQ5.cjs.map} +1 -1
package/dist/{chunk-VAVK4KJI.cjs → chunk-4K2NKQGN.cjs} +316 -207
package/dist/chunk-4K2NKQGN.cjs.map +1 -0
package/dist/{chunk-WPDB2VAM.cjs → chunk-5IZ346TV.cjs} +7 -7
package/dist/{chunk-WPDB2VAM.cjs.map → chunk-5IZ346TV.cjs.map} +1 -1
package/dist/{chunk-R2V6BCYX.cjs → chunk-64565EZF.cjs} +9 -9
package/dist/{chunk-R2V6BCYX.cjs.map → chunk-64565EZF.cjs.map} +1 -1
package/dist/{chunk-6WVZQBYQ.cjs → chunk-6LHF5UDX.cjs} +9 -9
package/dist/{chunk-6WVZQBYQ.cjs.map → chunk-6LHF5UDX.cjs.map} +1 -1
package/dist/{chunk-L53NRDFU.js → chunk-7USX5P3I.js} +3 -3
package/dist/{chunk-L53NRDFU.js.map → chunk-7USX5P3I.js.map} +1 -1
package/dist/{chunk-FWDG2WUV.js → chunk-AHFHYQS6.js} +3 -3
package/dist/{chunk-FWDG2WUV.js.map → chunk-AHFHYQS6.js.map} +1 -1
package/dist/{chunk-G5HKDGNT.cjs → chunk-B6D4D2CY.cjs} +2 -2
package/dist/{chunk-G5HKDGNT.cjs.map → chunk-B6D4D2CY.cjs.map} +1 -1
package/dist/{chunk-URN2SKVG.js → chunk-BFRUEBG3.js} +4 -4
package/dist/{chunk-URN2SKVG.js.map → chunk-BFRUEBG3.js.map} +1 -1
package/dist/{chunk-CT4YYQI3.js → chunk-CX5I6VS4.js} +2 -2
package/dist/{chunk-CT4YYQI3.js.map → chunk-CX5I6VS4.js.map} +1 -1
package/dist/{chunk-JEWTRDRI.js → chunk-D5VCCQ4L.js} +5 -5
package/dist/{chunk-JEWTRDRI.js.map → chunk-D5VCCQ4L.js.map} +1 -1
package/dist/{chunk-WVTVBWOQ.js → chunk-FB6O7FNK.js} +42 -61
package/dist/chunk-FB6O7FNK.js.map +1 -0
package/dist/{chunk-LUONHWL6.cjs → chunk-GVHBM6VU.cjs} +6 -6
package/dist/{chunk-LUONHWL6.cjs.map → chunk-GVHBM6VU.cjs.map} +1 -1
package/dist/{chunk-ZOJZL62I.js → chunk-HDPQ3LEE.js} +4 -4
package/dist/{chunk-ZOJZL62I.js.map → chunk-HDPQ3LEE.js.map} +1 -1
package/dist/{chunk-IOCCTQYZ.js → chunk-IR4WRS3N.js} +38 -14
package/dist/chunk-IR4WRS3N.js.map +1 -0
package/dist/{chunk-E6XNM5ES.js → chunk-MP2VGF73.js} +3 -3
package/dist/{chunk-E6XNM5ES.js.map → chunk-MP2VGF73.js.map} +1 -1
package/dist/{chunk-DQYOFNZ7.cjs → chunk-OKKJRRSS.cjs} +39 -15
package/dist/chunk-OKKJRRSS.cjs.map +1 -0
package/dist/{chunk-H5O522FY.cjs → chunk-OVWOD5GI.cjs} +53 -72
package/dist/chunk-OVWOD5GI.cjs.map +1 -0
package/dist/{chunk-MBJ6IAKF.cjs → chunk-OXS5X7PW.cjs} +13 -13
package/dist/{chunk-MBJ6IAKF.cjs.map → chunk-OXS5X7PW.cjs.map} +1 -1
package/dist/{chunk-M5CZCWWW.js → chunk-OYAHOEQP.js} +4 -4
package/dist/{chunk-M5CZCWWW.js.map → chunk-OYAHOEQP.js.map} +1 -1
package/dist/{chunk-5AVTFQEP.cjs → chunk-PYNUWPGW.cjs} +19 -19
package/dist/{chunk-5AVTFQEP.cjs.map → chunk-PYNUWPGW.cjs.map} +1 -1
package/dist/{chunk-7IDACSBM.cjs → chunk-QXPUTX3W.cjs} +32 -32
package/dist/chunk-QXPUTX3W.cjs.map +1 -0
package/dist/{chunk-VMKNS3YO.js → chunk-SLHDZJ4B.js} +3 -3
package/dist/chunk-SLHDZJ4B.js.map +1 -0
package/dist/{chunk-YEKQKMCU.cjs → chunk-T5XU5TAV.cjs} +82 -82
package/dist/{chunk-YEKQKMCU.cjs.map → chunk-T5XU5TAV.cjs.map} +1 -1
package/dist/{chunk-YN3FI4AD.js → chunk-UORUVXKY.js} +5 -5
package/dist/{chunk-YN3FI4AD.js.map → chunk-UORUVXKY.js.map} +1 -1
package/dist/{chunk-T2CILERS.cjs → chunk-VVR3SFKH.cjs} +68 -68
package/dist/{chunk-T2CILERS.cjs.map → chunk-VVR3SFKH.cjs.map} +1 -1
package/dist/{chunk-EYM6DWKD.js → chunk-W2QYKEBN.js} +5 -5
package/dist/{chunk-EYM6DWKD.js.map → chunk-W2QYKEBN.js.map} +1 -1
package/dist/{chunk-PK3X5KIF.cjs → chunk-X36INADV.cjs} +23 -23
package/dist/{chunk-PK3X5KIF.cjs.map → chunk-X36INADV.cjs.map} +1 -1
package/dist/{chunk-45M6KUQZ.js → chunk-YBVRBNRY.js} +7 -7
package/dist/{chunk-45M6KUQZ.js.map → chunk-YBVRBNRY.js.map} +1 -1
package/dist/{chunk-QR2Z4WQ7.js → chunk-ZFH5KWHO.js} +3 -3
package/dist/{chunk-QR2Z4WQ7.js.map → chunk-ZFH5KWHO.js.map} +1 -1
package/dist/{chunk-4KGTZPYF.cjs → chunk-ZFZ6HQFT.cjs} +185 -185
package/dist/{chunk-4KGTZPYF.cjs.map → chunk-ZFZ6HQFT.cjs.map} +1 -1
package/dist/datasets/index.cjs +17 -17
package/dist/datasets/index.js +2 -2
package/dist/docs/SKILL.md +1 -1
package/dist/docs/assets/SOURCE_MAP.json +469 -469
package/dist/docs/references/docs-memory-observational-memory.md +49 -4
package/dist/docs/references/docs-server-mastra-client.md +17 -0
package/dist/docs/references/reference-client-js-agents.md +11 -6
package/dist/docs/references/reference-evals-scorer-utils.md +9 -5
package/dist/docs/references/reference-evals-trajectory-accuracy.md +29 -15
package/dist/docs/references/reference-memory-observational-memory.md +32 -6
package/dist/evals/index.cjs +23 -23
package/dist/evals/index.js +3 -3
package/dist/evals/scoreTraces/index.cjs +7 -7
package/dist/evals/scoreTraces/index.js +3 -3
package/dist/evals/types.d.ts +58 -28
package/dist/evals/types.d.ts.map +1 -1
package/dist/harness/harness.d.ts.map +1 -1
package/dist/harness/index.cjs +28 -8
package/dist/harness/index.cjs.map +1 -1
package/dist/harness/index.js +26 -6
package/dist/harness/index.js.map +1 -1
package/dist/index.cjs +2 -2
package/dist/index.js +1 -1
package/dist/llm/index.cjs +16 -16
package/dist/llm/index.js +5 -5
package/dist/llm/model/provider-types.generated.d.ts +14 -2
package/dist/loop/index.cjs +14 -14
package/dist/loop/index.js +1 -1
package/dist/loop/workflows/agentic-execution/llm-execution-step.d.ts.map +1 -1
package/dist/mastra/index.cjs +2 -2
package/dist/mastra/index.d.ts +11 -0
package/dist/mastra/index.d.ts.map +1 -1
package/dist/mastra/index.js +1 -1
package/dist/memory/index.cjs +14 -14
package/dist/memory/index.js +1 -1
package/dist/memory/memory.d.ts.map +1 -1
package/dist/memory/types.d.ts +16 -2
package/dist/memory/types.d.ts.map +1 -1
package/dist/models-dev-MIC2CW43.cjs +12 -0
package/dist/{models-dev-GLFQVCT4.cjs.map → models-dev-MIC2CW43.cjs.map} +1 -1
package/dist/models-dev-VDZIOMAC.js +3 -0
package/dist/{models-dev-WSLKK6FO.js.map → models-dev-VDZIOMAC.js.map} +1 -1
package/dist/netlify-2CGMKEKT.js +3 -0
package/dist/{netlify-SNTN56QS.js.map → netlify-2CGMKEKT.js.map} +1 -1
package/dist/netlify-KZ4HQR5N.cjs +12 -0
package/dist/{netlify-VFRYJYBK.cjs.map → netlify-KZ4HQR5N.cjs.map} +1 -1
package/dist/observability/index.cjs +18 -18
package/dist/observability/index.js +1 -1
package/dist/observability/types/tracing.d.ts +3 -0
package/dist/observability/types/tracing.d.ts.map +1 -1
package/dist/processor-provider/index.cjs +10 -10
package/dist/processor-provider/index.js +1 -1
package/dist/processors/index.cjs +44 -44
package/dist/processors/index.js +1 -1
package/dist/processors/tool-result-reminder.d.ts +2 -0
package/dist/processors/tool-result-reminder.d.ts.map +1 -1
package/dist/provider-registry-HGWTTWCL.cjs +40 -0
package/dist/{provider-registry-L6KDLMFM.cjs.map → provider-registry-HGWTTWCL.cjs.map} +1 -1
package/dist/provider-registry-QRWUBN5N.js +3 -0
package/dist/{provider-registry-LEXW2E53.js.map → provider-registry-QRWUBN5N.js.map} +1 -1
package/dist/provider-registry.json +34 -10
package/dist/relevance/index.cjs +3 -3
package/dist/relevance/index.js +1 -1
package/dist/storage/constants.cjs +56 -56
package/dist/storage/constants.js +1 -1
package/dist/storage/index.cjs +268 -268
package/dist/storage/index.js +3 -3
package/dist/stream/index.cjs +8 -8
package/dist/stream/index.js +1 -1
package/dist/test-utils/llm-mock.cjs +4 -4
package/dist/test-utils/llm-mock.js +1 -1
package/dist/tool-loop-agent/index.cjs +4 -4
package/dist/tool-loop-agent/index.js +1 -1
package/dist/utils.cjs +23 -23
package/dist/utils.js +1 -1
package/dist/vector/index.cjs +7 -7
package/dist/vector/index.js +1 -1
package/dist/workflows/evented/index.cjs +10 -10
package/dist/workflows/evented/index.js +1 -1
package/dist/workflows/index.cjs +24 -24
package/dist/workflows/index.js +1 -1
package/package.json +4 -3
package/src/llm/model/provider-types.generated.d.ts +14 -2
package/dist/chunk-7IDACSBM.cjs.map +0 -1
package/dist/chunk-DQYOFNZ7.cjs.map +0 -1
package/dist/chunk-H5O522FY.cjs.map +0 -1
package/dist/chunk-IOCCTQYZ.js.map +0 -1
package/dist/chunk-VAVK4KJI.cjs.map +0 -1
package/dist/chunk-VF3CL3JV.js.map +0 -1
package/dist/chunk-VMKNS3YO.js.map +0 -1
package/dist/chunk-WVTVBWOQ.js.map +0 -1
package/dist/models-dev-GLFQVCT4.cjs +0 -12
package/dist/models-dev-WSLKK6FO.js +0 -3
package/dist/netlify-SNTN56QS.js +0 -3
package/dist/netlify-VFRYJYBK.cjs +0 -12
package/dist/provider-registry-L6KDLMFM.cjs +0 -40
package/dist/provider-registry-LEXW2E53.js +0 -3

package/dist/docs/references/docs-memory-observational-memory.md CHANGED Viewed

@@ -95,27 +95,72 @@ The result is a three-tier system:
 Normal OM compresses messages into observations, which is great for staying on task — but the original wording is gone. Retrieval mode fixes this by keeping each observation group linked to the raw messages that produced it. When the agent needs exact wording, tool output, or chronology that the summary compressed away, it can call a `recall` tool to page through the source messages.
+#### Browsing only
+Set `retrieval: true` to enable the recall tool for browsing raw messages. No vector store needed. By default, the recall tool can browse across all threads for the current resource.
 ```typescript
 const memory = new Memory({
   options: {
     observationalMemory: {
       model: 'google/gemini-2.5-flash',
-      scope: 'thread',
       retrieval: true,
     },
   },
 })
 ```
+#### With semantic search
+Set `retrieval: { vector: true }` to also enable semantic search. This reuses the vector store and embedder already configured on your Memory instance:
+```typescript
+const memory = new Memory({
+  storage,
+  vector: myVectorStore,
+  embedder: myEmbedder,
+  options: {
+    observationalMemory: {
+      model: 'google/gemini-2.5-flash',
+      retrieval: { vector: true },
+    },
+  },
+})
+```
+When vector search is configured, new observation groups are automatically indexed at buffer time and during synchronous observation (fire-and-forget, non-blocking). Semantic search returns observation-group matches with their raw source message ID ranges, so the recall tool can show the summarized memory alongside where it came from.
+#### Restricting to the current thread
+By default, the recall tool scope is `'resource'` — the agent can list threads, browse other threads, and search across all conversations. Set `scope: 'thread'` to restrict the agent to only the current thread:
+```typescript
+const memory = new Memory({
+  options: {
+    observationalMemory: {
+      model: 'google/gemini-2.5-flash',
+      retrieval: { vector: true, scope: 'thread' },
+    },
+  },
+})
+```
+#### What retrieval enables
 With retrieval mode enabled, OM:
 - Stores a `range` (e.g. `startId:endId`) on each observation group pointing to the messages it was derived from
 - Keeps range metadata visible in the agent's context so the agent knows which observations map to which messages
-- Registers a `recall` tool the agent can call to page through the raw messages behind any range
-Retrieval mode is only active for thread-scoped OM. Setting `retrieval: true` with `scope: 'resource'` has no effect — OM keeps resource-scoped behavior but skips retrieval-mode context and does not register the `recall` tool.
+- Registers a `recall` tool the agent can call to:
+  - Page through the raw messages behind any observation group range
+  - Search by semantic similarity (`mode: "search"` with a `query` string) — requires `vector: true`
+  - List all threads (`mode: "threads"`), browse other threads (`threadId`), and search across all threads (default `scope: 'resource'`)
+  - When `scope: 'thread'`: restrict browsing and search to the current thread only
-See the [recall tool reference](https://mastra.ai/reference/memory/observational-memory) for the full API (detail levels, part indexing, pagination, and token limiting).
+See the [recall tool reference](https://mastra.ai/reference/memory/observational-memory) for the full API (detail levels, part indexing, pagination, cross-thread browsing, and token limiting).
 ## Models

package/dist/docs/references/docs-server-mastra-client.md CHANGED Viewed

@@ -133,6 +133,23 @@ export const mastraClient = new MastraClient({
 > **Info:** Visit [MastraClient](https://mastra.ai/reference/client-js/mastra-client) for more configuration options.
+## Credentials and session cookies
+**Authenticate Mastra API calls with session cookies** when your UI and Mastra API are not on the same origin—different host, subdomain, or port (for example Mastra Studio on one port and a custom server on another). Add **`credentials: 'include'`** to `MastraClient` so each request carries the cookies the user already has after sign-in. Skip this and you will often get **`401`** responses from Mastra even though login succeeded in the browser.
+```typescript
+import { MastraClient } from '@mastra/client-js'
+export const mastraClient = new MastraClient({
+  baseUrl: process.env.MASTRA_API_URL || 'http://localhost:4111',
+  credentials: 'include',
+})
+```
+**Allow credentialed cross-origin requests on your server**—see [CORS: requests with credentials](https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/CORS#requests_with_credentials). You need a concrete `Access-Control-Allow-Origin` (not `*`) and `Access-Control-Allow-Credentials: true`, or the browser will block the call before it reaches Mastra.
+**Using `@mastra/react`?** Wrap your app with `MastraReactProvider`, set `baseUrl` and `apiPrefix` to match your server, and rely on the default `credentials: 'include'`. Change `credentials` only when you deliberately want `same-origin` or `omit` behavior.
 ## Adding request cancelling
 `MastraClient` supports request cancellation using the standard Node.js `AbortSignal` API. Useful for canceling in-flight requests, such as when users abort an operation or to clean up stale network calls.

package/dist/docs/references/reference-client-js-agents.md CHANGED Viewed

@@ -308,7 +308,7 @@ response.processDataStream({
 ## Stored agents
-Stored agents are agent configurations stored in a database that can be created, updated, and deleted at runtime. They reference primitives (tools, workflows, other agents, memory, scorers) by key, which are resolved from the Mastra registry when the agent is instantiated.
+Stored agents are agent configurations stored in a database that can be created, updated, and deleted at runtime. They reference primitives (tools, workflows, other agents, scorers) by key, which are resolved from the Mastra registry when the agent is instantiated. Memory is configured inline as a `SerializedMemoryConfig` object with options such as `lastMessages` and `semanticRecall`.
 ### `listStoredAgents()`
@@ -361,10 +361,15 @@ const agent = await mastraClient.createStoredAgent({
     provider: 'openai',
     name: 'gpt-5.4',
   },
-  tools: ['calculator', 'weather'],
-  workflows: ['data-processing'],
-  agents: ['subagent-1'],
-  memory: 'my-memory',
+  tools: { calculator: {}, weather: {} },
+  workflows: { 'data-processing': {} },
+  agents: { 'subagent-1': {} },
+  memory: {
+    options: {
+      lastMessages: 20,
+      semanticRecall: false,
+    },
+  },
   scorers: {
     'quality-scorer': {
       sampling: { type: 'ratio', rate: 0.1 },
@@ -415,7 +420,7 @@ const updated = await storedAgent.update({
 ```typescript
 // Update just the tools
 await storedAgent.update({
-  tools: ['new-tool-1', 'new-tool-2'],
+  tools: { 'new-tool-1': {}, 'new-tool-2': {} },
 })
 // Update metadata

package/dist/docs/references/reference-evals-scorer-utils.md CHANGED Viewed

@@ -367,10 +367,11 @@ The `expected` parameter accepts either a `Trajectory` (actual trajectory) or `{
 import { compareTrajectories } from '@mastra/evals/scorers/utils'
 // Using ExpectedStep[] (recommended for expectations)
+// Data fields (e.g. toolArgs) are auto-compared when present on expected steps
 const result = compareTrajectories(
   actualTrajectory,
   { steps: [{ name: 'search' }, { name: 'summarize', stepType: 'tool_call' }] },
-  { compareStepData: false, allowRepeatedSteps: true },
+  { allowRepeatedSteps: true },
 )
 // result.score — 0.0 to 1.0
 // result.missingSteps — step names not found
@@ -412,7 +413,9 @@ const result = checkTrajectoryEfficiency(trajectory, {
 })
 // result.score — 1.0 if within all budgets, lower with penalties
 // result.redundantCalls — duplicate tool+args combos
-// result.overBudget — which budgets were exceeded
+// result.overStepBudget — true if maxSteps exceeded
+// result.overTokenBudget — true if maxTotalTokens exceeded
+// result.overDurationBudget — true if maxTotalDurationMs exceeded
 ```
 **Returns:** `TrajectoryEfficiencyResult`
@@ -428,8 +431,9 @@ const result = checkTrajectoryBlacklist(trajectory, {
   blacklistedTools: ['deleteAll', 'admin-override'],
   blacklistedSequences: [['escalate', 'admin-override']],
 })
-// result.passed — true if no violations
-// result.violations — list of violations with type and details
+// result.score — 1.0 if no violations, 0.0 if any found
+// result.violatedTools — blacklisted tools that were called
+// result.violatedSequences — blacklisted sequences that were detected
 ```
 **Returns:** `TrajectoryBlacklistResult`
@@ -442,7 +446,7 @@ Detects tool failure patterns including retries, fallbacks, and argument correct
 import { analyzeToolFailures } from '@mastra/evals/scorers/utils'
 const result = analyzeToolFailures(trajectory, {
-  maxRetriesPerTool: 3,
+  maxRetriesPerTool: 2,
 })
 // result.score — 1.0 if no failure patterns, lower if patterns detected
 // result.patterns — detected patterns (retry, fallback, arg_correction)

package/dist/docs/references/reference-evals-trajectory-accuracy.md CHANGED Viewed

@@ -103,13 +103,15 @@ All step types share the base properties `name`, `durationMs`, `metadata`, and `
 ## Expected steps
-When defining expected trajectories, use `ExpectedStep` instead of the full `TrajectoryStep` discriminated union. `ExpectedStep` is a simpler type designed for expectations:
+When defining expected trajectories, use `ExpectedStep` instead of the full `TrajectoryStep` discriminated union. `ExpectedStep` is a discriminated union that mirrors `TrajectoryStep` — when you specify a `stepType`, you get autocomplete for that variant's fields (e.g., `toolArgs` for `tool_call`, `modelId` for `model_generation`). All variant-specific fields are optional, so you only assert against what you care about.
+Omit `stepType` entirely to match any step by name only.
 **name** (`string`): Step name to match (tool name, agent ID, workflow step name, etc.).
-**stepType** (`TrajectoryStepType`): Step type to match. If omitted, matches any step type with the given name.
+**stepType** (`TrajectoryStepType`): Step type discriminant. When set, enables autocomplete for that variant's fields. If omitted, matches any step type with the given name.
-**data** (`Record<string, unknown>`): Expected step data. Compared against the actual step's type-specific data (toolArgs for tool\_call, output for workflow\_step, etc.).
+**(variant fields)** (`varies`): Type-specific fields from the corresponding TrajectoryStep variant. For example, \`toolArgs\` and \`toolResult\` for \`tool\_call\`, \`modelId\` for \`model\_generation\`, \`output\` for \`workflow\_step\`. All optional — only specified fields are compared.
 **children** (`TrajectoryExpectation`): Nested expectation config for this step's children. Overrides the parent config for evaluating children of this step.
@@ -120,11 +122,14 @@ const steps: ExpectedStep[] = [
   // Match by name only (any step type)
   { name: 'search' },
-  // Match by name and step type
+  // Match by name and step type (autocomplete for tool_call fields)
   { name: 'search', stepType: 'tool_call' },
-  // Match with expected data
-  { name: 'search', stepType: 'tool_call', data: { input: { query: 'weather' } } },
+  // Match with specific toolArgs (auto-compared when present)
+  { name: 'search', stepType: 'tool_call', toolArgs: { query: 'weather' } },
+  // Match a model generation step by model ID
+  { name: 'gpt-4o', stepType: 'model_generation', modelId: 'gpt-4o' },
 ]
 ```
@@ -182,7 +187,7 @@ The `createTrajectoryAccuracyScorerCode()` function from `@mastra/evals/scorers/
 ### Parameters
-**expectedTrajectory** (`TrajectoryExpectation`): Static expected trajectory to compare against. When provided, all dataset items use this trajectory. When omitted, the scorer reads expectedTrajectory from each dataset item at runtime.
+**expectedTrajectory** (`Trajectory | ExpectedStep[]`): Static expected trajectory to compare against. Accepts a full Trajectory or an array of ExpectedStep matchers. When omitted, the scorer reads expectedTrajectory from each dataset item at runtime.
 **comparisonOptions** (`TrajectoryComparisonOptions`): Controls how the comparison is performed.
@@ -368,8 +373,8 @@ const scorer = createTrajectoryAccuracyScorerCode({
       },
     ],
   },
-  comparisonOptions: { compareStepData: true },
 })
+// Data fields like toolArgs are auto-compared when present on expected steps
 ```
 ## LLM-based trajectory accuracy scorer
@@ -380,7 +385,7 @@ The `createTrajectoryAccuracyScorerLLM()` function from `@mastra/evals/scorers/p
 **model** (`MastraModelConfig`): The LLM model to use for evaluating trajectory quality.
-**expectedTrajectory** (`TrajectoryExpectation`): Optional static expected trajectory to compare against. When omitted, the LLM evaluates the trajectory based on the task requirements alone. Can also come from dataset items at runtime.
+**expectedTrajectory** (`Trajectory | ExpectedStep[]`): Optional static expected trajectory to compare against. Accepts a full Trajectory or an array of ExpectedStep matchers. When omitted, the LLM evaluates the trajectory based on the task requirements alone. Can also come from dataset items at runtime.
 ### Features
@@ -461,7 +466,7 @@ The `createTrajectoryScorerCode()` function from `@mastra/evals/scorers/prebuilt
 **defaults** (`TrajectoryExpectation`): Default expectations applied to all dataset items. Per-item expectedTrajectory values override these defaults.
-**weights** (`object`): Weights for combining dimension scores into the final score.
+**weights** (`TrajectoryScoreWeights`): Custom weights for combining dimension scores. Weights are normalized to sum to 1.0.
 ### Scoring behavior
@@ -472,7 +477,7 @@ The unified scorer evaluates four dimensions:
 3. **Blacklist** — Checks for forbidden tools or sequences. Any violation immediately results in a score of **0.0** regardless of other dimensions.
 4. **Tool failures** — Detects retry patterns, fallback patterns, and argument correction patterns.
-The final score is a weighted average of accuracy, efficiency, and tool failures. Blacklist violations override everything to 0.
+The final score is a weighted combination of active dimensions, normalized by which dimensions are active. Default weights are accuracy 0.4, efficiency 0.3, tool failures 0.2, blacklist 0.1, but you can customize them via the `weights` option. Blacklist violations override everything to 0. When nested evaluations are present, the score is 70% top-level and 30% nested average.
 ### Unified scorer results
@@ -481,11 +486,13 @@ The final score is a weighted average of accuracy, efficiency, and tool failures
   runId: string,
   preprocessStepResult: {
     accuracy?: TrajectoryComparisonResult,
-    efficiency: TrajectoryEfficiencyResult,
-    blacklist: TrajectoryBlacklistResult,
-    toolFailures: ToolFailureAnalysisResult,
+    efficiency?: TrajectoryEfficiencyResult,
+    blacklist?: TrajectoryBlacklistResult,
+    toolFailures?: ToolFailureAnalysisResult,
+    nested?: NestedEvaluationResult[],
   },
-  score: number
+  score: number,
+  reason: string
 }
 ```
@@ -542,6 +549,13 @@ const scorer = createTrajectoryScorerCode({
     noRedundantCalls: true,
     maxRetriesPerTool: 2,
   },
+  // Customize how dimensions contribute to the final score
+  weights: {
+    accuracy: 0.5, // prioritize step accuracy
+    efficiency: 0.3,
+    toolFailures: 0.1,
+    blacklist: 0.1,
+  },
 })
 ```

package/dist/docs/references/reference-memory-observational-memory.md CHANGED Viewed

@@ -38,7 +38,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **shareTokenBudget** (`boolean`): Share the token budget between messages and observations. When enabled, the total budget is \`observation.messageTokens + reflection.observationTokens\`. Messages can use more space when observations are small, and vice versa. This maximizes context usage through flexible allocation. \`shareTokenBudget\` is not yet compatible with async buffering. You must set \`observation: { bufferTokens: false }\` when using this option (this is a temporary limitation). (Default: `false`)
-**retrieval** (`boolean`): \*\*Experimental.\*\* Enable retrieval-mode observation groups as durable pointers to raw message history. Retrieval mode is only active when \`scope\` is \`'thread'\`. If you set \`retrieval: true\` with \`scope: 'resource'\`, OM keeps resource-scoped memory behavior but skips retrieval-mode context and does not register the \`recall\` tool. (Default: `false`)
+**retrieval** (`boolean | { vector?: boolean; scope?: 'thread' | 'resource' }`): \*\*Experimental.\*\* Enable retrieval-mode observation groups as durable pointers to raw message history. \`true\` enables cross-thread browsing by default. \`{ vector: true }\` also enables semantic search using Memory's vector store and embedder. \`{ scope: 'thread' }\` restricts the recall tool to the current thread only. Default scope is \`'resource'\`. (Default: `false`)
 **observation** (`ObservationalMemoryObservationConfig`): Configuration for the observation step. Controls when the Observer agent runs and how it behaves.
@@ -578,21 +578,31 @@ The standalone `ObservationalMemory` class accepts all the same options as the `
 ## Recall tool
-When `retrieval: true` is set with `scope: 'thread'`, OM registers a `recall` tool that the agent can call to page through the raw messages behind an observation group's `_range`. The tool is automatically added to the agent's tool list — no manual registration is needed.
+When `retrieval` is set (any truthy value), a `recall` tool is registered so the agent can page through raw messages behind observation group ranges. By default (scope `'resource'`), the tool supports listing threads (`mode: "threads"`), browsing other threads (`threadId`), and cross-thread search. With `retrieval: { vector: true }`, semantic search is available (`mode: "search"`). Set `scope: 'thread'` to restrict the tool to the current thread only. The tool is automatically added to the agent's tool list — no manual registration is needed.
 ### Parameters
-**cursor** (`string`): A message ID to anchor the recall query. Extract the start or end ID from an observation group range (e.g. from \`\_range: \\\`startId:endId\\\`\_\`, use either \`startId\` or \`endId\`). If a range string is passed directly, the tool returns a hint explaining how to extract the correct ID.
+**mode** (`'messages' | 'threads' | 'search'`): What to retrieve. \`"messages"\` (default) pages through message history. \`"threads"\` lists all threads for the current user. \`"search"\` finds messages by semantic similarity across all threads (requires vector store and embedder). (Default: `'messages'`)
-**page** (`number`): Pagination offset from the cursor. Positive values page forward (messages after the cursor), negative values page backward (messages before the cursor). \`0\` is treated as \`1\`. (Default: `1`)
+**query** (`string`): Search query for \`mode: "search"\`. Finds messages semantically similar to this text across all threads for the current user.
-**limit** (`number`): Maximum number of messages per page. (Default: `20`)
+**cursor** (`string`): A message ID to anchor the recall query. Required for \`mode: "messages"\` when browsing the current thread. Extract the start or end ID from an observation group range (e.g. from \`\_range: \\\`startId:endId\\\`\_\`, use either \`startId\` or \`endId\`). If a range string is passed directly, the tool returns a hint explaining how to extract the correct ID. Can be omitted when \`threadId\` is provided to start reading from the beginning of that thread.
+**threadId** (`string`): Browse a different thread by its ID. Use \`mode: "threads"\` first to discover thread IDs. When provided without a \`cursor\`, reading starts from the beginning of the thread.
+**page** (`number`): Pagination offset. For messages: positive values page forward from cursor, negative values page backward. For threads: page number (0-indexed). \`0\` is treated as \`1\` for messages. (Default: `1`)
+**limit** (`number`): Maximum number of items to return per page. (Default: `20`)
 **detail** (`'low' | 'high'`): Controls how much content is shown per message part. \`'low'\` shows truncated text and tool names with positional indices (\`\[p0]\`, \`\[p1]\`). \`'high'\` shows full content including tool arguments and results, clamped to one part per call with continuation hints. (Default: `'low'`)
 **partIndex** (`number`): Fetch a single message part at full detail by its positional index. Use this when a low-detail recall shows an interesting part at \`\[p1]\` — call again with \`partIndex: 1\` to see the full content without loading every part.
-### Returns
+**before** (`string`): For \`mode: "threads"\` only. Filter to threads created before this date. Accepts ISO 8601 format (e.g. \`"2026-03-15"\`, \`"2026-03-10T00:00:00Z"\`).
+**after** (`string`): For \`mode: "threads"\` only. Filter to threads created after this date. Accepts ISO 8601 format (e.g. \`"2026-03-01"\`, \`"2026-03-10T00:00:00Z"\`).
+### Returns (messages mode)
 **messages** (`string`): Formatted message content. Format depends on the \`detail\` level.
@@ -612,6 +622,22 @@ When `retrieval: true` is set with `scope: 'thread'`, OM registers a `recall` to
 **tokenOffset** (`number`): Approximate number of tokens that were trimmed when \`truncated\` is true.
+### Returns (threads mode)
+**threads** (`string`): Formatted thread listing. Each thread shows its title, ID, and dates. The current thread is marked with \`← current\`.
+**count** (`number`): Number of threads returned.
+**page** (`number`): The page number returned.
+**hasMore** (`boolean`): Whether more threads exist on the next page.
+### Returns (search mode)
+**results** (`string`): Formatted search results grouped by thread. Each result shows the thread title, thread ID, relevance score, message preview, and a cursor ID for browsing into that thread.
+**count** (`number`): Number of matching messages found.
 ### ModelByInputTokens
 `ModelByInputTokens` selects a model based on the input token count. It chooses the model for the smallest threshold that covers the actual input size.

package/dist/evals/index.cjs CHANGED Viewed

@@ -1,90 +1,90 @@
 'use strict';
-var chunk5AVTFQEP_cjs = require('../chunk-5AVTFQEP.cjs');
-var chunkVAVK4KJI_cjs = require('../chunk-VAVK4KJI.cjs');
-var chunk7IDACSBM_cjs = require('../chunk-7IDACSBM.cjs');
+var chunkPYNUWPGW_cjs = require('../chunk-PYNUWPGW.cjs');
+var chunk4K2NKQGN_cjs = require('../chunk-4K2NKQGN.cjs');
+var chunkQXPUTX3W_cjs = require('../chunk-QXPUTX3W.cjs');
 Object.defineProperty(exports, "runEvals", {
   enumerable: true,
-  get: function () { return chunk5AVTFQEP_cjs.runEvals; }
+  get: function () { return chunkPYNUWPGW_cjs.runEvals; }
 });
 Object.defineProperty(exports, "MastraScorer", {
   enumerable: true,
-  get: function () { return chunkVAVK4KJI_cjs.MastraScorer; }
+  get: function () { return chunk4K2NKQGN_cjs.MastraScorer; }
 });
 Object.defineProperty(exports, "createScorer", {
   enumerable: true,
-  get: function () { return chunkVAVK4KJI_cjs.createScorer; }
+  get: function () { return chunk4K2NKQGN_cjs.createScorer; }
 });
 Object.defineProperty(exports, "extractTrajectory", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.extractTrajectory; }
+  get: function () { return chunkQXPUTX3W_cjs.extractTrajectory; }
 });
 Object.defineProperty(exports, "extractTrajectoryFromTrace", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.extractTrajectoryFromTrace; }
+  get: function () { return chunkQXPUTX3W_cjs.extractTrajectoryFromTrace; }
 });
 Object.defineProperty(exports, "extractWorkflowTrajectory", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.extractWorkflowTrajectory; }
+  get: function () { return chunkQXPUTX3W_cjs.extractWorkflowTrajectory; }
 });
 Object.defineProperty(exports, "listScoresResponseSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.listScoresResponseSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.listScoresResponseSchema; }
 });
 Object.defineProperty(exports, "saveScorePayloadSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.saveScorePayloadSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.saveScorePayloadSchema; }
 });
 Object.defineProperty(exports, "scoreResultSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoreResultSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoreResultSchema; }
 });
 Object.defineProperty(exports, "scoreRowDataSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoreRowDataSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoreRowDataSchema; }
 });
 Object.defineProperty(exports, "scoringEntityTypeSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringEntityTypeSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringEntityTypeSchema; }
 });
 Object.defineProperty(exports, "scoringExtractStepResultSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringExtractStepResultSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringExtractStepResultSchema; }
 });
 Object.defineProperty(exports, "scoringHookInputSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringHookInputSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringHookInputSchema; }
 });
 Object.defineProperty(exports, "scoringInputSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringInputSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringInputSchema; }
 });
 Object.defineProperty(exports, "scoringInputWithExtractStepResultAndAnalyzeStepResultSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringInputWithExtractStepResultAndAnalyzeStepResultSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringInputWithExtractStepResultAndAnalyzeStepResultSchema; }
 });
 Object.defineProperty(exports, "scoringInputWithExtractStepResultAndScoreAndReasonSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringInputWithExtractStepResultAndScoreAndReasonSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringInputWithExtractStepResultAndScoreAndReasonSchema; }
 });
 Object.defineProperty(exports, "scoringInputWithExtractStepResultSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringInputWithExtractStepResultSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringInputWithExtractStepResultSchema; }
 });
 Object.defineProperty(exports, "scoringPromptsSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringPromptsSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringPromptsSchema; }
 });
 Object.defineProperty(exports, "scoringSourceSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringSourceSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringSourceSchema; }
 });
 Object.defineProperty(exports, "scoringValueSchema", {
   enumerable: true,
-  get: function () { return chunk7IDACSBM_cjs.scoringValueSchema; }
+  get: function () { return chunkQXPUTX3W_cjs.scoringValueSchema; }
 });
 //# sourceMappingURL=index.cjs.map
 //# sourceMappingURL=index.cjs.map

package/dist/evals/index.js CHANGED Viewed

@@ -1,5 +1,5 @@
-export { runEvals } from '../chunk-JEWTRDRI.js';
-export { MastraScorer, createScorer } from '../chunk-VF3CL3JV.js';
-export { extractTrajectory, extractTrajectoryFromTrace, extractWorkflowTrajectory, listScoresResponseSchema, saveScorePayloadSchema, scoreResultSchema, scoreRowDataSchema, scoringEntityTypeSchema, scoringExtractStepResultSchema, scoringHookInputSchema, scoringInputSchema, scoringInputWithExtractStepResultAndAnalyzeStepResultSchema, scoringInputWithExtractStepResultAndScoreAndReasonSchema, scoringInputWithExtractStepResultSchema, scoringPromptsSchema, scoringSourceSchema, scoringValueSchema } from '../chunk-VMKNS3YO.js';
+export { runEvals } from '../chunk-D5VCCQ4L.js';
+export { MastraScorer, createScorer } from '../chunk-32NEFSTI.js';
+export { extractTrajectory, extractTrajectoryFromTrace, extractWorkflowTrajectory, listScoresResponseSchema, saveScorePayloadSchema, scoreResultSchema, scoreRowDataSchema, scoringEntityTypeSchema, scoringExtractStepResultSchema, scoringHookInputSchema, scoringInputSchema, scoringInputWithExtractStepResultAndAnalyzeStepResultSchema, scoringInputWithExtractStepResultAndScoreAndReasonSchema, scoringInputWithExtractStepResultSchema, scoringPromptsSchema, scoringSourceSchema, scoringValueSchema } from '../chunk-SLHDZJ4B.js';
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map

package/dist/evals/scoreTraces/index.cjs CHANGED Viewed

@@ -1,8 +1,8 @@
 'use strict';
-var chunkT2CILERS_cjs = require('../../chunk-T2CILERS.cjs');
-var chunk7IDACSBM_cjs = require('../../chunk-7IDACSBM.cjs');
-var chunkG5HKDGNT_cjs = require('../../chunk-G5HKDGNT.cjs');
+var chunkVVR3SFKH_cjs = require('../../chunk-VVR3SFKH.cjs');
+var chunkQXPUTX3W_cjs = require('../../chunk-QXPUTX3W.cjs');
+var chunkB6D4D2CY_cjs = require('../../chunk-B6D4D2CY.cjs');
 var chunk4U7ZLI36_cjs = require('../../chunk-4U7ZLI36.cjs');
 var pMap = require('p-map');
 var v4 = require('zod/v4');
@@ -235,7 +235,7 @@ function transformTraceToScorerInputAndOutput(trace) {
 }
 // src/evals/scoreTraces/scoreTracesWorkflow.ts
-var getTraceStep = chunkT2CILERS_cjs.createStep({
+var getTraceStep = chunkVVR3SFKH_cjs.createStep({
   id: "__process-trace-scoring",
   inputSchema: v4.z.object({
     targets: v4.z.array(
@@ -248,7 +248,7 @@ var getTraceStep = chunkT2CILERS_cjs.createStep({
   }),
   outputSchema: v4.z.any(),
   execute: async ({ inputData, mastra, ...rest }) => {
-    const observabilityContext = chunkG5HKDGNT_cjs.resolveObservabilityContext(rest);
+    const observabilityContext = chunkB6D4D2CY_cjs.resolveObservabilityContext(rest);
     const logger = mastra.getLogger();
     if (!logger) {
       console.warn(
@@ -383,7 +383,7 @@ async function validateAndSaveScore({ storage, scorerResult }) {
       text: "Scores storage domain is not available"
     });
   }
-  const payloadToSave = chunk7IDACSBM_cjs.saveScorePayloadSchema.parse(scorerResult);
+  const payloadToSave = chunkQXPUTX3W_cjs.saveScorePayloadSchema.parse(scorerResult);
   const result = await scoresStore.saveScore(payloadToSave);
   return result.score;
 }
@@ -446,7 +446,7 @@ async function attachScoreToSpan({
   } catch {
   }
 }
-var scoreTracesWorkflow = chunkT2CILERS_cjs.createWorkflow({
+var scoreTracesWorkflow = chunkVVR3SFKH_cjs.createWorkflow({
   id: "__batch-scoring-traces",
   inputSchema: v4.z.object({
     targets: v4.z.array(

package/dist/evals/scoreTraces/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { createStep, createWorkflow } from '../../chunk-YN3FI4AD.js';
-import { saveScorePayloadSchema } from '../../chunk-VMKNS3YO.js';
-import { resolveObservabilityContext } from '../../chunk-CT4YYQI3.js';
+import { createStep, createWorkflow } from '../../chunk-UORUVXKY.js';
+import { saveScorePayloadSchema } from '../../chunk-SLHDZJ4B.js';
+import { resolveObservabilityContext } from '../../chunk-CX5I6VS4.js';
 import { MastraError } from '../../chunk-FJEVLHJT.js';
 import pMap from 'p-map';
 import { z } from 'zod/v4';