npm - @mastra/memory - Versions diffs - 1.17.6-alpha.1 → 1.18.0-alpha.3 - Mend

@mastra/memory 1.17.6-alpha.1 → 1.18.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/docs/SKILL.md CHANGED Viewed

@@ -3,7 +3,7 @@ name: mastra-memory
 description: Documentation for @mastra/memory. Use when working with @mastra/memory APIs, configuration, or implementation.
 metadata:
   package: "@mastra/memory"
-  version: "1.17.6-alpha.1"
+  version: "1.18.0-alpha.3"
 ---
 ## When to use

package/dist/docs/assets/SOURCE_MAP.json CHANGED Viewed

@@ -1,119 +1,119 @@
 {
-  "version": "1.17.6-alpha.1",
+  "version": "1.18.0-alpha.3",
   "package": "@mastra/memory",
   "exports": {
     "ModelByInputTokens": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 745
     },
     "OBSERVER_SYSTEM_PROMPT": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js"
+      "implementation": "dist/chunk-XVVCS6R6.js"
     },
     "ObservationalMemory": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 6674
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 6690
     },
     "ObservationalMemoryProcessor": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 9219
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 9264
     },
     "TokenCounter": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 6144
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 6160
     },
     "buildObserverPrompt": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 3643
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 3659
     },
     "buildObserverSystemPrompt": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 2951
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 2967
     },
     "combineObservationGroupRanges": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 837
     },
     "deriveObservationGroupProvenance": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 871
     },
     "extractCurrentTask": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 3757
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 3773
     },
     "formatMessagesForObserver": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 3369
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 3385
     },
     "getObservationsAsOf": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 9425
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 9476
     },
     "hasCurrentTaskSection": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 3745
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 3761
     },
     "injectAnchorIds": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 2499
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 2515
     },
     "optimizeObservationsForContext": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 3768
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 3784
     },
     "parseAnchorId": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 2472
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 2488
     },
     "parseObservationGroups": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 806
     },
     "parseObserverOutput": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 3653
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 3669
     },
     "reconcileObservationGroupsFromReflection": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 895
     },
     "renderObservationGroupsForReflection": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 851
     },
     "stripEphemeralAnchorIds": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
-      "line": 2529
+      "implementation": "dist/chunk-XVVCS6R6.js",
+      "line": 2545
     },
     "stripObservationGroups": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 828
     },
     "wrapInObservationGroup": {
       "types": "dist/processors/index.d.ts",
-      "implementation": "dist/chunk-QZGJY67D.js",
+      "implementation": "dist/chunk-XVVCS6R6.js",
       "line": 799
     },
     "OBSERVATIONAL_MEMORY_DEFAULTS": {
@@ -149,7 +149,7 @@
     "processors": {
       "index": "dist/processors/index.js",
       "chunks": [
-        "chunk-QZGJY67D.js",
+        "chunk-XVVCS6R6.js",
         "chunk-LSJJAJAF.js"
       ]
     }

package/dist/docs/references/docs-memory-observational-memory.md CHANGED Viewed

@@ -77,6 +77,48 @@ The observer also sees these markers when it processes the thread, so the observ
 See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
+## Early activation
+OM can activate buffered observations before the token threshold is reached. This is useful when a prompt cache is likely to expire, or when the agent changes model providers.
+Top-level early activation settings apply to observations by default:
+```typescript
+const memory = new Memory({
+  options: {
+    observationalMemory: {
+      model: 'google/gemini-2.5-flash',
+      activateAfterIdle: '5m',
+      activateOnProviderChange: true,
+    },
+  },
+})
+```
+Use nested `observation` and `reflection` settings for per-phase control. Reflection early activation is opt-in, so top-level settings affect only observations.
+```typescript
+const memory = new Memory({
+  options: {
+    observationalMemory: {
+      model: 'google/gemini-2.5-flash',
+      activateAfterIdle: '5m',
+      observation: {
+        activateAfterIdle: false,
+      },
+      reflection: {
+        activateAfterIdle: '10m',
+        activateOnProviderChange: true,
+      },
+    },
+  },
+})
+```
+In this example, the top-level idle setting is disabled for observations, while reflections opt into idle and provider-change activation.
+See [the API reference](https://mastra.ai/reference/memory/observational-memory) for the full configuration shape.
 ## Benefits
 - **Prompt caching**: OM's context is stable and observations append over time rather than being dynamically retrieved each turn. This keeps the prompt prefix cacheable, which reduces costs.
@@ -368,17 +410,19 @@ Reflection works similarly — the Reflector runs in the background when observa
 ### Settings
-| Setting                        | Default | What it controls                                                                                                                                                                                                                                                                                                                       |
-| ------------------------------ | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `observation.bufferTokens`     | `0.2`   | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`).                                                                                                                                                     |
-| `observation.bufferActivation` | `0.8`   | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history.                                                                                                                                                  |
-| `observation.blockAfter`       | `1.2`   | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up.                                                                                                                                                                   |
-| `activateAfterIdle`            | none    | Forces buffered observations and buffered reflections to activate after a period of inactivity, even if their token thresholds have not been reached yet. Accepts milliseconds or duration strings like `300_000`, `"5m"`, or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
-| `activateOnProviderChange`     | `false` | Forces buffered observations and reflections to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse.                                                                                   |
-| `reflection.bufferActivation`  | `0.5`   | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold.                                                                                                                                                                                                   |
-| `reflection.blockAfter`        | `1.2`   | Safety threshold for reflection, same logic as observation.                                                                                                                                                                                                                                                                            |
+| Setting                               | Default | What it controls                                                                                                                                                                                                                                                                                                              |
+| ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `observation.bufferTokens`            | `0.2`   | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`).                                                                                                                                            |
+| `observation.bufferActivation`        | `0.8`   | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history.                                                                                                                                         |
+| `observation.blockAfter`              | `1.2`   | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up.                                                                                                                                                          |
+| `activateAfterIdle`                   | none    | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, or duration strings like `"5m"` or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
+| `activateOnProviderChange`            | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse.                                                                                          |
+| `reflection.bufferActivation`         | `0.5`   | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold.                                                                                                                                                                                          |
+| `reflection.activateAfterIdle`        | none    | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`.                                                                                                                                                                                                                      |
+| `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`.                                                                                                                                                                                                    |
+| `reflection.blockAfter`               | `1.2`   | Safety threshold for reflection, same logic as observation.                                                                                                                                                                                                                                                                   |
-If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations or reflections first and send a smaller compressed context window.
+If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
 ```typescript
 const memory = new Memory({
@@ -392,9 +436,9 @@ const memory = new Memory({
 })
 ```
-With a 5-minute prompt cache TTL, this activates buffered context after 5 minutes of inactivity so the next uncached prompt uses observations and reflections instead of a larger raw message window. If you prefer, `300_000` works the same way.
+With a 5-minute prompt cache TTL, this activates buffered observations after 5 minutes of inactivity so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer, `300_000` works the same way.
-Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered context to activate before the new provider runs. That avoids sending a large raw window to a provider that cannot reuse the previous prompt cache.
+Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
 ### Disabling

package/dist/docs/references/docs-memory-semantic-recall.md CHANGED Viewed

@@ -121,26 +121,88 @@ Each vector store page below includes installation instructions, configuration p
 ## Recall configuration
-The three main parameters that control semantic recall behavior are:
+The following options control semantic recall behavior:
-1. **topK**: How many semantically similar messages to retrieve
-2. **messageRange**: How much surrounding context to include with each match
-3. **scope**: Whether to search within the current thread or across all threads owned by a resource (the default is resource scope).
+1. **topK**: The number of similar messages to retrieve
+2. **messageRange**: The surrounding messages to include with each match
+3. **scope**: Whether to search the current thread or all threads for a resource
+4. **filter**: Metadata criteria that restrict search results
 ```typescript
 const agent = new Agent({
   memory: new Memory({
     options: {
       semanticRecall: {
-        topK: 3, // Retrieve 3 most similar messages
+        topK: 3, // Retrieve 3 similar messages
         messageRange: 2, // Include 2 messages before and after each match
-        scope: 'resource', // Search across all threads for this user (default setting if omitted)
+        scope: 'resource', // Search all threads for this resource
+        filter: { projectId: { $eq: 'project-a' } },
       },
     },
   }),
 })
 ```
+> **Note:** `scope: 'resource'` is supported by the LibSQL, PostgreSQL, and Upstash storage adapters.
+### Metadata filtering
+The `filter` option restricts semantic recall results to messages with matching thread metadata.
+```typescript
+const agent = new Agent({
+  memory: new Memory({
+    options: {
+      semanticRecall: {
+        scope: 'resource',
+        filter: {
+          projectId: { $eq: 'project-a' },
+          category: { $in: ['work', 'personal'] },
+        },
+      },
+    },
+  }),
+})
+```
+Filters match metadata stored on message embeddings when messages are saved. If thread metadata changes later, existing embeddings keep their previous metadata until those messages are saved or indexed again.
+Supported filter operators:
+- `$and`: Logical AND
+- `$eq`: Equal to
+- `$gt`: Greater than
+- `$gte`: Greater than or equal
+- `$in`: In array
+- `$lt`: Less than
+- `$lte`: Less than or equal
+- `$ne`: Not equal to
+- `$nin`: Not in array
+- `$or`: Logical OR
+The following example demonstrates metadata filters for common use cases:
+```typescript
+// Filter by project
+const options = {
+  semanticRecall: { filter: { projectId: { $eq: 'my-project' } } },
+}
+// Filter by multiple categories
+const options = {
+  semanticRecall: { filter: { category: { $in: ['work', 'research'] } } },
+}
+// Filter by project and priority
+const options = {
+  semanticRecall: {
+    filter: {
+      $and: [{ projectId: { $eq: 'project-a' } }, { priority: { $gte: 3 } }],
+    },
+  },
+}
+```
 ## Embedder configuration
 Semantic recall relies on an [embedding model](https://mastra.ai/reference/memory/memory-class) to convert messages into embeddings. Mastra supports embedding models through the model router using `provider/model` strings, or you can use any [embedding model](https://sdk.vercel.ai/docs/ai-sdk-core/embeddings) compatible with the AI SDK.

package/dist/docs/references/reference-memory-observational-memory.md CHANGED Viewed

@@ -36,7 +36,9 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **scope** (`'resource' | 'thread'`): Memory scope for observations. \`'thread'\` keeps observations per-thread. \`'resource'\` (experimental) shares observations across all threads for a resource, enabling cross-conversation memory. (Default: `'thread'`)
-**activateAfterIdle** (`number | string`): Time before buffered observations or buffered reflections are forced to activate after inactivity, even if their token thresholds have not been reached yet. Accepts milliseconds or duration strings like \`300\_000\`, \`"5m"\`, or \`"1hr"\`. When the gap between the current time and the last assistant message part timestamp exceeds this value, buffered observational memory activates before the next prompt. Useful for aligning with prompt cache TTLs.
+**activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity, even before \`observation.messageTokens\` is reached. Accepts a numeric millisecond value such as \`300\_000\`, duration strings like \`"5m"\` or \`"1hr"\`, or \`false\` to disable inherited observation idle activation. Reflections do not inherit this setting. Use \`reflection.activateAfterIdle\` to opt reflections into idle activation.
+**activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. Reflections do not inherit this setting. Use \`reflection.activateOnProviderChange\` to opt reflections into provider-change activation. (Default: `false`)
 **shareTokenBudget** (`boolean`): Share the token budget between messages and observations. When enabled, the total budget is \`observation.messageTokens + reflection.observationTokens\`. Messages can use more space when observations are small, and vice versa. This maximizes context usage through flexible allocation. \`shareTokenBudget\` is not yet compatible with async buffering. You must set \`observation: { bufferTokens: false }\` when using this option (this is a temporary limitation). (Default: `false`)
@@ -66,6 +68,10 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **observation.bufferActivation** (`number`): Controls how much of the message window to retain after activation. Accepts a ratio (0-1) or an absolute token count (≥ 1000). For example, \`0.8\` means: activate enough buffers to remove 80% of \`messageTokens\` and leave 20% as active message history. An absolute token count like \`4000\` targets a goal of keeping \~4k message tokens remaining after activation. Higher values remove more message history per activation when using a ratio. Higher values keep more message history when using a token count.
+**observation.activateAfterIdle** (`number | string | false`): Time before buffered observations are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. If unset, the top-level \`activateAfterIdle\` value is used for observations. Set \`false\` to disable the top-level idle setting for observations.
+**observation.activateOnProviderChange** (`boolean`): Force buffered observations to activate when the actor provider or model changes. If unset, the top-level \`activateOnProviderChange\` value is used for observations.
 **observation.blockAfter** (`number`): Token threshold above which synchronous (blocking) observation is forced. Between \`messageTokens\` and \`blockAfter\`, only async buffering/activation is used. Above \`blockAfter\`, a synchronous observation runs as a last resort, while buffered activation still preserves a minimum remaining context (min(1000, retention floor)). Accepts a multiplier (1 < value < 2, multiplied by \`messageTokens\`) or an absolute token count (≥ 2, must be greater than \`messageTokens\`). Only relevant when \`bufferTokens\` is set. Defaults to \`1.2\` when async buffering is enabled.
 **observation.previousObserverTokens** (`number | false`): Optional token budget for the observer's previous-observations context. When set to a number, the observations passed to the Observer agent are tail-truncated to fit within this budget while keeping the newest observations and preserving highlighted 🔴 items when possible. When a buffered reflection is pending, the already-reflected observation lines are automatically replaced with the reflection summary before truncation. Set to \`0\` to omit previous observations entirely, or \`false\` to disable truncation explicitly.
@@ -86,6 +92,10 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
 **reflection.bufferActivation** (`number`): Ratio (0-1) controlling when async reflection buffering starts. When observation tokens reach \`observationTokens \* bufferActivation\`, reflection runs in the background. On activation at the full threshold, the buffered reflection replaces the observations it covers, preserving any new observations appended after that range.
+**reflection.activateAfterIdle** (`number | string | false`): Time before buffered reflections are forced to activate after inactivity. Accepts milliseconds, a duration string, or \`false\`. Reflections do not inherit top-level \`activateAfterIdle\`; set this explicitly to opt reflections into idle activation.
+**reflection.activateOnProviderChange** (`boolean`): Force buffered reflections to activate when the actor provider or model changes. Reflections do not inherit top-level \`activateOnProviderChange\`; set this explicitly to opt reflections into provider-change activation.
 **reflection.blockAfter** (`number`): Token threshold above which synchronous (blocking) reflection is forced. Between \`observationTokens\` and \`blockAfter\`, only async buffering/activation is used. Above \`blockAfter\`, a synchronous reflection runs as a last resort. Accepts a multiplier (1 < value < 2, multiplied by \`observationTokens\`) or an absolute token count (≥ 2, must be greater than \`observationTokens\`). Only relevant when \`bufferActivation\` is set. Defaults to \`1.2\` when async reflection is enabled.
 ### Token estimate metadata cache

package/dist/index.cjs CHANGED Viewed

@@ -1,6 +1,6 @@
 'use strict';
-var chunkWNLFJKTX_cjs = require('./chunk-WNLFJKTX.cjs');
+var chunkET2TVAT3_cjs = require('./chunk-ET2TVAT3.cjs');
 var v3 = require('zod/v3');
 var zod = require('zod');
 var z4 = require('zod/v4');
@@ -16110,7 +16110,7 @@ function formatTimestamp(date) {
 }
 function truncateByTokens(text4, maxTokens, hint) {
   if (tokenx.estimateTokenCount(text4) <= maxTokens) return { text: text4, wasTruncated: false };
-  const truncated = chunkWNLFJKTX_cjs.truncateStringByTokens(text4, maxTokens);
+  const truncated = chunkET2TVAT3_cjs.truncateStringByTokens(text4, maxTokens);
   const suffix = hint ? ` [${hint} for more]` : "";
   return { text: truncated + suffix, wasTruncated: true };
 }
@@ -16162,11 +16162,11 @@ ${JSON.stringify(inv.args, null, 2)}`;
             });
           }
           if (inv.state === "result") {
-            const { value: resultValue } = chunkWNLFJKTX_cjs.resolveToolResultValue(
+            const { value: resultValue } = chunkET2TVAT3_cjs.resolveToolResultValue(
               part,
               inv.result
             );
-            const resultStr = chunkWNLFJKTX_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
+            const resultStr = chunkET2TVAT3_cjs.formatToolResultForObserver(resultValue, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
             const fullText = `[Tool Result: ${inv.toolName}]
 ${resultStr}`;
             parts.push(makePart(msg, i, "tool-result", fullText, detail, inv.toolName));
@@ -16193,7 +16193,7 @@ ${typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs, null, 2)}`;
         const toolName = part.toolName;
         if (toolName) {
           const rawResult = part.output ?? part.result;
-          const resultStr = chunkWNLFJKTX_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
+          const resultStr = chunkET2TVAT3_cjs.formatToolResultForObserver(rawResult, { maxTokens: HIGH_DETAIL_TOOL_RESULT_TOKENS });
           const fullText = `[Tool Result: ${toolName}]
 ${resultStr}`;
           parts.push(makePart(msg, i, "tool-result", fullText, detail, toolName));
@@ -16272,7 +16272,7 @@ function renderFormattedParts(parts, timestamps, options) {
   const text4 = buildRenderedText(parts, timestamps);
   let totalTokens = tokenx.estimateTokenCount(text4);
   if (totalTokens > options.maxTokens) {
-    const truncated = chunkWNLFJKTX_cjs.truncateStringByTokens(text4, options.maxTokens);
+    const truncated = chunkET2TVAT3_cjs.truncateStringByTokens(text4, options.maxTokens);
     return { text: truncated, truncated: true, tokenOffset: totalTokens - options.maxTokens };
   }
   const truncatedIndices = parts.map((p, i) => ({ part: p, index: i })).filter(({ part }) => part.text !== part.fullText).sort((a, b) => expandPriority(a.part) - expandPriority(b.part));
@@ -16305,7 +16305,7 @@ function renderFormattedParts(parts, timestamps, options) {
   if (expandedTokens <= options.maxTokens) {
     return { text: expanded, truncated: false, tokenOffset: 0 };
   }
-  const hardTruncated = chunkWNLFJKTX_cjs.truncateStringByTokens(expanded, options.maxTokens);
+  const hardTruncated = chunkET2TVAT3_cjs.truncateStringByTokens(expanded, options.maxTokens);
   return { text: hardTruncated, truncated: true, tokenOffset: expandedTokens - options.maxTokens };
 }
 async function recallPart({
@@ -16356,7 +16356,7 @@ async function recallPart({
 `;
           const fallbackText = `${fallbackNote}${firstNextPart.text}`;
-          const truncatedText2 = chunkWNLFJKTX_cjs.truncateStringByTokens(fallbackText, maxTokens);
+          const truncatedText2 = chunkET2TVAT3_cjs.truncateStringByTokens(fallbackText, maxTokens);
           const wasTruncated2 = truncatedText2 !== fallbackText;
           return {
             text: truncatedText2,
@@ -16371,7 +16371,7 @@ async function recallPart({
     }
     throw new Error(`Part index ${partIndex} not found in message ${cursor}. Available indices: ${availableIndices}`);
   }
-  const truncatedText = chunkWNLFJKTX_cjs.truncateStringByTokens(target.text, maxTokens);
+  const truncatedText = chunkET2TVAT3_cjs.truncateStringByTokens(target.text, maxTokens);
   const wasTruncated = truncatedText !== target.text;
   return {
     text: truncatedText,
@@ -17351,16 +17351,15 @@ var Memory = class extends memory.MastraMemory {
                 `Tried to query vector index ${indexName} but this Memory instance doesn't have an attached vector db.`
               );
             }
+            const scopeFilter = resourceScope ? { resource_id: resourceId } : { thread_id: threadId };
+            const userFilter = typeof config.semanticRecall === "object" ? config.semanticRecall.filter : void 0;
+            const combinedFilter = userFilter ? { $and: [scopeFilter, userFilter] } : scopeFilter;
             vectorResults.push(
               ...await this.vector.query({
                 indexName,
                 queryVector: embedding,
                 topK: vectorConfig.topK,
-                filter: resourceScope ? {
-                  resource_id: resourceId
-                } : {
-                  thread_id: threadId
-                }
+                filter: combinedFilter
               })
             );
           })
@@ -17403,9 +17402,12 @@ var Memory = class extends memory.MastraMemory {
       throw error;
     }
   }
-  async getThreadById({ threadId }) {
+  async getThreadById({
+    threadId,
+    resourceId
+  }) {
     const memoryStore = await this.getMemoryStore();
-    return memoryStore.getThreadById({ threadId });
+    return memoryStore.getThreadById({ threadId, resourceId });
   }
   async listThreads(args) {
     const memoryStore = await this.getMemoryStore();
@@ -17762,6 +17764,31 @@ ${workingMemory}`;
       });
       let totalTokens = 0;
       if (this.vector && config.semanticRecall) {
+        const messagesByThread = /* @__PURE__ */ new Map();
+        updatedMessages.forEach((message) => {
+          if (message.threadId) {
+            if (!messagesByThread.has(message.threadId)) {
+              messagesByThread.set(message.threadId, []);
+            }
+            messagesByThread.get(message.threadId).push(message);
+          }
+        });
+        const threadMetadataMap = /* @__PURE__ */ new Map();
+        await Promise.all(
+          Array.from(messagesByThread.keys()).map(async (threadId) => {
+            try {
+              const thread = await memoryStore.getThreadById({ threadId });
+              if (thread?.metadata) {
+                threadMetadataMap.set(threadId, thread.metadata);
+              }
+            } catch (error) {
+              const message = error instanceof Error ? error.message : String(error);
+              throw new Error(
+                `Could not fetch metadata for thread ${threadId} while saving semantic recall embeddings: ${message}`
+              );
+            }
+          })
+        );
         const embeddingData = [];
         let dimension;
         await Promise.all(
@@ -17779,9 +17806,11 @@ ${workingMemory}`;
             if (result2.usage?.tokens) {
               totalTokens += result2.usage.tokens;
             }
+            const threadMetadata = message.threadId ? threadMetadataMap.get(message.threadId) || {} : {};
             embeddingData.push({
               embeddings: result2.embeddings,
               metadata: result2.chunks.map(() => ({
+                ...threadMetadata,
                 message_id: message.id,
                 thread_id: message.threadId,
                 resource_id: message.resourceId
@@ -18094,7 +18123,7 @@ ${workingMemory}`;
         "Observational memory requires @mastra/core support for request-response-id-rotation. Please bump @mastra/core to a newer version."
       );
     }
-    const { ObservationalMemory: OMClass } = await import('./observational-memory-BJF72NKJ.cjs');
+    const { ObservationalMemory: OMClass } = await import('./observational-memory-UJUAQKJB.cjs');
     const onIndexObservations = this.hasRetrievalSearch(omConfig.retrieval) ? async (observation) => {
       await this.indexObservation(observation);
     } : void 0;
@@ -19020,7 +19049,7 @@ Notes:
     if (!effectiveConfig) return null;
     const engine = await this.omEngine;
     if (!engine) return null;
-    const { ObservationalMemoryProcessor } = await import('./observational-memory-BJF72NKJ.cjs');
+    const { ObservationalMemoryProcessor } = await import('./observational-memory-UJUAQKJB.cjs');
     return new ObservationalMemoryProcessor(engine, this, {
       temporalMarkers: effectiveConfig.temporalMarkers
     });
@@ -19029,11 +19058,11 @@ Notes:
 Object.defineProperty(exports, "ModelByInputTokens", {
   enumerable: true,
-  get: function () { return chunkWNLFJKTX_cjs.ModelByInputTokens; }
+  get: function () { return chunkET2TVAT3_cjs.ModelByInputTokens; }
 });
 Object.defineProperty(exports, "getObservationsAsOf", {
   enumerable: true,
-  get: function () { return chunkWNLFJKTX_cjs.getObservationsAsOf; }
+  get: function () { return chunkET2TVAT3_cjs.getObservationsAsOf; }
 });
 Object.defineProperty(exports, "MessageHistory", {
   enumerable: true,