npm - @mastra/memory - Versions diffs - 0.2.7-alpha.3 → 0.2.7-alpha.5 - Mend

@mastra/memory 0.2.7-alpha.3 → 0.2.7-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.turbo/turbo-build.log +9 -9
package/CHANGELOG.md +16 -0
package/dist/_tsup-dts-rollup.d.cts +4 -0
package/dist/_tsup-dts-rollup.d.ts +4 -0
package/dist/index.cjs +69 -28
package/dist/index.js +64 -28
package/package.json +4 -4
package/src/index.ts +88 -32

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,29 +1,29 @@
-> @mastra/memory@0.2.7-alpha.3 build /home/runner/work/mastra/mastra/packages/memory
+> @mastra/memory@0.2.7-alpha.5 build /home/runner/work/mastra/mastra/packages/memory
 > pnpm run check && tsup src/index.ts src/processors/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
-> @mastra/memory@0.2.7-alpha.3 check /home/runner/work/mastra/mastra/packages/memory
+> @mastra/memory@0.2.7-alpha.5 check /home/runner/work/mastra/mastra/packages/memory
 > tsc --noEmit
 [34mCLI[39m Building entry: src/index.ts, src/processors/index.ts
 [34mCLI[39m Using tsconfig: tsconfig.json
 [34mCLI[39m tsup v8.4.0
 [34mTSC[39m Build start
-[32mTSC[39m ⚡️ Build success in 11095ms
+[32mTSC[39m ⚡️ Build success in 9207ms
 [34mDTS[39m Build start
 [34mCLI[39m Target: es2022
 Analysis will use the bundled TypeScript version 5.8.2
 [36mWriting package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.ts[39m
 Analysis will use the bundled TypeScript version 5.8.2
 [36mWriting package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.cts[39m
-[32mDTS[39m ⚡️ Build success in 6446ms
+[32mDTS[39m ⚡️ Build success in 11942ms
 [34mCLI[39m Cleaning output folder
 [34mESM[39m Build start
 [34mCJS[39m Build start
-[32mCJS[39m [1mdist/index.cjs            [22m[32m13.53 KB[39m
-[32mCJS[39m [1mdist/processors/index.cjs [22m[32m5.54 KB[39m
-[32mCJS[39m ⚡️ Build success in 411ms
+[32mESM[39m [1mdist/index.js            [22m[32m14.82 KB[39m
 [32mESM[39m [1mdist/processors/index.js [22m[32m5.33 KB[39m
-[32mESM[39m [1mdist/index.js            [22m[32m13.50 KB[39m
-[32mESM[39m ⚡️ Build success in 412ms
+[32mESM[39m ⚡️ Build success in 771ms
+[32mCJS[39m [1mdist/index.cjs            [22m[32m15.09 KB[39m
+[32mCJS[39m [1mdist/processors/index.cjs [22m[32m5.54 KB[39m
+[32mCJS[39m ⚡️ Build success in 771ms

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,21 @@
 # @mastra/memory
+## 0.2.7-alpha.5
+### Patch Changes
+- 93875ed: Improved the performance of Memory semantic recall by 2 to 3 times when using pg by making tweaks to @mastra/memory @mastra/core and @mastra/pg
+- Updated dependencies [93875ed]
+  - @mastra/core@0.8.0-alpha.5
+## 0.2.7-alpha.4
+### Patch Changes
+- Updated dependencies [d7e08e8]
+  - @mastra/core@0.8.0-alpha.4
+  - @mastra/rag@0.1.15-alpha.4
 ## 0.2.7-alpha.3
 ### Patch Changes

package/dist/_tsup-dts-rollup.d.cts CHANGED Viewed

@@ -51,6 +51,10 @@ export declare class Memory extends MastraMemory {
         metadata: Record<string, unknown>;
     }): Promise<StorageThreadType>;
     deleteThread(threadId: string): Promise<void>;
+    private chunkText;
+    private hasher;
+    private embeddingCache;
+    private firstEmbed;
     private embedMessageContent;
     saveMessages({ messages, memoryConfig, }: {
         messages: MessageType[];

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -51,6 +51,10 @@ export declare class Memory extends MastraMemory {
         metadata: Record<string, unknown>;
     }): Promise<StorageThreadType>;
     deleteThread(threadId: string): Promise<void>;
+    private chunkText;
+    private hasher;
+    private embeddingCache;
+    private firstEmbed;
     private embedMessageContent;
     saveMessages({ messages, memoryConfig, }: {
         messages: MessageType[];

package/dist/index.cjs CHANGED Viewed

@@ -2,10 +2,17 @@
 var core = require('@mastra/core');
 var memory = require('@mastra/core/memory');
-var rag = require('@mastra/rag');
 var ai = require('ai');
+var lite = require('js-tiktoken/lite');
+var o200k_base = require('js-tiktoken/ranks/o200k_base');
+var xxhash = require('xxhash-wasm');
 var zod = require('zod');
+function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
+var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
+var xxhash__default = /*#__PURE__*/_interopDefault(xxhash);
 // src/index.ts
 var updateWorkingMemoryTool = {
   description: "Update the working memory with new information",
@@ -35,6 +42,7 @@ var updateWorkingMemoryTool = {
 };
 // src/index.ts
+var encoder = new lite.Tiktoken(o200k_base__default.default);
 var Memory = class extends memory.MastraMemory {
   constructor(config = {}) {
     super({ name: "Memory", ...config });
@@ -79,8 +87,8 @@ var Memory = class extends memory.MastraMemory {
       messageRange: config?.semanticRecall?.messageRange ?? { before: 2, after: 2 }
     };
     if (config?.semanticRecall && selectBy?.vectorSearchString && this.vector && !!selectBy.vectorSearchString) {
-      const { indexName } = await this.createEmbeddingIndex();
-      const { embeddings } = await this.embedMessageContent(selectBy.vectorSearchString);
+      const { embeddings, dimension } = await this.embedMessageContent(selectBy.vectorSearchString);
+      const { indexName } = await this.createEmbeddingIndex(dimension);
       await Promise.all(
         embeddings.map(async (embedding) => {
           vectorResults.push(
@@ -180,22 +188,49 @@ var Memory = class extends memory.MastraMemory {
   async deleteThread(threadId) {
     await this.storage.__deleteThread({ threadId });
   }
+  chunkText(text, size = 4096) {
+    const tokens = encoder.encode(text);
+    const chunks = [];
+    let currentChunk = [];
+    for (const token of tokens) {
+      currentChunk.push(token);
+      if (currentChunk.length >= size) {
+        chunks.push(encoder.decode(currentChunk));
+        currentChunk = [];
+      }
+    }
+    if (currentChunk.length > 0) {
+      chunks.push(encoder.decode(currentChunk));
+    }
+    return chunks;
+  }
+  hasher = xxhash__default.default();
+  // embedding is computationally expensive so cache content -> embeddings/chunks
+  embeddingCache = /* @__PURE__ */ new Map();
+  firstEmbed;
   async embedMessageContent(content) {
-    const doc = rag.MDocument.fromText(content);
-    const chunks = await doc.chunk({
-      strategy: "token",
-      size: 4096,
-      overlap: 20
-    });
-    const { embeddings } = await ai.embedMany({
-      values: chunks.map((chunk) => chunk.text),
+    const key = (await this.hasher).h32(content);
+    const cached = this.embeddingCache.get(key);
+    if (cached) return cached;
+    const chunks = this.chunkText(content);
+    const isFastEmbed = this.embedder.provider === `fastembed`;
+    if (isFastEmbed && this.firstEmbed instanceof Promise) {
+      await this.firstEmbed;
+    }
+    const promise = ai.embedMany({
+      values: chunks,
       model: this.embedder,
       maxRetries: 3
     });
-    return {
+    if (isFastEmbed && !this.firstEmbed) this.firstEmbed = promise;
+    const { embeddings } = await promise;
+    const result = {
       embeddings,
-      chunks
+      chunks,
+      dimension: embeddings[0]?.length
     };
+    this.embeddingCache.set(key, result);
+    return result;
   }
   async saveMessages({
     messages,
@@ -204,23 +239,29 @@ var Memory = class extends memory.MastraMemory {
     await this.saveWorkingMemory(messages);
     this.mutateMessagesToHideWorkingMemory(messages);
     const config = this.getMergedThreadConfig(memoryConfig);
+    const result = this.storage.__saveMessages({ messages });
     if (this.vector && config.semanticRecall) {
-      const { indexName } = await this.createEmbeddingIndex();
-      for (const message of messages) {
-        if (typeof message.content !== `string` || message.content === "") continue;
-        const { embeddings, chunks } = await this.embedMessageContent(message.content);
-        await this.vector.upsert({
-          indexName,
-          vectors: embeddings,
-          metadata: chunks.map(() => ({
-            message_id: message.id,
-            thread_id: message.threadId,
-            resource_id: message.resourceId
-          }))
-        });
-      }
+      let indexName;
+      await Promise.all(
+        messages.map(async (message) => {
+          if (typeof message.content !== `string` || message.content === "") return;
+          const { embeddings, chunks, dimension } = await this.embedMessageContent(message.content);
+          if (typeof indexName === `undefined`) {
+            indexName = this.createEmbeddingIndex(dimension).then((result2) => result2.indexName);
+          }
+          await this.vector.upsert({
+            indexName: await indexName,
+            vectors: embeddings,
+            metadata: chunks.map(() => ({
+              message_id: message.id,
+              thread_id: message.threadId,
+              resource_id: message.resourceId
+            }))
+          });
+        })
+      );
     }
-    return this.storage.__saveMessages({ messages });
+    return result;
   }
   mutateMessagesToHideWorkingMemory(messages) {
     const workingMemoryRegex = /<working_memory>([^]*?)<\/working_memory>/g;

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,9 @@
 import { deepMerge } from '@mastra/core';
 import { MastraMemory } from '@mastra/core/memory';
-import { MDocument } from '@mastra/rag';
 import { embedMany } from 'ai';
+import { Tiktoken } from 'js-tiktoken/lite';
+import o200k_base from 'js-tiktoken/ranks/o200k_base';
+import xxhash from 'xxhash-wasm';
 import { z } from 'zod';
 // src/index.ts
@@ -33,6 +35,7 @@ var updateWorkingMemoryTool = {
 };
 // src/index.ts
+var encoder = new Tiktoken(o200k_base);
 var Memory = class extends MastraMemory {
   constructor(config = {}) {
     super({ name: "Memory", ...config });
@@ -77,8 +80,8 @@ var Memory = class extends MastraMemory {
       messageRange: config?.semanticRecall?.messageRange ?? { before: 2, after: 2 }
     };
     if (config?.semanticRecall && selectBy?.vectorSearchString && this.vector && !!selectBy.vectorSearchString) {
-      const { indexName } = await this.createEmbeddingIndex();
-      const { embeddings } = await this.embedMessageContent(selectBy.vectorSearchString);
+      const { embeddings, dimension } = await this.embedMessageContent(selectBy.vectorSearchString);
+      const { indexName } = await this.createEmbeddingIndex(dimension);
       await Promise.all(
         embeddings.map(async (embedding) => {
           vectorResults.push(
@@ -178,22 +181,49 @@ var Memory = class extends MastraMemory {
   async deleteThread(threadId) {
     await this.storage.__deleteThread({ threadId });
   }
+  chunkText(text, size = 4096) {
+    const tokens = encoder.encode(text);
+    const chunks = [];
+    let currentChunk = [];
+    for (const token of tokens) {
+      currentChunk.push(token);
+      if (currentChunk.length >= size) {
+        chunks.push(encoder.decode(currentChunk));
+        currentChunk = [];
+      }
+    }
+    if (currentChunk.length > 0) {
+      chunks.push(encoder.decode(currentChunk));
+    }
+    return chunks;
+  }
+  hasher = xxhash();
+  // embedding is computationally expensive so cache content -> embeddings/chunks
+  embeddingCache = /* @__PURE__ */ new Map();
+  firstEmbed;
   async embedMessageContent(content) {
-    const doc = MDocument.fromText(content);
-    const chunks = await doc.chunk({
-      strategy: "token",
-      size: 4096,
-      overlap: 20
-    });
-    const { embeddings } = await embedMany({
-      values: chunks.map((chunk) => chunk.text),
+    const key = (await this.hasher).h32(content);
+    const cached = this.embeddingCache.get(key);
+    if (cached) return cached;
+    const chunks = this.chunkText(content);
+    const isFastEmbed = this.embedder.provider === `fastembed`;
+    if (isFastEmbed && this.firstEmbed instanceof Promise) {
+      await this.firstEmbed;
+    }
+    const promise = embedMany({
+      values: chunks,
       model: this.embedder,
       maxRetries: 3
     });
-    return {
+    if (isFastEmbed && !this.firstEmbed) this.firstEmbed = promise;
+    const { embeddings } = await promise;
+    const result = {
       embeddings,
-      chunks
+      chunks,
+      dimension: embeddings[0]?.length
     };
+    this.embeddingCache.set(key, result);
+    return result;
   }
   async saveMessages({
     messages,
@@ -202,23 +232,29 @@ var Memory = class extends MastraMemory {
     await this.saveWorkingMemory(messages);
     this.mutateMessagesToHideWorkingMemory(messages);
     const config = this.getMergedThreadConfig(memoryConfig);
+    const result = this.storage.__saveMessages({ messages });
     if (this.vector && config.semanticRecall) {
-      const { indexName } = await this.createEmbeddingIndex();
-      for (const message of messages) {
-        if (typeof message.content !== `string` || message.content === "") continue;
-        const { embeddings, chunks } = await this.embedMessageContent(message.content);
-        await this.vector.upsert({
-          indexName,
-          vectors: embeddings,
-          metadata: chunks.map(() => ({
-            message_id: message.id,
-            thread_id: message.threadId,
-            resource_id: message.resourceId
-          }))
-        });
-      }
+      let indexName;
+      await Promise.all(
+        messages.map(async (message) => {
+          if (typeof message.content !== `string` || message.content === "") return;
+          const { embeddings, chunks, dimension } = await this.embedMessageContent(message.content);
+          if (typeof indexName === `undefined`) {
+            indexName = this.createEmbeddingIndex(dimension).then((result2) => result2.indexName);
+          }
+          await this.vector.upsert({
+            indexName: await indexName,
+            vectors: embeddings,
+            metadata: chunks.map(() => ({
+              message_id: message.id,
+              thread_id: message.threadId,
+              resource_id: message.resourceId
+            }))
+          });
+        })
+      );
     }
-    return this.storage.__saveMessages({ messages });
+    return result;
   }
   mutateMessagesToHideWorkingMemory(messages) {
     const workingMemoryRegex = /<working_memory>([^]*?)<\/working_memory>/g;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/memory",
-  "version": "0.2.7-alpha.3",
+  "version": "0.2.7-alpha.5",
   "description": "",
   "type": "module",
   "main": "./dist/index.js",
@@ -33,15 +33,15 @@
   "license": "ISC",
   "dependencies": {
     "@upstash/redis": "^1.34.5",
-    "js-tiktoken": "^1.0.19",
     "ai": "^4.2.2",
+    "js-tiktoken": "^1.0.19",
     "pg": "^8.13.3",
     "pg-pool": "^3.7.1",
     "postgres": "^3.4.5",
     "redis": "^4.7.0",
+    "xxhash-wasm": "^1.1.0",
     "zod": "^3.24.2",
-    "@mastra/core": "^0.8.0-alpha.3",
-    "@mastra/rag": "^0.1.15-alpha.3"
+    "@mastra/core": "^0.8.0-alpha.5"
   },
   "devDependencies": {
     "@ai-sdk/openai": "^1.3.3",

package/src/index.ts CHANGED Viewed

@@ -3,10 +3,14 @@ import type { AiMessageType, CoreMessage, CoreTool } from '@mastra/core';
 import { MastraMemory } from '@mastra/core/memory';
 import type { MessageType, MemoryConfig, SharedMemoryConfig, StorageThreadType } from '@mastra/core/memory';
 import type { StorageGetMessagesArg } from '@mastra/core/storage';
-import { MDocument } from '@mastra/rag';
 import { embedMany } from 'ai';
+import { Tiktoken } from 'js-tiktoken/lite';
+import o200k_base from 'js-tiktoken/ranks/o200k_base';
+import xxhash from 'xxhash-wasm';
 import { updateWorkingMemoryTool } from './tools/working-memory';
+const encoder = new Tiktoken(o200k_base);
 /**
  * Concrete implementation of MastraMemory that adds support for thread configuration
  * and message injection.
@@ -73,8 +77,8 @@ export class Memory extends MastraMemory {
           };
     if (config?.semanticRecall && selectBy?.vectorSearchString && this.vector && !!selectBy.vectorSearchString) {
-      const { indexName } = await this.createEmbeddingIndex();
-      const { embeddings } = await this.embedMessageContent(selectBy.vectorSearchString);
+      const { embeddings, dimension } = await this.embedMessageContent(selectBy.vectorSearchString!);
+      const { indexName } = await this.createEmbeddingIndex(dimension);
       await Promise.all(
         embeddings.map(async embedding => {
@@ -139,7 +143,6 @@ export class Memory extends MastraMemory {
     uiMessages: AiMessageType[];
   }> {
     if (resourceId) await this.validateThreadIsOwnedByResource(threadId, resourceId);
     const threadConfig = this.getMergedThreadConfig(config || {});
     if (!threadConfig.lastMessages && !threadConfig.semanticRecall) {
@@ -223,25 +226,71 @@ export class Memory extends MastraMemory {
     // }
   }
-  private async embedMessageContent(content: string) {
-    const doc = MDocument.fromText(content);
+  private chunkText(text: string, size = 4096) {
+    const tokens = encoder.encode(text);
+    const chunks: string[] = [];
+    let currentChunk: number[] = [];
-    const chunks = await doc.chunk({
-      strategy: 'token',
-      size: 4096,
-      overlap: 20,
-    });
+    for (const token of tokens) {
+      currentChunk.push(token);
+      // If current chunk reaches size limit, add it to chunks and start a new one
+      if (currentChunk.length >= size) {
+        chunks.push(encoder.decode(currentChunk));
+        currentChunk = [];
+      }
+    }
+    // Add any remaining tokens as the final chunk
+    if (currentChunk.length > 0) {
+      chunks.push(encoder.decode(currentChunk));
+    }
+    return chunks;
+  }
+  private hasher = xxhash();
+  // embedding is computationally expensive so cache content -> embeddings/chunks
+  private embeddingCache = new Map<
+    number,
+    {
+      chunks: string[];
+      embeddings: Awaited<ReturnType<typeof embedMany>>['embeddings'];
+      dimension: number | undefined;
+    }
+  >();
+  private firstEmbed: Promise<any> | undefined;
+  private async embedMessageContent(content: string) {
+    // use fast xxhash for lower memory usage. if we cache by content string we will store all messages in memory for the life of the process
+    const key = (await this.hasher).h32(content);
+    const cached = this.embeddingCache.get(key);
+    if (cached) return cached;
+    const chunks = this.chunkText(content);
+    // for fastembed multiple initial calls to embed will fail if the model hasn't been downloaded yet.
+    const isFastEmbed = this.embedder.provider === `fastembed`;
+    if (isFastEmbed && this.firstEmbed instanceof Promise) {
+      // so wait for the first one
+      await this.firstEmbed;
+    }
-    const { embeddings } = await embedMany({
-      values: chunks.map(chunk => chunk.text),
+    const promise = embedMany({
+      values: chunks,
       model: this.embedder,
       maxRetries: 3,
     });
-    return {
+    if (isFastEmbed && !this.firstEmbed) this.firstEmbed = promise;
+    const { embeddings } = await promise;
+    const result = {
       embeddings,
       chunks,
+      dimension: embeddings[0]?.length,
     };
+    this.embeddingCache.set(key, result);
+    return result;
   }
   async saveMessages({
@@ -259,27 +308,34 @@ export class Memory extends MastraMemory {
     const config = this.getMergedThreadConfig(memoryConfig);
+    const result = this.storage.__saveMessages({ messages });
     if (this.vector && config.semanticRecall) {
-      const { indexName } = await this.createEmbeddingIndex();
-      for (const message of messages) {
-        if (typeof message.content !== `string` || message.content === '') continue;
-        const { embeddings, chunks } = await this.embedMessageContent(message.content);
-        await this.vector.upsert({
-          indexName,
-          vectors: embeddings,
-          metadata: chunks.map(() => ({
-            message_id: message.id,
-            thread_id: message.threadId,
-            resource_id: message.resourceId,
-          })),
-        });
-      }
+      let indexName: Promise<string>;
+      await Promise.all(
+        messages.map(async message => {
+          if (typeof message.content !== `string` || message.content === '') return;
+          const { embeddings, chunks, dimension } = await this.embedMessageContent(message.content);
+          if (typeof indexName === `undefined`) {
+            indexName = this.createEmbeddingIndex(dimension).then(result => result.indexName);
+          }
+          await this.vector.upsert({
+            indexName: await indexName,
+            vectors: embeddings,
+            metadata: chunks.map(() => ({
+              message_id: message.id,
+              thread_id: message.threadId,
+              resource_id: message.resourceId,
+            })),
+          });
+        }),
+      );
     }
-    return this.storage.__saveMessages({ messages });
+    return result;
   }
   protected mutateMessagesToHideWorkingMemory(messages: MessageType[]) {