@mastra/memory 0.2.7-alpha.4 → 0.2.7-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,29 @@
1
1
 
2
- > @mastra/memory@0.2.7-alpha.4 build /home/runner/work/mastra/mastra/packages/memory
2
+ > @mastra/memory@0.2.7-alpha.5 build /home/runner/work/mastra/mastra/packages/memory
3
3
  > pnpm run check && tsup src/index.ts src/processors/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
4
4
 
5
5
 
6
- > @mastra/memory@0.2.7-alpha.4 check /home/runner/work/mastra/mastra/packages/memory
6
+ > @mastra/memory@0.2.7-alpha.5 check /home/runner/work/mastra/mastra/packages/memory
7
7
  > tsc --noEmit
8
8
 
9
9
  CLI Building entry: src/index.ts, src/processors/index.ts
10
10
  CLI Using tsconfig: tsconfig.json
11
11
  CLI tsup v8.4.0
12
12
  TSC Build start
13
- TSC ⚡️ Build success in 10543ms
13
+ TSC ⚡️ Build success in 9207ms
14
14
  DTS Build start
15
15
  CLI Target: es2022
16
16
  Analysis will use the bundled TypeScript version 5.8.2
17
17
  Writing package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.ts
18
18
  Analysis will use the bundled TypeScript version 5.8.2
19
19
  Writing package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.cts
20
- DTS ⚡️ Build success in 6385ms
20
+ DTS ⚡️ Build success in 11942ms
21
21
  CLI Cleaning output folder
22
22
  ESM Build start
23
23
  CJS Build start
24
- ESM dist/index.js 13.50 KB
24
+ ESM dist/index.js 14.82 KB
25
25
  ESM dist/processors/index.js 5.33 KB
26
- ESM ⚡️ Build success in 470ms
27
- CJS dist/index.cjs 13.53 KB
26
+ ESM ⚡️ Build success in 771ms
27
+ CJS dist/index.cjs 15.09 KB
28
28
  CJS dist/processors/index.cjs 5.54 KB
29
- CJS ⚡️ Build success in 471ms
29
+ CJS ⚡️ Build success in 771ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # @mastra/memory
2
2
 
3
+ ## 0.2.7-alpha.5
4
+
5
+ ### Patch Changes
6
+
7
+ - 93875ed: Improved the performance of Memory semantic recall by 2 to 3 times when using pg by making tweaks to @mastra/memory @mastra/core and @mastra/pg
8
+ - Updated dependencies [93875ed]
9
+ - @mastra/core@0.8.0-alpha.5
10
+
3
11
  ## 0.2.7-alpha.4
4
12
 
5
13
  ### Patch Changes
@@ -51,6 +51,10 @@ export declare class Memory extends MastraMemory {
51
51
  metadata: Record<string, unknown>;
52
52
  }): Promise<StorageThreadType>;
53
53
  deleteThread(threadId: string): Promise<void>;
54
+ private chunkText;
55
+ private hasher;
56
+ private embeddingCache;
57
+ private firstEmbed;
54
58
  private embedMessageContent;
55
59
  saveMessages({ messages, memoryConfig, }: {
56
60
  messages: MessageType[];
@@ -51,6 +51,10 @@ export declare class Memory extends MastraMemory {
51
51
  metadata: Record<string, unknown>;
52
52
  }): Promise<StorageThreadType>;
53
53
  deleteThread(threadId: string): Promise<void>;
54
+ private chunkText;
55
+ private hasher;
56
+ private embeddingCache;
57
+ private firstEmbed;
54
58
  private embedMessageContent;
55
59
  saveMessages({ messages, memoryConfig, }: {
56
60
  messages: MessageType[];
package/dist/index.cjs CHANGED
@@ -2,10 +2,17 @@
2
2
 
3
3
  var core = require('@mastra/core');
4
4
  var memory = require('@mastra/core/memory');
5
- var rag = require('@mastra/rag');
6
5
  var ai = require('ai');
6
+ var lite = require('js-tiktoken/lite');
7
+ var o200k_base = require('js-tiktoken/ranks/o200k_base');
8
+ var xxhash = require('xxhash-wasm');
7
9
  var zod = require('zod');
8
10
 
11
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
12
+
13
+ var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
14
+ var xxhash__default = /*#__PURE__*/_interopDefault(xxhash);
15
+
9
16
  // src/index.ts
10
17
  var updateWorkingMemoryTool = {
11
18
  description: "Update the working memory with new information",
@@ -35,6 +42,7 @@ var updateWorkingMemoryTool = {
35
42
  };
36
43
 
37
44
  // src/index.ts
45
+ var encoder = new lite.Tiktoken(o200k_base__default.default);
38
46
  var Memory = class extends memory.MastraMemory {
39
47
  constructor(config = {}) {
40
48
  super({ name: "Memory", ...config });
@@ -79,8 +87,8 @@ var Memory = class extends memory.MastraMemory {
79
87
  messageRange: config?.semanticRecall?.messageRange ?? { before: 2, after: 2 }
80
88
  };
81
89
  if (config?.semanticRecall && selectBy?.vectorSearchString && this.vector && !!selectBy.vectorSearchString) {
82
- const { indexName } = await this.createEmbeddingIndex();
83
- const { embeddings } = await this.embedMessageContent(selectBy.vectorSearchString);
90
+ const { embeddings, dimension } = await this.embedMessageContent(selectBy.vectorSearchString);
91
+ const { indexName } = await this.createEmbeddingIndex(dimension);
84
92
  await Promise.all(
85
93
  embeddings.map(async (embedding) => {
86
94
  vectorResults.push(
@@ -180,22 +188,49 @@ var Memory = class extends memory.MastraMemory {
180
188
  async deleteThread(threadId) {
181
189
  await this.storage.__deleteThread({ threadId });
182
190
  }
191
+ chunkText(text, size = 4096) {
192
+ const tokens = encoder.encode(text);
193
+ const chunks = [];
194
+ let currentChunk = [];
195
+ for (const token of tokens) {
196
+ currentChunk.push(token);
197
+ if (currentChunk.length >= size) {
198
+ chunks.push(encoder.decode(currentChunk));
199
+ currentChunk = [];
200
+ }
201
+ }
202
+ if (currentChunk.length > 0) {
203
+ chunks.push(encoder.decode(currentChunk));
204
+ }
205
+ return chunks;
206
+ }
207
+ hasher = xxhash__default.default();
208
+ // embedding is computationally expensive so cache content -> embeddings/chunks
209
+ embeddingCache = /* @__PURE__ */ new Map();
210
+ firstEmbed;
183
211
  async embedMessageContent(content) {
184
- const doc = rag.MDocument.fromText(content);
185
- const chunks = await doc.chunk({
186
- strategy: "token",
187
- size: 4096,
188
- overlap: 20
189
- });
190
- const { embeddings } = await ai.embedMany({
191
- values: chunks.map((chunk) => chunk.text),
212
+ const key = (await this.hasher).h32(content);
213
+ const cached = this.embeddingCache.get(key);
214
+ if (cached) return cached;
215
+ const chunks = this.chunkText(content);
216
+ const isFastEmbed = this.embedder.provider === `fastembed`;
217
+ if (isFastEmbed && this.firstEmbed instanceof Promise) {
218
+ await this.firstEmbed;
219
+ }
220
+ const promise = ai.embedMany({
221
+ values: chunks,
192
222
  model: this.embedder,
193
223
  maxRetries: 3
194
224
  });
195
- return {
225
+ if (isFastEmbed && !this.firstEmbed) this.firstEmbed = promise;
226
+ const { embeddings } = await promise;
227
+ const result = {
196
228
  embeddings,
197
- chunks
229
+ chunks,
230
+ dimension: embeddings[0]?.length
198
231
  };
232
+ this.embeddingCache.set(key, result);
233
+ return result;
199
234
  }
200
235
  async saveMessages({
201
236
  messages,
@@ -204,23 +239,29 @@ var Memory = class extends memory.MastraMemory {
204
239
  await this.saveWorkingMemory(messages);
205
240
  this.mutateMessagesToHideWorkingMemory(messages);
206
241
  const config = this.getMergedThreadConfig(memoryConfig);
242
+ const result = this.storage.__saveMessages({ messages });
207
243
  if (this.vector && config.semanticRecall) {
208
- const { indexName } = await this.createEmbeddingIndex();
209
- for (const message of messages) {
210
- if (typeof message.content !== `string` || message.content === "") continue;
211
- const { embeddings, chunks } = await this.embedMessageContent(message.content);
212
- await this.vector.upsert({
213
- indexName,
214
- vectors: embeddings,
215
- metadata: chunks.map(() => ({
216
- message_id: message.id,
217
- thread_id: message.threadId,
218
- resource_id: message.resourceId
219
- }))
220
- });
221
- }
244
+ let indexName;
245
+ await Promise.all(
246
+ messages.map(async (message) => {
247
+ if (typeof message.content !== `string` || message.content === "") return;
248
+ const { embeddings, chunks, dimension } = await this.embedMessageContent(message.content);
249
+ if (typeof indexName === `undefined`) {
250
+ indexName = this.createEmbeddingIndex(dimension).then((result2) => result2.indexName);
251
+ }
252
+ await this.vector.upsert({
253
+ indexName: await indexName,
254
+ vectors: embeddings,
255
+ metadata: chunks.map(() => ({
256
+ message_id: message.id,
257
+ thread_id: message.threadId,
258
+ resource_id: message.resourceId
259
+ }))
260
+ });
261
+ })
262
+ );
222
263
  }
223
- return this.storage.__saveMessages({ messages });
264
+ return result;
224
265
  }
225
266
  mutateMessagesToHideWorkingMemory(messages) {
226
267
  const workingMemoryRegex = /<working_memory>([^]*?)<\/working_memory>/g;
package/dist/index.js CHANGED
@@ -1,7 +1,9 @@
1
1
  import { deepMerge } from '@mastra/core';
2
2
  import { MastraMemory } from '@mastra/core/memory';
3
- import { MDocument } from '@mastra/rag';
4
3
  import { embedMany } from 'ai';
4
+ import { Tiktoken } from 'js-tiktoken/lite';
5
+ import o200k_base from 'js-tiktoken/ranks/o200k_base';
6
+ import xxhash from 'xxhash-wasm';
5
7
  import { z } from 'zod';
6
8
 
7
9
  // src/index.ts
@@ -33,6 +35,7 @@ var updateWorkingMemoryTool = {
33
35
  };
34
36
 
35
37
  // src/index.ts
38
+ var encoder = new Tiktoken(o200k_base);
36
39
  var Memory = class extends MastraMemory {
37
40
  constructor(config = {}) {
38
41
  super({ name: "Memory", ...config });
@@ -77,8 +80,8 @@ var Memory = class extends MastraMemory {
77
80
  messageRange: config?.semanticRecall?.messageRange ?? { before: 2, after: 2 }
78
81
  };
79
82
  if (config?.semanticRecall && selectBy?.vectorSearchString && this.vector && !!selectBy.vectorSearchString) {
80
- const { indexName } = await this.createEmbeddingIndex();
81
- const { embeddings } = await this.embedMessageContent(selectBy.vectorSearchString);
83
+ const { embeddings, dimension } = await this.embedMessageContent(selectBy.vectorSearchString);
84
+ const { indexName } = await this.createEmbeddingIndex(dimension);
82
85
  await Promise.all(
83
86
  embeddings.map(async (embedding) => {
84
87
  vectorResults.push(
@@ -178,22 +181,49 @@ var Memory = class extends MastraMemory {
178
181
  async deleteThread(threadId) {
179
182
  await this.storage.__deleteThread({ threadId });
180
183
  }
184
+ chunkText(text, size = 4096) {
185
+ const tokens = encoder.encode(text);
186
+ const chunks = [];
187
+ let currentChunk = [];
188
+ for (const token of tokens) {
189
+ currentChunk.push(token);
190
+ if (currentChunk.length >= size) {
191
+ chunks.push(encoder.decode(currentChunk));
192
+ currentChunk = [];
193
+ }
194
+ }
195
+ if (currentChunk.length > 0) {
196
+ chunks.push(encoder.decode(currentChunk));
197
+ }
198
+ return chunks;
199
+ }
200
+ hasher = xxhash();
201
+ // embedding is computationally expensive so cache content -> embeddings/chunks
202
+ embeddingCache = /* @__PURE__ */ new Map();
203
+ firstEmbed;
181
204
  async embedMessageContent(content) {
182
- const doc = MDocument.fromText(content);
183
- const chunks = await doc.chunk({
184
- strategy: "token",
185
- size: 4096,
186
- overlap: 20
187
- });
188
- const { embeddings } = await embedMany({
189
- values: chunks.map((chunk) => chunk.text),
205
+ const key = (await this.hasher).h32(content);
206
+ const cached = this.embeddingCache.get(key);
207
+ if (cached) return cached;
208
+ const chunks = this.chunkText(content);
209
+ const isFastEmbed = this.embedder.provider === `fastembed`;
210
+ if (isFastEmbed && this.firstEmbed instanceof Promise) {
211
+ await this.firstEmbed;
212
+ }
213
+ const promise = embedMany({
214
+ values: chunks,
190
215
  model: this.embedder,
191
216
  maxRetries: 3
192
217
  });
193
- return {
218
+ if (isFastEmbed && !this.firstEmbed) this.firstEmbed = promise;
219
+ const { embeddings } = await promise;
220
+ const result = {
194
221
  embeddings,
195
- chunks
222
+ chunks,
223
+ dimension: embeddings[0]?.length
196
224
  };
225
+ this.embeddingCache.set(key, result);
226
+ return result;
197
227
  }
198
228
  async saveMessages({
199
229
  messages,
@@ -202,23 +232,29 @@ var Memory = class extends MastraMemory {
202
232
  await this.saveWorkingMemory(messages);
203
233
  this.mutateMessagesToHideWorkingMemory(messages);
204
234
  const config = this.getMergedThreadConfig(memoryConfig);
235
+ const result = this.storage.__saveMessages({ messages });
205
236
  if (this.vector && config.semanticRecall) {
206
- const { indexName } = await this.createEmbeddingIndex();
207
- for (const message of messages) {
208
- if (typeof message.content !== `string` || message.content === "") continue;
209
- const { embeddings, chunks } = await this.embedMessageContent(message.content);
210
- await this.vector.upsert({
211
- indexName,
212
- vectors: embeddings,
213
- metadata: chunks.map(() => ({
214
- message_id: message.id,
215
- thread_id: message.threadId,
216
- resource_id: message.resourceId
217
- }))
218
- });
219
- }
237
+ let indexName;
238
+ await Promise.all(
239
+ messages.map(async (message) => {
240
+ if (typeof message.content !== `string` || message.content === "") return;
241
+ const { embeddings, chunks, dimension } = await this.embedMessageContent(message.content);
242
+ if (typeof indexName === `undefined`) {
243
+ indexName = this.createEmbeddingIndex(dimension).then((result2) => result2.indexName);
244
+ }
245
+ await this.vector.upsert({
246
+ indexName: await indexName,
247
+ vectors: embeddings,
248
+ metadata: chunks.map(() => ({
249
+ message_id: message.id,
250
+ thread_id: message.threadId,
251
+ resource_id: message.resourceId
252
+ }))
253
+ });
254
+ })
255
+ );
220
256
  }
221
- return this.storage.__saveMessages({ messages });
257
+ return result;
222
258
  }
223
259
  mutateMessagesToHideWorkingMemory(messages) {
224
260
  const workingMemoryRegex = /<working_memory>([^]*?)<\/working_memory>/g;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/memory",
3
- "version": "0.2.7-alpha.4",
3
+ "version": "0.2.7-alpha.5",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -33,15 +33,15 @@
33
33
  "license": "ISC",
34
34
  "dependencies": {
35
35
  "@upstash/redis": "^1.34.5",
36
- "js-tiktoken": "^1.0.19",
37
36
  "ai": "^4.2.2",
37
+ "js-tiktoken": "^1.0.19",
38
38
  "pg": "^8.13.3",
39
39
  "pg-pool": "^3.7.1",
40
40
  "postgres": "^3.4.5",
41
41
  "redis": "^4.7.0",
42
+ "xxhash-wasm": "^1.1.0",
42
43
  "zod": "^3.24.2",
43
- "@mastra/rag": "^0.1.15-alpha.4",
44
- "@mastra/core": "^0.8.0-alpha.4"
44
+ "@mastra/core": "^0.8.0-alpha.5"
45
45
  },
46
46
  "devDependencies": {
47
47
  "@ai-sdk/openai": "^1.3.3",
package/src/index.ts CHANGED
@@ -3,10 +3,14 @@ import type { AiMessageType, CoreMessage, CoreTool } from '@mastra/core';
3
3
  import { MastraMemory } from '@mastra/core/memory';
4
4
  import type { MessageType, MemoryConfig, SharedMemoryConfig, StorageThreadType } from '@mastra/core/memory';
5
5
  import type { StorageGetMessagesArg } from '@mastra/core/storage';
6
- import { MDocument } from '@mastra/rag';
7
6
  import { embedMany } from 'ai';
7
+ import { Tiktoken } from 'js-tiktoken/lite';
8
+ import o200k_base from 'js-tiktoken/ranks/o200k_base';
9
+ import xxhash from 'xxhash-wasm';
8
10
  import { updateWorkingMemoryTool } from './tools/working-memory';
9
11
 
12
+ const encoder = new Tiktoken(o200k_base);
13
+
10
14
  /**
11
15
  * Concrete implementation of MastraMemory that adds support for thread configuration
12
16
  * and message injection.
@@ -73,8 +77,8 @@ export class Memory extends MastraMemory {
73
77
  };
74
78
 
75
79
  if (config?.semanticRecall && selectBy?.vectorSearchString && this.vector && !!selectBy.vectorSearchString) {
76
- const { indexName } = await this.createEmbeddingIndex();
77
- const { embeddings } = await this.embedMessageContent(selectBy.vectorSearchString);
80
+ const { embeddings, dimension } = await this.embedMessageContent(selectBy.vectorSearchString!);
81
+ const { indexName } = await this.createEmbeddingIndex(dimension);
78
82
 
79
83
  await Promise.all(
80
84
  embeddings.map(async embedding => {
@@ -139,7 +143,6 @@ export class Memory extends MastraMemory {
139
143
  uiMessages: AiMessageType[];
140
144
  }> {
141
145
  if (resourceId) await this.validateThreadIsOwnedByResource(threadId, resourceId);
142
-
143
146
  const threadConfig = this.getMergedThreadConfig(config || {});
144
147
 
145
148
  if (!threadConfig.lastMessages && !threadConfig.semanticRecall) {
@@ -223,25 +226,71 @@ export class Memory extends MastraMemory {
223
226
  // }
224
227
  }
225
228
 
226
- private async embedMessageContent(content: string) {
227
- const doc = MDocument.fromText(content);
229
+ private chunkText(text: string, size = 4096) {
230
+ const tokens = encoder.encode(text);
231
+ const chunks: string[] = [];
232
+ let currentChunk: number[] = [];
228
233
 
229
- const chunks = await doc.chunk({
230
- strategy: 'token',
231
- size: 4096,
232
- overlap: 20,
233
- });
234
+ for (const token of tokens) {
235
+ currentChunk.push(token);
236
+
237
+ // If current chunk reaches size limit, add it to chunks and start a new one
238
+ if (currentChunk.length >= size) {
239
+ chunks.push(encoder.decode(currentChunk));
240
+ currentChunk = [];
241
+ }
242
+ }
243
+
244
+ // Add any remaining tokens as the final chunk
245
+ if (currentChunk.length > 0) {
246
+ chunks.push(encoder.decode(currentChunk));
247
+ }
248
+
249
+ return chunks;
250
+ }
251
+
252
+ private hasher = xxhash();
253
+
254
+ // embedding is computationally expensive so cache content -> embeddings/chunks
255
+ private embeddingCache = new Map<
256
+ number,
257
+ {
258
+ chunks: string[];
259
+ embeddings: Awaited<ReturnType<typeof embedMany>>['embeddings'];
260
+ dimension: number | undefined;
261
+ }
262
+ >();
263
+ private firstEmbed: Promise<any> | undefined;
264
+ private async embedMessageContent(content: string) {
265
+ // use fast xxhash for lower memory usage. if we cache by content string we will store all messages in memory for the life of the process
266
+ const key = (await this.hasher).h32(content);
267
+ const cached = this.embeddingCache.get(key);
268
+ if (cached) return cached;
269
+ const chunks = this.chunkText(content);
270
+
271
+ // for fastembed multiple initial calls to embed will fail if the model hasn't been downloaded yet.
272
+ const isFastEmbed = this.embedder.provider === `fastembed`;
273
+ if (isFastEmbed && this.firstEmbed instanceof Promise) {
274
+ // so wait for the first one
275
+ await this.firstEmbed;
276
+ }
234
277
 
235
- const { embeddings } = await embedMany({
236
- values: chunks.map(chunk => chunk.text),
278
+ const promise = embedMany({
279
+ values: chunks,
237
280
  model: this.embedder,
238
281
  maxRetries: 3,
239
282
  });
240
283
 
241
- return {
284
+ if (isFastEmbed && !this.firstEmbed) this.firstEmbed = promise;
285
+ const { embeddings } = await promise;
286
+
287
+ const result = {
242
288
  embeddings,
243
289
  chunks,
290
+ dimension: embeddings[0]?.length,
244
291
  };
292
+ this.embeddingCache.set(key, result);
293
+ return result;
245
294
  }
246
295
 
247
296
  async saveMessages({
@@ -259,27 +308,34 @@ export class Memory extends MastraMemory {
259
308
 
260
309
  const config = this.getMergedThreadConfig(memoryConfig);
261
310
 
311
+ const result = this.storage.__saveMessages({ messages });
312
+
262
313
  if (this.vector && config.semanticRecall) {
263
- const { indexName } = await this.createEmbeddingIndex();
264
-
265
- for (const message of messages) {
266
- if (typeof message.content !== `string` || message.content === '') continue;
267
-
268
- const { embeddings, chunks } = await this.embedMessageContent(message.content);
269
-
270
- await this.vector.upsert({
271
- indexName,
272
- vectors: embeddings,
273
- metadata: chunks.map(() => ({
274
- message_id: message.id,
275
- thread_id: message.threadId,
276
- resource_id: message.resourceId,
277
- })),
278
- });
279
- }
314
+ let indexName: Promise<string>;
315
+ await Promise.all(
316
+ messages.map(async message => {
317
+ if (typeof message.content !== `string` || message.content === '') return;
318
+
319
+ const { embeddings, chunks, dimension } = await this.embedMessageContent(message.content);
320
+
321
+ if (typeof indexName === `undefined`) {
322
+ indexName = this.createEmbeddingIndex(dimension).then(result => result.indexName);
323
+ }
324
+
325
+ await this.vector.upsert({
326
+ indexName: await indexName,
327
+ vectors: embeddings,
328
+ metadata: chunks.map(() => ({
329
+ message_id: message.id,
330
+ thread_id: message.threadId,
331
+ resource_id: message.resourceId,
332
+ })),
333
+ });
334
+ }),
335
+ );
280
336
  }
281
337
 
282
- return this.storage.__saveMessages({ messages });
338
+ return result;
283
339
  }
284
340
 
285
341
  protected mutateMessagesToHideWorkingMemory(messages: MessageType[]) {