@mastra/memory 0.2.11-alpha.1 → 0.2.11-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,29 @@
1
1
 
2
- > @mastra/memory@0.2.11-alpha.1 build /home/runner/work/mastra/mastra/packages/memory
2
+ > @mastra/memory@0.2.11-alpha.2 build /home/runner/work/mastra/mastra/packages/memory
3
3
  > pnpm run check && tsup src/index.ts src/processors/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
4
4
 
5
5
 
6
- > @mastra/memory@0.2.11-alpha.1 check /home/runner/work/mastra/mastra/packages/memory
6
+ > @mastra/memory@0.2.11-alpha.2 check /home/runner/work/mastra/mastra/packages/memory
7
7
  > tsc --noEmit
8
8
 
9
9
  CLI Building entry: src/index.ts, src/processors/index.ts
10
10
  CLI Using tsconfig: tsconfig.json
11
11
  CLI tsup v8.4.0
12
12
  TSC Build start
13
- TSC ⚡️ Build success in 9818ms
13
+ TSC ⚡️ Build success in 10363ms
14
14
  DTS Build start
15
15
  CLI Target: es2022
16
16
  Analysis will use the bundled TypeScript version 5.8.2
17
17
  Writing package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.ts
18
18
  Analysis will use the bundled TypeScript version 5.8.2
19
19
  Writing package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.cts
20
- DTS ⚡️ Build success in 11869ms
20
+ DTS ⚡️ Build success in 12416ms
21
21
  CLI Cleaning output folder
22
22
  ESM Build start
23
23
  CJS Build start
24
+ CJS dist/index.cjs 17.58 KB
25
+ CJS dist/processors/index.cjs 5.54 KB
26
+ CJS ⚡️ Build success in 946ms
24
27
  ESM dist/index.js 17.39 KB
25
28
  ESM dist/processors/index.js 5.33 KB
26
- ESM ⚡️ Build success in 1099ms
27
- CJS dist/index.cjs 17.66 KB
28
- CJS dist/processors/index.cjs 5.54 KB
29
- CJS ⚡️ Build success in 1103ms
29
+ ESM ⚡️ Build success in 950ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @mastra/memory
2
2
 
3
+ ## 0.2.11-alpha.2
4
+
5
+ ### Patch Changes
6
+
7
+ - 5c6825c: [MASTRA-2782] removed tiktoken from memory chunktext
8
+
3
9
  ## 0.2.11-alpha.1
4
10
 
5
11
  ### Patch Changes
package/dist/index.cjs CHANGED
@@ -3,14 +3,11 @@
3
3
  var core = require('@mastra/core');
4
4
  var memory = require('@mastra/core/memory');
5
5
  var ai = require('ai');
6
- var lite = require('js-tiktoken/lite');
7
- var o200k_base = require('js-tiktoken/ranks/o200k_base');
8
6
  var xxhash = require('xxhash-wasm');
9
7
  var zod = require('zod');
10
8
 
11
9
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
12
10
 
13
- var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
14
11
  var xxhash__default = /*#__PURE__*/_interopDefault(xxhash);
15
12
 
16
13
  // src/index.ts
@@ -84,7 +81,7 @@ function reorderToolCallsAndResults(messages) {
84
81
  }
85
82
 
86
83
  // src/index.ts
87
- var encoder = new lite.Tiktoken(o200k_base__default.default);
84
+ var CHARS_PER_TOKEN = 4;
88
85
  var Memory = class extends memory.MastraMemory {
89
86
  constructor(config = {}) {
90
87
  super({ name: "Memory", ...config });
@@ -237,19 +234,22 @@ var Memory = class extends memory.MastraMemory {
237
234
  async deleteThread(threadId) {
238
235
  await this.storage.__deleteThread({ threadId });
239
236
  }
240
- chunkText(text, size = 4096) {
241
- const tokens = encoder.encode(text);
237
+ chunkText(text, tokenSize = 4096) {
238
+ const charSize = tokenSize * CHARS_PER_TOKEN;
242
239
  const chunks = [];
243
- let currentChunk = [];
244
- for (const token of tokens) {
245
- currentChunk.push(token);
246
- if (currentChunk.length >= size) {
247
- chunks.push(encoder.decode(currentChunk));
248
- currentChunk = [];
240
+ let currentChunk = "";
241
+ const words = text.split(/\s+/);
242
+ for (const word of words) {
243
+ const wordWithSpace = currentChunk ? " " + word : word;
244
+ if (currentChunk.length + wordWithSpace.length > charSize) {
245
+ chunks.push(currentChunk);
246
+ currentChunk = word;
247
+ } else {
248
+ currentChunk += wordWithSpace;
249
249
  }
250
250
  }
251
- if (currentChunk.length > 0) {
252
- chunks.push(encoder.decode(currentChunk));
251
+ if (currentChunk) {
252
+ chunks.push(currentChunk);
253
253
  }
254
254
  return chunks;
255
255
  }
package/dist/index.js CHANGED
@@ -1,8 +1,6 @@
1
1
  import { deepMerge } from '@mastra/core';
2
2
  import { MastraMemory } from '@mastra/core/memory';
3
3
  import { embedMany } from 'ai';
4
- import { Tiktoken } from 'js-tiktoken/lite';
5
- import o200k_base from 'js-tiktoken/ranks/o200k_base';
6
4
  import xxhash from 'xxhash-wasm';
7
5
  import { z } from 'zod';
8
6
 
@@ -77,7 +75,7 @@ function reorderToolCallsAndResults(messages) {
77
75
  }
78
76
 
79
77
  // src/index.ts
80
- var encoder = new Tiktoken(o200k_base);
78
+ var CHARS_PER_TOKEN = 4;
81
79
  var Memory = class extends MastraMemory {
82
80
  constructor(config = {}) {
83
81
  super({ name: "Memory", ...config });
@@ -230,19 +228,22 @@ var Memory = class extends MastraMemory {
230
228
  async deleteThread(threadId) {
231
229
  await this.storage.__deleteThread({ threadId });
232
230
  }
233
- chunkText(text, size = 4096) {
234
- const tokens = encoder.encode(text);
231
+ chunkText(text, tokenSize = 4096) {
232
+ const charSize = tokenSize * CHARS_PER_TOKEN;
235
233
  const chunks = [];
236
- let currentChunk = [];
237
- for (const token of tokens) {
238
- currentChunk.push(token);
239
- if (currentChunk.length >= size) {
240
- chunks.push(encoder.decode(currentChunk));
241
- currentChunk = [];
234
+ let currentChunk = "";
235
+ const words = text.split(/\s+/);
236
+ for (const word of words) {
237
+ const wordWithSpace = currentChunk ? " " + word : word;
238
+ if (currentChunk.length + wordWithSpace.length > charSize) {
239
+ chunks.push(currentChunk);
240
+ currentChunk = word;
241
+ } else {
242
+ currentChunk += wordWithSpace;
242
243
  }
243
244
  }
244
- if (currentChunk.length > 0) {
245
- chunks.push(encoder.decode(currentChunk));
245
+ if (currentChunk) {
246
+ chunks.push(currentChunk);
246
247
  }
247
248
  return chunks;
248
249
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/memory",
3
- "version": "0.2.11-alpha.1",
3
+ "version": "0.2.11-alpha.2",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
package/src/index.ts CHANGED
@@ -4,13 +4,13 @@ import { MastraMemory } from '@mastra/core/memory';
4
4
  import type { MessageType, MemoryConfig, SharedMemoryConfig, StorageThreadType } from '@mastra/core/memory';
5
5
  import type { StorageGetMessagesArg } from '@mastra/core/storage';
6
6
  import { embedMany } from 'ai';
7
- import { Tiktoken } from 'js-tiktoken/lite';
8
- import o200k_base from 'js-tiktoken/ranks/o200k_base';
7
+
9
8
  import xxhash from 'xxhash-wasm';
10
9
  import { updateWorkingMemoryTool } from './tools/working-memory';
11
10
  import { reorderToolCallsAndResults } from './utils';
12
11
 
13
- const encoder = new Tiktoken(o200k_base);
12
+ // Average characters per token based on OpenAI's tokenization
13
+ const CHARS_PER_TOKEN = 4;
14
14
 
15
15
  /**
16
16
  * Concrete implementation of MastraMemory that adds support for thread configuration
@@ -233,24 +233,31 @@ export class Memory extends MastraMemory {
233
233
  await this.storage.__deleteThread({ threadId });
234
234
  }
235
235
 
236
- private chunkText(text: string, size = 4096) {
237
- const tokens = encoder.encode(text);
236
+ private chunkText(text: string, tokenSize = 4096) {
237
+ // Convert token size to character size with some buffer
238
+ const charSize = tokenSize * CHARS_PER_TOKEN;
238
239
  const chunks: string[] = [];
239
- let currentChunk: number[] = [];
240
+ let currentChunk = '';
241
+
242
+ // Split text into words to avoid breaking words
243
+ const words = text.split(/\s+/);
240
244
 
241
- for (const token of tokens) {
242
- currentChunk.push(token);
245
+ for (const word of words) {
246
+ // Add space before word unless it's the first word in the chunk
247
+ const wordWithSpace = currentChunk ? ' ' + word : word;
243
248
 
244
- // If current chunk reaches size limit, add it to chunks and start a new one
245
- if (currentChunk.length >= size) {
246
- chunks.push(encoder.decode(currentChunk));
247
- currentChunk = [];
249
+ // If adding this word would exceed the chunk size, start a new chunk
250
+ if (currentChunk.length + wordWithSpace.length > charSize) {
251
+ chunks.push(currentChunk);
252
+ currentChunk = word;
253
+ } else {
254
+ currentChunk += wordWithSpace;
248
255
  }
249
256
  }
250
257
 
251
- // Add any remaining tokens as the final chunk
252
- if (currentChunk.length > 0) {
253
- chunks.push(encoder.decode(currentChunk));
258
+ // Add the final chunk if not empty
259
+ if (currentChunk) {
260
+ chunks.push(currentChunk);
254
261
  }
255
262
 
256
263
  return chunks;