npm - @mastra/memory - Versions diffs - 0.2.11-alpha.1 → 0.2.11-alpha.2 - Mend

@mastra/memory 0.2.11-alpha.1 → 0.2.11-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,29 +1,29 @@
-> @mastra/memory@0.2.11-alpha.1 build /home/runner/work/mastra/mastra/packages/memory
+> @mastra/memory@0.2.11-alpha.2 build /home/runner/work/mastra/mastra/packages/memory
 > pnpm run check && tsup src/index.ts src/processors/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
-> @mastra/memory@0.2.11-alpha.1 check /home/runner/work/mastra/mastra/packages/memory
+> @mastra/memory@0.2.11-alpha.2 check /home/runner/work/mastra/mastra/packages/memory
 > tsc --noEmit
 [34mCLI[39m Building entry: src/index.ts, src/processors/index.ts
 [34mCLI[39m Using tsconfig: tsconfig.json
 [34mCLI[39m tsup v8.4.0
 [34mTSC[39m Build start
-[32mTSC[39m ⚡️ Build success in 9818ms
+[32mTSC[39m ⚡️ Build success in 10363ms
 [34mDTS[39m Build start
 [34mCLI[39m Target: es2022
 Analysis will use the bundled TypeScript version 5.8.2
 [36mWriting package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.ts[39m
 Analysis will use the bundled TypeScript version 5.8.2
 [36mWriting package typings: /home/runner/work/mastra/mastra/packages/memory/dist/_tsup-dts-rollup.d.cts[39m
-[32mDTS[39m ⚡️ Build success in 11869ms
+[32mDTS[39m ⚡️ Build success in 12416ms
 [34mCLI[39m Cleaning output folder
 [34mESM[39m Build start
 [34mCJS[39m Build start
+[32mCJS[39m [1mdist/index.cjs            [22m[32m17.58 KB[39m
+[32mCJS[39m [1mdist/processors/index.cjs [22m[32m5.54 KB[39m
+[32mCJS[39m ⚡️ Build success in 946ms
 [32mESM[39m [1mdist/index.js            [22m[32m17.39 KB[39m
 [32mESM[39m [1mdist/processors/index.js [22m[32m5.33 KB[39m
-[32mESM[39m ⚡️ Build success in 1099ms
-[32mCJS[39m [1mdist/index.cjs            [22m[32m17.66 KB[39m
-[32mCJS[39m [1mdist/processors/index.cjs [22m[32m5.54 KB[39m
-[32mCJS[39m ⚡️ Build success in 1103ms
+[32mESM[39m ⚡️ Build success in 950ms

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # @mastra/memory
+## 0.2.11-alpha.2
+### Patch Changes
+- 5c6825c: [MASTRA-2782] removed tiktoken from memory chunktext
 ## 0.2.11-alpha.1
 ### Patch Changes

package/dist/index.cjs CHANGED Viewed

@@ -3,14 +3,11 @@
 var core = require('@mastra/core');
 var memory = require('@mastra/core/memory');
 var ai = require('ai');
-var lite = require('js-tiktoken/lite');
-var o200k_base = require('js-tiktoken/ranks/o200k_base');
 var xxhash = require('xxhash-wasm');
 var zod = require('zod');
 function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
-var o200k_base__default = /*#__PURE__*/_interopDefault(o200k_base);
 var xxhash__default = /*#__PURE__*/_interopDefault(xxhash);
 // src/index.ts
@@ -84,7 +81,7 @@ function reorderToolCallsAndResults(messages) {
 }
 // src/index.ts
-var encoder = new lite.Tiktoken(o200k_base__default.default);
+var CHARS_PER_TOKEN = 4;
 var Memory = class extends memory.MastraMemory {
   constructor(config = {}) {
     super({ name: "Memory", ...config });
@@ -237,19 +234,22 @@ var Memory = class extends memory.MastraMemory {
   async deleteThread(threadId) {
     await this.storage.__deleteThread({ threadId });
   }
-  chunkText(text, size = 4096) {
-    const tokens = encoder.encode(text);
+  chunkText(text, tokenSize = 4096) {
+    const charSize = tokenSize * CHARS_PER_TOKEN;
     const chunks = [];
-    let currentChunk = [];
-    for (const token of tokens) {
-      currentChunk.push(token);
-      if (currentChunk.length >= size) {
-        chunks.push(encoder.decode(currentChunk));
-        currentChunk = [];
+    let currentChunk = "";
+    const words = text.split(/\s+/);
+    for (const word of words) {
+      const wordWithSpace = currentChunk ? " " + word : word;
+      if (currentChunk.length + wordWithSpace.length > charSize) {
+        chunks.push(currentChunk);
+        currentChunk = word;
+      } else {
+        currentChunk += wordWithSpace;
       }
     }
-    if (currentChunk.length > 0) {
-      chunks.push(encoder.decode(currentChunk));
+    if (currentChunk) {
+      chunks.push(currentChunk);
     }
     return chunks;
   }

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,6 @@
 import { deepMerge } from '@mastra/core';
 import { MastraMemory } from '@mastra/core/memory';
 import { embedMany } from 'ai';
-import { Tiktoken } from 'js-tiktoken/lite';
-import o200k_base from 'js-tiktoken/ranks/o200k_base';
 import xxhash from 'xxhash-wasm';
 import { z } from 'zod';
@@ -77,7 +75,7 @@ function reorderToolCallsAndResults(messages) {
 }
 // src/index.ts
-var encoder = new Tiktoken(o200k_base);
+var CHARS_PER_TOKEN = 4;
 var Memory = class extends MastraMemory {
   constructor(config = {}) {
     super({ name: "Memory", ...config });
@@ -230,19 +228,22 @@ var Memory = class extends MastraMemory {
   async deleteThread(threadId) {
     await this.storage.__deleteThread({ threadId });
   }
-  chunkText(text, size = 4096) {
-    const tokens = encoder.encode(text);
+  chunkText(text, tokenSize = 4096) {
+    const charSize = tokenSize * CHARS_PER_TOKEN;
     const chunks = [];
-    let currentChunk = [];
-    for (const token of tokens) {
-      currentChunk.push(token);
-      if (currentChunk.length >= size) {
-        chunks.push(encoder.decode(currentChunk));
-        currentChunk = [];
+    let currentChunk = "";
+    const words = text.split(/\s+/);
+    for (const word of words) {
+      const wordWithSpace = currentChunk ? " " + word : word;
+      if (currentChunk.length + wordWithSpace.length > charSize) {
+        chunks.push(currentChunk);
+        currentChunk = word;
+      } else {
+        currentChunk += wordWithSpace;
       }
     }
-    if (currentChunk.length > 0) {
-      chunks.push(encoder.decode(currentChunk));
+    if (currentChunk) {
+      chunks.push(currentChunk);
     }
     return chunks;
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/memory",
-  "version": "0.2.11-alpha.1",
+  "version": "0.2.11-alpha.2",
   "description": "",
   "type": "module",
   "main": "./dist/index.js",

package/src/index.ts CHANGED Viewed

@@ -4,13 +4,13 @@ import { MastraMemory } from '@mastra/core/memory';
 import type { MessageType, MemoryConfig, SharedMemoryConfig, StorageThreadType } from '@mastra/core/memory';
 import type { StorageGetMessagesArg } from '@mastra/core/storage';
 import { embedMany } from 'ai';
-import { Tiktoken } from 'js-tiktoken/lite';
-import o200k_base from 'js-tiktoken/ranks/o200k_base';
 import xxhash from 'xxhash-wasm';
 import { updateWorkingMemoryTool } from './tools/working-memory';
 import { reorderToolCallsAndResults } from './utils';
-const encoder = new Tiktoken(o200k_base);
+// Average characters per token based on OpenAI's tokenization
+const CHARS_PER_TOKEN = 4;
 /**
  * Concrete implementation of MastraMemory that adds support for thread configuration
@@ -233,24 +233,31 @@ export class Memory extends MastraMemory {
     await this.storage.__deleteThread({ threadId });
   }
-  private chunkText(text: string, size = 4096) {
-    const tokens = encoder.encode(text);
+  private chunkText(text: string, tokenSize = 4096) {
+    // Convert token size to character size with some buffer
+    const charSize = tokenSize * CHARS_PER_TOKEN;
     const chunks: string[] = [];
-    let currentChunk: number[] = [];
+    let currentChunk = '';
+    // Split text into words to avoid breaking words
+    const words = text.split(/\s+/);
-    for (const token of tokens) {
-      currentChunk.push(token);
+    for (const word of words) {
+      // Add space before word unless it's the first word in the chunk
+      const wordWithSpace = currentChunk ? ' ' + word : word;
-      // If current chunk reaches size limit, add it to chunks and start a new one
-      if (currentChunk.length >= size) {
-        chunks.push(encoder.decode(currentChunk));
-        currentChunk = [];
+      // If adding this word would exceed the chunk size, start a new chunk
+      if (currentChunk.length + wordWithSpace.length > charSize) {
+        chunks.push(currentChunk);
+        currentChunk = word;
+      } else {
+        currentChunk += wordWithSpace;
       }
     }
-    // Add any remaining tokens as the final chunk
-    if (currentChunk.length > 0) {
-      chunks.push(encoder.decode(currentChunk));
+    // Add the final chunk if not empty
+    if (currentChunk) {
+      chunks.push(currentChunk);
     }
     return chunks;