npm - @mastra/rag - Versions diffs - 0.1.10-alpha.0 → 0.1.10-alpha.1 - Mend

@mastra/rag 0.1.10-alpha.0 → 0.1.10-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.turbo/turbo-build.log +7 -7
package/CHANGELOG.md +13 -0
package/dist/_tsup-dts-rollup.d.cts +7 -2
package/dist/_tsup-dts-rollup.d.ts +7 -2
package/dist/index.cjs +61 -31
package/dist/index.d.cts +1 -0
package/dist/index.d.ts +1 -0
package/dist/index.js +61 -32
package/package.json +2 -2
package/src/document/document.test.ts +134 -23
package/src/document/transformers/character.ts +6 -3
package/src/tools/graph-rag.ts +40 -19
package/src/tools/vector-query.test.ts +26 -12
package/src/tools/vector-query.ts +29 -14
package/src/utils/default-settings.ts +22 -5

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,23 +1,23 @@
-> @mastra/rag@0.1.10-alpha.0 build /home/runner/work/mastra/mastra/packages/rag
+> @mastra/rag@0.1.10-alpha.1 build /home/runner/work/mastra/mastra/packages/rag
 > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
 [34mCLI[39m Building entry: src/index.ts
 [34mCLI[39m Using tsconfig: tsconfig.json
 [34mCLI[39m tsup v8.4.0
 [34mTSC[39m Build start
-[32mTSC[39m ⚡️ Build success in 26351ms
+[32mTSC[39m ⚡️ Build success in 25133ms
 [34mDTS[39m Build start
 [34mCLI[39m Target: es2022
 Analysis will use the bundled TypeScript version 5.7.3
 [36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts[39m
 Analysis will use the bundled TypeScript version 5.7.3
 [36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts[39m
-[32mDTS[39m ⚡️ Build success in 37004ms
+[32mDTS[39m ⚡️ Build success in 36170ms
 [34mCLI[39m Cleaning output folder
 [34mESM[39m Build start
 [34mCJS[39m Build start
-[32mESM[39m [1mdist/index.js [22m[32m90.58 KB[39m
-[32mESM[39m ⚡️ Build success in 2197ms
-[32mCJS[39m [1mdist/index.cjs [22m[32m91.27 KB[39m
-[32mCJS[39m ⚡️ Build success in 2196ms
+[32mCJS[39m [1mdist/index.cjs [22m[32m92.97 KB[39m
+[32mCJS[39m ⚡️ Build success in 1897ms
+[32mESM[39m [1mdist/index.js [22m[32m92.25 KB[39m
+[32mESM[39m ⚡️ Build success in 1898ms

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # @mastra/rag
+## 0.1.10-alpha.1
+### Patch Changes
+- ff1a76c: Update Vector Query and GraphRag tool schema
+- b195f6e: Update character chunking strategy
+- Updated dependencies [f4854ee]
+- Updated dependencies [afaf73f]
+- Updated dependencies [44631b1]
+- Updated dependencies [6e559a0]
+- Updated dependencies [5f43505]
+  - @mastra/core@0.6.1-alpha.1
 ## 0.1.10-alpha.0
 ### Patch Changes

package/dist/_tsup-dts-rollup.d.cts CHANGED Viewed

@@ -143,7 +143,7 @@ declare type ExtractParams = {
 export { ExtractParams }
 export { ExtractParams as ExtractParams_alias_1 }
-declare const filterDescription = "JSON-formatted criteria to refine search results.\n- Must be valid JSON format\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Multiple filters can be combined";
+declare const filterDescription = "JSON-formatted criteria to refine search results.\n- ALWAYS provide a filter value\n- If no filter is provided, use the default (\"{}\")\n- MUST be a valid, complete JSON object with proper quotes and brackets\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example for no filtering: \"filter\": \"{}\"\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Do NOT use single quotes or unquoted properties\n- IMPORTANT: Always ensure JSON is properly closed with matching brackets\n- Multiple filters can be combined";
 export { filterDescription }
 export { filterDescription as filterDescription_alias_1 }
 export { filterDescription as filterDescription_alias_2 }
@@ -348,6 +348,11 @@ declare const QDRANT_PROMPT = "When querying Qdrant, you can ONLY use the operat
 export { QDRANT_PROMPT }
 export { QDRANT_PROMPT as QDRANT_PROMPT_alias_1 }
+declare const queryTextDescription = "The text query to search for in the vector database.\n- ALWAYS provide a non-empty query string\n- Must contain the user's question or search terms\n- Example: \"market data\" or \"financial reports\"\n- If the user's query is about a specific topic, use that topic as the queryText\n- Cannot be an empty string\n- Do not include quotes, just the text itself\n- Required for all searches";
+export { queryTextDescription }
+export { queryTextDescription as queryTextDescription_alias_1 }
+export { queryTextDescription as queryTextDescription_alias_2 }
 declare type QuestionAnswerExtractArgs = {
     llm?: LLM;
     questions?: number;
@@ -608,7 +613,7 @@ export declare class TokenTransformer extends TextTransformer {
     }): TokenTransformer;
 }
-declare const topKDescription = "Controls how many matching documents to return.\n- Must be a valid number\n- Uses provided value if specified\n- Default: 10 results\n- Higher values provide more context\n- Lower values focus on best matches\n- Based on query requirements";
+declare const topKDescription = "Controls how many matching documents to return.\n- ALWAYS provide a value\n- If no value is provided, use the default (10)\n- Must be a valid and positive number\n- Cannot be NaN\n- Uses provided value if specified\n- Default: 10 results (use this if unsure)\n- Higher values (like 20) provide more context\n- Lower values (like 3) focus on best matches\n- Based on query requirements";
 export { topKDescription }
 export { topKDescription as topKDescription_alias_1 }
 export { topKDescription as topKDescription_alias_2 }

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -143,7 +143,7 @@ declare type ExtractParams = {
 export { ExtractParams }
 export { ExtractParams as ExtractParams_alias_1 }
-declare const filterDescription = "JSON-formatted criteria to refine search results.\n- Must be valid JSON format\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Multiple filters can be combined";
+declare const filterDescription = "JSON-formatted criteria to refine search results.\n- ALWAYS provide a filter value\n- If no filter is provided, use the default (\"{}\")\n- MUST be a valid, complete JSON object with proper quotes and brackets\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example for no filtering: \"filter\": \"{}\"\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Do NOT use single quotes or unquoted properties\n- IMPORTANT: Always ensure JSON is properly closed with matching brackets\n- Multiple filters can be combined";
 export { filterDescription }
 export { filterDescription as filterDescription_alias_1 }
 export { filterDescription as filterDescription_alias_2 }
@@ -348,6 +348,11 @@ declare const QDRANT_PROMPT = "When querying Qdrant, you can ONLY use the operat
 export { QDRANT_PROMPT }
 export { QDRANT_PROMPT as QDRANT_PROMPT_alias_1 }
+declare const queryTextDescription = "The text query to search for in the vector database.\n- ALWAYS provide a non-empty query string\n- Must contain the user's question or search terms\n- Example: \"market data\" or \"financial reports\"\n- If the user's query is about a specific topic, use that topic as the queryText\n- Cannot be an empty string\n- Do not include quotes, just the text itself\n- Required for all searches";
+export { queryTextDescription }
+export { queryTextDescription as queryTextDescription_alias_1 }
+export { queryTextDescription as queryTextDescription_alias_2 }
 declare type QuestionAnswerExtractArgs = {
     llm?: LLM;
     questions?: number;
@@ -608,7 +613,7 @@ export declare class TokenTransformer extends TextTransformer {
     }): TokenTransformer;
 }
-declare const topKDescription = "Controls how many matching documents to return.\n- Must be a valid number\n- Uses provided value if specified\n- Default: 10 results\n- Higher values provide more context\n- Lower values focus on best matches\n- Based on query requirements";
+declare const topKDescription = "Controls how many matching documents to return.\n- ALWAYS provide a value\n- If no value is provided, use the default (10)\n- Must be a valid and positive number\n- Cannot be NaN\n- Uses provided value if specified\n- Default: 10 results (use this if unsure)\n- Higher values (like 20) provide more context\n- Lower values (like 3) focus on best matches\n- Based on query requirements";
 export { topKDescription }
 export { topKDescription as topKDescription_alias_1 }
 export { topKDescription as topKDescription_alias_2 }

package/dist/index.cjs CHANGED Viewed

@@ -232,13 +232,14 @@ var CharacterTransformer = class extends TextTransformer {
     let currentPosition = 0;
     while (currentPosition < text.length) {
       let chunkEnd = currentPosition;
-      let currentChunk = "";
       while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
         chunkEnd++;
       }
-      currentChunk = text.slice(currentPosition, chunkEnd);
+      const currentChunk = text.slice(currentPosition, chunkEnd);
+      const chunkLength = this.lengthFunction(currentChunk);
       chunks.push(currentChunk);
-      currentPosition += Math.max(1, this.lengthFunction(currentChunk) - this.overlap);
+      if (chunkEnd >= text.length) break;
+      currentPosition += Math.max(1, chunkLength - this.overlap);
     }
     return chunks;
   }
@@ -1825,19 +1826,35 @@ var vectorQuerySearch = async ({
 // src/utils/default-settings.ts
 var defaultVectorQueryDescription = () => `Access the knowledge base to find information needed to answer user questions.`;
 var defaultGraphRagDescription = () => `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
+var queryTextDescription = `The text query to search for in the vector database.
+- ALWAYS provide a non-empty query string
+- Must contain the user's question or search terms
+- Example: "market data" or "financial reports"
+- If the user's query is about a specific topic, use that topic as the queryText
+- Cannot be an empty string
+- Do not include quotes, just the text itself
+- Required for all searches`;
 var topKDescription = `Controls how many matching documents to return.
-- Must be a valid number
+- ALWAYS provide a value
+- If no value is provided, use the default (10)
+- Must be a valid and positive number
+- Cannot be NaN
 - Uses provided value if specified
-- Default: 10 results
-- Higher values provide more context
-- Lower values focus on best matches
+- Default: 10 results (use this if unsure)
+- Higher values (like 20) provide more context
+- Lower values (like 3) focus on best matches
 - Based on query requirements`;
 var filterDescription = `JSON-formatted criteria to refine search results.
-- Must be valid JSON format
+- ALWAYS provide a filter value
+- If no filter is provided, use the default ("{}")
+- MUST be a valid, complete JSON object with proper quotes and brackets
 - Uses provided filter if specified
 - Default: "{}" (no filtering)
+- Example for no filtering: "filter": "{}"
 - Example: '{"category": "health"}'
 - Based on query intent
+- Do NOT use single quotes or unquoted properties
+- IMPORTANT: Always ensure JSON is properly closed with matching brackets
 - Multiple filters can be combined`;
 // src/tools/graph-rag.ts
@@ -1859,32 +1876,40 @@ var createGraphRAGTool = ({
   const toolDescription = description || defaultGraphRagDescription();
   const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
   let isInitialized = false;
+  const baseSchema = {
+    queryText: zod.z.string().describe(queryTextDescription),
+    topK: zod.z.any().describe(topKDescription)
+  };
+  const inputSchema = enableFilter ? zod.z.object({
+    ...baseSchema,
+    filter: zod.z.string().describe(filterDescription)
+  }).passthrough() : zod.z.object(baseSchema).passthrough();
   return tools.createTool({
     id: toolId,
-    inputSchema: zod.z.object({
-      queryText: zod.z.string().describe("The text query to search for in the vector database"),
-      topK: zod.z.number().describe(topKDescription),
-      filter: zod.z.string().describe(filterDescription)
-    }),
+    inputSchema,
     outputSchema: zod.z.object({
       relevantContext: zod.z.any()
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
+      const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
       const vectorStore = mastra?.vectors?.[vectorStoreName];
       if (vectorStore) {
         let queryFilter = {};
         if (enableFilter) {
-          queryFilter = filter ? (() => {
+          queryFilter = (() => {
             try {
-              return JSON.parse(filter);
-            } catch {
-              return filter;
+              return typeof filter === "string" ? JSON.parse(filter) : filter;
+            } catch (error) {
+              if (mastra.logger) {
+                mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
+              }
+              return {};
             }
-          })() : filter;
+          })();
         }
         if (mastra.logger) {
-          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
+          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
         }
         const { results, queryEmbedding } = await vectorQuerySearch({
           indexName,
@@ -1892,7 +1917,7 @@ var createGraphRAGTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
-          topK,
+          topK: topKValue,
           includeVectors: true
         });
         if (!isInitialized) {
@@ -1908,7 +1933,7 @@ var createGraphRAGTool = ({
         }
         const rerankedResults = graphRag.query({
           query: queryEmbedding,
-          topK,
+          topK: topKValue,
           randomWalkSteps: graphOptions.randomWalkSteps,
           restartProb: graphOptions.restartProb
         });
@@ -1935,13 +1960,13 @@ var createVectorQueryTool = ({
   const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
   const toolDescription = description || defaultVectorQueryDescription();
   const baseSchema = {
-    queryText: zod.z.string().describe("The text query to search for in the vector database"),
-    topK: zod.z.coerce.number().describe(topKDescription)
+    queryText: zod.z.string().describe(queryTextDescription),
+    topK: zod.z.any().describe(topKDescription)
   };
   const inputSchema = enableFilter ? zod.z.object({
     ...baseSchema,
-    filter: zod.z.coerce.string().describe(filterDescription)
-  }).strict() : zod.z.object(baseSchema).strict();
+    filter: zod.z.string().describe(filterDescription)
+  }).passthrough() : zod.z.object(baseSchema).passthrough();
   return tools.createTool({
     id: toolId,
     inputSchema,
@@ -1950,20 +1975,24 @@ var createVectorQueryTool = ({
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
+      const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
       const vectorStore = mastra?.vectors?.[vectorStoreName];
       if (vectorStore) {
         let queryFilter = {};
         if (enableFilter && filter) {
           queryFilter = (() => {
             try {
-              return JSON.parse(filter);
-            } catch {
-              return filter;
+              return typeof filter === "string" ? JSON.parse(filter) : filter;
+            } catch (error) {
+              if (mastra.logger) {
+                mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
+              }
+              return {};
             }
           })();
         }
         if (mastra.logger) {
-          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
+          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
         }
         const { results } = await vectorQuerySearch({
           indexName,
@@ -1971,12 +2000,12 @@ var createVectorQueryTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
-          topK
+          topK: topKValue
         });
         if (reranker) {
           const rerankedResults = await rerank(results, queryText, reranker.model, {
             ...reranker.options,
-            topK: reranker.options?.topK || topK
+            topK: reranker.options?.topK || topKValue
           });
           const relevantChunks2 = rerankedResults.map(({ result }) => result?.metadata);
           return { relevantContext: relevantChunks2 };
@@ -2728,5 +2757,6 @@ exports.createVectorQueryTool = createVectorQueryTool;
 exports.defaultGraphRagDescription = defaultGraphRagDescription;
 exports.defaultVectorQueryDescription = defaultVectorQueryDescription;
 exports.filterDescription = filterDescription;
+exports.queryTextDescription = queryTextDescription;
 exports.rerank = rerank;
 exports.topKDescription = topKDescription;

package/dist/index.d.cts CHANGED Viewed

@@ -18,5 +18,6 @@ export { UPSTASH_PROMPT } from './_tsup-dts-rollup.cjs';
 export { VECTORIZE_PROMPT } from './_tsup-dts-rollup.cjs';
 export { defaultVectorQueryDescription } from './_tsup-dts-rollup.cjs';
 export { defaultGraphRagDescription } from './_tsup-dts-rollup.cjs';
+export { queryTextDescription } from './_tsup-dts-rollup.cjs';
 export { topKDescription } from './_tsup-dts-rollup.cjs';
 export { filterDescription } from './_tsup-dts-rollup.cjs';

package/dist/index.d.ts CHANGED Viewed

@@ -18,5 +18,6 @@ export { UPSTASH_PROMPT } from './_tsup-dts-rollup.js';
 export { VECTORIZE_PROMPT } from './_tsup-dts-rollup.js';
 export { defaultVectorQueryDescription } from './_tsup-dts-rollup.js';
 export { defaultGraphRagDescription } from './_tsup-dts-rollup.js';
+export { queryTextDescription } from './_tsup-dts-rollup.js';
 export { topKDescription } from './_tsup-dts-rollup.js';
 export { filterDescription } from './_tsup-dts-rollup.js';

package/dist/index.js CHANGED Viewed

@@ -230,13 +230,14 @@ var CharacterTransformer = class extends TextTransformer {
     let currentPosition = 0;
     while (currentPosition < text.length) {
       let chunkEnd = currentPosition;
-      let currentChunk = "";
       while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
         chunkEnd++;
       }
-      currentChunk = text.slice(currentPosition, chunkEnd);
+      const currentChunk = text.slice(currentPosition, chunkEnd);
+      const chunkLength = this.lengthFunction(currentChunk);
       chunks.push(currentChunk);
-      currentPosition += Math.max(1, this.lengthFunction(currentChunk) - this.overlap);
+      if (chunkEnd >= text.length) break;
+      currentPosition += Math.max(1, chunkLength - this.overlap);
     }
     return chunks;
   }
@@ -1823,19 +1824,35 @@ var vectorQuerySearch = async ({
 // src/utils/default-settings.ts
 var defaultVectorQueryDescription = () => `Access the knowledge base to find information needed to answer user questions.`;
 var defaultGraphRagDescription = () => `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
+var queryTextDescription = `The text query to search for in the vector database.
+- ALWAYS provide a non-empty query string
+- Must contain the user's question or search terms
+- Example: "market data" or "financial reports"
+- If the user's query is about a specific topic, use that topic as the queryText
+- Cannot be an empty string
+- Do not include quotes, just the text itself
+- Required for all searches`;
 var topKDescription = `Controls how many matching documents to return.
-- Must be a valid number
+- ALWAYS provide a value
+- If no value is provided, use the default (10)
+- Must be a valid and positive number
+- Cannot be NaN
 - Uses provided value if specified
-- Default: 10 results
-- Higher values provide more context
-- Lower values focus on best matches
+- Default: 10 results (use this if unsure)
+- Higher values (like 20) provide more context
+- Lower values (like 3) focus on best matches
 - Based on query requirements`;
 var filterDescription = `JSON-formatted criteria to refine search results.
-- Must be valid JSON format
+- ALWAYS provide a filter value
+- If no filter is provided, use the default ("{}")
+- MUST be a valid, complete JSON object with proper quotes and brackets
 - Uses provided filter if specified
 - Default: "{}" (no filtering)
+- Example for no filtering: "filter": "{}"
 - Example: '{"category": "health"}'
 - Based on query intent
+- Do NOT use single quotes or unquoted properties
+- IMPORTANT: Always ensure JSON is properly closed with matching brackets
 - Multiple filters can be combined`;
 // src/tools/graph-rag.ts
@@ -1857,32 +1874,40 @@ var createGraphRAGTool = ({
   const toolDescription = description || defaultGraphRagDescription();
   const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
   let isInitialized = false;
+  const baseSchema = {
+    queryText: z.string().describe(queryTextDescription),
+    topK: z.any().describe(topKDescription)
+  };
+  const inputSchema = enableFilter ? z.object({
+    ...baseSchema,
+    filter: z.string().describe(filterDescription)
+  }).passthrough() : z.object(baseSchema).passthrough();
   return createTool({
     id: toolId,
-    inputSchema: z.object({
-      queryText: z.string().describe("The text query to search for in the vector database"),
-      topK: z.number().describe(topKDescription),
-      filter: z.string().describe(filterDescription)
-    }),
+    inputSchema,
     outputSchema: z.object({
       relevantContext: z.any()
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
+      const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
       const vectorStore = mastra?.vectors?.[vectorStoreName];
       if (vectorStore) {
         let queryFilter = {};
         if (enableFilter) {
-          queryFilter = filter ? (() => {
+          queryFilter = (() => {
             try {
-              return JSON.parse(filter);
-            } catch {
-              return filter;
+              return typeof filter === "string" ? JSON.parse(filter) : filter;
+            } catch (error) {
+              if (mastra.logger) {
+                mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
+              }
+              return {};
             }
-          })() : filter;
+          })();
         }
         if (mastra.logger) {
-          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
+          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
         }
         const { results, queryEmbedding } = await vectorQuerySearch({
           indexName,
@@ -1890,7 +1915,7 @@ var createGraphRAGTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
-          topK,
+          topK: topKValue,
           includeVectors: true
         });
         if (!isInitialized) {
@@ -1906,7 +1931,7 @@ var createGraphRAGTool = ({
         }
         const rerankedResults = graphRag.query({
           query: queryEmbedding,
-          topK,
+          topK: topKValue,
           randomWalkSteps: graphOptions.randomWalkSteps,
           restartProb: graphOptions.restartProb
         });
@@ -1933,13 +1958,13 @@ var createVectorQueryTool = ({
   const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
   const toolDescription = description || defaultVectorQueryDescription();
   const baseSchema = {
-    queryText: z.string().describe("The text query to search for in the vector database"),
-    topK: z.coerce.number().describe(topKDescription)
+    queryText: z.string().describe(queryTextDescription),
+    topK: z.any().describe(topKDescription)
   };
   const inputSchema = enableFilter ? z.object({
     ...baseSchema,
-    filter: z.coerce.string().describe(filterDescription)
-  }).strict() : z.object(baseSchema).strict();
+    filter: z.string().describe(filterDescription)
+  }).passthrough() : z.object(baseSchema).passthrough();
   return createTool({
     id: toolId,
     inputSchema,
@@ -1948,20 +1973,24 @@ var createVectorQueryTool = ({
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
+      const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
       const vectorStore = mastra?.vectors?.[vectorStoreName];
       if (vectorStore) {
         let queryFilter = {};
         if (enableFilter && filter) {
           queryFilter = (() => {
             try {
-              return JSON.parse(filter);
-            } catch {
-              return filter;
+              return typeof filter === "string" ? JSON.parse(filter) : filter;
+            } catch (error) {
+              if (mastra.logger) {
+                mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
+              }
+              return {};
             }
           })();
         }
         if (mastra.logger) {
-          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
+          mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
         }
         const { results } = await vectorQuerySearch({
           indexName,
@@ -1969,12 +1998,12 @@ var createVectorQueryTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
-          topK
+          topK: topKValue
         });
         if (reranker) {
           const rerankedResults = await rerank(results, queryText, reranker.model, {
             ...reranker.options,
-            topK: reranker.options?.topK || topK
+            topK: reranker.options?.topK || topKValue
           });
           const relevantChunks2 = rerankedResults.map(({ result }) => result?.metadata);
           return { relevantContext: relevantChunks2 };
@@ -2710,4 +2739,4 @@ Example Complex Query:
   "inStock": true
 }`;
-export { ASTRA_PROMPT, CHROMA_PROMPT, GraphRAG, LIBSQL_PROMPT, MDocument, PGVECTOR_PROMPT, PINECONE_PROMPT, QDRANT_PROMPT, UPSTASH_PROMPT, VECTORIZE_PROMPT, createDocumentChunkerTool, createGraphRAGTool, createVectorQueryTool, defaultGraphRagDescription, defaultVectorQueryDescription, filterDescription, rerank, topKDescription };
+export { ASTRA_PROMPT, CHROMA_PROMPT, GraphRAG, LIBSQL_PROMPT, MDocument, PGVECTOR_PROMPT, PINECONE_PROMPT, QDRANT_PROMPT, UPSTASH_PROMPT, VECTORIZE_PROMPT, createDocumentChunkerTool, createGraphRAGTool, createVectorQueryTool, defaultGraphRagDescription, defaultVectorQueryDescription, filterDescription, queryTextDescription, rerank, topKDescription };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/rag",
-  "version": "0.1.10-alpha.0",
+  "version": "0.1.10-alpha.1",
   "description": "",
   "type": "module",
   "main": "dist/index.js",
@@ -29,7 +29,7 @@
     "node-html-better-parser": "^1.4.7",
     "pathe": "^2.0.3",
     "zod": "^3.24.2",
-    "@mastra/core": "^0.6.1-alpha.0"
+    "@mastra/core": "^0.6.1-alpha.1"
   },
   "peerDependencies": {
     "ai": "^4.0.0"

package/src/document/document.test.ts CHANGED Viewed

@@ -225,37 +225,152 @@ describe('MDocument', () => {
       });
     });
     it('should properly implement overlap in character chunking', async () => {
-      // Create a text that will definitely need character-level chunking
+      // Test basic overlap functionality
       const text = 'a'.repeat(500) + 'b'.repeat(500) + 'c'.repeat(500);
       const chunkSize = 600;
-      const overlapSize = 100;
-      const doc = MDocument.fromText(text, { meta: 'data' });
+      const overlap = 100;
+      const doc = MDocument.fromText(text);
-      await doc.chunk({
+      const result = await doc.chunk({
         strategy: 'character',
         size: chunkSize,
-        overlap: overlapSize,
+        overlap,
       });
-      const docs = doc.getDocs();
-      expect(docs.length).toBeGreaterThan(1); // Should create multiple chunks
-      for (let i = 1; i < docs.length; i++) {
-        const prevChunk = docs[i - 1]?.text;
-        const currentChunk = docs[i]?.text;
+      // Verify overlap between chunks
+      for (let i = 1; i < result.length; i++) {
+        const prevChunk = result[i - 1]?.text;
+        const currentChunk = result[i]?.text;
         if (prevChunk && currentChunk) {
-          // Get the end of the previous chunk
-          const prevEnd = prevChunk.slice(-overlapSize);
-          // Get the start of the current chunk
-          const currentStart = currentChunk.slice(0, overlapSize);
+          // Get the end of the previous chunk and start of current chunk
+          const prevEnd = prevChunk.slice(-overlap);
+          const currentStart = currentChunk.slice(0, overlap);
-          // Check if there's overlap between chunks using a more flexible approach
-          // Find common substring between the end of previous chunk and start of current chunk
-          const commonText = findCommonSubstring(prevEnd, currentStart);
-          expect(commonText.length).toBeGreaterThan(0);
+          // There should be a common substring of length >= min(overlap, chunk length)
+          const commonSubstring = findCommonSubstring(prevEnd, currentStart);
+          expect(commonSubstring.length).toBeGreaterThan(0);
+        }
+      }
+    });
+    it('should ensure character chunks never exceed size limit', async () => {
+      // Create text with varying content to test size limits
+      const text = 'a'.repeat(50) + 'b'.repeat(100) + 'c'.repeat(30);
+      const chunkSize = 50;
+      const overlap = 10;
+      const doc = MDocument.fromText(text);
+      const chunks = await doc.chunk({
+        strategy: 'character',
+        size: chunkSize,
+        overlap,
+      });
+      chunks.forEach((chunk, i) => {
+        if (i > 0) {
+          const prevChunk = chunks[i - 1]?.text;
+          const actualOverlap = chunk.text.slice(0, overlap);
+          const expectedOverlap = prevChunk?.slice(-overlap);
+          expect(actualOverlap).toBe(expectedOverlap);
+        }
+      });
+      // Verify each chunk's size
+      let allChunksValid = true;
+      for (const chunk of chunks) {
+        if (chunk.text.length > chunkSize) {
+          allChunksValid = false;
+        }
+      }
+      expect(allChunksValid).toBe(true);
+      // Verify overlaps between consecutive chunks
+      for (let i = 1; i < chunks.length; i++) {
+        const prevChunk = chunks[i - 1]!;
+        const currentChunk = chunks[i]!;
+        // The end of the previous chunk should match the start of the current chunk
+        const prevEnd = prevChunk.text.slice(-overlap);
+        const currentStart = currentChunk.text.slice(0, overlap);
+        expect(currentStart).toBe(prevEnd);
+        expect(currentStart.length).toBeLessThanOrEqual(overlap);
+      }
+    });
+    it('should handle end chunks properly in character chunking', async () => {
+      const text = 'This is a test document that needs to be split into chunks with proper handling of the end.';
+      const chunkSize = 20;
+      const overlap = 5;
+      const testDoc = MDocument.fromText(text);
+      const chunks = await testDoc.chunk({
+        strategy: 'character',
+        size: chunkSize,
+        overlap,
+      });
+      // Verify no tiny fragments at the end
+      const lastChunk = chunks[chunks.length - 1]?.text;
+      expect(lastChunk?.length).toBeGreaterThan(5);
+      // Verify each chunk respects size limit
+      let allChunksValid = true;
+      for (const chunk of chunks) {
+        if (chunk.text.length > chunkSize) {
+          allChunksValid = false;
         }
       }
+      expect(allChunksValid).toBe(true);
+      // Verify each chunk size explicitly
+      for (const chunk of chunks) {
+        expect(chunk.text.length).toBeLessThanOrEqual(chunkSize);
+      }
+      // Verify overlaps between consecutive chunks
+      for (let i = 1; i < chunks.length; i++) {
+        const prevChunk = chunks[i - 1]!;
+        const currentChunk = chunks[i]!;
+        // The end of the previous chunk should match the start of the current chunk
+        const prevEnd = prevChunk.text.slice(-overlap);
+        const currentStart = currentChunk.text.slice(0, overlap);
+        expect(currentStart).toBe(prevEnd);
+        expect(currentStart.length).toBeLessThanOrEqual(overlap);
+      }
+    });
+    it('should not create tiny chunks at the end', async () => {
+      const text = 'ABCDEFGHIJ'; // 10 characters
+      const chunkSize = 4;
+      const overlap = 2;
+      const doc = MDocument.fromText(text);
+      const chunks = await doc.chunk({
+        strategy: 'character',
+        size: chunkSize,
+        overlap,
+      });
+      // Verify we don't have tiny chunks
+      chunks.forEach(chunk => {
+        // Each chunk should be either:
+        // 1. Full size (chunkSize)
+        // 2. Or at least half the chunk size if it's the last chunk
+        const minSize = chunk === chunks[chunks.length - 1] ? Math.floor(chunkSize / 2) : chunkSize;
+        expect(chunk.text.length).toBeGreaterThanOrEqual(minSize);
+      });
+      // Verify overlaps are maintained
+      for (let i = 1; i < chunks.length; i++) {
+        const prevChunk = chunks[i - 1]!;
+        const currentChunk = chunks[i]!;
+        const actualOverlap = currentChunk.text.slice(0, overlap);
+        const expectedOverlap = prevChunk.text.slice(-overlap);
+        expect(actualOverlap).toBe(expectedOverlap);
+      }
     });
   });
@@ -376,10 +491,6 @@ describe('MDocument', () => {
       for (let i = 1; i < docs.length; i++) {
         const prevChunk = docs[i - 1]?.text;
         const currentChunk = docs[i]?.text;
-        console.log({
-          prevChunk,
-          currentChunk,
-        });
         if (prevChunk && currentChunk) {
           // Test using two methods:

package/src/document/transformers/character.ts CHANGED Viewed

@@ -100,18 +100,21 @@ export class CharacterTransformer extends TextTransformer {
     while (currentPosition < text.length) {
       let chunkEnd = currentPosition;
-      let currentChunk = '';
       // Build chunk up to max size
       while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
         chunkEnd++;
       }
-      currentChunk = text.slice(currentPosition, chunkEnd);
+      const currentChunk = text.slice(currentPosition, chunkEnd);
+      const chunkLength = this.lengthFunction(currentChunk);
       chunks.push(currentChunk);
+      // If we're at the end, break to avoid tiny chunks
+      if (chunkEnd >= text.length) break;
       // Move position forward by chunk size minus overlap
-      currentPosition += Math.max(1, this.lengthFunction(currentChunk) - this.overlap);
+      currentPosition += Math.max(1, chunkLength - this.overlap);
     }
     return chunks;

package/src/tools/graph-rag.ts CHANGED Viewed

@@ -3,7 +3,13 @@ import type { EmbeddingModel } from 'ai';
 import { z } from 'zod';
 import { GraphRAG } from '../graph-rag';
-import { vectorQuerySearch, defaultGraphRagDescription, filterDescription, topKDescription } from '../utils';
+import {
+  vectorQuerySearch,
+  defaultGraphRagDescription,
+  filterDescription,
+  topKDescription,
+  queryTextDescription,
+} from '../utils';
 export const createGraphRAGTool = ({
   vectorStoreName,
@@ -38,35 +44,51 @@ export const createGraphRAGTool = ({
   const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
   let isInitialized = false;
+  const baseSchema = {
+    queryText: z.string().describe(queryTextDescription),
+    topK: z.any().describe(topKDescription),
+  };
+  const inputSchema = enableFilter
+    ? z
+        .object({
+          ...baseSchema,
+          filter: z.string().describe(filterDescription),
+        })
+        .passthrough()
+    : z.object(baseSchema).passthrough();
   return createTool({
     id: toolId,
-    inputSchema: z.object({
-      queryText: z.string().describe('The text query to search for in the vector database'),
-      topK: z.number().describe(topKDescription),
-      filter: z.string().describe(filterDescription),
-    }),
+    inputSchema,
     outputSchema: z.object({
       relevantContext: z.any(),
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
+      const topKValue =
+        typeof topK === 'number' && !isNaN(topK)
+          ? topK
+          : typeof topK === 'string' && !isNaN(Number(topK))
+            ? Number(topK)
+            : 10;
       const vectorStore = mastra?.vectors?.[vectorStoreName];
       if (vectorStore) {
         let queryFilter = {};
         if (enableFilter) {
-          queryFilter = filter
-            ? (() => {
-                try {
-                  return JSON.parse(filter);
-                } catch {
-                  return filter;
-                }
-              })()
-            : filter;
+          queryFilter = (() => {
+            try {
+              return typeof filter === 'string' ? JSON.parse(filter) : filter;
+            } catch (error) {
+              // Log the error and use empty object
+              if (mastra.logger) {
+                mastra.logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
+              }
+              return {};
+            }
+          })();
         }
         if (mastra.logger) {
-          mastra.logger.debug('Using this filter and topK:', { queryFilter, topK });
+          mastra.logger.debug('Using this filter and topK:', { queryFilter, topK: topKValue });
         }
         const { results, queryEmbedding } = await vectorQuerySearch({
           indexName,
@@ -74,7 +96,7 @@ export const createGraphRAGTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
-          topK,
+          topK: topKValue,
           includeVectors: true,
         });
@@ -96,7 +118,7 @@ export const createGraphRAGTool = ({
         // Get reranked results using GraphRAG
         const rerankedResults = graphRag.query({
           query: queryEmbedding,
-          topK,
+          topK: topKValue,
           randomWalkSteps: graphOptions.randomWalkSteps,
           restartProb: graphOptions.restartProb,
         });
@@ -107,7 +129,6 @@ export const createGraphRAGTool = ({
           relevantContext: relevantChunks,
         };
       }
       return {
         relevantContext: [],
       };

package/src/tools/vector-query.test.ts CHANGED Viewed

@@ -15,6 +15,7 @@ vi.mock('@mastra/core/tools', () => ({
 vi.mock('../utils', () => ({
   vectorQuerySearch: vi.fn().mockResolvedValue({ results: [] }),
   defaultVectorQueryDescription: () => 'Default vector query description',
+  queryTextDescription: 'Query text description',
   filterDescription: 'Filter description',
   topKDescription: 'Top K description',
 }));
@@ -29,6 +30,8 @@ describe('createVectorQueryTool', () => {
     },
     logger: {
       debug: vi.fn(),
+      warn: vi.fn(),
+      info: vi.fn(),
     },
   };
@@ -37,7 +40,7 @@ describe('createVectorQueryTool', () => {
   });
   describe('input schema validation', () => {
-    it('should make filter invalid when enableFilter is false', () => {
+    it('should handle filter permissively when enableFilter is false', () => {
       // Create tool with enableFilter set to false
       const tool = createVectorQueryTool({
         vectorStoreName: 'testStore',
@@ -61,10 +64,10 @@ describe('createVectorQueryTool', () => {
         ...validInput,
         filter: '{"field": "value"}',
       };
-      expect(() => schema.parse(inputWithFilter)).toThrow();
+      expect(() => schema.parse(inputWithFilter)).not.toThrow();
     });
-    it('should handle filter permissively when enableFilter is true', () => {
+    it('should handle filter when enableFilter is true', () => {
       const tool = createVectorQueryTool({
         vectorStoreName: 'testStore',
         indexName: 'testIndex',
@@ -81,13 +84,14 @@ describe('createVectorQueryTool', () => {
         { filter: '{"field": "value"}' },
         { filter: '{}' },
         { filter: 'simple-string' },
-        // Object inputs (should be coerced to strings)
+        // Empty
+        { filter: '' },
+      ];
+      const invalidTestCases = [
         { filter: { field: 'value' } },
         { filter: {} },
-        // Numbers (should be coerced)
         { filter: 123 },
-        // Empty/null values (should be coerced)
-        { filter: '' },
         { filter: null },
         { filter: undefined },
       ];
@@ -102,6 +106,16 @@ describe('createVectorQueryTool', () => {
         ).not.toThrow();
       });
+      invalidTestCases.forEach(({ filter }) => {
+        expect(() =>
+          schema.parse({
+            queryText: 'test query',
+            topK: 5,
+            filter,
+          }),
+        ).toThrow();
+      });
       // Verify that all parsed values are strings
       testCases.forEach(({ filter }) => {
         const result = schema.parse({
@@ -113,7 +127,7 @@ describe('createVectorQueryTool', () => {
       });
     });
-    it('should reject unexpected properties in both modes', () => {
+    it('should not reject unexpected properties in both modes', () => {
       // Test with enableFilter false
       const toolWithoutFilter = createVectorQueryTool({
         vectorStoreName: 'testStore',
@@ -129,7 +143,7 @@ describe('createVectorQueryTool', () => {
           topK: 5,
           unexpectedProp: 'value',
         }),
-      ).toThrow();
+      ).not.toThrow();
       // Test with enableFilter true
       const toolWithFilter = createVectorQueryTool({
@@ -147,7 +161,7 @@ describe('createVectorQueryTool', () => {
           filter: '{}',
           unexpectedProp: 'value',
         }),
-      ).toThrow();
+      ).not.toThrow();
     });
   });
@@ -228,10 +242,10 @@ describe('createVectorQueryTool', () => {
         mastra: mockMastra,
       });
-      // Check that vectorQuerySearch was called with the string filter
+      // Since this is not a valid filter, it should be ignored
       expect(vectorQuerySearch).toHaveBeenCalledWith(
         expect.objectContaining({
-          queryFilter: 'string-filter',
+          queryFilter: undefined,
         }),
       );
     });

package/src/tools/vector-query.ts CHANGED Viewed

@@ -4,7 +4,13 @@ import { z } from 'zod';
 import { rerank } from '../rerank';
 import type { RerankConfig } from '../rerank';
-import { vectorQuerySearch, defaultVectorQueryDescription, filterDescription, topKDescription } from '../utils';
+import {
+  vectorQuerySearch,
+  defaultVectorQueryDescription,
+  filterDescription,
+  topKDescription,
+  queryTextDescription,
+} from '../utils';
 export const createVectorQueryTool = ({
   vectorStoreName,
@@ -27,17 +33,17 @@ export const createVectorQueryTool = ({
   const toolDescription = description || defaultVectorQueryDescription();
   // Create base schema with required fields
   const baseSchema = {
-    queryText: z.string().describe('The text query to search for in the vector database'),
-    topK: z.coerce.number().describe(topKDescription),
+    queryText: z.string().describe(queryTextDescription),
+    topK: z.any().describe(topKDescription),
   };
   const inputSchema = enableFilter
     ? z
         .object({
           ...baseSchema,
-          filter: z.coerce.string().describe(filterDescription),
+          filter: z.string().describe(filterDescription),
         })
-        .strict()
-    : z.object(baseSchema).strict();
+        .passthrough()
+    : z.object(baseSchema).passthrough();
   return createTool({
     id: toolId,
     inputSchema,
@@ -46,6 +52,13 @@ export const createVectorQueryTool = ({
     }),
     description: toolDescription,
     execute: async ({ context: { queryText, topK, filter }, mastra }) => {
+      const topKValue =
+        typeof topK === 'number' && !isNaN(topK)
+          ? topK
+          : typeof topK === 'string' && !isNaN(Number(topK))
+            ? Number(topK)
+            : 10;
       const vectorStore = mastra?.vectors?.[vectorStoreName];
       // Get relevant chunks from the vector database
@@ -54,14 +67,18 @@ export const createVectorQueryTool = ({
         if (enableFilter && filter) {
           queryFilter = (() => {
             try {
-              return JSON.parse(filter);
-            } catch {
-              return filter;
+              return typeof filter === 'string' ? JSON.parse(filter) : filter;
+            } catch (error) {
+              // Log the error and use empty object
+              if (mastra.logger) {
+                mastra.logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
+              }
+              return {};
             }
           })();
         }
         if (mastra.logger) {
-          mastra.logger.debug('Using this filter and topK:', { queryFilter, topK });
+          mastra.logger.debug('Using this filter and topK:', { queryFilter, topK: topKValue });
         }
         const { results } = await vectorQuerySearch({
@@ -70,24 +87,22 @@ export const createVectorQueryTool = ({
           queryText,
           model,
           queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
-          topK,
+          topK: topKValue,
         });
         if (reranker) {
           const rerankedResults = await rerank(results, queryText, reranker.model, {
             ...reranker.options,
-            topK: reranker.options?.topK || topK,
+            topK: reranker.options?.topK || topKValue,
           });
           const relevantChunks = rerankedResults.map(({ result }) => result?.metadata);
           return { relevantContext: relevantChunks };
         }
         const relevantChunks = results.map(result => result?.metadata);
         return {
           relevantContext: relevantChunks,
         };
       }
       return {
         relevantContext: [],
       };

package/src/utils/default-settings.ts CHANGED Viewed

@@ -4,18 +4,35 @@ export const defaultVectorQueryDescription = () =>
 export const defaultGraphRagDescription = () =>
   `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
+export const queryTextDescription = `The text query to search for in the vector database.
+- ALWAYS provide a non-empty query string
+- Must contain the user's question or search terms
+- Example: "market data" or "financial reports"
+- If the user's query is about a specific topic, use that topic as the queryText
+- Cannot be an empty string
+- Do not include quotes, just the text itself
+- Required for all searches`;
 export const topKDescription = `Controls how many matching documents to return.
-- Must be a valid number
+- ALWAYS provide a value
+- If no value is provided, use the default (10)
+- Must be a valid and positive number
+- Cannot be NaN
 - Uses provided value if specified
-- Default: 10 results
-- Higher values provide more context
-- Lower values focus on best matches
+- Default: 10 results (use this if unsure)
+- Higher values (like 20) provide more context
+- Lower values (like 3) focus on best matches
 - Based on query requirements`;
 export const filterDescription = `JSON-formatted criteria to refine search results.
-- Must be valid JSON format
+- ALWAYS provide a filter value
+- If no filter is provided, use the default ("{}")
+- MUST be a valid, complete JSON object with proper quotes and brackets
 - Uses provided filter if specified
 - Default: "{}" (no filtering)
+- Example for no filtering: "filter": "{}"
 - Example: '{"category": "health"}'
 - Based on query intent
+- Do NOT use single quotes or unquoted properties
+- IMPORTANT: Always ensure JSON is properly closed with matching brackets
 - Multiple filters can be combined`;