@mastra/rag 0.1.10-alpha.0 → 0.1.10-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,23 @@
1
1
 
2
- > @mastra/rag@0.1.10-alpha.0 build /home/runner/work/mastra/mastra/packages/rag
2
+ > @mastra/rag@0.1.10-alpha.1 build /home/runner/work/mastra/mastra/packages/rag
3
3
  > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.4.0
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 26351ms
9
+ TSC ⚡️ Build success in 25133ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.7.3
13
13
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts
14
14
  Analysis will use the bundled TypeScript version 5.7.3
15
15
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts
16
- DTS ⚡️ Build success in 37004ms
16
+ DTS ⚡️ Build success in 36170ms
17
17
  CLI Cleaning output folder
18
18
  ESM Build start
19
19
  CJS Build start
20
- ESM dist/index.js 90.58 KB
21
- ESM ⚡️ Build success in 2197ms
22
- CJS dist/index.cjs 91.27 KB
23
- CJS ⚡️ Build success in 2196ms
20
+ CJS dist/index.cjs 92.97 KB
21
+ CJS ⚡️ Build success in 1897ms
22
+ ESM dist/index.js 92.25 KB
23
+ ESM ⚡️ Build success in 1898ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.1.10-alpha.1
4
+
5
+ ### Patch Changes
6
+
7
+ - ff1a76c: Update Vector Query and GraphRag tool schema
8
+ - b195f6e: Update character chunking strategy
9
+ - Updated dependencies [f4854ee]
10
+ - Updated dependencies [afaf73f]
11
+ - Updated dependencies [44631b1]
12
+ - Updated dependencies [6e559a0]
13
+ - Updated dependencies [5f43505]
14
+ - @mastra/core@0.6.1-alpha.1
15
+
3
16
  ## 0.1.10-alpha.0
4
17
 
5
18
  ### Patch Changes
@@ -143,7 +143,7 @@ declare type ExtractParams = {
143
143
  export { ExtractParams }
144
144
  export { ExtractParams as ExtractParams_alias_1 }
145
145
 
146
- declare const filterDescription = "JSON-formatted criteria to refine search results.\n- Must be valid JSON format\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Multiple filters can be combined";
146
+ declare const filterDescription = "JSON-formatted criteria to refine search results.\n- ALWAYS provide a filter value\n- If no filter is provided, use the default (\"{}\")\n- MUST be a valid, complete JSON object with proper quotes and brackets\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example for no filtering: \"filter\": \"{}\"\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Do NOT use single quotes or unquoted properties\n- IMPORTANT: Always ensure JSON is properly closed with matching brackets\n- Multiple filters can be combined";
147
147
  export { filterDescription }
148
148
  export { filterDescription as filterDescription_alias_1 }
149
149
  export { filterDescription as filterDescription_alias_2 }
@@ -348,6 +348,11 @@ declare const QDRANT_PROMPT = "When querying Qdrant, you can ONLY use the operat
348
348
  export { QDRANT_PROMPT }
349
349
  export { QDRANT_PROMPT as QDRANT_PROMPT_alias_1 }
350
350
 
351
+ declare const queryTextDescription = "The text query to search for in the vector database.\n- ALWAYS provide a non-empty query string\n- Must contain the user's question or search terms\n- Example: \"market data\" or \"financial reports\"\n- If the user's query is about a specific topic, use that topic as the queryText\n- Cannot be an empty string\n- Do not include quotes, just the text itself\n- Required for all searches";
352
+ export { queryTextDescription }
353
+ export { queryTextDescription as queryTextDescription_alias_1 }
354
+ export { queryTextDescription as queryTextDescription_alias_2 }
355
+
351
356
  declare type QuestionAnswerExtractArgs = {
352
357
  llm?: LLM;
353
358
  questions?: number;
@@ -608,7 +613,7 @@ export declare class TokenTransformer extends TextTransformer {
608
613
  }): TokenTransformer;
609
614
  }
610
615
 
611
- declare const topKDescription = "Controls how many matching documents to return.\n- Must be a valid number\n- Uses provided value if specified\n- Default: 10 results\n- Higher values provide more context\n- Lower values focus on best matches\n- Based on query requirements";
616
+ declare const topKDescription = "Controls how many matching documents to return.\n- ALWAYS provide a value\n- If no value is provided, use the default (10)\n- Must be a valid and positive number\n- Cannot be NaN\n- Uses provided value if specified\n- Default: 10 results (use this if unsure)\n- Higher values (like 20) provide more context\n- Lower values (like 3) focus on best matches\n- Based on query requirements";
612
617
  export { topKDescription }
613
618
  export { topKDescription as topKDescription_alias_1 }
614
619
  export { topKDescription as topKDescription_alias_2 }
@@ -143,7 +143,7 @@ declare type ExtractParams = {
143
143
  export { ExtractParams }
144
144
  export { ExtractParams as ExtractParams_alias_1 }
145
145
 
146
- declare const filterDescription = "JSON-formatted criteria to refine search results.\n- Must be valid JSON format\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Multiple filters can be combined";
146
+ declare const filterDescription = "JSON-formatted criteria to refine search results.\n- ALWAYS provide a filter value\n- If no filter is provided, use the default (\"{}\")\n- MUST be a valid, complete JSON object with proper quotes and brackets\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example for no filtering: \"filter\": \"{}\"\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Do NOT use single quotes or unquoted properties\n- IMPORTANT: Always ensure JSON is properly closed with matching brackets\n- Multiple filters can be combined";
147
147
  export { filterDescription }
148
148
  export { filterDescription as filterDescription_alias_1 }
149
149
  export { filterDescription as filterDescription_alias_2 }
@@ -348,6 +348,11 @@ declare const QDRANT_PROMPT = "When querying Qdrant, you can ONLY use the operat
348
348
  export { QDRANT_PROMPT }
349
349
  export { QDRANT_PROMPT as QDRANT_PROMPT_alias_1 }
350
350
 
351
+ declare const queryTextDescription = "The text query to search for in the vector database.\n- ALWAYS provide a non-empty query string\n- Must contain the user's question or search terms\n- Example: \"market data\" or \"financial reports\"\n- If the user's query is about a specific topic, use that topic as the queryText\n- Cannot be an empty string\n- Do not include quotes, just the text itself\n- Required for all searches";
352
+ export { queryTextDescription }
353
+ export { queryTextDescription as queryTextDescription_alias_1 }
354
+ export { queryTextDescription as queryTextDescription_alias_2 }
355
+
351
356
  declare type QuestionAnswerExtractArgs = {
352
357
  llm?: LLM;
353
358
  questions?: number;
@@ -608,7 +613,7 @@ export declare class TokenTransformer extends TextTransformer {
608
613
  }): TokenTransformer;
609
614
  }
610
615
 
611
- declare const topKDescription = "Controls how many matching documents to return.\n- Must be a valid number\n- Uses provided value if specified\n- Default: 10 results\n- Higher values provide more context\n- Lower values focus on best matches\n- Based on query requirements";
616
+ declare const topKDescription = "Controls how many matching documents to return.\n- ALWAYS provide a value\n- If no value is provided, use the default (10)\n- Must be a valid and positive number\n- Cannot be NaN\n- Uses provided value if specified\n- Default: 10 results (use this if unsure)\n- Higher values (like 20) provide more context\n- Lower values (like 3) focus on best matches\n- Based on query requirements";
612
617
  export { topKDescription }
613
618
  export { topKDescription as topKDescription_alias_1 }
614
619
  export { topKDescription as topKDescription_alias_2 }
package/dist/index.cjs CHANGED
@@ -232,13 +232,14 @@ var CharacterTransformer = class extends TextTransformer {
232
232
  let currentPosition = 0;
233
233
  while (currentPosition < text.length) {
234
234
  let chunkEnd = currentPosition;
235
- let currentChunk = "";
236
235
  while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
237
236
  chunkEnd++;
238
237
  }
239
- currentChunk = text.slice(currentPosition, chunkEnd);
238
+ const currentChunk = text.slice(currentPosition, chunkEnd);
239
+ const chunkLength = this.lengthFunction(currentChunk);
240
240
  chunks.push(currentChunk);
241
- currentPosition += Math.max(1, this.lengthFunction(currentChunk) - this.overlap);
241
+ if (chunkEnd >= text.length) break;
242
+ currentPosition += Math.max(1, chunkLength - this.overlap);
242
243
  }
243
244
  return chunks;
244
245
  }
@@ -1825,19 +1826,35 @@ var vectorQuerySearch = async ({
1825
1826
  // src/utils/default-settings.ts
1826
1827
  var defaultVectorQueryDescription = () => `Access the knowledge base to find information needed to answer user questions.`;
1827
1828
  var defaultGraphRagDescription = () => `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
1829
+ var queryTextDescription = `The text query to search for in the vector database.
1830
+ - ALWAYS provide a non-empty query string
1831
+ - Must contain the user's question or search terms
1832
+ - Example: "market data" or "financial reports"
1833
+ - If the user's query is about a specific topic, use that topic as the queryText
1834
+ - Cannot be an empty string
1835
+ - Do not include quotes, just the text itself
1836
+ - Required for all searches`;
1828
1837
  var topKDescription = `Controls how many matching documents to return.
1829
- - Must be a valid number
1838
+ - ALWAYS provide a value
1839
+ - If no value is provided, use the default (10)
1840
+ - Must be a valid and positive number
1841
+ - Cannot be NaN
1830
1842
  - Uses provided value if specified
1831
- - Default: 10 results
1832
- - Higher values provide more context
1833
- - Lower values focus on best matches
1843
+ - Default: 10 results (use this if unsure)
1844
+ - Higher values (like 20) provide more context
1845
+ - Lower values (like 3) focus on best matches
1834
1846
  - Based on query requirements`;
1835
1847
  var filterDescription = `JSON-formatted criteria to refine search results.
1836
- - Must be valid JSON format
1848
+ - ALWAYS provide a filter value
1849
+ - If no filter is provided, use the default ("{}")
1850
+ - MUST be a valid, complete JSON object with proper quotes and brackets
1837
1851
  - Uses provided filter if specified
1838
1852
  - Default: "{}" (no filtering)
1853
+ - Example for no filtering: "filter": "{}"
1839
1854
  - Example: '{"category": "health"}'
1840
1855
  - Based on query intent
1856
+ - Do NOT use single quotes or unquoted properties
1857
+ - IMPORTANT: Always ensure JSON is properly closed with matching brackets
1841
1858
  - Multiple filters can be combined`;
1842
1859
 
1843
1860
  // src/tools/graph-rag.ts
@@ -1859,32 +1876,40 @@ var createGraphRAGTool = ({
1859
1876
  const toolDescription = description || defaultGraphRagDescription();
1860
1877
  const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
1861
1878
  let isInitialized = false;
1879
+ const baseSchema = {
1880
+ queryText: zod.z.string().describe(queryTextDescription),
1881
+ topK: zod.z.any().describe(topKDescription)
1882
+ };
1883
+ const inputSchema = enableFilter ? zod.z.object({
1884
+ ...baseSchema,
1885
+ filter: zod.z.string().describe(filterDescription)
1886
+ }).passthrough() : zod.z.object(baseSchema).passthrough();
1862
1887
  return tools.createTool({
1863
1888
  id: toolId,
1864
- inputSchema: zod.z.object({
1865
- queryText: zod.z.string().describe("The text query to search for in the vector database"),
1866
- topK: zod.z.number().describe(topKDescription),
1867
- filter: zod.z.string().describe(filterDescription)
1868
- }),
1889
+ inputSchema,
1869
1890
  outputSchema: zod.z.object({
1870
1891
  relevantContext: zod.z.any()
1871
1892
  }),
1872
1893
  description: toolDescription,
1873
1894
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
1895
+ const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
1874
1896
  const vectorStore = mastra?.vectors?.[vectorStoreName];
1875
1897
  if (vectorStore) {
1876
1898
  let queryFilter = {};
1877
1899
  if (enableFilter) {
1878
- queryFilter = filter ? (() => {
1900
+ queryFilter = (() => {
1879
1901
  try {
1880
- return JSON.parse(filter);
1881
- } catch {
1882
- return filter;
1902
+ return typeof filter === "string" ? JSON.parse(filter) : filter;
1903
+ } catch (error) {
1904
+ if (mastra.logger) {
1905
+ mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
1906
+ }
1907
+ return {};
1883
1908
  }
1884
- })() : filter;
1909
+ })();
1885
1910
  }
1886
1911
  if (mastra.logger) {
1887
- mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
1912
+ mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
1888
1913
  }
1889
1914
  const { results, queryEmbedding } = await vectorQuerySearch({
1890
1915
  indexName,
@@ -1892,7 +1917,7 @@ var createGraphRAGTool = ({
1892
1917
  queryText,
1893
1918
  model,
1894
1919
  queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
1895
- topK,
1920
+ topK: topKValue,
1896
1921
  includeVectors: true
1897
1922
  });
1898
1923
  if (!isInitialized) {
@@ -1908,7 +1933,7 @@ var createGraphRAGTool = ({
1908
1933
  }
1909
1934
  const rerankedResults = graphRag.query({
1910
1935
  query: queryEmbedding,
1911
- topK,
1936
+ topK: topKValue,
1912
1937
  randomWalkSteps: graphOptions.randomWalkSteps,
1913
1938
  restartProb: graphOptions.restartProb
1914
1939
  });
@@ -1935,13 +1960,13 @@ var createVectorQueryTool = ({
1935
1960
  const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
1936
1961
  const toolDescription = description || defaultVectorQueryDescription();
1937
1962
  const baseSchema = {
1938
- queryText: zod.z.string().describe("The text query to search for in the vector database"),
1939
- topK: zod.z.coerce.number().describe(topKDescription)
1963
+ queryText: zod.z.string().describe(queryTextDescription),
1964
+ topK: zod.z.any().describe(topKDescription)
1940
1965
  };
1941
1966
  const inputSchema = enableFilter ? zod.z.object({
1942
1967
  ...baseSchema,
1943
- filter: zod.z.coerce.string().describe(filterDescription)
1944
- }).strict() : zod.z.object(baseSchema).strict();
1968
+ filter: zod.z.string().describe(filterDescription)
1969
+ }).passthrough() : zod.z.object(baseSchema).passthrough();
1945
1970
  return tools.createTool({
1946
1971
  id: toolId,
1947
1972
  inputSchema,
@@ -1950,20 +1975,24 @@ var createVectorQueryTool = ({
1950
1975
  }),
1951
1976
  description: toolDescription,
1952
1977
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
1978
+ const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
1953
1979
  const vectorStore = mastra?.vectors?.[vectorStoreName];
1954
1980
  if (vectorStore) {
1955
1981
  let queryFilter = {};
1956
1982
  if (enableFilter && filter) {
1957
1983
  queryFilter = (() => {
1958
1984
  try {
1959
- return JSON.parse(filter);
1960
- } catch {
1961
- return filter;
1985
+ return typeof filter === "string" ? JSON.parse(filter) : filter;
1986
+ } catch (error) {
1987
+ if (mastra.logger) {
1988
+ mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
1989
+ }
1990
+ return {};
1962
1991
  }
1963
1992
  })();
1964
1993
  }
1965
1994
  if (mastra.logger) {
1966
- mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
1995
+ mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
1967
1996
  }
1968
1997
  const { results } = await vectorQuerySearch({
1969
1998
  indexName,
@@ -1971,12 +2000,12 @@ var createVectorQueryTool = ({
1971
2000
  queryText,
1972
2001
  model,
1973
2002
  queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
1974
- topK
2003
+ topK: topKValue
1975
2004
  });
1976
2005
  if (reranker) {
1977
2006
  const rerankedResults = await rerank(results, queryText, reranker.model, {
1978
2007
  ...reranker.options,
1979
- topK: reranker.options?.topK || topK
2008
+ topK: reranker.options?.topK || topKValue
1980
2009
  });
1981
2010
  const relevantChunks2 = rerankedResults.map(({ result }) => result?.metadata);
1982
2011
  return { relevantContext: relevantChunks2 };
@@ -2728,5 +2757,6 @@ exports.createVectorQueryTool = createVectorQueryTool;
2728
2757
  exports.defaultGraphRagDescription = defaultGraphRagDescription;
2729
2758
  exports.defaultVectorQueryDescription = defaultVectorQueryDescription;
2730
2759
  exports.filterDescription = filterDescription;
2760
+ exports.queryTextDescription = queryTextDescription;
2731
2761
  exports.rerank = rerank;
2732
2762
  exports.topKDescription = topKDescription;
package/dist/index.d.cts CHANGED
@@ -18,5 +18,6 @@ export { UPSTASH_PROMPT } from './_tsup-dts-rollup.cjs';
18
18
  export { VECTORIZE_PROMPT } from './_tsup-dts-rollup.cjs';
19
19
  export { defaultVectorQueryDescription } from './_tsup-dts-rollup.cjs';
20
20
  export { defaultGraphRagDescription } from './_tsup-dts-rollup.cjs';
21
+ export { queryTextDescription } from './_tsup-dts-rollup.cjs';
21
22
  export { topKDescription } from './_tsup-dts-rollup.cjs';
22
23
  export { filterDescription } from './_tsup-dts-rollup.cjs';
package/dist/index.d.ts CHANGED
@@ -18,5 +18,6 @@ export { UPSTASH_PROMPT } from './_tsup-dts-rollup.js';
18
18
  export { VECTORIZE_PROMPT } from './_tsup-dts-rollup.js';
19
19
  export { defaultVectorQueryDescription } from './_tsup-dts-rollup.js';
20
20
  export { defaultGraphRagDescription } from './_tsup-dts-rollup.js';
21
+ export { queryTextDescription } from './_tsup-dts-rollup.js';
21
22
  export { topKDescription } from './_tsup-dts-rollup.js';
22
23
  export { filterDescription } from './_tsup-dts-rollup.js';
package/dist/index.js CHANGED
@@ -230,13 +230,14 @@ var CharacterTransformer = class extends TextTransformer {
230
230
  let currentPosition = 0;
231
231
  while (currentPosition < text.length) {
232
232
  let chunkEnd = currentPosition;
233
- let currentChunk = "";
234
233
  while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
235
234
  chunkEnd++;
236
235
  }
237
- currentChunk = text.slice(currentPosition, chunkEnd);
236
+ const currentChunk = text.slice(currentPosition, chunkEnd);
237
+ const chunkLength = this.lengthFunction(currentChunk);
238
238
  chunks.push(currentChunk);
239
- currentPosition += Math.max(1, this.lengthFunction(currentChunk) - this.overlap);
239
+ if (chunkEnd >= text.length) break;
240
+ currentPosition += Math.max(1, chunkLength - this.overlap);
240
241
  }
241
242
  return chunks;
242
243
  }
@@ -1823,19 +1824,35 @@ var vectorQuerySearch = async ({
1823
1824
  // src/utils/default-settings.ts
1824
1825
  var defaultVectorQueryDescription = () => `Access the knowledge base to find information needed to answer user questions.`;
1825
1826
  var defaultGraphRagDescription = () => `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
1827
+ var queryTextDescription = `The text query to search for in the vector database.
1828
+ - ALWAYS provide a non-empty query string
1829
+ - Must contain the user's question or search terms
1830
+ - Example: "market data" or "financial reports"
1831
+ - If the user's query is about a specific topic, use that topic as the queryText
1832
+ - Cannot be an empty string
1833
+ - Do not include quotes, just the text itself
1834
+ - Required for all searches`;
1826
1835
  var topKDescription = `Controls how many matching documents to return.
1827
- - Must be a valid number
1836
+ - ALWAYS provide a value
1837
+ - If no value is provided, use the default (10)
1838
+ - Must be a valid and positive number
1839
+ - Cannot be NaN
1828
1840
  - Uses provided value if specified
1829
- - Default: 10 results
1830
- - Higher values provide more context
1831
- - Lower values focus on best matches
1841
+ - Default: 10 results (use this if unsure)
1842
+ - Higher values (like 20) provide more context
1843
+ - Lower values (like 3) focus on best matches
1832
1844
  - Based on query requirements`;
1833
1845
  var filterDescription = `JSON-formatted criteria to refine search results.
1834
- - Must be valid JSON format
1846
+ - ALWAYS provide a filter value
1847
+ - If no filter is provided, use the default ("{}")
1848
+ - MUST be a valid, complete JSON object with proper quotes and brackets
1835
1849
  - Uses provided filter if specified
1836
1850
  - Default: "{}" (no filtering)
1851
+ - Example for no filtering: "filter": "{}"
1837
1852
  - Example: '{"category": "health"}'
1838
1853
  - Based on query intent
1854
+ - Do NOT use single quotes or unquoted properties
1855
+ - IMPORTANT: Always ensure JSON is properly closed with matching brackets
1839
1856
  - Multiple filters can be combined`;
1840
1857
 
1841
1858
  // src/tools/graph-rag.ts
@@ -1857,32 +1874,40 @@ var createGraphRAGTool = ({
1857
1874
  const toolDescription = description || defaultGraphRagDescription();
1858
1875
  const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
1859
1876
  let isInitialized = false;
1877
+ const baseSchema = {
1878
+ queryText: z.string().describe(queryTextDescription),
1879
+ topK: z.any().describe(topKDescription)
1880
+ };
1881
+ const inputSchema = enableFilter ? z.object({
1882
+ ...baseSchema,
1883
+ filter: z.string().describe(filterDescription)
1884
+ }).passthrough() : z.object(baseSchema).passthrough();
1860
1885
  return createTool({
1861
1886
  id: toolId,
1862
- inputSchema: z.object({
1863
- queryText: z.string().describe("The text query to search for in the vector database"),
1864
- topK: z.number().describe(topKDescription),
1865
- filter: z.string().describe(filterDescription)
1866
- }),
1887
+ inputSchema,
1867
1888
  outputSchema: z.object({
1868
1889
  relevantContext: z.any()
1869
1890
  }),
1870
1891
  description: toolDescription,
1871
1892
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
1893
+ const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
1872
1894
  const vectorStore = mastra?.vectors?.[vectorStoreName];
1873
1895
  if (vectorStore) {
1874
1896
  let queryFilter = {};
1875
1897
  if (enableFilter) {
1876
- queryFilter = filter ? (() => {
1898
+ queryFilter = (() => {
1877
1899
  try {
1878
- return JSON.parse(filter);
1879
- } catch {
1880
- return filter;
1900
+ return typeof filter === "string" ? JSON.parse(filter) : filter;
1901
+ } catch (error) {
1902
+ if (mastra.logger) {
1903
+ mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
1904
+ }
1905
+ return {};
1881
1906
  }
1882
- })() : filter;
1907
+ })();
1883
1908
  }
1884
1909
  if (mastra.logger) {
1885
- mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
1910
+ mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
1886
1911
  }
1887
1912
  const { results, queryEmbedding } = await vectorQuerySearch({
1888
1913
  indexName,
@@ -1890,7 +1915,7 @@ var createGraphRAGTool = ({
1890
1915
  queryText,
1891
1916
  model,
1892
1917
  queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
1893
- topK,
1918
+ topK: topKValue,
1894
1919
  includeVectors: true
1895
1920
  });
1896
1921
  if (!isInitialized) {
@@ -1906,7 +1931,7 @@ var createGraphRAGTool = ({
1906
1931
  }
1907
1932
  const rerankedResults = graphRag.query({
1908
1933
  query: queryEmbedding,
1909
- topK,
1934
+ topK: topKValue,
1910
1935
  randomWalkSteps: graphOptions.randomWalkSteps,
1911
1936
  restartProb: graphOptions.restartProb
1912
1937
  });
@@ -1933,13 +1958,13 @@ var createVectorQueryTool = ({
1933
1958
  const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
1934
1959
  const toolDescription = description || defaultVectorQueryDescription();
1935
1960
  const baseSchema = {
1936
- queryText: z.string().describe("The text query to search for in the vector database"),
1937
- topK: z.coerce.number().describe(topKDescription)
1961
+ queryText: z.string().describe(queryTextDescription),
1962
+ topK: z.any().describe(topKDescription)
1938
1963
  };
1939
1964
  const inputSchema = enableFilter ? z.object({
1940
1965
  ...baseSchema,
1941
- filter: z.coerce.string().describe(filterDescription)
1942
- }).strict() : z.object(baseSchema).strict();
1966
+ filter: z.string().describe(filterDescription)
1967
+ }).passthrough() : z.object(baseSchema).passthrough();
1943
1968
  return createTool({
1944
1969
  id: toolId,
1945
1970
  inputSchema,
@@ -1948,20 +1973,24 @@ var createVectorQueryTool = ({
1948
1973
  }),
1949
1974
  description: toolDescription,
1950
1975
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
1976
+ const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
1951
1977
  const vectorStore = mastra?.vectors?.[vectorStoreName];
1952
1978
  if (vectorStore) {
1953
1979
  let queryFilter = {};
1954
1980
  if (enableFilter && filter) {
1955
1981
  queryFilter = (() => {
1956
1982
  try {
1957
- return JSON.parse(filter);
1958
- } catch {
1959
- return filter;
1983
+ return typeof filter === "string" ? JSON.parse(filter) : filter;
1984
+ } catch (error) {
1985
+ if (mastra.logger) {
1986
+ mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
1987
+ }
1988
+ return {};
1960
1989
  }
1961
1990
  })();
1962
1991
  }
1963
1992
  if (mastra.logger) {
1964
- mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
1993
+ mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
1965
1994
  }
1966
1995
  const { results } = await vectorQuerySearch({
1967
1996
  indexName,
@@ -1969,12 +1998,12 @@ var createVectorQueryTool = ({
1969
1998
  queryText,
1970
1999
  model,
1971
2000
  queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
1972
- topK
2001
+ topK: topKValue
1973
2002
  });
1974
2003
  if (reranker) {
1975
2004
  const rerankedResults = await rerank(results, queryText, reranker.model, {
1976
2005
  ...reranker.options,
1977
- topK: reranker.options?.topK || topK
2006
+ topK: reranker.options?.topK || topKValue
1978
2007
  });
1979
2008
  const relevantChunks2 = rerankedResults.map(({ result }) => result?.metadata);
1980
2009
  return { relevantContext: relevantChunks2 };
@@ -2710,4 +2739,4 @@ Example Complex Query:
2710
2739
  "inStock": true
2711
2740
  }`;
2712
2741
 
2713
- export { ASTRA_PROMPT, CHROMA_PROMPT, GraphRAG, LIBSQL_PROMPT, MDocument, PGVECTOR_PROMPT, PINECONE_PROMPT, QDRANT_PROMPT, UPSTASH_PROMPT, VECTORIZE_PROMPT, createDocumentChunkerTool, createGraphRAGTool, createVectorQueryTool, defaultGraphRagDescription, defaultVectorQueryDescription, filterDescription, rerank, topKDescription };
2742
+ export { ASTRA_PROMPT, CHROMA_PROMPT, GraphRAG, LIBSQL_PROMPT, MDocument, PGVECTOR_PROMPT, PINECONE_PROMPT, QDRANT_PROMPT, UPSTASH_PROMPT, VECTORIZE_PROMPT, createDocumentChunkerTool, createGraphRAGTool, createVectorQueryTool, defaultGraphRagDescription, defaultVectorQueryDescription, filterDescription, queryTextDescription, rerank, topKDescription };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/rag",
3
- "version": "0.1.10-alpha.0",
3
+ "version": "0.1.10-alpha.1",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -29,7 +29,7 @@
29
29
  "node-html-better-parser": "^1.4.7",
30
30
  "pathe": "^2.0.3",
31
31
  "zod": "^3.24.2",
32
- "@mastra/core": "^0.6.1-alpha.0"
32
+ "@mastra/core": "^0.6.1-alpha.1"
33
33
  },
34
34
  "peerDependencies": {
35
35
  "ai": "^4.0.0"
@@ -225,37 +225,152 @@ describe('MDocument', () => {
225
225
  });
226
226
  });
227
227
  it('should properly implement overlap in character chunking', async () => {
228
- // Create a text that will definitely need character-level chunking
228
+ // Test basic overlap functionality
229
229
  const text = 'a'.repeat(500) + 'b'.repeat(500) + 'c'.repeat(500);
230
230
  const chunkSize = 600;
231
- const overlapSize = 100;
232
- const doc = MDocument.fromText(text, { meta: 'data' });
231
+ const overlap = 100;
232
+ const doc = MDocument.fromText(text);
233
233
 
234
- await doc.chunk({
234
+ const result = await doc.chunk({
235
235
  strategy: 'character',
236
236
  size: chunkSize,
237
- overlap: overlapSize,
237
+ overlap,
238
238
  });
239
239
 
240
- const docs = doc.getDocs();
241
- expect(docs.length).toBeGreaterThan(1); // Should create multiple chunks
242
-
243
- for (let i = 1; i < docs.length; i++) {
244
- const prevChunk = docs[i - 1]?.text;
245
- const currentChunk = docs[i]?.text;
240
+ // Verify overlap between chunks
241
+ for (let i = 1; i < result.length; i++) {
242
+ const prevChunk = result[i - 1]?.text;
243
+ const currentChunk = result[i]?.text;
246
244
 
247
245
  if (prevChunk && currentChunk) {
248
- // Get the end of the previous chunk
249
- const prevEnd = prevChunk.slice(-overlapSize);
250
- // Get the start of the current chunk
251
- const currentStart = currentChunk.slice(0, overlapSize);
246
+ // Get the end of the previous chunk and start of current chunk
247
+ const prevEnd = prevChunk.slice(-overlap);
248
+ const currentStart = currentChunk.slice(0, overlap);
252
249
 
253
- // Check if there's overlap between chunks using a more flexible approach
254
- // Find common substring between the end of previous chunk and start of current chunk
255
- const commonText = findCommonSubstring(prevEnd, currentStart);
256
- expect(commonText.length).toBeGreaterThan(0);
250
+ // There should be a common substring of length >= min(overlap, chunk length)
251
+ const commonSubstring = findCommonSubstring(prevEnd, currentStart);
252
+ expect(commonSubstring.length).toBeGreaterThan(0);
253
+ }
254
+ }
255
+ });
256
+
257
+ it('should ensure character chunks never exceed size limit', async () => {
258
+ // Create text with varying content to test size limits
259
+ const text = 'a'.repeat(50) + 'b'.repeat(100) + 'c'.repeat(30);
260
+ const chunkSize = 50;
261
+ const overlap = 10;
262
+
263
+ const doc = MDocument.fromText(text);
264
+ const chunks = await doc.chunk({
265
+ strategy: 'character',
266
+ size: chunkSize,
267
+ overlap,
268
+ });
269
+
270
+ chunks.forEach((chunk, i) => {
271
+ if (i > 0) {
272
+ const prevChunk = chunks[i - 1]?.text;
273
+ const actualOverlap = chunk.text.slice(0, overlap);
274
+ const expectedOverlap = prevChunk?.slice(-overlap);
275
+ expect(actualOverlap).toBe(expectedOverlap);
276
+ }
277
+ });
278
+
279
+ // Verify each chunk's size
280
+ let allChunksValid = true;
281
+ for (const chunk of chunks) {
282
+ if (chunk.text.length > chunkSize) {
283
+ allChunksValid = false;
284
+ }
285
+ }
286
+ expect(allChunksValid).toBe(true);
287
+
288
+ // Verify overlaps between consecutive chunks
289
+ for (let i = 1; i < chunks.length; i++) {
290
+ const prevChunk = chunks[i - 1]!;
291
+ const currentChunk = chunks[i]!;
292
+
293
+ // The end of the previous chunk should match the start of the current chunk
294
+ const prevEnd = prevChunk.text.slice(-overlap);
295
+ const currentStart = currentChunk.text.slice(0, overlap);
296
+
297
+ expect(currentStart).toBe(prevEnd);
298
+ expect(currentStart.length).toBeLessThanOrEqual(overlap);
299
+ }
300
+ });
301
+
302
+ it('should handle end chunks properly in character chunking', async () => {
303
+ const text = 'This is a test document that needs to be split into chunks with proper handling of the end.';
304
+ const chunkSize = 20;
305
+ const overlap = 5;
306
+
307
+ const testDoc = MDocument.fromText(text);
308
+ const chunks = await testDoc.chunk({
309
+ strategy: 'character',
310
+ size: chunkSize,
311
+ overlap,
312
+ });
313
+
314
+ // Verify no tiny fragments at the end
315
+ const lastChunk = chunks[chunks.length - 1]?.text;
316
+ expect(lastChunk?.length).toBeGreaterThan(5);
317
+
318
+ // Verify each chunk respects size limit
319
+ let allChunksValid = true;
320
+ for (const chunk of chunks) {
321
+ if (chunk.text.length > chunkSize) {
322
+ allChunksValid = false;
257
323
  }
258
324
  }
325
+ expect(allChunksValid).toBe(true);
326
+
327
+ // Verify each chunk size explicitly
328
+ for (const chunk of chunks) {
329
+ expect(chunk.text.length).toBeLessThanOrEqual(chunkSize);
330
+ }
331
+
332
+ // Verify overlaps between consecutive chunks
333
+ for (let i = 1; i < chunks.length; i++) {
334
+ const prevChunk = chunks[i - 1]!;
335
+ const currentChunk = chunks[i]!;
336
+
337
+ // The end of the previous chunk should match the start of the current chunk
338
+ const prevEnd = prevChunk.text.slice(-overlap);
339
+ const currentStart = currentChunk.text.slice(0, overlap);
340
+
341
+ expect(currentStart).toBe(prevEnd);
342
+ expect(currentStart.length).toBeLessThanOrEqual(overlap);
343
+ }
344
+ });
345
+ it('should not create tiny chunks at the end', async () => {
346
+ const text = 'ABCDEFGHIJ'; // 10 characters
347
+ const chunkSize = 4;
348
+ const overlap = 2;
349
+
350
+ const doc = MDocument.fromText(text);
351
+ const chunks = await doc.chunk({
352
+ strategy: 'character',
353
+ size: chunkSize,
354
+ overlap,
355
+ });
356
+
357
+ // Verify we don't have tiny chunks
358
+ chunks.forEach(chunk => {
359
+ // Each chunk should be either:
360
+ // 1. Full size (chunkSize)
361
+ // 2. Or at least half the chunk size if it's the last chunk
362
+ const minSize = chunk === chunks[chunks.length - 1] ? Math.floor(chunkSize / 2) : chunkSize;
363
+ expect(chunk.text.length).toBeGreaterThanOrEqual(minSize);
364
+ });
365
+
366
+ // Verify overlaps are maintained
367
+ for (let i = 1; i < chunks.length; i++) {
368
+ const prevChunk = chunks[i - 1]!;
369
+ const currentChunk = chunks[i]!;
370
+ const actualOverlap = currentChunk.text.slice(0, overlap);
371
+ const expectedOverlap = prevChunk.text.slice(-overlap);
372
+ expect(actualOverlap).toBe(expectedOverlap);
373
+ }
259
374
  });
260
375
  });
261
376
 
@@ -376,10 +491,6 @@ describe('MDocument', () => {
376
491
  for (let i = 1; i < docs.length; i++) {
377
492
  const prevChunk = docs[i - 1]?.text;
378
493
  const currentChunk = docs[i]?.text;
379
- console.log({
380
- prevChunk,
381
- currentChunk,
382
- });
383
494
 
384
495
  if (prevChunk && currentChunk) {
385
496
  // Test using two methods:
@@ -100,18 +100,21 @@ export class CharacterTransformer extends TextTransformer {
100
100
 
101
101
  while (currentPosition < text.length) {
102
102
  let chunkEnd = currentPosition;
103
- let currentChunk = '';
104
103
 
105
104
  // Build chunk up to max size
106
105
  while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
107
106
  chunkEnd++;
108
107
  }
109
108
 
110
- currentChunk = text.slice(currentPosition, chunkEnd);
109
+ const currentChunk = text.slice(currentPosition, chunkEnd);
110
+ const chunkLength = this.lengthFunction(currentChunk);
111
111
  chunks.push(currentChunk);
112
112
 
113
+ // If we're at the end, break to avoid tiny chunks
114
+ if (chunkEnd >= text.length) break;
115
+
113
116
  // Move position forward by chunk size minus overlap
114
- currentPosition += Math.max(1, this.lengthFunction(currentChunk) - this.overlap);
117
+ currentPosition += Math.max(1, chunkLength - this.overlap);
115
118
  }
116
119
 
117
120
  return chunks;
@@ -3,7 +3,13 @@ import type { EmbeddingModel } from 'ai';
3
3
  import { z } from 'zod';
4
4
 
5
5
  import { GraphRAG } from '../graph-rag';
6
- import { vectorQuerySearch, defaultGraphRagDescription, filterDescription, topKDescription } from '../utils';
6
+ import {
7
+ vectorQuerySearch,
8
+ defaultGraphRagDescription,
9
+ filterDescription,
10
+ topKDescription,
11
+ queryTextDescription,
12
+ } from '../utils';
7
13
 
8
14
  export const createGraphRAGTool = ({
9
15
  vectorStoreName,
@@ -38,35 +44,51 @@ export const createGraphRAGTool = ({
38
44
  const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
39
45
  let isInitialized = false;
40
46
 
47
+ const baseSchema = {
48
+ queryText: z.string().describe(queryTextDescription),
49
+ topK: z.any().describe(topKDescription),
50
+ };
51
+ const inputSchema = enableFilter
52
+ ? z
53
+ .object({
54
+ ...baseSchema,
55
+ filter: z.string().describe(filterDescription),
56
+ })
57
+ .passthrough()
58
+ : z.object(baseSchema).passthrough();
41
59
  return createTool({
42
60
  id: toolId,
43
- inputSchema: z.object({
44
- queryText: z.string().describe('The text query to search for in the vector database'),
45
- topK: z.number().describe(topKDescription),
46
- filter: z.string().describe(filterDescription),
47
- }),
61
+ inputSchema,
48
62
  outputSchema: z.object({
49
63
  relevantContext: z.any(),
50
64
  }),
51
65
  description: toolDescription,
52
66
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
67
+ const topKValue =
68
+ typeof topK === 'number' && !isNaN(topK)
69
+ ? topK
70
+ : typeof topK === 'string' && !isNaN(Number(topK))
71
+ ? Number(topK)
72
+ : 10;
53
73
  const vectorStore = mastra?.vectors?.[vectorStoreName];
54
74
 
55
75
  if (vectorStore) {
56
76
  let queryFilter = {};
57
77
  if (enableFilter) {
58
- queryFilter = filter
59
- ? (() => {
60
- try {
61
- return JSON.parse(filter);
62
- } catch {
63
- return filter;
64
- }
65
- })()
66
- : filter;
78
+ queryFilter = (() => {
79
+ try {
80
+ return typeof filter === 'string' ? JSON.parse(filter) : filter;
81
+ } catch (error) {
82
+ // Log the error and use empty object
83
+ if (mastra.logger) {
84
+ mastra.logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
85
+ }
86
+ return {};
87
+ }
88
+ })();
67
89
  }
68
90
  if (mastra.logger) {
69
- mastra.logger.debug('Using this filter and topK:', { queryFilter, topK });
91
+ mastra.logger.debug('Using this filter and topK:', { queryFilter, topK: topKValue });
70
92
  }
71
93
  const { results, queryEmbedding } = await vectorQuerySearch({
72
94
  indexName,
@@ -74,7 +96,7 @@ export const createGraphRAGTool = ({
74
96
  queryText,
75
97
  model,
76
98
  queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
77
- topK,
99
+ topK: topKValue,
78
100
  includeVectors: true,
79
101
  });
80
102
 
@@ -96,7 +118,7 @@ export const createGraphRAGTool = ({
96
118
  // Get reranked results using GraphRAG
97
119
  const rerankedResults = graphRag.query({
98
120
  query: queryEmbedding,
99
- topK,
121
+ topK: topKValue,
100
122
  randomWalkSteps: graphOptions.randomWalkSteps,
101
123
  restartProb: graphOptions.restartProb,
102
124
  });
@@ -107,7 +129,6 @@ export const createGraphRAGTool = ({
107
129
  relevantContext: relevantChunks,
108
130
  };
109
131
  }
110
-
111
132
  return {
112
133
  relevantContext: [],
113
134
  };
@@ -15,6 +15,7 @@ vi.mock('@mastra/core/tools', () => ({
15
15
  vi.mock('../utils', () => ({
16
16
  vectorQuerySearch: vi.fn().mockResolvedValue({ results: [] }),
17
17
  defaultVectorQueryDescription: () => 'Default vector query description',
18
+ queryTextDescription: 'Query text description',
18
19
  filterDescription: 'Filter description',
19
20
  topKDescription: 'Top K description',
20
21
  }));
@@ -29,6 +30,8 @@ describe('createVectorQueryTool', () => {
29
30
  },
30
31
  logger: {
31
32
  debug: vi.fn(),
33
+ warn: vi.fn(),
34
+ info: vi.fn(),
32
35
  },
33
36
  };
34
37
 
@@ -37,7 +40,7 @@ describe('createVectorQueryTool', () => {
37
40
  });
38
41
 
39
42
  describe('input schema validation', () => {
40
- it('should make filter invalid when enableFilter is false', () => {
43
+ it('should handle filter permissively when enableFilter is false', () => {
41
44
  // Create tool with enableFilter set to false
42
45
  const tool = createVectorQueryTool({
43
46
  vectorStoreName: 'testStore',
@@ -61,10 +64,10 @@ describe('createVectorQueryTool', () => {
61
64
  ...validInput,
62
65
  filter: '{"field": "value"}',
63
66
  };
64
- expect(() => schema.parse(inputWithFilter)).toThrow();
67
+ expect(() => schema.parse(inputWithFilter)).not.toThrow();
65
68
  });
66
69
 
67
- it('should handle filter permissively when enableFilter is true', () => {
70
+ it('should handle filter when enableFilter is true', () => {
68
71
  const tool = createVectorQueryTool({
69
72
  vectorStoreName: 'testStore',
70
73
  indexName: 'testIndex',
@@ -81,13 +84,14 @@ describe('createVectorQueryTool', () => {
81
84
  { filter: '{"field": "value"}' },
82
85
  { filter: '{}' },
83
86
  { filter: 'simple-string' },
84
- // Object inputs (should be coerced to strings)
87
+ // Empty
88
+ { filter: '' },
89
+ ];
90
+
91
+ const invalidTestCases = [
85
92
  { filter: { field: 'value' } },
86
93
  { filter: {} },
87
- // Numbers (should be coerced)
88
94
  { filter: 123 },
89
- // Empty/null values (should be coerced)
90
- { filter: '' },
91
95
  { filter: null },
92
96
  { filter: undefined },
93
97
  ];
@@ -102,6 +106,16 @@ describe('createVectorQueryTool', () => {
102
106
  ).not.toThrow();
103
107
  });
104
108
 
109
+ invalidTestCases.forEach(({ filter }) => {
110
+ expect(() =>
111
+ schema.parse({
112
+ queryText: 'test query',
113
+ topK: 5,
114
+ filter,
115
+ }),
116
+ ).toThrow();
117
+ });
118
+
105
119
  // Verify that all parsed values are strings
106
120
  testCases.forEach(({ filter }) => {
107
121
  const result = schema.parse({
@@ -113,7 +127,7 @@ describe('createVectorQueryTool', () => {
113
127
  });
114
128
  });
115
129
 
116
- it('should reject unexpected properties in both modes', () => {
130
+ it('should not reject unexpected properties in both modes', () => {
117
131
  // Test with enableFilter false
118
132
  const toolWithoutFilter = createVectorQueryTool({
119
133
  vectorStoreName: 'testStore',
@@ -129,7 +143,7 @@ describe('createVectorQueryTool', () => {
129
143
  topK: 5,
130
144
  unexpectedProp: 'value',
131
145
  }),
132
- ).toThrow();
146
+ ).not.toThrow();
133
147
 
134
148
  // Test with enableFilter true
135
149
  const toolWithFilter = createVectorQueryTool({
@@ -147,7 +161,7 @@ describe('createVectorQueryTool', () => {
147
161
  filter: '{}',
148
162
  unexpectedProp: 'value',
149
163
  }),
150
- ).toThrow();
164
+ ).not.toThrow();
151
165
  });
152
166
  });
153
167
 
@@ -228,10 +242,10 @@ describe('createVectorQueryTool', () => {
228
242
  mastra: mockMastra,
229
243
  });
230
244
 
231
- // Check that vectorQuerySearch was called with the string filter
245
+ // Since this is not a valid filter, it should be ignored
232
246
  expect(vectorQuerySearch).toHaveBeenCalledWith(
233
247
  expect.objectContaining({
234
- queryFilter: 'string-filter',
248
+ queryFilter: undefined,
235
249
  }),
236
250
  );
237
251
  });
@@ -4,7 +4,13 @@ import { z } from 'zod';
4
4
 
5
5
  import { rerank } from '../rerank';
6
6
  import type { RerankConfig } from '../rerank';
7
- import { vectorQuerySearch, defaultVectorQueryDescription, filterDescription, topKDescription } from '../utils';
7
+ import {
8
+ vectorQuerySearch,
9
+ defaultVectorQueryDescription,
10
+ filterDescription,
11
+ topKDescription,
12
+ queryTextDescription,
13
+ } from '../utils';
8
14
 
9
15
  export const createVectorQueryTool = ({
10
16
  vectorStoreName,
@@ -27,17 +33,17 @@ export const createVectorQueryTool = ({
27
33
  const toolDescription = description || defaultVectorQueryDescription();
28
34
  // Create base schema with required fields
29
35
  const baseSchema = {
30
- queryText: z.string().describe('The text query to search for in the vector database'),
31
- topK: z.coerce.number().describe(topKDescription),
36
+ queryText: z.string().describe(queryTextDescription),
37
+ topK: z.any().describe(topKDescription),
32
38
  };
33
39
  const inputSchema = enableFilter
34
40
  ? z
35
41
  .object({
36
42
  ...baseSchema,
37
- filter: z.coerce.string().describe(filterDescription),
43
+ filter: z.string().describe(filterDescription),
38
44
  })
39
- .strict()
40
- : z.object(baseSchema).strict();
45
+ .passthrough()
46
+ : z.object(baseSchema).passthrough();
41
47
  return createTool({
42
48
  id: toolId,
43
49
  inputSchema,
@@ -46,6 +52,13 @@ export const createVectorQueryTool = ({
46
52
  }),
47
53
  description: toolDescription,
48
54
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
55
+ const topKValue =
56
+ typeof topK === 'number' && !isNaN(topK)
57
+ ? topK
58
+ : typeof topK === 'string' && !isNaN(Number(topK))
59
+ ? Number(topK)
60
+ : 10;
61
+
49
62
  const vectorStore = mastra?.vectors?.[vectorStoreName];
50
63
 
51
64
  // Get relevant chunks from the vector database
@@ -54,14 +67,18 @@ export const createVectorQueryTool = ({
54
67
  if (enableFilter && filter) {
55
68
  queryFilter = (() => {
56
69
  try {
57
- return JSON.parse(filter);
58
- } catch {
59
- return filter;
70
+ return typeof filter === 'string' ? JSON.parse(filter) : filter;
71
+ } catch (error) {
72
+ // Log the error and use empty object
73
+ if (mastra.logger) {
74
+ mastra.logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
75
+ }
76
+ return {};
60
77
  }
61
78
  })();
62
79
  }
63
80
  if (mastra.logger) {
64
- mastra.logger.debug('Using this filter and topK:', { queryFilter, topK });
81
+ mastra.logger.debug('Using this filter and topK:', { queryFilter, topK: topKValue });
65
82
  }
66
83
 
67
84
  const { results } = await vectorQuerySearch({
@@ -70,24 +87,22 @@ export const createVectorQueryTool = ({
70
87
  queryText,
71
88
  model,
72
89
  queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
73
- topK,
90
+ topK: topKValue,
74
91
  });
75
92
  if (reranker) {
76
93
  const rerankedResults = await rerank(results, queryText, reranker.model, {
77
94
  ...reranker.options,
78
- topK: reranker.options?.topK || topK,
95
+ topK: reranker.options?.topK || topKValue,
79
96
  });
80
97
  const relevantChunks = rerankedResults.map(({ result }) => result?.metadata);
81
98
  return { relevantContext: relevantChunks };
82
99
  }
83
100
 
84
101
  const relevantChunks = results.map(result => result?.metadata);
85
-
86
102
  return {
87
103
  relevantContext: relevantChunks,
88
104
  };
89
105
  }
90
-
91
106
  return {
92
107
  relevantContext: [],
93
108
  };
@@ -4,18 +4,35 @@ export const defaultVectorQueryDescription = () =>
4
4
  export const defaultGraphRagDescription = () =>
5
5
  `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
6
6
 
7
+ export const queryTextDescription = `The text query to search for in the vector database.
8
+ - ALWAYS provide a non-empty query string
9
+ - Must contain the user's question or search terms
10
+ - Example: "market data" or "financial reports"
11
+ - If the user's query is about a specific topic, use that topic as the queryText
12
+ - Cannot be an empty string
13
+ - Do not include quotes, just the text itself
14
+ - Required for all searches`;
15
+
7
16
  export const topKDescription = `Controls how many matching documents to return.
8
- - Must be a valid number
17
+ - ALWAYS provide a value
18
+ - If no value is provided, use the default (10)
19
+ - Must be a valid and positive number
20
+ - Cannot be NaN
9
21
  - Uses provided value if specified
10
- - Default: 10 results
11
- - Higher values provide more context
12
- - Lower values focus on best matches
22
+ - Default: 10 results (use this if unsure)
23
+ - Higher values (like 20) provide more context
24
+ - Lower values (like 3) focus on best matches
13
25
  - Based on query requirements`;
14
26
 
15
27
  export const filterDescription = `JSON-formatted criteria to refine search results.
16
- - Must be valid JSON format
28
+ - ALWAYS provide a filter value
29
+ - If no filter is provided, use the default ("{}")
30
+ - MUST be a valid, complete JSON object with proper quotes and brackets
17
31
  - Uses provided filter if specified
18
32
  - Default: "{}" (no filtering)
33
+ - Example for no filtering: "filter": "{}"
19
34
  - Example: '{"category": "health"}'
20
35
  - Based on query intent
36
+ - Do NOT use single quotes or unquoted properties
37
+ - IMPORTANT: Always ensure JSON is properly closed with matching brackets
21
38
  - Multiple filters can be combined`;