@mastra/rag 0.1.10-alpha.0 → 0.1.10-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +13 -0
- package/dist/_tsup-dts-rollup.d.cts +7 -2
- package/dist/_tsup-dts-rollup.d.ts +7 -2
- package/dist/index.cjs +61 -31
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +61 -32
- package/package.json +2 -2
- package/src/document/document.test.ts +134 -23
- package/src/document/transformers/character.ts +6 -3
- package/src/tools/graph-rag.ts +40 -19
- package/src/tools/vector-query.test.ts +26 -12
- package/src/tools/vector-query.ts +29 -14
- package/src/utils/default-settings.ts +22 -5
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/rag@0.1.10-alpha.
|
|
2
|
+
> @mastra/rag@0.1.10-alpha.1 build /home/runner/work/mastra/mastra/packages/rag
|
|
3
3
|
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.4.0
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 25133ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
14
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
15
15
|
[36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 36170ms
|
|
17
17
|
[34mCLI[39m Cleaning output folder
|
|
18
18
|
[34mESM[39m Build start
|
|
19
19
|
[34mCJS[39m Build start
|
|
20
|
-
[
|
|
21
|
-
[
|
|
22
|
-
[
|
|
23
|
-
[
|
|
20
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m92.97 KB[39m
|
|
21
|
+
[32mCJS[39m ⚡️ Build success in 1897ms
|
|
22
|
+
[32mESM[39m [1mdist/index.js [22m[32m92.25 KB[39m
|
|
23
|
+
[32mESM[39m ⚡️ Build success in 1898ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 0.1.10-alpha.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- ff1a76c: Update Vector Query and GraphRag tool schema
|
|
8
|
+
- b195f6e: Update character chunking strategy
|
|
9
|
+
- Updated dependencies [f4854ee]
|
|
10
|
+
- Updated dependencies [afaf73f]
|
|
11
|
+
- Updated dependencies [44631b1]
|
|
12
|
+
- Updated dependencies [6e559a0]
|
|
13
|
+
- Updated dependencies [5f43505]
|
|
14
|
+
- @mastra/core@0.6.1-alpha.1
|
|
15
|
+
|
|
3
16
|
## 0.1.10-alpha.0
|
|
4
17
|
|
|
5
18
|
### Patch Changes
|
|
@@ -143,7 +143,7 @@ declare type ExtractParams = {
|
|
|
143
143
|
export { ExtractParams }
|
|
144
144
|
export { ExtractParams as ExtractParams_alias_1 }
|
|
145
145
|
|
|
146
|
-
declare const filterDescription = "JSON-formatted criteria to refine search results.\n-
|
|
146
|
+
declare const filterDescription = "JSON-formatted criteria to refine search results.\n- ALWAYS provide a filter value\n- If no filter is provided, use the default (\"{}\")\n- MUST be a valid, complete JSON object with proper quotes and brackets\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example for no filtering: \"filter\": \"{}\"\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Do NOT use single quotes or unquoted properties\n- IMPORTANT: Always ensure JSON is properly closed with matching brackets\n- Multiple filters can be combined";
|
|
147
147
|
export { filterDescription }
|
|
148
148
|
export { filterDescription as filterDescription_alias_1 }
|
|
149
149
|
export { filterDescription as filterDescription_alias_2 }
|
|
@@ -348,6 +348,11 @@ declare const QDRANT_PROMPT = "When querying Qdrant, you can ONLY use the operat
|
|
|
348
348
|
export { QDRANT_PROMPT }
|
|
349
349
|
export { QDRANT_PROMPT as QDRANT_PROMPT_alias_1 }
|
|
350
350
|
|
|
351
|
+
declare const queryTextDescription = "The text query to search for in the vector database.\n- ALWAYS provide a non-empty query string\n- Must contain the user's question or search terms\n- Example: \"market data\" or \"financial reports\"\n- If the user's query is about a specific topic, use that topic as the queryText\n- Cannot be an empty string\n- Do not include quotes, just the text itself\n- Required for all searches";
|
|
352
|
+
export { queryTextDescription }
|
|
353
|
+
export { queryTextDescription as queryTextDescription_alias_1 }
|
|
354
|
+
export { queryTextDescription as queryTextDescription_alias_2 }
|
|
355
|
+
|
|
351
356
|
declare type QuestionAnswerExtractArgs = {
|
|
352
357
|
llm?: LLM;
|
|
353
358
|
questions?: number;
|
|
@@ -608,7 +613,7 @@ export declare class TokenTransformer extends TextTransformer {
|
|
|
608
613
|
}): TokenTransformer;
|
|
609
614
|
}
|
|
610
615
|
|
|
611
|
-
declare const topKDescription = "Controls how many matching documents to return.\n- Must be a valid number\n- Uses provided value if specified\n- Default: 10 results\n- Higher values provide more context\n- Lower values focus on best matches\n- Based on query requirements";
|
|
616
|
+
declare const topKDescription = "Controls how many matching documents to return.\n- ALWAYS provide a value\n- If no value is provided, use the default (10)\n- Must be a valid and positive number\n- Cannot be NaN\n- Uses provided value if specified\n- Default: 10 results (use this if unsure)\n- Higher values (like 20) provide more context\n- Lower values (like 3) focus on best matches\n- Based on query requirements";
|
|
612
617
|
export { topKDescription }
|
|
613
618
|
export { topKDescription as topKDescription_alias_1 }
|
|
614
619
|
export { topKDescription as topKDescription_alias_2 }
|
|
@@ -143,7 +143,7 @@ declare type ExtractParams = {
|
|
|
143
143
|
export { ExtractParams }
|
|
144
144
|
export { ExtractParams as ExtractParams_alias_1 }
|
|
145
145
|
|
|
146
|
-
declare const filterDescription = "JSON-formatted criteria to refine search results.\n-
|
|
146
|
+
declare const filterDescription = "JSON-formatted criteria to refine search results.\n- ALWAYS provide a filter value\n- If no filter is provided, use the default (\"{}\")\n- MUST be a valid, complete JSON object with proper quotes and brackets\n- Uses provided filter if specified\n- Default: \"{}\" (no filtering)\n- Example for no filtering: \"filter\": \"{}\"\n- Example: '{\"category\": \"health\"}'\n- Based on query intent\n- Do NOT use single quotes or unquoted properties\n- IMPORTANT: Always ensure JSON is properly closed with matching brackets\n- Multiple filters can be combined";
|
|
147
147
|
export { filterDescription }
|
|
148
148
|
export { filterDescription as filterDescription_alias_1 }
|
|
149
149
|
export { filterDescription as filterDescription_alias_2 }
|
|
@@ -348,6 +348,11 @@ declare const QDRANT_PROMPT = "When querying Qdrant, you can ONLY use the operat
|
|
|
348
348
|
export { QDRANT_PROMPT }
|
|
349
349
|
export { QDRANT_PROMPT as QDRANT_PROMPT_alias_1 }
|
|
350
350
|
|
|
351
|
+
declare const queryTextDescription = "The text query to search for in the vector database.\n- ALWAYS provide a non-empty query string\n- Must contain the user's question or search terms\n- Example: \"market data\" or \"financial reports\"\n- If the user's query is about a specific topic, use that topic as the queryText\n- Cannot be an empty string\n- Do not include quotes, just the text itself\n- Required for all searches";
|
|
352
|
+
export { queryTextDescription }
|
|
353
|
+
export { queryTextDescription as queryTextDescription_alias_1 }
|
|
354
|
+
export { queryTextDescription as queryTextDescription_alias_2 }
|
|
355
|
+
|
|
351
356
|
declare type QuestionAnswerExtractArgs = {
|
|
352
357
|
llm?: LLM;
|
|
353
358
|
questions?: number;
|
|
@@ -608,7 +613,7 @@ export declare class TokenTransformer extends TextTransformer {
|
|
|
608
613
|
}): TokenTransformer;
|
|
609
614
|
}
|
|
610
615
|
|
|
611
|
-
declare const topKDescription = "Controls how many matching documents to return.\n- Must be a valid number\n- Uses provided value if specified\n- Default: 10 results\n- Higher values provide more context\n- Lower values focus on best matches\n- Based on query requirements";
|
|
616
|
+
declare const topKDescription = "Controls how many matching documents to return.\n- ALWAYS provide a value\n- If no value is provided, use the default (10)\n- Must be a valid and positive number\n- Cannot be NaN\n- Uses provided value if specified\n- Default: 10 results (use this if unsure)\n- Higher values (like 20) provide more context\n- Lower values (like 3) focus on best matches\n- Based on query requirements";
|
|
612
617
|
export { topKDescription }
|
|
613
618
|
export { topKDescription as topKDescription_alias_1 }
|
|
614
619
|
export { topKDescription as topKDescription_alias_2 }
|
package/dist/index.cjs
CHANGED
|
@@ -232,13 +232,14 @@ var CharacterTransformer = class extends TextTransformer {
|
|
|
232
232
|
let currentPosition = 0;
|
|
233
233
|
while (currentPosition < text.length) {
|
|
234
234
|
let chunkEnd = currentPosition;
|
|
235
|
-
let currentChunk = "";
|
|
236
235
|
while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
|
|
237
236
|
chunkEnd++;
|
|
238
237
|
}
|
|
239
|
-
currentChunk = text.slice(currentPosition, chunkEnd);
|
|
238
|
+
const currentChunk = text.slice(currentPosition, chunkEnd);
|
|
239
|
+
const chunkLength = this.lengthFunction(currentChunk);
|
|
240
240
|
chunks.push(currentChunk);
|
|
241
|
-
|
|
241
|
+
if (chunkEnd >= text.length) break;
|
|
242
|
+
currentPosition += Math.max(1, chunkLength - this.overlap);
|
|
242
243
|
}
|
|
243
244
|
return chunks;
|
|
244
245
|
}
|
|
@@ -1825,19 +1826,35 @@ var vectorQuerySearch = async ({
|
|
|
1825
1826
|
// src/utils/default-settings.ts
|
|
1826
1827
|
var defaultVectorQueryDescription = () => `Access the knowledge base to find information needed to answer user questions.`;
|
|
1827
1828
|
var defaultGraphRagDescription = () => `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
|
|
1829
|
+
var queryTextDescription = `The text query to search for in the vector database.
|
|
1830
|
+
- ALWAYS provide a non-empty query string
|
|
1831
|
+
- Must contain the user's question or search terms
|
|
1832
|
+
- Example: "market data" or "financial reports"
|
|
1833
|
+
- If the user's query is about a specific topic, use that topic as the queryText
|
|
1834
|
+
- Cannot be an empty string
|
|
1835
|
+
- Do not include quotes, just the text itself
|
|
1836
|
+
- Required for all searches`;
|
|
1828
1837
|
var topKDescription = `Controls how many matching documents to return.
|
|
1829
|
-
-
|
|
1838
|
+
- ALWAYS provide a value
|
|
1839
|
+
- If no value is provided, use the default (10)
|
|
1840
|
+
- Must be a valid and positive number
|
|
1841
|
+
- Cannot be NaN
|
|
1830
1842
|
- Uses provided value if specified
|
|
1831
|
-
- Default: 10 results
|
|
1832
|
-
- Higher values provide more context
|
|
1833
|
-
- Lower values focus on best matches
|
|
1843
|
+
- Default: 10 results (use this if unsure)
|
|
1844
|
+
- Higher values (like 20) provide more context
|
|
1845
|
+
- Lower values (like 3) focus on best matches
|
|
1834
1846
|
- Based on query requirements`;
|
|
1835
1847
|
var filterDescription = `JSON-formatted criteria to refine search results.
|
|
1836
|
-
-
|
|
1848
|
+
- ALWAYS provide a filter value
|
|
1849
|
+
- If no filter is provided, use the default ("{}")
|
|
1850
|
+
- MUST be a valid, complete JSON object with proper quotes and brackets
|
|
1837
1851
|
- Uses provided filter if specified
|
|
1838
1852
|
- Default: "{}" (no filtering)
|
|
1853
|
+
- Example for no filtering: "filter": "{}"
|
|
1839
1854
|
- Example: '{"category": "health"}'
|
|
1840
1855
|
- Based on query intent
|
|
1856
|
+
- Do NOT use single quotes or unquoted properties
|
|
1857
|
+
- IMPORTANT: Always ensure JSON is properly closed with matching brackets
|
|
1841
1858
|
- Multiple filters can be combined`;
|
|
1842
1859
|
|
|
1843
1860
|
// src/tools/graph-rag.ts
|
|
@@ -1859,32 +1876,40 @@ var createGraphRAGTool = ({
|
|
|
1859
1876
|
const toolDescription = description || defaultGraphRagDescription();
|
|
1860
1877
|
const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
|
|
1861
1878
|
let isInitialized = false;
|
|
1879
|
+
const baseSchema = {
|
|
1880
|
+
queryText: zod.z.string().describe(queryTextDescription),
|
|
1881
|
+
topK: zod.z.any().describe(topKDescription)
|
|
1882
|
+
};
|
|
1883
|
+
const inputSchema = enableFilter ? zod.z.object({
|
|
1884
|
+
...baseSchema,
|
|
1885
|
+
filter: zod.z.string().describe(filterDescription)
|
|
1886
|
+
}).passthrough() : zod.z.object(baseSchema).passthrough();
|
|
1862
1887
|
return tools.createTool({
|
|
1863
1888
|
id: toolId,
|
|
1864
|
-
inputSchema
|
|
1865
|
-
queryText: zod.z.string().describe("The text query to search for in the vector database"),
|
|
1866
|
-
topK: zod.z.number().describe(topKDescription),
|
|
1867
|
-
filter: zod.z.string().describe(filterDescription)
|
|
1868
|
-
}),
|
|
1889
|
+
inputSchema,
|
|
1869
1890
|
outputSchema: zod.z.object({
|
|
1870
1891
|
relevantContext: zod.z.any()
|
|
1871
1892
|
}),
|
|
1872
1893
|
description: toolDescription,
|
|
1873
1894
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
1895
|
+
const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
|
|
1874
1896
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
1875
1897
|
if (vectorStore) {
|
|
1876
1898
|
let queryFilter = {};
|
|
1877
1899
|
if (enableFilter) {
|
|
1878
|
-
queryFilter =
|
|
1900
|
+
queryFilter = (() => {
|
|
1879
1901
|
try {
|
|
1880
|
-
return JSON.parse(filter);
|
|
1881
|
-
} catch {
|
|
1882
|
-
|
|
1902
|
+
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
1903
|
+
} catch (error) {
|
|
1904
|
+
if (mastra.logger) {
|
|
1905
|
+
mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
|
|
1906
|
+
}
|
|
1907
|
+
return {};
|
|
1883
1908
|
}
|
|
1884
|
-
})()
|
|
1909
|
+
})();
|
|
1885
1910
|
}
|
|
1886
1911
|
if (mastra.logger) {
|
|
1887
|
-
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
|
|
1912
|
+
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
|
|
1888
1913
|
}
|
|
1889
1914
|
const { results, queryEmbedding } = await vectorQuerySearch({
|
|
1890
1915
|
indexName,
|
|
@@ -1892,7 +1917,7 @@ var createGraphRAGTool = ({
|
|
|
1892
1917
|
queryText,
|
|
1893
1918
|
model,
|
|
1894
1919
|
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
|
|
1895
|
-
topK,
|
|
1920
|
+
topK: topKValue,
|
|
1896
1921
|
includeVectors: true
|
|
1897
1922
|
});
|
|
1898
1923
|
if (!isInitialized) {
|
|
@@ -1908,7 +1933,7 @@ var createGraphRAGTool = ({
|
|
|
1908
1933
|
}
|
|
1909
1934
|
const rerankedResults = graphRag.query({
|
|
1910
1935
|
query: queryEmbedding,
|
|
1911
|
-
topK,
|
|
1936
|
+
topK: topKValue,
|
|
1912
1937
|
randomWalkSteps: graphOptions.randomWalkSteps,
|
|
1913
1938
|
restartProb: graphOptions.restartProb
|
|
1914
1939
|
});
|
|
@@ -1935,13 +1960,13 @@ var createVectorQueryTool = ({
|
|
|
1935
1960
|
const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
|
|
1936
1961
|
const toolDescription = description || defaultVectorQueryDescription();
|
|
1937
1962
|
const baseSchema = {
|
|
1938
|
-
queryText: zod.z.string().describe(
|
|
1939
|
-
topK: zod.z.
|
|
1963
|
+
queryText: zod.z.string().describe(queryTextDescription),
|
|
1964
|
+
topK: zod.z.any().describe(topKDescription)
|
|
1940
1965
|
};
|
|
1941
1966
|
const inputSchema = enableFilter ? zod.z.object({
|
|
1942
1967
|
...baseSchema,
|
|
1943
|
-
filter: zod.z.
|
|
1944
|
-
}).
|
|
1968
|
+
filter: zod.z.string().describe(filterDescription)
|
|
1969
|
+
}).passthrough() : zod.z.object(baseSchema).passthrough();
|
|
1945
1970
|
return tools.createTool({
|
|
1946
1971
|
id: toolId,
|
|
1947
1972
|
inputSchema,
|
|
@@ -1950,20 +1975,24 @@ var createVectorQueryTool = ({
|
|
|
1950
1975
|
}),
|
|
1951
1976
|
description: toolDescription,
|
|
1952
1977
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
1978
|
+
const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
|
|
1953
1979
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
1954
1980
|
if (vectorStore) {
|
|
1955
1981
|
let queryFilter = {};
|
|
1956
1982
|
if (enableFilter && filter) {
|
|
1957
1983
|
queryFilter = (() => {
|
|
1958
1984
|
try {
|
|
1959
|
-
return JSON.parse(filter);
|
|
1960
|
-
} catch {
|
|
1961
|
-
|
|
1985
|
+
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
1986
|
+
} catch (error) {
|
|
1987
|
+
if (mastra.logger) {
|
|
1988
|
+
mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
|
|
1989
|
+
}
|
|
1990
|
+
return {};
|
|
1962
1991
|
}
|
|
1963
1992
|
})();
|
|
1964
1993
|
}
|
|
1965
1994
|
if (mastra.logger) {
|
|
1966
|
-
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
|
|
1995
|
+
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
|
|
1967
1996
|
}
|
|
1968
1997
|
const { results } = await vectorQuerySearch({
|
|
1969
1998
|
indexName,
|
|
@@ -1971,12 +2000,12 @@ var createVectorQueryTool = ({
|
|
|
1971
2000
|
queryText,
|
|
1972
2001
|
model,
|
|
1973
2002
|
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
|
|
1974
|
-
topK
|
|
2003
|
+
topK: topKValue
|
|
1975
2004
|
});
|
|
1976
2005
|
if (reranker) {
|
|
1977
2006
|
const rerankedResults = await rerank(results, queryText, reranker.model, {
|
|
1978
2007
|
...reranker.options,
|
|
1979
|
-
topK: reranker.options?.topK ||
|
|
2008
|
+
topK: reranker.options?.topK || topKValue
|
|
1980
2009
|
});
|
|
1981
2010
|
const relevantChunks2 = rerankedResults.map(({ result }) => result?.metadata);
|
|
1982
2011
|
return { relevantContext: relevantChunks2 };
|
|
@@ -2728,5 +2757,6 @@ exports.createVectorQueryTool = createVectorQueryTool;
|
|
|
2728
2757
|
exports.defaultGraphRagDescription = defaultGraphRagDescription;
|
|
2729
2758
|
exports.defaultVectorQueryDescription = defaultVectorQueryDescription;
|
|
2730
2759
|
exports.filterDescription = filterDescription;
|
|
2760
|
+
exports.queryTextDescription = queryTextDescription;
|
|
2731
2761
|
exports.rerank = rerank;
|
|
2732
2762
|
exports.topKDescription = topKDescription;
|
package/dist/index.d.cts
CHANGED
|
@@ -18,5 +18,6 @@ export { UPSTASH_PROMPT } from './_tsup-dts-rollup.cjs';
|
|
|
18
18
|
export { VECTORIZE_PROMPT } from './_tsup-dts-rollup.cjs';
|
|
19
19
|
export { defaultVectorQueryDescription } from './_tsup-dts-rollup.cjs';
|
|
20
20
|
export { defaultGraphRagDescription } from './_tsup-dts-rollup.cjs';
|
|
21
|
+
export { queryTextDescription } from './_tsup-dts-rollup.cjs';
|
|
21
22
|
export { topKDescription } from './_tsup-dts-rollup.cjs';
|
|
22
23
|
export { filterDescription } from './_tsup-dts-rollup.cjs';
|
package/dist/index.d.ts
CHANGED
|
@@ -18,5 +18,6 @@ export { UPSTASH_PROMPT } from './_tsup-dts-rollup.js';
|
|
|
18
18
|
export { VECTORIZE_PROMPT } from './_tsup-dts-rollup.js';
|
|
19
19
|
export { defaultVectorQueryDescription } from './_tsup-dts-rollup.js';
|
|
20
20
|
export { defaultGraphRagDescription } from './_tsup-dts-rollup.js';
|
|
21
|
+
export { queryTextDescription } from './_tsup-dts-rollup.js';
|
|
21
22
|
export { topKDescription } from './_tsup-dts-rollup.js';
|
|
22
23
|
export { filterDescription } from './_tsup-dts-rollup.js';
|
package/dist/index.js
CHANGED
|
@@ -230,13 +230,14 @@ var CharacterTransformer = class extends TextTransformer {
|
|
|
230
230
|
let currentPosition = 0;
|
|
231
231
|
while (currentPosition < text.length) {
|
|
232
232
|
let chunkEnd = currentPosition;
|
|
233
|
-
let currentChunk = "";
|
|
234
233
|
while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
|
|
235
234
|
chunkEnd++;
|
|
236
235
|
}
|
|
237
|
-
currentChunk = text.slice(currentPosition, chunkEnd);
|
|
236
|
+
const currentChunk = text.slice(currentPosition, chunkEnd);
|
|
237
|
+
const chunkLength = this.lengthFunction(currentChunk);
|
|
238
238
|
chunks.push(currentChunk);
|
|
239
|
-
|
|
239
|
+
if (chunkEnd >= text.length) break;
|
|
240
|
+
currentPosition += Math.max(1, chunkLength - this.overlap);
|
|
240
241
|
}
|
|
241
242
|
return chunks;
|
|
242
243
|
}
|
|
@@ -1823,19 +1824,35 @@ var vectorQuerySearch = async ({
|
|
|
1823
1824
|
// src/utils/default-settings.ts
|
|
1824
1825
|
var defaultVectorQueryDescription = () => `Access the knowledge base to find information needed to answer user questions.`;
|
|
1825
1826
|
var defaultGraphRagDescription = () => `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
|
|
1827
|
+
var queryTextDescription = `The text query to search for in the vector database.
|
|
1828
|
+
- ALWAYS provide a non-empty query string
|
|
1829
|
+
- Must contain the user's question or search terms
|
|
1830
|
+
- Example: "market data" or "financial reports"
|
|
1831
|
+
- If the user's query is about a specific topic, use that topic as the queryText
|
|
1832
|
+
- Cannot be an empty string
|
|
1833
|
+
- Do not include quotes, just the text itself
|
|
1834
|
+
- Required for all searches`;
|
|
1826
1835
|
var topKDescription = `Controls how many matching documents to return.
|
|
1827
|
-
-
|
|
1836
|
+
- ALWAYS provide a value
|
|
1837
|
+
- If no value is provided, use the default (10)
|
|
1838
|
+
- Must be a valid and positive number
|
|
1839
|
+
- Cannot be NaN
|
|
1828
1840
|
- Uses provided value if specified
|
|
1829
|
-
- Default: 10 results
|
|
1830
|
-
- Higher values provide more context
|
|
1831
|
-
- Lower values focus on best matches
|
|
1841
|
+
- Default: 10 results (use this if unsure)
|
|
1842
|
+
- Higher values (like 20) provide more context
|
|
1843
|
+
- Lower values (like 3) focus on best matches
|
|
1832
1844
|
- Based on query requirements`;
|
|
1833
1845
|
var filterDescription = `JSON-formatted criteria to refine search results.
|
|
1834
|
-
-
|
|
1846
|
+
- ALWAYS provide a filter value
|
|
1847
|
+
- If no filter is provided, use the default ("{}")
|
|
1848
|
+
- MUST be a valid, complete JSON object with proper quotes and brackets
|
|
1835
1849
|
- Uses provided filter if specified
|
|
1836
1850
|
- Default: "{}" (no filtering)
|
|
1851
|
+
- Example for no filtering: "filter": "{}"
|
|
1837
1852
|
- Example: '{"category": "health"}'
|
|
1838
1853
|
- Based on query intent
|
|
1854
|
+
- Do NOT use single quotes or unquoted properties
|
|
1855
|
+
- IMPORTANT: Always ensure JSON is properly closed with matching brackets
|
|
1839
1856
|
- Multiple filters can be combined`;
|
|
1840
1857
|
|
|
1841
1858
|
// src/tools/graph-rag.ts
|
|
@@ -1857,32 +1874,40 @@ var createGraphRAGTool = ({
|
|
|
1857
1874
|
const toolDescription = description || defaultGraphRagDescription();
|
|
1858
1875
|
const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
|
|
1859
1876
|
let isInitialized = false;
|
|
1877
|
+
const baseSchema = {
|
|
1878
|
+
queryText: z.string().describe(queryTextDescription),
|
|
1879
|
+
topK: z.any().describe(topKDescription)
|
|
1880
|
+
};
|
|
1881
|
+
const inputSchema = enableFilter ? z.object({
|
|
1882
|
+
...baseSchema,
|
|
1883
|
+
filter: z.string().describe(filterDescription)
|
|
1884
|
+
}).passthrough() : z.object(baseSchema).passthrough();
|
|
1860
1885
|
return createTool({
|
|
1861
1886
|
id: toolId,
|
|
1862
|
-
inputSchema
|
|
1863
|
-
queryText: z.string().describe("The text query to search for in the vector database"),
|
|
1864
|
-
topK: z.number().describe(topKDescription),
|
|
1865
|
-
filter: z.string().describe(filterDescription)
|
|
1866
|
-
}),
|
|
1887
|
+
inputSchema,
|
|
1867
1888
|
outputSchema: z.object({
|
|
1868
1889
|
relevantContext: z.any()
|
|
1869
1890
|
}),
|
|
1870
1891
|
description: toolDescription,
|
|
1871
1892
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
1893
|
+
const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
|
|
1872
1894
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
1873
1895
|
if (vectorStore) {
|
|
1874
1896
|
let queryFilter = {};
|
|
1875
1897
|
if (enableFilter) {
|
|
1876
|
-
queryFilter =
|
|
1898
|
+
queryFilter = (() => {
|
|
1877
1899
|
try {
|
|
1878
|
-
return JSON.parse(filter);
|
|
1879
|
-
} catch {
|
|
1880
|
-
|
|
1900
|
+
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
1901
|
+
} catch (error) {
|
|
1902
|
+
if (mastra.logger) {
|
|
1903
|
+
mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
|
|
1904
|
+
}
|
|
1905
|
+
return {};
|
|
1881
1906
|
}
|
|
1882
|
-
})()
|
|
1907
|
+
})();
|
|
1883
1908
|
}
|
|
1884
1909
|
if (mastra.logger) {
|
|
1885
|
-
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
|
|
1910
|
+
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
|
|
1886
1911
|
}
|
|
1887
1912
|
const { results, queryEmbedding } = await vectorQuerySearch({
|
|
1888
1913
|
indexName,
|
|
@@ -1890,7 +1915,7 @@ var createGraphRAGTool = ({
|
|
|
1890
1915
|
queryText,
|
|
1891
1916
|
model,
|
|
1892
1917
|
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
|
|
1893
|
-
topK,
|
|
1918
|
+
topK: topKValue,
|
|
1894
1919
|
includeVectors: true
|
|
1895
1920
|
});
|
|
1896
1921
|
if (!isInitialized) {
|
|
@@ -1906,7 +1931,7 @@ var createGraphRAGTool = ({
|
|
|
1906
1931
|
}
|
|
1907
1932
|
const rerankedResults = graphRag.query({
|
|
1908
1933
|
query: queryEmbedding,
|
|
1909
|
-
topK,
|
|
1934
|
+
topK: topKValue,
|
|
1910
1935
|
randomWalkSteps: graphOptions.randomWalkSteps,
|
|
1911
1936
|
restartProb: graphOptions.restartProb
|
|
1912
1937
|
});
|
|
@@ -1933,13 +1958,13 @@ var createVectorQueryTool = ({
|
|
|
1933
1958
|
const toolId = id || `VectorQuery ${vectorStoreName} ${indexName} Tool`;
|
|
1934
1959
|
const toolDescription = description || defaultVectorQueryDescription();
|
|
1935
1960
|
const baseSchema = {
|
|
1936
|
-
queryText: z.string().describe(
|
|
1937
|
-
topK: z.
|
|
1961
|
+
queryText: z.string().describe(queryTextDescription),
|
|
1962
|
+
topK: z.any().describe(topKDescription)
|
|
1938
1963
|
};
|
|
1939
1964
|
const inputSchema = enableFilter ? z.object({
|
|
1940
1965
|
...baseSchema,
|
|
1941
|
-
filter: z.
|
|
1942
|
-
}).
|
|
1966
|
+
filter: z.string().describe(filterDescription)
|
|
1967
|
+
}).passthrough() : z.object(baseSchema).passthrough();
|
|
1943
1968
|
return createTool({
|
|
1944
1969
|
id: toolId,
|
|
1945
1970
|
inputSchema,
|
|
@@ -1948,20 +1973,24 @@ var createVectorQueryTool = ({
|
|
|
1948
1973
|
}),
|
|
1949
1974
|
description: toolDescription,
|
|
1950
1975
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
1976
|
+
const topKValue = typeof topK === "number" && !isNaN(topK) ? topK : typeof topK === "string" && !isNaN(Number(topK)) ? Number(topK) : 10;
|
|
1951
1977
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
1952
1978
|
if (vectorStore) {
|
|
1953
1979
|
let queryFilter = {};
|
|
1954
1980
|
if (enableFilter && filter) {
|
|
1955
1981
|
queryFilter = (() => {
|
|
1956
1982
|
try {
|
|
1957
|
-
return JSON.parse(filter);
|
|
1958
|
-
} catch {
|
|
1959
|
-
|
|
1983
|
+
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
1984
|
+
} catch (error) {
|
|
1985
|
+
if (mastra.logger) {
|
|
1986
|
+
mastra.logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
|
|
1987
|
+
}
|
|
1988
|
+
return {};
|
|
1960
1989
|
}
|
|
1961
1990
|
})();
|
|
1962
1991
|
}
|
|
1963
1992
|
if (mastra.logger) {
|
|
1964
|
-
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK });
|
|
1993
|
+
mastra.logger.debug("Using this filter and topK:", { queryFilter, topK: topKValue });
|
|
1965
1994
|
}
|
|
1966
1995
|
const { results } = await vectorQuerySearch({
|
|
1967
1996
|
indexName,
|
|
@@ -1969,12 +1998,12 @@ var createVectorQueryTool = ({
|
|
|
1969
1998
|
queryText,
|
|
1970
1999
|
model,
|
|
1971
2000
|
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : void 0,
|
|
1972
|
-
topK
|
|
2001
|
+
topK: topKValue
|
|
1973
2002
|
});
|
|
1974
2003
|
if (reranker) {
|
|
1975
2004
|
const rerankedResults = await rerank(results, queryText, reranker.model, {
|
|
1976
2005
|
...reranker.options,
|
|
1977
|
-
topK: reranker.options?.topK ||
|
|
2006
|
+
topK: reranker.options?.topK || topKValue
|
|
1978
2007
|
});
|
|
1979
2008
|
const relevantChunks2 = rerankedResults.map(({ result }) => result?.metadata);
|
|
1980
2009
|
return { relevantContext: relevantChunks2 };
|
|
@@ -2710,4 +2739,4 @@ Example Complex Query:
|
|
|
2710
2739
|
"inStock": true
|
|
2711
2740
|
}`;
|
|
2712
2741
|
|
|
2713
|
-
export { ASTRA_PROMPT, CHROMA_PROMPT, GraphRAG, LIBSQL_PROMPT, MDocument, PGVECTOR_PROMPT, PINECONE_PROMPT, QDRANT_PROMPT, UPSTASH_PROMPT, VECTORIZE_PROMPT, createDocumentChunkerTool, createGraphRAGTool, createVectorQueryTool, defaultGraphRagDescription, defaultVectorQueryDescription, filterDescription, rerank, topKDescription };
|
|
2742
|
+
export { ASTRA_PROMPT, CHROMA_PROMPT, GraphRAG, LIBSQL_PROMPT, MDocument, PGVECTOR_PROMPT, PINECONE_PROMPT, QDRANT_PROMPT, UPSTASH_PROMPT, VECTORIZE_PROMPT, createDocumentChunkerTool, createGraphRAGTool, createVectorQueryTool, defaultGraphRagDescription, defaultVectorQueryDescription, filterDescription, queryTextDescription, rerank, topKDescription };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/rag",
|
|
3
|
-
"version": "0.1.10-alpha.
|
|
3
|
+
"version": "0.1.10-alpha.1",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"node-html-better-parser": "^1.4.7",
|
|
30
30
|
"pathe": "^2.0.3",
|
|
31
31
|
"zod": "^3.24.2",
|
|
32
|
-
"@mastra/core": "^0.6.1-alpha.
|
|
32
|
+
"@mastra/core": "^0.6.1-alpha.1"
|
|
33
33
|
},
|
|
34
34
|
"peerDependencies": {
|
|
35
35
|
"ai": "^4.0.0"
|
|
@@ -225,37 +225,152 @@ describe('MDocument', () => {
|
|
|
225
225
|
});
|
|
226
226
|
});
|
|
227
227
|
it('should properly implement overlap in character chunking', async () => {
|
|
228
|
-
//
|
|
228
|
+
// Test basic overlap functionality
|
|
229
229
|
const text = 'a'.repeat(500) + 'b'.repeat(500) + 'c'.repeat(500);
|
|
230
230
|
const chunkSize = 600;
|
|
231
|
-
const
|
|
232
|
-
const doc = MDocument.fromText(text
|
|
231
|
+
const overlap = 100;
|
|
232
|
+
const doc = MDocument.fromText(text);
|
|
233
233
|
|
|
234
|
-
await doc.chunk({
|
|
234
|
+
const result = await doc.chunk({
|
|
235
235
|
strategy: 'character',
|
|
236
236
|
size: chunkSize,
|
|
237
|
-
overlap
|
|
237
|
+
overlap,
|
|
238
238
|
});
|
|
239
239
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
const prevChunk = docs[i - 1]?.text;
|
|
245
|
-
const currentChunk = docs[i]?.text;
|
|
240
|
+
// Verify overlap between chunks
|
|
241
|
+
for (let i = 1; i < result.length; i++) {
|
|
242
|
+
const prevChunk = result[i - 1]?.text;
|
|
243
|
+
const currentChunk = result[i]?.text;
|
|
246
244
|
|
|
247
245
|
if (prevChunk && currentChunk) {
|
|
248
|
-
// Get the end of the previous chunk
|
|
249
|
-
const prevEnd = prevChunk.slice(-
|
|
250
|
-
|
|
251
|
-
const currentStart = currentChunk.slice(0, overlapSize);
|
|
246
|
+
// Get the end of the previous chunk and start of current chunk
|
|
247
|
+
const prevEnd = prevChunk.slice(-overlap);
|
|
248
|
+
const currentStart = currentChunk.slice(0, overlap);
|
|
252
249
|
|
|
253
|
-
//
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
250
|
+
// There should be a common substring of length >= min(overlap, chunk length)
|
|
251
|
+
const commonSubstring = findCommonSubstring(prevEnd, currentStart);
|
|
252
|
+
expect(commonSubstring.length).toBeGreaterThan(0);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it('should ensure character chunks never exceed size limit', async () => {
|
|
258
|
+
// Create text with varying content to test size limits
|
|
259
|
+
const text = 'a'.repeat(50) + 'b'.repeat(100) + 'c'.repeat(30);
|
|
260
|
+
const chunkSize = 50;
|
|
261
|
+
const overlap = 10;
|
|
262
|
+
|
|
263
|
+
const doc = MDocument.fromText(text);
|
|
264
|
+
const chunks = await doc.chunk({
|
|
265
|
+
strategy: 'character',
|
|
266
|
+
size: chunkSize,
|
|
267
|
+
overlap,
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
chunks.forEach((chunk, i) => {
|
|
271
|
+
if (i > 0) {
|
|
272
|
+
const prevChunk = chunks[i - 1]?.text;
|
|
273
|
+
const actualOverlap = chunk.text.slice(0, overlap);
|
|
274
|
+
const expectedOverlap = prevChunk?.slice(-overlap);
|
|
275
|
+
expect(actualOverlap).toBe(expectedOverlap);
|
|
276
|
+
}
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
// Verify each chunk's size
|
|
280
|
+
let allChunksValid = true;
|
|
281
|
+
for (const chunk of chunks) {
|
|
282
|
+
if (chunk.text.length > chunkSize) {
|
|
283
|
+
allChunksValid = false;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
expect(allChunksValid).toBe(true);
|
|
287
|
+
|
|
288
|
+
// Verify overlaps between consecutive chunks
|
|
289
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
290
|
+
const prevChunk = chunks[i - 1]!;
|
|
291
|
+
const currentChunk = chunks[i]!;
|
|
292
|
+
|
|
293
|
+
// The end of the previous chunk should match the start of the current chunk
|
|
294
|
+
const prevEnd = prevChunk.text.slice(-overlap);
|
|
295
|
+
const currentStart = currentChunk.text.slice(0, overlap);
|
|
296
|
+
|
|
297
|
+
expect(currentStart).toBe(prevEnd);
|
|
298
|
+
expect(currentStart.length).toBeLessThanOrEqual(overlap);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
it('should handle end chunks properly in character chunking', async () => {
|
|
303
|
+
const text = 'This is a test document that needs to be split into chunks with proper handling of the end.';
|
|
304
|
+
const chunkSize = 20;
|
|
305
|
+
const overlap = 5;
|
|
306
|
+
|
|
307
|
+
const testDoc = MDocument.fromText(text);
|
|
308
|
+
const chunks = await testDoc.chunk({
|
|
309
|
+
strategy: 'character',
|
|
310
|
+
size: chunkSize,
|
|
311
|
+
overlap,
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// Verify no tiny fragments at the end
|
|
315
|
+
const lastChunk = chunks[chunks.length - 1]?.text;
|
|
316
|
+
expect(lastChunk?.length).toBeGreaterThan(5);
|
|
317
|
+
|
|
318
|
+
// Verify each chunk respects size limit
|
|
319
|
+
let allChunksValid = true;
|
|
320
|
+
for (const chunk of chunks) {
|
|
321
|
+
if (chunk.text.length > chunkSize) {
|
|
322
|
+
allChunksValid = false;
|
|
257
323
|
}
|
|
258
324
|
}
|
|
325
|
+
expect(allChunksValid).toBe(true);
|
|
326
|
+
|
|
327
|
+
// Verify each chunk size explicitly
|
|
328
|
+
for (const chunk of chunks) {
|
|
329
|
+
expect(chunk.text.length).toBeLessThanOrEqual(chunkSize);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Verify overlaps between consecutive chunks
|
|
333
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
334
|
+
const prevChunk = chunks[i - 1]!;
|
|
335
|
+
const currentChunk = chunks[i]!;
|
|
336
|
+
|
|
337
|
+
// The end of the previous chunk should match the start of the current chunk
|
|
338
|
+
const prevEnd = prevChunk.text.slice(-overlap);
|
|
339
|
+
const currentStart = currentChunk.text.slice(0, overlap);
|
|
340
|
+
|
|
341
|
+
expect(currentStart).toBe(prevEnd);
|
|
342
|
+
expect(currentStart.length).toBeLessThanOrEqual(overlap);
|
|
343
|
+
}
|
|
344
|
+
});
|
|
345
|
+
it('should not create tiny chunks at the end', async () => {
|
|
346
|
+
const text = 'ABCDEFGHIJ'; // 10 characters
|
|
347
|
+
const chunkSize = 4;
|
|
348
|
+
const overlap = 2;
|
|
349
|
+
|
|
350
|
+
const doc = MDocument.fromText(text);
|
|
351
|
+
const chunks = await doc.chunk({
|
|
352
|
+
strategy: 'character',
|
|
353
|
+
size: chunkSize,
|
|
354
|
+
overlap,
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
// Verify we don't have tiny chunks
|
|
358
|
+
chunks.forEach(chunk => {
|
|
359
|
+
// Each chunk should be either:
|
|
360
|
+
// 1. Full size (chunkSize)
|
|
361
|
+
// 2. Or at least half the chunk size if it's the last chunk
|
|
362
|
+
const minSize = chunk === chunks[chunks.length - 1] ? Math.floor(chunkSize / 2) : chunkSize;
|
|
363
|
+
expect(chunk.text.length).toBeGreaterThanOrEqual(minSize);
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
// Verify overlaps are maintained
|
|
367
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
368
|
+
const prevChunk = chunks[i - 1]!;
|
|
369
|
+
const currentChunk = chunks[i]!;
|
|
370
|
+
const actualOverlap = currentChunk.text.slice(0, overlap);
|
|
371
|
+
const expectedOverlap = prevChunk.text.slice(-overlap);
|
|
372
|
+
expect(actualOverlap).toBe(expectedOverlap);
|
|
373
|
+
}
|
|
259
374
|
});
|
|
260
375
|
});
|
|
261
376
|
|
|
@@ -376,10 +491,6 @@ describe('MDocument', () => {
|
|
|
376
491
|
for (let i = 1; i < docs.length; i++) {
|
|
377
492
|
const prevChunk = docs[i - 1]?.text;
|
|
378
493
|
const currentChunk = docs[i]?.text;
|
|
379
|
-
console.log({
|
|
380
|
-
prevChunk,
|
|
381
|
-
currentChunk,
|
|
382
|
-
});
|
|
383
494
|
|
|
384
495
|
if (prevChunk && currentChunk) {
|
|
385
496
|
// Test using two methods:
|
|
@@ -100,18 +100,21 @@ export class CharacterTransformer extends TextTransformer {
|
|
|
100
100
|
|
|
101
101
|
while (currentPosition < text.length) {
|
|
102
102
|
let chunkEnd = currentPosition;
|
|
103
|
-
let currentChunk = '';
|
|
104
103
|
|
|
105
104
|
// Build chunk up to max size
|
|
106
105
|
while (chunkEnd < text.length && this.lengthFunction(text.slice(currentPosition, chunkEnd + 1)) <= this.size) {
|
|
107
106
|
chunkEnd++;
|
|
108
107
|
}
|
|
109
108
|
|
|
110
|
-
currentChunk = text.slice(currentPosition, chunkEnd);
|
|
109
|
+
const currentChunk = text.slice(currentPosition, chunkEnd);
|
|
110
|
+
const chunkLength = this.lengthFunction(currentChunk);
|
|
111
111
|
chunks.push(currentChunk);
|
|
112
112
|
|
|
113
|
+
// If we're at the end, break to avoid tiny chunks
|
|
114
|
+
if (chunkEnd >= text.length) break;
|
|
115
|
+
|
|
113
116
|
// Move position forward by chunk size minus overlap
|
|
114
|
-
currentPosition += Math.max(1,
|
|
117
|
+
currentPosition += Math.max(1, chunkLength - this.overlap);
|
|
115
118
|
}
|
|
116
119
|
|
|
117
120
|
return chunks;
|
package/src/tools/graph-rag.ts
CHANGED
|
@@ -3,7 +3,13 @@ import type { EmbeddingModel } from 'ai';
|
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
|
|
5
5
|
import { GraphRAG } from '../graph-rag';
|
|
6
|
-
import {
|
|
6
|
+
import {
|
|
7
|
+
vectorQuerySearch,
|
|
8
|
+
defaultGraphRagDescription,
|
|
9
|
+
filterDescription,
|
|
10
|
+
topKDescription,
|
|
11
|
+
queryTextDescription,
|
|
12
|
+
} from '../utils';
|
|
7
13
|
|
|
8
14
|
export const createGraphRAGTool = ({
|
|
9
15
|
vectorStoreName,
|
|
@@ -38,35 +44,51 @@ export const createGraphRAGTool = ({
|
|
|
38
44
|
const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
|
|
39
45
|
let isInitialized = false;
|
|
40
46
|
|
|
47
|
+
const baseSchema = {
|
|
48
|
+
queryText: z.string().describe(queryTextDescription),
|
|
49
|
+
topK: z.any().describe(topKDescription),
|
|
50
|
+
};
|
|
51
|
+
const inputSchema = enableFilter
|
|
52
|
+
? z
|
|
53
|
+
.object({
|
|
54
|
+
...baseSchema,
|
|
55
|
+
filter: z.string().describe(filterDescription),
|
|
56
|
+
})
|
|
57
|
+
.passthrough()
|
|
58
|
+
: z.object(baseSchema).passthrough();
|
|
41
59
|
return createTool({
|
|
42
60
|
id: toolId,
|
|
43
|
-
inputSchema
|
|
44
|
-
queryText: z.string().describe('The text query to search for in the vector database'),
|
|
45
|
-
topK: z.number().describe(topKDescription),
|
|
46
|
-
filter: z.string().describe(filterDescription),
|
|
47
|
-
}),
|
|
61
|
+
inputSchema,
|
|
48
62
|
outputSchema: z.object({
|
|
49
63
|
relevantContext: z.any(),
|
|
50
64
|
}),
|
|
51
65
|
description: toolDescription,
|
|
52
66
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
67
|
+
const topKValue =
|
|
68
|
+
typeof topK === 'number' && !isNaN(topK)
|
|
69
|
+
? topK
|
|
70
|
+
: typeof topK === 'string' && !isNaN(Number(topK))
|
|
71
|
+
? Number(topK)
|
|
72
|
+
: 10;
|
|
53
73
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
54
74
|
|
|
55
75
|
if (vectorStore) {
|
|
56
76
|
let queryFilter = {};
|
|
57
77
|
if (enableFilter) {
|
|
58
|
-
queryFilter =
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
78
|
+
queryFilter = (() => {
|
|
79
|
+
try {
|
|
80
|
+
return typeof filter === 'string' ? JSON.parse(filter) : filter;
|
|
81
|
+
} catch (error) {
|
|
82
|
+
// Log the error and use empty object
|
|
83
|
+
if (mastra.logger) {
|
|
84
|
+
mastra.logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
|
|
85
|
+
}
|
|
86
|
+
return {};
|
|
87
|
+
}
|
|
88
|
+
})();
|
|
67
89
|
}
|
|
68
90
|
if (mastra.logger) {
|
|
69
|
-
mastra.logger.debug('Using this filter and topK:', { queryFilter, topK });
|
|
91
|
+
mastra.logger.debug('Using this filter and topK:', { queryFilter, topK: topKValue });
|
|
70
92
|
}
|
|
71
93
|
const { results, queryEmbedding } = await vectorQuerySearch({
|
|
72
94
|
indexName,
|
|
@@ -74,7 +96,7 @@ export const createGraphRAGTool = ({
|
|
|
74
96
|
queryText,
|
|
75
97
|
model,
|
|
76
98
|
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
|
|
77
|
-
topK,
|
|
99
|
+
topK: topKValue,
|
|
78
100
|
includeVectors: true,
|
|
79
101
|
});
|
|
80
102
|
|
|
@@ -96,7 +118,7 @@ export const createGraphRAGTool = ({
|
|
|
96
118
|
// Get reranked results using GraphRAG
|
|
97
119
|
const rerankedResults = graphRag.query({
|
|
98
120
|
query: queryEmbedding,
|
|
99
|
-
topK,
|
|
121
|
+
topK: topKValue,
|
|
100
122
|
randomWalkSteps: graphOptions.randomWalkSteps,
|
|
101
123
|
restartProb: graphOptions.restartProb,
|
|
102
124
|
});
|
|
@@ -107,7 +129,6 @@ export const createGraphRAGTool = ({
|
|
|
107
129
|
relevantContext: relevantChunks,
|
|
108
130
|
};
|
|
109
131
|
}
|
|
110
|
-
|
|
111
132
|
return {
|
|
112
133
|
relevantContext: [],
|
|
113
134
|
};
|
|
@@ -15,6 +15,7 @@ vi.mock('@mastra/core/tools', () => ({
|
|
|
15
15
|
vi.mock('../utils', () => ({
|
|
16
16
|
vectorQuerySearch: vi.fn().mockResolvedValue({ results: [] }),
|
|
17
17
|
defaultVectorQueryDescription: () => 'Default vector query description',
|
|
18
|
+
queryTextDescription: 'Query text description',
|
|
18
19
|
filterDescription: 'Filter description',
|
|
19
20
|
topKDescription: 'Top K description',
|
|
20
21
|
}));
|
|
@@ -29,6 +30,8 @@ describe('createVectorQueryTool', () => {
|
|
|
29
30
|
},
|
|
30
31
|
logger: {
|
|
31
32
|
debug: vi.fn(),
|
|
33
|
+
warn: vi.fn(),
|
|
34
|
+
info: vi.fn(),
|
|
32
35
|
},
|
|
33
36
|
};
|
|
34
37
|
|
|
@@ -37,7 +40,7 @@ describe('createVectorQueryTool', () => {
|
|
|
37
40
|
});
|
|
38
41
|
|
|
39
42
|
describe('input schema validation', () => {
|
|
40
|
-
it('should
|
|
43
|
+
it('should handle filter permissively when enableFilter is false', () => {
|
|
41
44
|
// Create tool with enableFilter set to false
|
|
42
45
|
const tool = createVectorQueryTool({
|
|
43
46
|
vectorStoreName: 'testStore',
|
|
@@ -61,10 +64,10 @@ describe('createVectorQueryTool', () => {
|
|
|
61
64
|
...validInput,
|
|
62
65
|
filter: '{"field": "value"}',
|
|
63
66
|
};
|
|
64
|
-
expect(() => schema.parse(inputWithFilter)).toThrow();
|
|
67
|
+
expect(() => schema.parse(inputWithFilter)).not.toThrow();
|
|
65
68
|
});
|
|
66
69
|
|
|
67
|
-
it('should handle filter
|
|
70
|
+
it('should handle filter when enableFilter is true', () => {
|
|
68
71
|
const tool = createVectorQueryTool({
|
|
69
72
|
vectorStoreName: 'testStore',
|
|
70
73
|
indexName: 'testIndex',
|
|
@@ -81,13 +84,14 @@ describe('createVectorQueryTool', () => {
|
|
|
81
84
|
{ filter: '{"field": "value"}' },
|
|
82
85
|
{ filter: '{}' },
|
|
83
86
|
{ filter: 'simple-string' },
|
|
84
|
-
//
|
|
87
|
+
// Empty
|
|
88
|
+
{ filter: '' },
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
const invalidTestCases = [
|
|
85
92
|
{ filter: { field: 'value' } },
|
|
86
93
|
{ filter: {} },
|
|
87
|
-
// Numbers (should be coerced)
|
|
88
94
|
{ filter: 123 },
|
|
89
|
-
// Empty/null values (should be coerced)
|
|
90
|
-
{ filter: '' },
|
|
91
95
|
{ filter: null },
|
|
92
96
|
{ filter: undefined },
|
|
93
97
|
];
|
|
@@ -102,6 +106,16 @@ describe('createVectorQueryTool', () => {
|
|
|
102
106
|
).not.toThrow();
|
|
103
107
|
});
|
|
104
108
|
|
|
109
|
+
invalidTestCases.forEach(({ filter }) => {
|
|
110
|
+
expect(() =>
|
|
111
|
+
schema.parse({
|
|
112
|
+
queryText: 'test query',
|
|
113
|
+
topK: 5,
|
|
114
|
+
filter,
|
|
115
|
+
}),
|
|
116
|
+
).toThrow();
|
|
117
|
+
});
|
|
118
|
+
|
|
105
119
|
// Verify that all parsed values are strings
|
|
106
120
|
testCases.forEach(({ filter }) => {
|
|
107
121
|
const result = schema.parse({
|
|
@@ -113,7 +127,7 @@ describe('createVectorQueryTool', () => {
|
|
|
113
127
|
});
|
|
114
128
|
});
|
|
115
129
|
|
|
116
|
-
it('should reject unexpected properties in both modes', () => {
|
|
130
|
+
it('should not reject unexpected properties in both modes', () => {
|
|
117
131
|
// Test with enableFilter false
|
|
118
132
|
const toolWithoutFilter = createVectorQueryTool({
|
|
119
133
|
vectorStoreName: 'testStore',
|
|
@@ -129,7 +143,7 @@ describe('createVectorQueryTool', () => {
|
|
|
129
143
|
topK: 5,
|
|
130
144
|
unexpectedProp: 'value',
|
|
131
145
|
}),
|
|
132
|
-
).toThrow();
|
|
146
|
+
).not.toThrow();
|
|
133
147
|
|
|
134
148
|
// Test with enableFilter true
|
|
135
149
|
const toolWithFilter = createVectorQueryTool({
|
|
@@ -147,7 +161,7 @@ describe('createVectorQueryTool', () => {
|
|
|
147
161
|
filter: '{}',
|
|
148
162
|
unexpectedProp: 'value',
|
|
149
163
|
}),
|
|
150
|
-
).toThrow();
|
|
164
|
+
).not.toThrow();
|
|
151
165
|
});
|
|
152
166
|
});
|
|
153
167
|
|
|
@@ -228,10 +242,10 @@ describe('createVectorQueryTool', () => {
|
|
|
228
242
|
mastra: mockMastra,
|
|
229
243
|
});
|
|
230
244
|
|
|
231
|
-
//
|
|
245
|
+
// Since this is not a valid filter, it should be ignored
|
|
232
246
|
expect(vectorQuerySearch).toHaveBeenCalledWith(
|
|
233
247
|
expect.objectContaining({
|
|
234
|
-
queryFilter:
|
|
248
|
+
queryFilter: undefined,
|
|
235
249
|
}),
|
|
236
250
|
);
|
|
237
251
|
});
|
|
@@ -4,7 +4,13 @@ import { z } from 'zod';
|
|
|
4
4
|
|
|
5
5
|
import { rerank } from '../rerank';
|
|
6
6
|
import type { RerankConfig } from '../rerank';
|
|
7
|
-
import {
|
|
7
|
+
import {
|
|
8
|
+
vectorQuerySearch,
|
|
9
|
+
defaultVectorQueryDescription,
|
|
10
|
+
filterDescription,
|
|
11
|
+
topKDescription,
|
|
12
|
+
queryTextDescription,
|
|
13
|
+
} from '../utils';
|
|
8
14
|
|
|
9
15
|
export const createVectorQueryTool = ({
|
|
10
16
|
vectorStoreName,
|
|
@@ -27,17 +33,17 @@ export const createVectorQueryTool = ({
|
|
|
27
33
|
const toolDescription = description || defaultVectorQueryDescription();
|
|
28
34
|
// Create base schema with required fields
|
|
29
35
|
const baseSchema = {
|
|
30
|
-
queryText: z.string().describe(
|
|
31
|
-
topK: z.
|
|
36
|
+
queryText: z.string().describe(queryTextDescription),
|
|
37
|
+
topK: z.any().describe(topKDescription),
|
|
32
38
|
};
|
|
33
39
|
const inputSchema = enableFilter
|
|
34
40
|
? z
|
|
35
41
|
.object({
|
|
36
42
|
...baseSchema,
|
|
37
|
-
filter: z.
|
|
43
|
+
filter: z.string().describe(filterDescription),
|
|
38
44
|
})
|
|
39
|
-
.
|
|
40
|
-
: z.object(baseSchema).
|
|
45
|
+
.passthrough()
|
|
46
|
+
: z.object(baseSchema).passthrough();
|
|
41
47
|
return createTool({
|
|
42
48
|
id: toolId,
|
|
43
49
|
inputSchema,
|
|
@@ -46,6 +52,13 @@ export const createVectorQueryTool = ({
|
|
|
46
52
|
}),
|
|
47
53
|
description: toolDescription,
|
|
48
54
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
55
|
+
const topKValue =
|
|
56
|
+
typeof topK === 'number' && !isNaN(topK)
|
|
57
|
+
? topK
|
|
58
|
+
: typeof topK === 'string' && !isNaN(Number(topK))
|
|
59
|
+
? Number(topK)
|
|
60
|
+
: 10;
|
|
61
|
+
|
|
49
62
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
50
63
|
|
|
51
64
|
// Get relevant chunks from the vector database
|
|
@@ -54,14 +67,18 @@ export const createVectorQueryTool = ({
|
|
|
54
67
|
if (enableFilter && filter) {
|
|
55
68
|
queryFilter = (() => {
|
|
56
69
|
try {
|
|
57
|
-
return JSON.parse(filter);
|
|
58
|
-
} catch {
|
|
59
|
-
|
|
70
|
+
return typeof filter === 'string' ? JSON.parse(filter) : filter;
|
|
71
|
+
} catch (error) {
|
|
72
|
+
// Log the error and use empty object
|
|
73
|
+
if (mastra.logger) {
|
|
74
|
+
mastra.logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
|
|
75
|
+
}
|
|
76
|
+
return {};
|
|
60
77
|
}
|
|
61
78
|
})();
|
|
62
79
|
}
|
|
63
80
|
if (mastra.logger) {
|
|
64
|
-
mastra.logger.debug('Using this filter and topK:', { queryFilter, topK });
|
|
81
|
+
mastra.logger.debug('Using this filter and topK:', { queryFilter, topK: topKValue });
|
|
65
82
|
}
|
|
66
83
|
|
|
67
84
|
const { results } = await vectorQuerySearch({
|
|
@@ -70,24 +87,22 @@ export const createVectorQueryTool = ({
|
|
|
70
87
|
queryText,
|
|
71
88
|
model,
|
|
72
89
|
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
|
|
73
|
-
topK,
|
|
90
|
+
topK: topKValue,
|
|
74
91
|
});
|
|
75
92
|
if (reranker) {
|
|
76
93
|
const rerankedResults = await rerank(results, queryText, reranker.model, {
|
|
77
94
|
...reranker.options,
|
|
78
|
-
topK: reranker.options?.topK ||
|
|
95
|
+
topK: reranker.options?.topK || topKValue,
|
|
79
96
|
});
|
|
80
97
|
const relevantChunks = rerankedResults.map(({ result }) => result?.metadata);
|
|
81
98
|
return { relevantContext: relevantChunks };
|
|
82
99
|
}
|
|
83
100
|
|
|
84
101
|
const relevantChunks = results.map(result => result?.metadata);
|
|
85
|
-
|
|
86
102
|
return {
|
|
87
103
|
relevantContext: relevantChunks,
|
|
88
104
|
};
|
|
89
105
|
}
|
|
90
|
-
|
|
91
106
|
return {
|
|
92
107
|
relevantContext: [],
|
|
93
108
|
};
|
|
@@ -4,18 +4,35 @@ export const defaultVectorQueryDescription = () =>
|
|
|
4
4
|
export const defaultGraphRagDescription = () =>
|
|
5
5
|
`Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
|
|
6
6
|
|
|
7
|
+
export const queryTextDescription = `The text query to search for in the vector database.
|
|
8
|
+
- ALWAYS provide a non-empty query string
|
|
9
|
+
- Must contain the user's question or search terms
|
|
10
|
+
- Example: "market data" or "financial reports"
|
|
11
|
+
- If the user's query is about a specific topic, use that topic as the queryText
|
|
12
|
+
- Cannot be an empty string
|
|
13
|
+
- Do not include quotes, just the text itself
|
|
14
|
+
- Required for all searches`;
|
|
15
|
+
|
|
7
16
|
export const topKDescription = `Controls how many matching documents to return.
|
|
8
|
-
-
|
|
17
|
+
- ALWAYS provide a value
|
|
18
|
+
- If no value is provided, use the default (10)
|
|
19
|
+
- Must be a valid and positive number
|
|
20
|
+
- Cannot be NaN
|
|
9
21
|
- Uses provided value if specified
|
|
10
|
-
- Default: 10 results
|
|
11
|
-
- Higher values provide more context
|
|
12
|
-
- Lower values focus on best matches
|
|
22
|
+
- Default: 10 results (use this if unsure)
|
|
23
|
+
- Higher values (like 20) provide more context
|
|
24
|
+
- Lower values (like 3) focus on best matches
|
|
13
25
|
- Based on query requirements`;
|
|
14
26
|
|
|
15
27
|
export const filterDescription = `JSON-formatted criteria to refine search results.
|
|
16
|
-
-
|
|
28
|
+
- ALWAYS provide a filter value
|
|
29
|
+
- If no filter is provided, use the default ("{}")
|
|
30
|
+
- MUST be a valid, complete JSON object with proper quotes and brackets
|
|
17
31
|
- Uses provided filter if specified
|
|
18
32
|
- Default: "{}" (no filtering)
|
|
33
|
+
- Example for no filtering: "filter": "{}"
|
|
19
34
|
- Example: '{"category": "health"}'
|
|
20
35
|
- Based on query intent
|
|
36
|
+
- Do NOT use single quotes or unquoted properties
|
|
37
|
+
- IMPORTANT: Always ensure JSON is properly closed with matching brackets
|
|
21
38
|
- Multiple filters can be combined`;
|