@mastra/rag 1.2.2 → 1.2.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/index.cjs +25 -9
  3. package/dist/index.cjs.map +1 -1
  4. package/dist/index.js +25 -9
  5. package/dist/index.js.map +1 -1
  6. package/dist/tools/graph-rag.d.ts.map +1 -1
  7. package/dist/tools/types.d.ts +18 -5
  8. package/dist/tools/types.d.ts.map +1 -1
  9. package/dist/tools/vector-query.d.ts.map +1 -1
  10. package/dist/utils/vector-search.d.ts +6 -7
  11. package/dist/utils/vector-search.d.ts.map +1 -1
  12. package/package.json +19 -6
  13. package/.turbo/turbo-build.log +0 -4
  14. package/docker-compose.yaml +0 -22
  15. package/eslint.config.js +0 -6
  16. package/src/document/document.test.ts +0 -2975
  17. package/src/document/document.ts +0 -335
  18. package/src/document/extractors/base.ts +0 -30
  19. package/src/document/extractors/index.ts +0 -5
  20. package/src/document/extractors/keywords.test.ts +0 -125
  21. package/src/document/extractors/keywords.ts +0 -126
  22. package/src/document/extractors/questions.test.ts +0 -120
  23. package/src/document/extractors/questions.ts +0 -111
  24. package/src/document/extractors/summary.test.ts +0 -107
  25. package/src/document/extractors/summary.ts +0 -122
  26. package/src/document/extractors/title.test.ts +0 -121
  27. package/src/document/extractors/title.ts +0 -185
  28. package/src/document/extractors/types.ts +0 -40
  29. package/src/document/index.ts +0 -2
  30. package/src/document/prompts/base.ts +0 -77
  31. package/src/document/prompts/format.ts +0 -9
  32. package/src/document/prompts/index.ts +0 -15
  33. package/src/document/prompts/prompt.ts +0 -60
  34. package/src/document/prompts/types.ts +0 -29
  35. package/src/document/schema/index.ts +0 -3
  36. package/src/document/schema/node.ts +0 -187
  37. package/src/document/schema/types.ts +0 -40
  38. package/src/document/transformers/character.ts +0 -267
  39. package/src/document/transformers/html.ts +0 -346
  40. package/src/document/transformers/json.ts +0 -536
  41. package/src/document/transformers/latex.ts +0 -11
  42. package/src/document/transformers/markdown.ts +0 -239
  43. package/src/document/transformers/semantic-markdown.ts +0 -227
  44. package/src/document/transformers/sentence.ts +0 -314
  45. package/src/document/transformers/text.ts +0 -158
  46. package/src/document/transformers/token.ts +0 -137
  47. package/src/document/transformers/transformer.ts +0 -5
  48. package/src/document/types.ts +0 -145
  49. package/src/document/validation.ts +0 -158
  50. package/src/graph-rag/index.test.ts +0 -235
  51. package/src/graph-rag/index.ts +0 -306
  52. package/src/index.ts +0 -8
  53. package/src/rerank/index.test.ts +0 -150
  54. package/src/rerank/index.ts +0 -198
  55. package/src/rerank/relevance/cohere/index.ts +0 -56
  56. package/src/rerank/relevance/index.ts +0 -3
  57. package/src/rerank/relevance/mastra-agent/index.ts +0 -32
  58. package/src/rerank/relevance/zeroentropy/index.ts +0 -26
  59. package/src/tools/README.md +0 -153
  60. package/src/tools/document-chunker.ts +0 -34
  61. package/src/tools/graph-rag.test.ts +0 -115
  62. package/src/tools/graph-rag.ts +0 -154
  63. package/src/tools/index.ts +0 -3
  64. package/src/tools/types.ts +0 -110
  65. package/src/tools/vector-query-database-config.test.ts +0 -190
  66. package/src/tools/vector-query.test.ts +0 -418
  67. package/src/tools/vector-query.ts +0 -169
  68. package/src/utils/convert-sources.ts +0 -43
  69. package/src/utils/default-settings.ts +0 -38
  70. package/src/utils/index.ts +0 -3
  71. package/src/utils/tool-schemas.ts +0 -38
  72. package/src/utils/vector-prompts.ts +0 -832
  73. package/src/utils/vector-search.ts +0 -117
  74. package/tsconfig.build.json +0 -9
  75. package/tsconfig.json +0 -5
  76. package/tsup.config.ts +0 -17
  77. package/vitest.config.ts +0 -8
@@ -1,169 +0,0 @@
1
- import { createTool } from '@mastra/core/tools';
2
- import type { MastraVector } from '@mastra/core/vector';
3
- import type { EmbeddingModel } from 'ai';
4
- import { z } from 'zod';
5
-
6
- import { rerank, rerankWithScorer } from '../rerank';
7
- import type { RerankConfig, RerankResult } from '../rerank';
8
- import { vectorQuerySearch, defaultVectorQueryDescription, filterSchema, outputSchema, baseSchema } from '../utils';
9
- import type { RagTool } from '../utils';
10
- import { convertToSources } from '../utils/convert-sources';
11
- import type { VectorQueryToolOptions } from './types';
12
-
13
- export const createVectorQueryTool = (options: VectorQueryToolOptions) => {
14
- const { id, description } = options;
15
- const storeName = options['vectorStoreName'] ? options.vectorStoreName : 'DirectVectorStore';
16
-
17
- const toolId = id || `VectorQuery ${storeName} ${options.indexName} Tool`;
18
- const toolDescription = description || defaultVectorQueryDescription();
19
- const inputSchema = options.enableFilter ? filterSchema : z.object(baseSchema).passthrough();
20
-
21
- return createTool({
22
- id: toolId,
23
- description: toolDescription,
24
- inputSchema,
25
- outputSchema,
26
- execute: async ({ context, mastra, runtimeContext }) => {
27
- const indexName: string = runtimeContext.get('indexName') ?? options.indexName;
28
- const vectorStoreName: string =
29
- 'vectorStore' in options ? storeName : (runtimeContext.get('vectorStoreName') ?? storeName);
30
- const includeVectors: boolean = runtimeContext.get('includeVectors') ?? options.includeVectors ?? false;
31
- const includeSources: boolean = runtimeContext.get('includeSources') ?? options.includeSources ?? true;
32
- const reranker: RerankConfig = runtimeContext.get('reranker') ?? options.reranker;
33
- const databaseConfig = runtimeContext.get('databaseConfig') ?? options.databaseConfig;
34
- const model: EmbeddingModel<string> = runtimeContext.get('model') ?? options.model;
35
-
36
- if (!indexName) throw new Error(`indexName is required, got: ${indexName}`);
37
- if (!vectorStoreName) throw new Error(`vectorStoreName is required, got: ${vectorStoreName}`); // won't fire
38
-
39
- const topK: number = runtimeContext.get('topK') ?? context.topK ?? 10;
40
- const filter: Record<string, any> = runtimeContext.get('filter') ?? context.filter;
41
- const queryText = context.queryText;
42
- const enableFilter = !!runtimeContext.get('filter') || (options.enableFilter ?? false);
43
-
44
- const logger = mastra?.getLogger();
45
- if (!logger) {
46
- console.warn(
47
- '[VectorQueryTool] Logger not initialized: no debug or error logs will be recorded for this tool execution.',
48
- );
49
- }
50
- if (logger) {
51
- logger.debug('[VectorQueryTool] execute called with:', { queryText, topK, filter, databaseConfig });
52
- }
53
- try {
54
- const topKValue =
55
- typeof topK === 'number' && !isNaN(topK)
56
- ? topK
57
- : typeof topK === 'string' && !isNaN(Number(topK))
58
- ? Number(topK)
59
- : 10;
60
-
61
- let vectorStore: MastraVector | undefined = undefined;
62
- if ('vectorStore' in options) {
63
- vectorStore = options.vectorStore;
64
- } else if (mastra) {
65
- vectorStore = mastra.getVector(vectorStoreName);
66
- }
67
- if (!vectorStore) {
68
- if (logger) {
69
- logger.error('Vector store not found', { vectorStoreName });
70
- }
71
- return { relevantContext: [], sources: [] };
72
- }
73
- // Get relevant chunks from the vector database
74
- let queryFilter = {};
75
- if (enableFilter && filter) {
76
- queryFilter = (() => {
77
- try {
78
- return typeof filter === 'string' ? JSON.parse(filter) : filter;
79
- } catch (error) {
80
- // Log the error and use empty object
81
- if (logger) {
82
- logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
83
- }
84
- return {};
85
- }
86
- })();
87
- }
88
- if (logger) {
89
- logger.debug('Prepared vector query parameters', { queryText, topK: topKValue, queryFilter, databaseConfig });
90
- }
91
-
92
- const { results } = await vectorQuerySearch({
93
- indexName,
94
- vectorStore,
95
- queryText,
96
- model,
97
- queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
98
- topK: topKValue,
99
- includeVectors,
100
- databaseConfig,
101
- });
102
- if (logger) {
103
- logger.debug('vectorQuerySearch returned results', { count: results.length });
104
- }
105
-
106
- if (reranker) {
107
- if (logger) {
108
- logger.debug('Reranking results', { rerankerModel: reranker.model, rerankerOptions: reranker.options });
109
- }
110
-
111
- let rerankedResults: RerankResult[] = [];
112
-
113
- if (typeof reranker?.model === 'object' && 'getRelevanceScore' in reranker?.model) {
114
- rerankedResults = await rerankWithScorer({
115
- results,
116
- query: queryText,
117
- scorer: reranker.model,
118
- options: {
119
- ...reranker.options,
120
- topK: reranker.options?.topK || topKValue,
121
- },
122
- });
123
- } else {
124
- rerankedResults = await rerank(results, queryText, reranker.model, {
125
- ...reranker.options,
126
- topK: reranker.options?.topK || topKValue,
127
- });
128
- }
129
-
130
- if (logger) {
131
- logger.debug('Reranking complete', { rerankedCount: rerankedResults.length });
132
- }
133
-
134
- const relevantChunks = rerankedResults.map(({ result }) => result?.metadata);
135
-
136
- if (logger) {
137
- logger.debug('Returning reranked relevant context chunks', { count: relevantChunks.length });
138
- }
139
-
140
- const sources = includeSources ? convertToSources(rerankedResults) : [];
141
-
142
- return { relevantContext: relevantChunks, sources };
143
- }
144
-
145
- const relevantChunks = results.map(result => result?.metadata);
146
-
147
- if (logger) {
148
- logger.debug('Returning relevant context chunks', { count: relevantChunks.length });
149
- }
150
- // `sources` exposes the full retrieval objects
151
- const sources = includeSources ? convertToSources(results) : [];
152
- return {
153
- relevantContext: relevantChunks,
154
- sources,
155
- };
156
- } catch (err) {
157
- if (logger) {
158
- logger.error('Unexpected error in VectorQueryTool execute', {
159
- error: err,
160
- errorMessage: err instanceof Error ? err.message : String(err),
161
- errorStack: err instanceof Error ? err.stack : undefined,
162
- });
163
- }
164
- return { relevantContext: [], sources: [] };
165
- }
166
- },
167
- // Use any for output schema as the structure of the output causes type inference issues
168
- }) as RagTool<typeof inputSchema, any>;
169
- };
@@ -1,43 +0,0 @@
1
- import type { QueryResult } from '@mastra/core/vector';
2
- import type { RankedNode } from '../graph-rag';
3
- import type { RerankResult } from '../rerank';
4
-
5
- type SourceInput = QueryResult | RankedNode | RerankResult;
6
-
7
- /**
8
- * Convert an array of source inputs (QueryResult, RankedNode, or RerankResult) to an array of sources.
9
- * @param results Array of source inputs to convert.
10
- * @returns Array of sources.
11
- */
12
- export const convertToSources = (results: SourceInput[]) => {
13
- return results.map(result => {
14
- // RankedNode
15
- if ('content' in result) {
16
- return {
17
- id: result.id,
18
- vector: result.embedding || [],
19
- score: result.score,
20
- metadata: result.metadata,
21
- document: result.content || '',
22
- };
23
- }
24
- // RerankResult
25
- if ('result' in result) {
26
- return {
27
- id: result.result.id,
28
- vector: result.result.vector || [],
29
- score: result.score,
30
- metadata: result.result.metadata,
31
- document: result.result.document || '',
32
- };
33
- }
34
- // QueryResult
35
- return {
36
- id: result.id,
37
- vector: result.vector || [],
38
- score: result.score,
39
- metadata: result.metadata,
40
- document: result.document || '',
41
- };
42
- });
43
- };
@@ -1,38 +0,0 @@
1
- export const defaultVectorQueryDescription = () =>
2
- `Access the knowledge base to find information needed to answer user questions.`;
3
-
4
- export const defaultGraphRagDescription = () =>
5
- `Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
6
-
7
- export const queryTextDescription = `The text query to search for in the vector database.
8
- - ALWAYS provide a non-empty query string
9
- - Must contain the user's question or search terms
10
- - Example: "market data" or "financial reports"
11
- - If the user's query is about a specific topic, use that topic as the queryText
12
- - Cannot be an empty string
13
- - Do not include quotes, just the text itself
14
- - Required for all searches`;
15
-
16
- export const topKDescription = `Controls how many matching documents to return.
17
- - ALWAYS provide a value
18
- - If no value is provided, use the default (10)
19
- - Must be a valid and positive number
20
- - Cannot be NaN
21
- - Uses provided value if specified
22
- - Default: 10 results (use this if unsure)
23
- - Higher values (like 20) provide more context
24
- - Lower values (like 3) focus on best matches
25
- - Based on query requirements`;
26
-
27
- export const filterDescription = `JSON-formatted criteria to refine search results.
28
- - ALWAYS provide a filter value
29
- - If no filter is provided, use the default ("{}")
30
- - MUST be a valid, complete JSON object with proper quotes and brackets
31
- - Uses provided filter if specified
32
- - Default: "{}" (no filtering)
33
- - Example for no filtering: "filter": "{}"
34
- - Example: '{"category": "health"}'
35
- - Based on query intent
36
- - Do NOT use single quotes or unquoted properties
37
- - IMPORTANT: Always ensure JSON is properly closed with matching brackets
38
- - Multiple filters can be combined`;
@@ -1,3 +0,0 @@
1
- export * from './vector-search';
2
- export * from './default-settings';
3
- export * from './tool-schemas';
@@ -1,38 +0,0 @@
1
- import type { Tool } from '@mastra/core/tools';
2
- import { z } from 'zod';
3
- import { queryTextDescription, topKDescription, filterDescription } from './default-settings';
4
-
5
- export const baseSchema = {
6
- queryText: z.string().describe(queryTextDescription),
7
- topK: z.coerce.number().describe(topKDescription),
8
- };
9
-
10
- // Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
11
- // Each source contains all information needed to reference
12
- // the original document, chunk, and similarity score.
13
- export const outputSchema = z.object({
14
- // Array of metadata or content for compatibility with prior usage
15
- relevantContext: z.any(),
16
- // Array of full retrieval result objects
17
- sources: z.array(
18
- z.object({
19
- id: z.string(), // Unique chunk/document identifier
20
- metadata: z.any(), // All metadata fields (document ID, etc.)
21
- vector: z.array(z.number()), // Embedding vector (if available)
22
- score: z.number(), // Similarity score for this retrieval
23
- document: z.string(), // Full chunk/document text (if available)
24
- }),
25
- ),
26
- });
27
-
28
- export const filterSchema = z.object({
29
- ...baseSchema,
30
- filter: z.coerce.string().describe(filterDescription),
31
- });
32
-
33
- export type RagTool<
34
- TInput extends z.ZodType<any, z.ZodTypeDef, any> | undefined,
35
- TOutput extends z.ZodType<any, z.ZodTypeDef, any> | undefined,
36
- > = Tool<TInput, TOutput> & {
37
- execute: NonNullable<Tool<TInput, TOutput>['execute']>;
38
- };