@mastra/rag 1.2.2 → 1.2.3-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/index.cjs +25 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +25 -9
- package/dist/index.js.map +1 -1
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/types.d.ts +18 -5
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/vector-search.d.ts +6 -7
- package/dist/utils/vector-search.d.ts.map +1 -1
- package/package.json +19 -6
- package/.turbo/turbo-build.log +0 -4
- package/docker-compose.yaml +0 -22
- package/eslint.config.js +0 -6
- package/src/document/document.test.ts +0 -2975
- package/src/document/document.ts +0 -335
- package/src/document/extractors/base.ts +0 -30
- package/src/document/extractors/index.ts +0 -5
- package/src/document/extractors/keywords.test.ts +0 -125
- package/src/document/extractors/keywords.ts +0 -126
- package/src/document/extractors/questions.test.ts +0 -120
- package/src/document/extractors/questions.ts +0 -111
- package/src/document/extractors/summary.test.ts +0 -107
- package/src/document/extractors/summary.ts +0 -122
- package/src/document/extractors/title.test.ts +0 -121
- package/src/document/extractors/title.ts +0 -185
- package/src/document/extractors/types.ts +0 -40
- package/src/document/index.ts +0 -2
- package/src/document/prompts/base.ts +0 -77
- package/src/document/prompts/format.ts +0 -9
- package/src/document/prompts/index.ts +0 -15
- package/src/document/prompts/prompt.ts +0 -60
- package/src/document/prompts/types.ts +0 -29
- package/src/document/schema/index.ts +0 -3
- package/src/document/schema/node.ts +0 -187
- package/src/document/schema/types.ts +0 -40
- package/src/document/transformers/character.ts +0 -267
- package/src/document/transformers/html.ts +0 -346
- package/src/document/transformers/json.ts +0 -536
- package/src/document/transformers/latex.ts +0 -11
- package/src/document/transformers/markdown.ts +0 -239
- package/src/document/transformers/semantic-markdown.ts +0 -227
- package/src/document/transformers/sentence.ts +0 -314
- package/src/document/transformers/text.ts +0 -158
- package/src/document/transformers/token.ts +0 -137
- package/src/document/transformers/transformer.ts +0 -5
- package/src/document/types.ts +0 -145
- package/src/document/validation.ts +0 -158
- package/src/graph-rag/index.test.ts +0 -235
- package/src/graph-rag/index.ts +0 -306
- package/src/index.ts +0 -8
- package/src/rerank/index.test.ts +0 -150
- package/src/rerank/index.ts +0 -198
- package/src/rerank/relevance/cohere/index.ts +0 -56
- package/src/rerank/relevance/index.ts +0 -3
- package/src/rerank/relevance/mastra-agent/index.ts +0 -32
- package/src/rerank/relevance/zeroentropy/index.ts +0 -26
- package/src/tools/README.md +0 -153
- package/src/tools/document-chunker.ts +0 -34
- package/src/tools/graph-rag.test.ts +0 -115
- package/src/tools/graph-rag.ts +0 -154
- package/src/tools/index.ts +0 -3
- package/src/tools/types.ts +0 -110
- package/src/tools/vector-query-database-config.test.ts +0 -190
- package/src/tools/vector-query.test.ts +0 -418
- package/src/tools/vector-query.ts +0 -169
- package/src/utils/convert-sources.ts +0 -43
- package/src/utils/default-settings.ts +0 -38
- package/src/utils/index.ts +0 -3
- package/src/utils/tool-schemas.ts +0 -38
- package/src/utils/vector-prompts.ts +0 -832
- package/src/utils/vector-search.ts +0 -117
- package/tsconfig.build.json +0 -9
- package/tsconfig.json +0 -5
- package/tsup.config.ts +0 -17
- package/vitest.config.ts +0 -8
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
import { createTool } from '@mastra/core/tools';
|
|
2
|
-
import type { MastraVector } from '@mastra/core/vector';
|
|
3
|
-
import type { EmbeddingModel } from 'ai';
|
|
4
|
-
import { z } from 'zod';
|
|
5
|
-
|
|
6
|
-
import { rerank, rerankWithScorer } from '../rerank';
|
|
7
|
-
import type { RerankConfig, RerankResult } from '../rerank';
|
|
8
|
-
import { vectorQuerySearch, defaultVectorQueryDescription, filterSchema, outputSchema, baseSchema } from '../utils';
|
|
9
|
-
import type { RagTool } from '../utils';
|
|
10
|
-
import { convertToSources } from '../utils/convert-sources';
|
|
11
|
-
import type { VectorQueryToolOptions } from './types';
|
|
12
|
-
|
|
13
|
-
export const createVectorQueryTool = (options: VectorQueryToolOptions) => {
|
|
14
|
-
const { id, description } = options;
|
|
15
|
-
const storeName = options['vectorStoreName'] ? options.vectorStoreName : 'DirectVectorStore';
|
|
16
|
-
|
|
17
|
-
const toolId = id || `VectorQuery ${storeName} ${options.indexName} Tool`;
|
|
18
|
-
const toolDescription = description || defaultVectorQueryDescription();
|
|
19
|
-
const inputSchema = options.enableFilter ? filterSchema : z.object(baseSchema).passthrough();
|
|
20
|
-
|
|
21
|
-
return createTool({
|
|
22
|
-
id: toolId,
|
|
23
|
-
description: toolDescription,
|
|
24
|
-
inputSchema,
|
|
25
|
-
outputSchema,
|
|
26
|
-
execute: async ({ context, mastra, runtimeContext }) => {
|
|
27
|
-
const indexName: string = runtimeContext.get('indexName') ?? options.indexName;
|
|
28
|
-
const vectorStoreName: string =
|
|
29
|
-
'vectorStore' in options ? storeName : (runtimeContext.get('vectorStoreName') ?? storeName);
|
|
30
|
-
const includeVectors: boolean = runtimeContext.get('includeVectors') ?? options.includeVectors ?? false;
|
|
31
|
-
const includeSources: boolean = runtimeContext.get('includeSources') ?? options.includeSources ?? true;
|
|
32
|
-
const reranker: RerankConfig = runtimeContext.get('reranker') ?? options.reranker;
|
|
33
|
-
const databaseConfig = runtimeContext.get('databaseConfig') ?? options.databaseConfig;
|
|
34
|
-
const model: EmbeddingModel<string> = runtimeContext.get('model') ?? options.model;
|
|
35
|
-
|
|
36
|
-
if (!indexName) throw new Error(`indexName is required, got: ${indexName}`);
|
|
37
|
-
if (!vectorStoreName) throw new Error(`vectorStoreName is required, got: ${vectorStoreName}`); // won't fire
|
|
38
|
-
|
|
39
|
-
const topK: number = runtimeContext.get('topK') ?? context.topK ?? 10;
|
|
40
|
-
const filter: Record<string, any> = runtimeContext.get('filter') ?? context.filter;
|
|
41
|
-
const queryText = context.queryText;
|
|
42
|
-
const enableFilter = !!runtimeContext.get('filter') || (options.enableFilter ?? false);
|
|
43
|
-
|
|
44
|
-
const logger = mastra?.getLogger();
|
|
45
|
-
if (!logger) {
|
|
46
|
-
console.warn(
|
|
47
|
-
'[VectorQueryTool] Logger not initialized: no debug or error logs will be recorded for this tool execution.',
|
|
48
|
-
);
|
|
49
|
-
}
|
|
50
|
-
if (logger) {
|
|
51
|
-
logger.debug('[VectorQueryTool] execute called with:', { queryText, topK, filter, databaseConfig });
|
|
52
|
-
}
|
|
53
|
-
try {
|
|
54
|
-
const topKValue =
|
|
55
|
-
typeof topK === 'number' && !isNaN(topK)
|
|
56
|
-
? topK
|
|
57
|
-
: typeof topK === 'string' && !isNaN(Number(topK))
|
|
58
|
-
? Number(topK)
|
|
59
|
-
: 10;
|
|
60
|
-
|
|
61
|
-
let vectorStore: MastraVector | undefined = undefined;
|
|
62
|
-
if ('vectorStore' in options) {
|
|
63
|
-
vectorStore = options.vectorStore;
|
|
64
|
-
} else if (mastra) {
|
|
65
|
-
vectorStore = mastra.getVector(vectorStoreName);
|
|
66
|
-
}
|
|
67
|
-
if (!vectorStore) {
|
|
68
|
-
if (logger) {
|
|
69
|
-
logger.error('Vector store not found', { vectorStoreName });
|
|
70
|
-
}
|
|
71
|
-
return { relevantContext: [], sources: [] };
|
|
72
|
-
}
|
|
73
|
-
// Get relevant chunks from the vector database
|
|
74
|
-
let queryFilter = {};
|
|
75
|
-
if (enableFilter && filter) {
|
|
76
|
-
queryFilter = (() => {
|
|
77
|
-
try {
|
|
78
|
-
return typeof filter === 'string' ? JSON.parse(filter) : filter;
|
|
79
|
-
} catch (error) {
|
|
80
|
-
// Log the error and use empty object
|
|
81
|
-
if (logger) {
|
|
82
|
-
logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
|
|
83
|
-
}
|
|
84
|
-
return {};
|
|
85
|
-
}
|
|
86
|
-
})();
|
|
87
|
-
}
|
|
88
|
-
if (logger) {
|
|
89
|
-
logger.debug('Prepared vector query parameters', { queryText, topK: topKValue, queryFilter, databaseConfig });
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
const { results } = await vectorQuerySearch({
|
|
93
|
-
indexName,
|
|
94
|
-
vectorStore,
|
|
95
|
-
queryText,
|
|
96
|
-
model,
|
|
97
|
-
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
|
|
98
|
-
topK: topKValue,
|
|
99
|
-
includeVectors,
|
|
100
|
-
databaseConfig,
|
|
101
|
-
});
|
|
102
|
-
if (logger) {
|
|
103
|
-
logger.debug('vectorQuerySearch returned results', { count: results.length });
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
if (reranker) {
|
|
107
|
-
if (logger) {
|
|
108
|
-
logger.debug('Reranking results', { rerankerModel: reranker.model, rerankerOptions: reranker.options });
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
let rerankedResults: RerankResult[] = [];
|
|
112
|
-
|
|
113
|
-
if (typeof reranker?.model === 'object' && 'getRelevanceScore' in reranker?.model) {
|
|
114
|
-
rerankedResults = await rerankWithScorer({
|
|
115
|
-
results,
|
|
116
|
-
query: queryText,
|
|
117
|
-
scorer: reranker.model,
|
|
118
|
-
options: {
|
|
119
|
-
...reranker.options,
|
|
120
|
-
topK: reranker.options?.topK || topKValue,
|
|
121
|
-
},
|
|
122
|
-
});
|
|
123
|
-
} else {
|
|
124
|
-
rerankedResults = await rerank(results, queryText, reranker.model, {
|
|
125
|
-
...reranker.options,
|
|
126
|
-
topK: reranker.options?.topK || topKValue,
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
if (logger) {
|
|
131
|
-
logger.debug('Reranking complete', { rerankedCount: rerankedResults.length });
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const relevantChunks = rerankedResults.map(({ result }) => result?.metadata);
|
|
135
|
-
|
|
136
|
-
if (logger) {
|
|
137
|
-
logger.debug('Returning reranked relevant context chunks', { count: relevantChunks.length });
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
const sources = includeSources ? convertToSources(rerankedResults) : [];
|
|
141
|
-
|
|
142
|
-
return { relevantContext: relevantChunks, sources };
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
const relevantChunks = results.map(result => result?.metadata);
|
|
146
|
-
|
|
147
|
-
if (logger) {
|
|
148
|
-
logger.debug('Returning relevant context chunks', { count: relevantChunks.length });
|
|
149
|
-
}
|
|
150
|
-
// `sources` exposes the full retrieval objects
|
|
151
|
-
const sources = includeSources ? convertToSources(results) : [];
|
|
152
|
-
return {
|
|
153
|
-
relevantContext: relevantChunks,
|
|
154
|
-
sources,
|
|
155
|
-
};
|
|
156
|
-
} catch (err) {
|
|
157
|
-
if (logger) {
|
|
158
|
-
logger.error('Unexpected error in VectorQueryTool execute', {
|
|
159
|
-
error: err,
|
|
160
|
-
errorMessage: err instanceof Error ? err.message : String(err),
|
|
161
|
-
errorStack: err instanceof Error ? err.stack : undefined,
|
|
162
|
-
});
|
|
163
|
-
}
|
|
164
|
-
return { relevantContext: [], sources: [] };
|
|
165
|
-
}
|
|
166
|
-
},
|
|
167
|
-
// Use any for output schema as the structure of the output causes type inference issues
|
|
168
|
-
}) as RagTool<typeof inputSchema, any>;
|
|
169
|
-
};
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import type { QueryResult } from '@mastra/core/vector';
|
|
2
|
-
import type { RankedNode } from '../graph-rag';
|
|
3
|
-
import type { RerankResult } from '../rerank';
|
|
4
|
-
|
|
5
|
-
type SourceInput = QueryResult | RankedNode | RerankResult;
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Convert an array of source inputs (QueryResult, RankedNode, or RerankResult) to an array of sources.
|
|
9
|
-
* @param results Array of source inputs to convert.
|
|
10
|
-
* @returns Array of sources.
|
|
11
|
-
*/
|
|
12
|
-
export const convertToSources = (results: SourceInput[]) => {
|
|
13
|
-
return results.map(result => {
|
|
14
|
-
// RankedNode
|
|
15
|
-
if ('content' in result) {
|
|
16
|
-
return {
|
|
17
|
-
id: result.id,
|
|
18
|
-
vector: result.embedding || [],
|
|
19
|
-
score: result.score,
|
|
20
|
-
metadata: result.metadata,
|
|
21
|
-
document: result.content || '',
|
|
22
|
-
};
|
|
23
|
-
}
|
|
24
|
-
// RerankResult
|
|
25
|
-
if ('result' in result) {
|
|
26
|
-
return {
|
|
27
|
-
id: result.result.id,
|
|
28
|
-
vector: result.result.vector || [],
|
|
29
|
-
score: result.score,
|
|
30
|
-
metadata: result.result.metadata,
|
|
31
|
-
document: result.result.document || '',
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
// QueryResult
|
|
35
|
-
return {
|
|
36
|
-
id: result.id,
|
|
37
|
-
vector: result.vector || [],
|
|
38
|
-
score: result.score,
|
|
39
|
-
metadata: result.metadata,
|
|
40
|
-
document: result.document || '',
|
|
41
|
-
};
|
|
42
|
-
});
|
|
43
|
-
};
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
export const defaultVectorQueryDescription = () =>
|
|
2
|
-
`Access the knowledge base to find information needed to answer user questions.`;
|
|
3
|
-
|
|
4
|
-
export const defaultGraphRagDescription = () =>
|
|
5
|
-
`Access and analyze relationships between information in the knowledge base to answer complex questions about connections and patterns.`;
|
|
6
|
-
|
|
7
|
-
export const queryTextDescription = `The text query to search for in the vector database.
|
|
8
|
-
- ALWAYS provide a non-empty query string
|
|
9
|
-
- Must contain the user's question or search terms
|
|
10
|
-
- Example: "market data" or "financial reports"
|
|
11
|
-
- If the user's query is about a specific topic, use that topic as the queryText
|
|
12
|
-
- Cannot be an empty string
|
|
13
|
-
- Do not include quotes, just the text itself
|
|
14
|
-
- Required for all searches`;
|
|
15
|
-
|
|
16
|
-
export const topKDescription = `Controls how many matching documents to return.
|
|
17
|
-
- ALWAYS provide a value
|
|
18
|
-
- If no value is provided, use the default (10)
|
|
19
|
-
- Must be a valid and positive number
|
|
20
|
-
- Cannot be NaN
|
|
21
|
-
- Uses provided value if specified
|
|
22
|
-
- Default: 10 results (use this if unsure)
|
|
23
|
-
- Higher values (like 20) provide more context
|
|
24
|
-
- Lower values (like 3) focus on best matches
|
|
25
|
-
- Based on query requirements`;
|
|
26
|
-
|
|
27
|
-
export const filterDescription = `JSON-formatted criteria to refine search results.
|
|
28
|
-
- ALWAYS provide a filter value
|
|
29
|
-
- If no filter is provided, use the default ("{}")
|
|
30
|
-
- MUST be a valid, complete JSON object with proper quotes and brackets
|
|
31
|
-
- Uses provided filter if specified
|
|
32
|
-
- Default: "{}" (no filtering)
|
|
33
|
-
- Example for no filtering: "filter": "{}"
|
|
34
|
-
- Example: '{"category": "health"}'
|
|
35
|
-
- Based on query intent
|
|
36
|
-
- Do NOT use single quotes or unquoted properties
|
|
37
|
-
- IMPORTANT: Always ensure JSON is properly closed with matching brackets
|
|
38
|
-
- Multiple filters can be combined`;
|
package/src/utils/index.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import type { Tool } from '@mastra/core/tools';
|
|
2
|
-
import { z } from 'zod';
|
|
3
|
-
import { queryTextDescription, topKDescription, filterDescription } from './default-settings';
|
|
4
|
-
|
|
5
|
-
export const baseSchema = {
|
|
6
|
-
queryText: z.string().describe(queryTextDescription),
|
|
7
|
-
topK: z.coerce.number().describe(topKDescription),
|
|
8
|
-
};
|
|
9
|
-
|
|
10
|
-
// Output schema includes `sources`, which exposes the full set of retrieved chunks (QueryResult objects)
|
|
11
|
-
// Each source contains all information needed to reference
|
|
12
|
-
// the original document, chunk, and similarity score.
|
|
13
|
-
export const outputSchema = z.object({
|
|
14
|
-
// Array of metadata or content for compatibility with prior usage
|
|
15
|
-
relevantContext: z.any(),
|
|
16
|
-
// Array of full retrieval result objects
|
|
17
|
-
sources: z.array(
|
|
18
|
-
z.object({
|
|
19
|
-
id: z.string(), // Unique chunk/document identifier
|
|
20
|
-
metadata: z.any(), // All metadata fields (document ID, etc.)
|
|
21
|
-
vector: z.array(z.number()), // Embedding vector (if available)
|
|
22
|
-
score: z.number(), // Similarity score for this retrieval
|
|
23
|
-
document: z.string(), // Full chunk/document text (if available)
|
|
24
|
-
}),
|
|
25
|
-
),
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
export const filterSchema = z.object({
|
|
29
|
-
...baseSchema,
|
|
30
|
-
filter: z.coerce.string().describe(filterDescription),
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
export type RagTool<
|
|
34
|
-
TInput extends z.ZodType<any, z.ZodTypeDef, any> | undefined,
|
|
35
|
-
TOutput extends z.ZodType<any, z.ZodTypeDef, any> | undefined,
|
|
36
|
-
> = Tool<TInput, TOutput> & {
|
|
37
|
-
execute: NonNullable<Tool<TInput, TOutput>['execute']>;
|
|
38
|
-
};
|