@mastra/rag 1.2.2 → 1.2.3-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/index.cjs +25 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +25 -9
- package/dist/index.js.map +1 -1
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/types.d.ts +18 -5
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/vector-search.d.ts +6 -7
- package/dist/utils/vector-search.d.ts.map +1 -1
- package/package.json +19 -6
- package/.turbo/turbo-build.log +0 -4
- package/docker-compose.yaml +0 -22
- package/eslint.config.js +0 -6
- package/src/document/document.test.ts +0 -2975
- package/src/document/document.ts +0 -335
- package/src/document/extractors/base.ts +0 -30
- package/src/document/extractors/index.ts +0 -5
- package/src/document/extractors/keywords.test.ts +0 -125
- package/src/document/extractors/keywords.ts +0 -126
- package/src/document/extractors/questions.test.ts +0 -120
- package/src/document/extractors/questions.ts +0 -111
- package/src/document/extractors/summary.test.ts +0 -107
- package/src/document/extractors/summary.ts +0 -122
- package/src/document/extractors/title.test.ts +0 -121
- package/src/document/extractors/title.ts +0 -185
- package/src/document/extractors/types.ts +0 -40
- package/src/document/index.ts +0 -2
- package/src/document/prompts/base.ts +0 -77
- package/src/document/prompts/format.ts +0 -9
- package/src/document/prompts/index.ts +0 -15
- package/src/document/prompts/prompt.ts +0 -60
- package/src/document/prompts/types.ts +0 -29
- package/src/document/schema/index.ts +0 -3
- package/src/document/schema/node.ts +0 -187
- package/src/document/schema/types.ts +0 -40
- package/src/document/transformers/character.ts +0 -267
- package/src/document/transformers/html.ts +0 -346
- package/src/document/transformers/json.ts +0 -536
- package/src/document/transformers/latex.ts +0 -11
- package/src/document/transformers/markdown.ts +0 -239
- package/src/document/transformers/semantic-markdown.ts +0 -227
- package/src/document/transformers/sentence.ts +0 -314
- package/src/document/transformers/text.ts +0 -158
- package/src/document/transformers/token.ts +0 -137
- package/src/document/transformers/transformer.ts +0 -5
- package/src/document/types.ts +0 -145
- package/src/document/validation.ts +0 -158
- package/src/graph-rag/index.test.ts +0 -235
- package/src/graph-rag/index.ts +0 -306
- package/src/index.ts +0 -8
- package/src/rerank/index.test.ts +0 -150
- package/src/rerank/index.ts +0 -198
- package/src/rerank/relevance/cohere/index.ts +0 -56
- package/src/rerank/relevance/index.ts +0 -3
- package/src/rerank/relevance/mastra-agent/index.ts +0 -32
- package/src/rerank/relevance/zeroentropy/index.ts +0 -26
- package/src/tools/README.md +0 -153
- package/src/tools/document-chunker.ts +0 -34
- package/src/tools/graph-rag.test.ts +0 -115
- package/src/tools/graph-rag.ts +0 -154
- package/src/tools/index.ts +0 -3
- package/src/tools/types.ts +0 -110
- package/src/tools/vector-query-database-config.test.ts +0 -190
- package/src/tools/vector-query.test.ts +0 -418
- package/src/tools/vector-query.ts +0 -169
- package/src/utils/convert-sources.ts +0 -43
- package/src/utils/default-settings.ts +0 -38
- package/src/utils/index.ts +0 -3
- package/src/utils/tool-schemas.ts +0 -38
- package/src/utils/vector-prompts.ts +0 -832
- package/src/utils/vector-search.ts +0 -117
- package/tsconfig.build.json +0 -9
- package/tsconfig.json +0 -5
- package/tsup.config.ts +0 -17
- package/vitest.config.ts +0 -8
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import type { RelevanceScoreProvider } from '@mastra/core/relevance';
|
|
2
|
-
|
|
3
|
-
interface CohereRerankingResponse {
|
|
4
|
-
results: Array<{
|
|
5
|
-
index: number;
|
|
6
|
-
relevance_score: number;
|
|
7
|
-
}>;
|
|
8
|
-
id: string;
|
|
9
|
-
meta: {
|
|
10
|
-
api_version: {
|
|
11
|
-
version: string;
|
|
12
|
-
is_experimental: boolean;
|
|
13
|
-
};
|
|
14
|
-
billed_units: {
|
|
15
|
-
search_units: number;
|
|
16
|
-
};
|
|
17
|
-
};
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export class CohereRelevanceScorer implements RelevanceScoreProvider {
|
|
21
|
-
private model: string;
|
|
22
|
-
private apiKey?: string;
|
|
23
|
-
constructor(model: string, apiKey?: string) {
|
|
24
|
-
this.apiKey = apiKey;
|
|
25
|
-
this.model = model;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
async getRelevanceScore(query: string, text: string): Promise<number> {
|
|
29
|
-
const response = await fetch(`https://api.cohere.com/v2/rerank`, {
|
|
30
|
-
method: 'POST',
|
|
31
|
-
headers: {
|
|
32
|
-
'Content-Type': 'application/json',
|
|
33
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
34
|
-
},
|
|
35
|
-
body: JSON.stringify({
|
|
36
|
-
query,
|
|
37
|
-
documents: [text],
|
|
38
|
-
model: this.model,
|
|
39
|
-
top_n: 1,
|
|
40
|
-
}),
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
if (!response.ok) {
|
|
44
|
-
throw new Error(`Cohere API error: ${response.status} ${await response.text()}`);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const data = (await response.json()) as CohereRerankingResponse;
|
|
48
|
-
const relevanceScore = data.results[0]?.relevance_score;
|
|
49
|
-
|
|
50
|
-
if (!relevanceScore) {
|
|
51
|
-
throw new Error('No relevance score found on Cohere response');
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return relevanceScore;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { Agent } from '@mastra/core/agent';
|
|
2
|
-
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
3
|
-
import { createSimilarityPrompt } from '@mastra/core/relevance';
|
|
4
|
-
import type { RelevanceScoreProvider } from '@mastra/core/relevance';
|
|
5
|
-
|
|
6
|
-
// Mastra Agent implementation
|
|
7
|
-
export class MastraAgentRelevanceScorer implements RelevanceScoreProvider {
|
|
8
|
-
private agent: Agent;
|
|
9
|
-
|
|
10
|
-
constructor(name: string, model: MastraLanguageModel) {
|
|
11
|
-
this.agent = new Agent({
|
|
12
|
-
name: `Relevance Scorer ${name}`,
|
|
13
|
-
instructions: `You are a specialized agent for evaluating the relevance of text to queries.
|
|
14
|
-
Your task is to rate how well a text passage answers a given query.
|
|
15
|
-
Output only a number between 0 and 1, where:
|
|
16
|
-
1.0 = Perfectly relevant, directly answers the query
|
|
17
|
-
0.0 = Completely irrelevant
|
|
18
|
-
Consider:
|
|
19
|
-
- Direct relevance to the question
|
|
20
|
-
- Completeness of information
|
|
21
|
-
- Quality and specificity
|
|
22
|
-
Always return just the number, no explanation.`,
|
|
23
|
-
model,
|
|
24
|
-
});
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
async getRelevanceScore(query: string, text: string): Promise<number> {
|
|
28
|
-
const prompt = createSimilarityPrompt(query, text);
|
|
29
|
-
const response = await this.agent.generate(prompt);
|
|
30
|
-
return parseFloat(response.text);
|
|
31
|
-
}
|
|
32
|
-
}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import type { RelevanceScoreProvider } from '@mastra/core/relevance';
|
|
2
|
-
import ZeroEntropy from 'zeroentropy';
|
|
3
|
-
|
|
4
|
-
// ZeroEntropy implementation
|
|
5
|
-
export class ZeroEntropyRelevanceScorer implements RelevanceScoreProvider {
|
|
6
|
-
private client: ZeroEntropy;
|
|
7
|
-
private model: string;
|
|
8
|
-
|
|
9
|
-
constructor(model?: string, apiKey?: string) {
|
|
10
|
-
this.client = new ZeroEntropy({
|
|
11
|
-
apiKey: apiKey || process.env.ZEROENTROPY_API_KEY || '',
|
|
12
|
-
});
|
|
13
|
-
this.model = model || 'zerank-1';
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
async getRelevanceScore(query: string, text: string): Promise<number> {
|
|
17
|
-
const response = await this.client.models.rerank({
|
|
18
|
-
query,
|
|
19
|
-
documents: [text],
|
|
20
|
-
model: this.model,
|
|
21
|
-
top_n: 1,
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
return response.results[0]?.relevance_score ?? 0;
|
|
25
|
-
}
|
|
26
|
-
}
|
package/src/tools/README.md
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
# Vector Query Tool with Database-Specific Configurations
|
|
2
|
-
|
|
3
|
-
The `createVectorQueryTool` function now supports database-specific configurations to handle unique properties and optimizations for different vector databases.
|
|
4
|
-
|
|
5
|
-
## Database Configuration Types
|
|
6
|
-
|
|
7
|
-
### Pinecone Configuration
|
|
8
|
-
|
|
9
|
-
```typescript
|
|
10
|
-
import { createVectorQueryTool } from '@mastra/rag/tools';
|
|
11
|
-
|
|
12
|
-
const pineconeVectorTool = createVectorQueryTool({
|
|
13
|
-
id: 'pinecone-search',
|
|
14
|
-
indexName: 'my-index',
|
|
15
|
-
vectorStoreName: 'pinecone',
|
|
16
|
-
model: embedModel,
|
|
17
|
-
databaseConfig: {
|
|
18
|
-
pinecone: {
|
|
19
|
-
namespace: 'my-namespace', // Pinecone namespace
|
|
20
|
-
sparseVector: {
|
|
21
|
-
// For hybrid search
|
|
22
|
-
indices: [0, 1, 2],
|
|
23
|
-
values: [0.1, 0.2, 0.3],
|
|
24
|
-
},
|
|
25
|
-
},
|
|
26
|
-
},
|
|
27
|
-
});
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
### pgVector Configuration
|
|
31
|
-
|
|
32
|
-
```typescript
|
|
33
|
-
const pgVectorTool = createVectorQueryTool({
|
|
34
|
-
id: 'pgvector-search',
|
|
35
|
-
indexName: 'my-index',
|
|
36
|
-
vectorStoreName: 'postgres',
|
|
37
|
-
model: embedModel,
|
|
38
|
-
databaseConfig: {
|
|
39
|
-
pgvector: {
|
|
40
|
-
minScore: 0.7, // Minimum similarity score
|
|
41
|
-
ef: 200, // HNSW search parameter
|
|
42
|
-
probes: 10, // IVFFlat probe parameter
|
|
43
|
-
},
|
|
44
|
-
},
|
|
45
|
-
});
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
### Chroma Configuration
|
|
49
|
-
|
|
50
|
-
```typescript
|
|
51
|
-
const chromaTool = createVectorQueryTool({
|
|
52
|
-
id: 'chroma-search',
|
|
53
|
-
indexName: 'my-index',
|
|
54
|
-
vectorStoreName: 'chroma',
|
|
55
|
-
model: embedModel,
|
|
56
|
-
databaseConfig: {
|
|
57
|
-
chroma: {
|
|
58
|
-
where: {
|
|
59
|
-
// Metadata filtering
|
|
60
|
-
category: 'documents',
|
|
61
|
-
},
|
|
62
|
-
whereDocument: {
|
|
63
|
-
// Document content filtering
|
|
64
|
-
$contains: 'important',
|
|
65
|
-
},
|
|
66
|
-
},
|
|
67
|
-
},
|
|
68
|
-
});
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
## Runtime Configuration Override
|
|
72
|
-
|
|
73
|
-
You can also override database configurations at runtime using the runtime context:
|
|
74
|
-
|
|
75
|
-
```typescript
|
|
76
|
-
import { RuntimeContext } from '@mastra/core/runtime-context';
|
|
77
|
-
|
|
78
|
-
const runtimeContext = new RuntimeContext();
|
|
79
|
-
|
|
80
|
-
// Override Pinecone namespace at runtime
|
|
81
|
-
runtimeContext.set('databaseConfig', {
|
|
82
|
-
pinecone: {
|
|
83
|
-
namespace: 'runtime-namespace',
|
|
84
|
-
},
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
await vectorTool.execute({
|
|
88
|
-
context: { queryText: 'search query' },
|
|
89
|
-
mastra,
|
|
90
|
-
runtimeContext,
|
|
91
|
-
});
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
## Extensibility for New Databases
|
|
95
|
-
|
|
96
|
-
The system is designed to be extensible. For new vector databases, you can:
|
|
97
|
-
|
|
98
|
-
1. Add configuration types:
|
|
99
|
-
|
|
100
|
-
```typescript
|
|
101
|
-
export interface NewDatabaseConfig {
|
|
102
|
-
customParam1?: string;
|
|
103
|
-
customParam2?: number;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export type DatabaseConfig = {
|
|
107
|
-
pinecone?: PineconeConfig;
|
|
108
|
-
pgvector?: PgVectorConfig;
|
|
109
|
-
chroma?: ChromaConfig;
|
|
110
|
-
newdatabase?: NewDatabaseConfig; // Add your config here
|
|
111
|
-
[key: string]: any;
|
|
112
|
-
};
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
2. The configuration will be automatically passed through to the vector store's query method.
|
|
116
|
-
|
|
117
|
-
## Type Safety
|
|
118
|
-
|
|
119
|
-
All database configurations are fully typed, providing IntelliSense and compile-time checking:
|
|
120
|
-
|
|
121
|
-
```typescript
|
|
122
|
-
const config: DatabaseConfig = {
|
|
123
|
-
pinecone: {
|
|
124
|
-
namespace: 'valid-namespace',
|
|
125
|
-
sparseVector: {
|
|
126
|
-
indices: [1, 2, 3],
|
|
127
|
-
values: [0.1, 0.2, 0.3],
|
|
128
|
-
},
|
|
129
|
-
},
|
|
130
|
-
pgvector: {
|
|
131
|
-
minScore: 0.8,
|
|
132
|
-
ef: 100,
|
|
133
|
-
probes: 5,
|
|
134
|
-
},
|
|
135
|
-
};
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
## Migration Guide
|
|
139
|
-
|
|
140
|
-
Existing code will continue to work without changes. To add database-specific configurations:
|
|
141
|
-
|
|
142
|
-
```diff
|
|
143
|
-
const vectorTool = createVectorQueryTool({
|
|
144
|
-
indexName: 'my-index',
|
|
145
|
-
vectorStoreName: 'pinecone',
|
|
146
|
-
model: embedModel,
|
|
147
|
-
+ databaseConfig: {
|
|
148
|
-
+ pinecone: {
|
|
149
|
-
+ namespace: 'my-namespace'
|
|
150
|
-
+ }
|
|
151
|
-
+ }
|
|
152
|
-
});
|
|
153
|
-
```
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import { createTool } from '@mastra/core/tools';
|
|
2
|
-
import { z } from 'zod';
|
|
3
|
-
|
|
4
|
-
import type { MDocument, ChunkParams } from '../document';
|
|
5
|
-
|
|
6
|
-
const DEFAULT_CHUNK_PARAMS = {
|
|
7
|
-
strategy: 'recursive' as const,
|
|
8
|
-
maxSize: 512,
|
|
9
|
-
overlap: 50,
|
|
10
|
-
separators: ['\n'],
|
|
11
|
-
} satisfies ChunkParams;
|
|
12
|
-
|
|
13
|
-
export const createDocumentChunkerTool = ({
|
|
14
|
-
doc,
|
|
15
|
-
params = DEFAULT_CHUNK_PARAMS,
|
|
16
|
-
}: {
|
|
17
|
-
doc: MDocument;
|
|
18
|
-
params?: ChunkParams;
|
|
19
|
-
}): ReturnType<typeof createTool> => {
|
|
20
|
-
return createTool({
|
|
21
|
-
id: `Document Chunker ${params.strategy} ${params.maxSize}`,
|
|
22
|
-
inputSchema: z.object({}),
|
|
23
|
-
description: `Chunks document using ${params.strategy} strategy with maxSize ${params.maxSize} and ${
|
|
24
|
-
params.overlap || 0
|
|
25
|
-
} overlap`,
|
|
26
|
-
execute: async () => {
|
|
27
|
-
const chunks = await doc.chunk(params);
|
|
28
|
-
|
|
29
|
-
return {
|
|
30
|
-
chunks,
|
|
31
|
-
};
|
|
32
|
-
},
|
|
33
|
-
});
|
|
34
|
-
};
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
import { RuntimeContext } from '@mastra/core/runtime-context';
|
|
2
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
3
|
-
import { GraphRAG } from '../graph-rag';
|
|
4
|
-
import { vectorQuerySearch } from '../utils';
|
|
5
|
-
import { createGraphRAGTool } from './graph-rag';
|
|
6
|
-
|
|
7
|
-
vi.mock('../utils', async importOriginal => {
|
|
8
|
-
const actual: any = await importOriginal();
|
|
9
|
-
return {
|
|
10
|
-
...actual,
|
|
11
|
-
vectorQuerySearch: vi.fn().mockResolvedValue({
|
|
12
|
-
results: [
|
|
13
|
-
{ metadata: { text: 'foo' }, vector: [1, 2, 3] },
|
|
14
|
-
{ metadata: { text: 'bar' }, vector: [4, 5, 6] },
|
|
15
|
-
],
|
|
16
|
-
queryEmbedding: [1, 2, 3],
|
|
17
|
-
}),
|
|
18
|
-
};
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
vi.mock('../graph-rag', async importOriginal => {
|
|
22
|
-
const actual: any = await importOriginal();
|
|
23
|
-
return {
|
|
24
|
-
...actual,
|
|
25
|
-
GraphRAG: vi.fn().mockImplementation(() => {
|
|
26
|
-
return {
|
|
27
|
-
createGraph: vi.fn(),
|
|
28
|
-
query: vi.fn(() => [
|
|
29
|
-
{ content: 'foo', metadata: { text: 'foo' } },
|
|
30
|
-
{ content: 'bar', metadata: { text: 'bar' } },
|
|
31
|
-
]),
|
|
32
|
-
};
|
|
33
|
-
}),
|
|
34
|
-
};
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
const mockModel = { name: 'test-model' } as any;
|
|
38
|
-
const mockMastra = {
|
|
39
|
-
getVector: vi.fn(storeName => ({
|
|
40
|
-
[storeName]: {},
|
|
41
|
-
})),
|
|
42
|
-
getLogger: vi.fn(() => ({
|
|
43
|
-
debug: vi.fn(),
|
|
44
|
-
warn: vi.fn(),
|
|
45
|
-
info: vi.fn(),
|
|
46
|
-
error: vi.fn(),
|
|
47
|
-
})),
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
describe('createGraphRAGTool', () => {
|
|
51
|
-
beforeEach(() => {
|
|
52
|
-
vi.clearAllMocks();
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
it('validates input schema', () => {
|
|
56
|
-
const tool = createGraphRAGTool({
|
|
57
|
-
id: 'test',
|
|
58
|
-
model: mockModel,
|
|
59
|
-
vectorStoreName: 'testStore',
|
|
60
|
-
indexName: 'testIndex',
|
|
61
|
-
});
|
|
62
|
-
expect(() => tool.inputSchema?.parse({ queryText: 'foo', topK: 10 })).not.toThrow();
|
|
63
|
-
expect(() => tool.inputSchema?.parse({})).toThrow();
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
describe('runtimeContext', () => {
|
|
67
|
-
it('calls vectorQuerySearch and GraphRAG with runtimeContext params', async () => {
|
|
68
|
-
const tool = createGraphRAGTool({
|
|
69
|
-
id: 'test',
|
|
70
|
-
model: mockModel,
|
|
71
|
-
indexName: 'testIndex',
|
|
72
|
-
vectorStoreName: 'testStore',
|
|
73
|
-
});
|
|
74
|
-
const runtimeContext = new RuntimeContext();
|
|
75
|
-
runtimeContext.set('indexName', 'anotherIndex');
|
|
76
|
-
runtimeContext.set('vectorStoreName', 'anotherStore');
|
|
77
|
-
runtimeContext.set('topK', 5);
|
|
78
|
-
runtimeContext.set('filter', { foo: 'bar' });
|
|
79
|
-
runtimeContext.set('randomWalkSteps', 99);
|
|
80
|
-
runtimeContext.set('restartProb', 0.42);
|
|
81
|
-
const result = await tool.execute({
|
|
82
|
-
context: { queryText: 'foo', topK: 2 },
|
|
83
|
-
mastra: mockMastra as any,
|
|
84
|
-
runtimeContext,
|
|
85
|
-
});
|
|
86
|
-
expect(result.relevantContext).toEqual(['foo', 'bar']);
|
|
87
|
-
expect(result.sources.length).toBe(2);
|
|
88
|
-
expect(vectorQuerySearch).toHaveBeenCalledWith(
|
|
89
|
-
expect.objectContaining({
|
|
90
|
-
indexName: 'anotherIndex',
|
|
91
|
-
vectorStore: {
|
|
92
|
-
anotherStore: {},
|
|
93
|
-
},
|
|
94
|
-
queryText: 'foo',
|
|
95
|
-
model: mockModel,
|
|
96
|
-
queryFilter: { foo: 'bar' },
|
|
97
|
-
topK: 5,
|
|
98
|
-
includeVectors: true,
|
|
99
|
-
}),
|
|
100
|
-
);
|
|
101
|
-
// GraphRAG createGraph and query should be called
|
|
102
|
-
expect(GraphRAG).toHaveBeenCalled();
|
|
103
|
-
const instance = (GraphRAG as any).mock.results[0].value;
|
|
104
|
-
expect(instance.createGraph).toHaveBeenCalled();
|
|
105
|
-
expect(instance.query).toHaveBeenCalledWith(
|
|
106
|
-
expect.objectContaining({
|
|
107
|
-
query: [1, 2, 3],
|
|
108
|
-
topK: 5,
|
|
109
|
-
randomWalkSteps: 99,
|
|
110
|
-
restartProb: 0.42,
|
|
111
|
-
}),
|
|
112
|
-
);
|
|
113
|
-
});
|
|
114
|
-
});
|
|
115
|
-
});
|
package/src/tools/graph-rag.ts
DELETED
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import { createTool } from '@mastra/core/tools';
|
|
2
|
-
import { z } from 'zod';
|
|
3
|
-
|
|
4
|
-
import { GraphRAG } from '../graph-rag';
|
|
5
|
-
import { vectorQuerySearch, defaultGraphRagDescription, filterSchema, outputSchema, baseSchema } from '../utils';
|
|
6
|
-
import type { RagTool } from '../utils';
|
|
7
|
-
import { convertToSources } from '../utils/convert-sources';
|
|
8
|
-
import type { GraphRagToolOptions } from './types';
|
|
9
|
-
import { defaultGraphOptions } from './types';
|
|
10
|
-
|
|
11
|
-
export const createGraphRAGTool = (options: GraphRagToolOptions) => {
|
|
12
|
-
const { model, id, description } = options;
|
|
13
|
-
|
|
14
|
-
const toolId = id || `GraphRAG ${options.vectorStoreName} ${options.indexName} Tool`;
|
|
15
|
-
const toolDescription = description || defaultGraphRagDescription();
|
|
16
|
-
const graphOptions = {
|
|
17
|
-
...defaultGraphOptions,
|
|
18
|
-
...(options.graphOptions || {}),
|
|
19
|
-
};
|
|
20
|
-
// Initialize GraphRAG
|
|
21
|
-
const graphRag = new GraphRAG(graphOptions.dimension, graphOptions.threshold);
|
|
22
|
-
let isInitialized = false;
|
|
23
|
-
|
|
24
|
-
const inputSchema = options.enableFilter ? filterSchema : z.object(baseSchema).passthrough();
|
|
25
|
-
|
|
26
|
-
return createTool({
|
|
27
|
-
id: toolId,
|
|
28
|
-
inputSchema,
|
|
29
|
-
outputSchema,
|
|
30
|
-
description: toolDescription,
|
|
31
|
-
execute: async ({ context, mastra, runtimeContext }) => {
|
|
32
|
-
const indexName: string = runtimeContext.get('indexName') ?? options.indexName;
|
|
33
|
-
const vectorStoreName: string = runtimeContext.get('vectorStoreName') ?? options.vectorStoreName;
|
|
34
|
-
if (!indexName) throw new Error(`indexName is required, got: ${indexName}`);
|
|
35
|
-
if (!vectorStoreName) throw new Error(`vectorStoreName is required, got: ${vectorStoreName}`);
|
|
36
|
-
const includeSources: boolean = runtimeContext.get('includeSources') ?? options.includeSources ?? true;
|
|
37
|
-
const randomWalkSteps: number | undefined = runtimeContext.get('randomWalkSteps') ?? graphOptions.randomWalkSteps;
|
|
38
|
-
const restartProb: number | undefined = runtimeContext.get('restartProb') ?? graphOptions.restartProb;
|
|
39
|
-
const topK: number = runtimeContext.get('topK') ?? context.topK ?? 10;
|
|
40
|
-
const filter: Record<string, any> = runtimeContext.get('filter') ?? context.filter;
|
|
41
|
-
const queryText = context.queryText;
|
|
42
|
-
|
|
43
|
-
const enableFilter = !!runtimeContext.get('filter') || (options.enableFilter ?? false);
|
|
44
|
-
|
|
45
|
-
const logger = mastra?.getLogger();
|
|
46
|
-
if (!logger) {
|
|
47
|
-
console.warn(
|
|
48
|
-
'[GraphRAGTool] Logger not initialized: no debug or error logs will be recorded for this tool execution.',
|
|
49
|
-
);
|
|
50
|
-
}
|
|
51
|
-
if (logger) {
|
|
52
|
-
logger.debug('[GraphRAGTool] execute called with:', { queryText, topK, filter });
|
|
53
|
-
}
|
|
54
|
-
try {
|
|
55
|
-
const topKValue =
|
|
56
|
-
typeof topK === 'number' && !isNaN(topK)
|
|
57
|
-
? topK
|
|
58
|
-
: typeof topK === 'string' && !isNaN(Number(topK))
|
|
59
|
-
? Number(topK)
|
|
60
|
-
: 10;
|
|
61
|
-
const vectorStore = mastra?.getVector(vectorStoreName);
|
|
62
|
-
|
|
63
|
-
if (!vectorStore) {
|
|
64
|
-
if (logger) {
|
|
65
|
-
logger.error('Vector store not found', { vectorStoreName });
|
|
66
|
-
}
|
|
67
|
-
return { relevantContext: [], sources: [] };
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
let queryFilter = {};
|
|
71
|
-
if (enableFilter) {
|
|
72
|
-
queryFilter = (() => {
|
|
73
|
-
try {
|
|
74
|
-
return typeof filter === 'string' ? JSON.parse(filter) : filter;
|
|
75
|
-
} catch (error) {
|
|
76
|
-
// Log the error and use empty object
|
|
77
|
-
if (logger) {
|
|
78
|
-
logger.warn('Failed to parse filter as JSON, using empty filter', { filter, error });
|
|
79
|
-
}
|
|
80
|
-
return {};
|
|
81
|
-
}
|
|
82
|
-
})();
|
|
83
|
-
}
|
|
84
|
-
if (logger) {
|
|
85
|
-
logger.debug('Prepared vector query parameters:', { queryFilter, topK: topKValue });
|
|
86
|
-
}
|
|
87
|
-
const { results, queryEmbedding } = await vectorQuerySearch({
|
|
88
|
-
indexName,
|
|
89
|
-
vectorStore,
|
|
90
|
-
queryText,
|
|
91
|
-
model,
|
|
92
|
-
queryFilter: Object.keys(queryFilter || {}).length > 0 ? queryFilter : undefined,
|
|
93
|
-
topK: topKValue,
|
|
94
|
-
includeVectors: true,
|
|
95
|
-
});
|
|
96
|
-
if (logger) {
|
|
97
|
-
logger.debug('vectorQuerySearch returned results', { count: results.length });
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
// Initialize graph if not done yet
|
|
101
|
-
if (!isInitialized) {
|
|
102
|
-
// Get all chunks and embeddings for graph construction
|
|
103
|
-
const chunks = results.map(result => ({
|
|
104
|
-
text: result?.metadata?.text,
|
|
105
|
-
metadata: result.metadata ?? {},
|
|
106
|
-
}));
|
|
107
|
-
const embeddings = results.map(result => ({
|
|
108
|
-
vector: result.vector || [],
|
|
109
|
-
}));
|
|
110
|
-
|
|
111
|
-
if (logger) {
|
|
112
|
-
logger.debug('Initializing graph', { chunkCount: chunks.length, embeddingCount: embeddings.length });
|
|
113
|
-
}
|
|
114
|
-
graphRag.createGraph(chunks, embeddings);
|
|
115
|
-
isInitialized = true;
|
|
116
|
-
} else if (logger) {
|
|
117
|
-
logger.debug('Graph already initialized, skipping graph construction');
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// Get reranked results using GraphRAG
|
|
121
|
-
const rerankedResults = graphRag.query({
|
|
122
|
-
query: queryEmbedding,
|
|
123
|
-
topK: topKValue,
|
|
124
|
-
randomWalkSteps,
|
|
125
|
-
restartProb,
|
|
126
|
-
});
|
|
127
|
-
if (logger) {
|
|
128
|
-
logger.debug('GraphRAG query returned results', { count: rerankedResults.length });
|
|
129
|
-
}
|
|
130
|
-
// Extract and combine relevant chunks
|
|
131
|
-
const relevantChunks = rerankedResults.map(result => result.content);
|
|
132
|
-
if (logger) {
|
|
133
|
-
logger.debug('Returning relevant context chunks', { count: relevantChunks.length });
|
|
134
|
-
}
|
|
135
|
-
// `sources` exposes the full retrieval objects
|
|
136
|
-
const sources = includeSources ? convertToSources(rerankedResults) : [];
|
|
137
|
-
return {
|
|
138
|
-
relevantContext: relevantChunks,
|
|
139
|
-
sources,
|
|
140
|
-
};
|
|
141
|
-
} catch (err) {
|
|
142
|
-
if (logger) {
|
|
143
|
-
logger.error('Unexpected error in VectorQueryTool execute', {
|
|
144
|
-
error: err,
|
|
145
|
-
errorMessage: err instanceof Error ? err.message : String(err),
|
|
146
|
-
errorStack: err instanceof Error ? err.stack : undefined,
|
|
147
|
-
});
|
|
148
|
-
}
|
|
149
|
-
return { relevantContext: [], sources: [] };
|
|
150
|
-
}
|
|
151
|
-
},
|
|
152
|
-
// Use any for output schema as the structure of the output causes type inference issues
|
|
153
|
-
}) as RagTool<typeof inputSchema, any>;
|
|
154
|
-
};
|
package/src/tools/index.ts
DELETED
package/src/tools/types.ts
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
import type { MastraVector } from '@mastra/core/vector';
|
|
2
|
-
import type { EmbeddingModel } from 'ai';
|
|
3
|
-
import type { RerankConfig } from '../rerank';
|
|
4
|
-
|
|
5
|
-
export interface PineconeConfig {
|
|
6
|
-
namespace?: string;
|
|
7
|
-
sparseVector?: {
|
|
8
|
-
indices: number[];
|
|
9
|
-
values: number[];
|
|
10
|
-
};
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface PgVectorConfig {
|
|
14
|
-
minScore?: number;
|
|
15
|
-
ef?: number; // HNSW search parameter
|
|
16
|
-
probes?: number; // IVFFlat probe parameter
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
// Chroma types
|
|
20
|
-
type LiteralValue = string | number | boolean;
|
|
21
|
-
type ListLiteralValue = LiteralValue[];
|
|
22
|
-
type LiteralNumber = number;
|
|
23
|
-
type LogicalOperator = '$and' | '$or';
|
|
24
|
-
type InclusionOperator = '$in' | '$nin';
|
|
25
|
-
type WhereOperator = '$gt' | '$gte' | '$lt' | '$lte' | '$ne' | '$eq';
|
|
26
|
-
type OperatorExpression = {
|
|
27
|
-
[key in WhereOperator | InclusionOperator | LogicalOperator]?: LiteralValue | ListLiteralValue;
|
|
28
|
-
};
|
|
29
|
-
type BaseWhere = {
|
|
30
|
-
[key: string]: LiteralValue | OperatorExpression;
|
|
31
|
-
};
|
|
32
|
-
type LogicalWhere = {
|
|
33
|
-
[key in LogicalOperator]?: Where[];
|
|
34
|
-
};
|
|
35
|
-
type Where = BaseWhere | LogicalWhere;
|
|
36
|
-
type WhereDocumentOperator = '$contains' | '$not_contains' | LogicalOperator;
|
|
37
|
-
type WhereDocument = {
|
|
38
|
-
[key in WhereDocumentOperator]?: LiteralValue | LiteralNumber | WhereDocument[];
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
export interface ChromaConfig {
|
|
42
|
-
// Add Chroma-specific configs here if needed
|
|
43
|
-
where?: Where;
|
|
44
|
-
whereDocument?: WhereDocument;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// Union type for all database-specific configs
|
|
48
|
-
export type DatabaseConfig = {
|
|
49
|
-
pinecone?: PineconeConfig;
|
|
50
|
-
pgvector?: PgVectorConfig;
|
|
51
|
-
chroma?: ChromaConfig;
|
|
52
|
-
// Add other database configs as needed
|
|
53
|
-
[key: string]: any; // Allow for future database extensions
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
export type VectorQueryToolOptions = {
|
|
57
|
-
id?: string;
|
|
58
|
-
description?: string;
|
|
59
|
-
indexName: string;
|
|
60
|
-
model: EmbeddingModel<string>;
|
|
61
|
-
enableFilter?: boolean;
|
|
62
|
-
includeVectors?: boolean;
|
|
63
|
-
includeSources?: boolean;
|
|
64
|
-
reranker?: RerankConfig;
|
|
65
|
-
/** Database-specific configuration options */
|
|
66
|
-
databaseConfig?: DatabaseConfig;
|
|
67
|
-
} & (
|
|
68
|
-
| {
|
|
69
|
-
vectorStoreName: string;
|
|
70
|
-
}
|
|
71
|
-
| {
|
|
72
|
-
vectorStoreName?: string;
|
|
73
|
-
vectorStore: MastraVector;
|
|
74
|
-
}
|
|
75
|
-
);
|
|
76
|
-
|
|
77
|
-
export type GraphRagToolOptions = {
|
|
78
|
-
id?: string;
|
|
79
|
-
description?: string;
|
|
80
|
-
indexName: string;
|
|
81
|
-
vectorStoreName: string;
|
|
82
|
-
model: EmbeddingModel<string>;
|
|
83
|
-
enableFilter?: boolean;
|
|
84
|
-
includeSources?: boolean;
|
|
85
|
-
graphOptions?: {
|
|
86
|
-
dimension?: number;
|
|
87
|
-
randomWalkSteps?: number;
|
|
88
|
-
restartProb?: number;
|
|
89
|
-
threshold?: number;
|
|
90
|
-
};
|
|
91
|
-
};
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Default options for GraphRAG
|
|
95
|
-
* @default
|
|
96
|
-
* ```json
|
|
97
|
-
* {
|
|
98
|
-
* "dimension": 1536,
|
|
99
|
-
* "randomWalkSteps": 100,
|
|
100
|
-
* "restartProb": 0.15,
|
|
101
|
-
* "threshold": 0.7
|
|
102
|
-
* }
|
|
103
|
-
* ```
|
|
104
|
-
*/
|
|
105
|
-
export const defaultGraphOptions = {
|
|
106
|
-
dimension: 1536,
|
|
107
|
-
randomWalkSteps: 100,
|
|
108
|
-
restartProb: 0.15,
|
|
109
|
-
threshold: 0.7,
|
|
110
|
-
};
|