@mastra/rag 1.2.2 → 1.2.3-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/index.cjs +25 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +25 -9
- package/dist/index.js.map +1 -1
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/types.d.ts +18 -5
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/vector-search.d.ts +6 -7
- package/dist/utils/vector-search.d.ts.map +1 -1
- package/package.json +19 -6
- package/.turbo/turbo-build.log +0 -4
- package/docker-compose.yaml +0 -22
- package/eslint.config.js +0 -6
- package/src/document/document.test.ts +0 -2975
- package/src/document/document.ts +0 -335
- package/src/document/extractors/base.ts +0 -30
- package/src/document/extractors/index.ts +0 -5
- package/src/document/extractors/keywords.test.ts +0 -125
- package/src/document/extractors/keywords.ts +0 -126
- package/src/document/extractors/questions.test.ts +0 -120
- package/src/document/extractors/questions.ts +0 -111
- package/src/document/extractors/summary.test.ts +0 -107
- package/src/document/extractors/summary.ts +0 -122
- package/src/document/extractors/title.test.ts +0 -121
- package/src/document/extractors/title.ts +0 -185
- package/src/document/extractors/types.ts +0 -40
- package/src/document/index.ts +0 -2
- package/src/document/prompts/base.ts +0 -77
- package/src/document/prompts/format.ts +0 -9
- package/src/document/prompts/index.ts +0 -15
- package/src/document/prompts/prompt.ts +0 -60
- package/src/document/prompts/types.ts +0 -29
- package/src/document/schema/index.ts +0 -3
- package/src/document/schema/node.ts +0 -187
- package/src/document/schema/types.ts +0 -40
- package/src/document/transformers/character.ts +0 -267
- package/src/document/transformers/html.ts +0 -346
- package/src/document/transformers/json.ts +0 -536
- package/src/document/transformers/latex.ts +0 -11
- package/src/document/transformers/markdown.ts +0 -239
- package/src/document/transformers/semantic-markdown.ts +0 -227
- package/src/document/transformers/sentence.ts +0 -314
- package/src/document/transformers/text.ts +0 -158
- package/src/document/transformers/token.ts +0 -137
- package/src/document/transformers/transformer.ts +0 -5
- package/src/document/types.ts +0 -145
- package/src/document/validation.ts +0 -158
- package/src/graph-rag/index.test.ts +0 -235
- package/src/graph-rag/index.ts +0 -306
- package/src/index.ts +0 -8
- package/src/rerank/index.test.ts +0 -150
- package/src/rerank/index.ts +0 -198
- package/src/rerank/relevance/cohere/index.ts +0 -56
- package/src/rerank/relevance/index.ts +0 -3
- package/src/rerank/relevance/mastra-agent/index.ts +0 -32
- package/src/rerank/relevance/zeroentropy/index.ts +0 -26
- package/src/tools/README.md +0 -153
- package/src/tools/document-chunker.ts +0 -34
- package/src/tools/graph-rag.test.ts +0 -115
- package/src/tools/graph-rag.ts +0 -154
- package/src/tools/index.ts +0 -3
- package/src/tools/types.ts +0 -110
- package/src/tools/vector-query-database-config.test.ts +0 -190
- package/src/tools/vector-query.test.ts +0 -418
- package/src/tools/vector-query.ts +0 -169
- package/src/utils/convert-sources.ts +0 -43
- package/src/utils/default-settings.ts +0 -38
- package/src/utils/index.ts +0 -3
- package/src/utils/tool-schemas.ts +0 -38
- package/src/utils/vector-prompts.ts +0 -832
- package/src/utils/vector-search.ts +0 -117
- package/tsconfig.build.json +0 -9
- package/tsconfig.json +0 -5
- package/tsup.config.ts +0 -17
- package/vitest.config.ts +0 -8
package/src/document/types.ts
DELETED
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
|
|
2
|
-
import type {
|
|
3
|
-
TitleExtractorsArgs,
|
|
4
|
-
SummaryExtractArgs,
|
|
5
|
-
QuestionAnswerExtractArgs,
|
|
6
|
-
KeywordExtractArgs,
|
|
7
|
-
} from './extractors';
|
|
8
|
-
|
|
9
|
-
export enum Language {
|
|
10
|
-
CPP = 'cpp',
|
|
11
|
-
GO = 'go',
|
|
12
|
-
JAVA = 'java',
|
|
13
|
-
KOTLIN = 'kotlin',
|
|
14
|
-
JS = 'js',
|
|
15
|
-
TS = 'ts',
|
|
16
|
-
PHP = 'php',
|
|
17
|
-
PROTO = 'proto',
|
|
18
|
-
PYTHON = 'python',
|
|
19
|
-
RST = 'rst',
|
|
20
|
-
RUBY = 'ruby',
|
|
21
|
-
RUST = 'rust',
|
|
22
|
-
SCALA = 'scala',
|
|
23
|
-
SWIFT = 'swift',
|
|
24
|
-
MARKDOWN = 'markdown',
|
|
25
|
-
LATEX = 'latex',
|
|
26
|
-
HTML = 'html',
|
|
27
|
-
SOL = 'sol',
|
|
28
|
-
CSHARP = 'csharp',
|
|
29
|
-
COBOL = 'cobol',
|
|
30
|
-
C = 'c',
|
|
31
|
-
LUA = 'lua',
|
|
32
|
-
PERL = 'perl',
|
|
33
|
-
HASKELL = 'haskell',
|
|
34
|
-
ELIXIR = 'elixir',
|
|
35
|
-
POWERSHELL = 'powershell',
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
export type ExtractParams = {
|
|
39
|
-
title?: TitleExtractorsArgs | boolean;
|
|
40
|
-
summary?: SummaryExtractArgs | boolean;
|
|
41
|
-
questions?: QuestionAnswerExtractArgs | boolean;
|
|
42
|
-
keywords?: KeywordExtractArgs | boolean;
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
export type BaseChunkOptions = {
|
|
46
|
-
/**
|
|
47
|
-
* @deprecated Use `maxSize` instead. Will be removed in next major version.
|
|
48
|
-
*/
|
|
49
|
-
size?: number;
|
|
50
|
-
maxSize?: number;
|
|
51
|
-
overlap?: number;
|
|
52
|
-
lengthFunction?: (text: string) => number;
|
|
53
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
54
|
-
addStartIndex?: boolean;
|
|
55
|
-
stripWhitespace?: boolean;
|
|
56
|
-
};
|
|
57
|
-
|
|
58
|
-
export type CharacterChunkOptions = BaseChunkOptions & {
|
|
59
|
-
separator?: string;
|
|
60
|
-
isSeparatorRegex?: boolean;
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
export type RecursiveChunkOptions = BaseChunkOptions & {
|
|
64
|
-
separators?: string[];
|
|
65
|
-
isSeparatorRegex?: boolean;
|
|
66
|
-
language?: Language;
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
export type TokenChunkOptions = BaseChunkOptions & {
|
|
70
|
-
encodingName?: TiktokenEncoding;
|
|
71
|
-
modelName?: TiktokenModel;
|
|
72
|
-
allowedSpecial?: Set<string> | 'all';
|
|
73
|
-
disallowedSpecial?: Set<string> | 'all';
|
|
74
|
-
};
|
|
75
|
-
|
|
76
|
-
export type MarkdownChunkOptions = BaseChunkOptions & {
|
|
77
|
-
headers?: [string, string][];
|
|
78
|
-
returnEachLine?: boolean;
|
|
79
|
-
stripHeaders?: boolean;
|
|
80
|
-
};
|
|
81
|
-
|
|
82
|
-
export type SemanticMarkdownChunkOptions = BaseChunkOptions & {
|
|
83
|
-
joinThreshold?: number;
|
|
84
|
-
encodingName?: TiktokenEncoding;
|
|
85
|
-
modelName?: TiktokenModel;
|
|
86
|
-
allowedSpecial?: Set<string> | 'all';
|
|
87
|
-
disallowedSpecial?: Set<string> | 'all';
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
export type HTMLChunkOptions = BaseChunkOptions &
|
|
91
|
-
(
|
|
92
|
-
| { headers: [string, string][]; sections?: never; returnEachLine?: boolean }
|
|
93
|
-
| { sections: [string, string][]; headers?: never }
|
|
94
|
-
) & { returnEachLine?: boolean };
|
|
95
|
-
|
|
96
|
-
export type JsonChunkOptions = BaseChunkOptions & {
|
|
97
|
-
minSize?: number;
|
|
98
|
-
ensureAscii?: boolean;
|
|
99
|
-
convertLists?: boolean;
|
|
100
|
-
};
|
|
101
|
-
|
|
102
|
-
export type LatexChunkOptions = BaseChunkOptions & {};
|
|
103
|
-
|
|
104
|
-
export type SentenceChunkOptions = BaseChunkOptions & {
|
|
105
|
-
maxSize: number; // Override to make required for sentence strategy
|
|
106
|
-
minSize?: number;
|
|
107
|
-
targetSize?: number;
|
|
108
|
-
sentenceEnders?: string[];
|
|
109
|
-
fallbackToWords?: boolean;
|
|
110
|
-
fallbackToCharacters?: boolean;
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
export type StrategyOptions = {
|
|
114
|
-
recursive: RecursiveChunkOptions;
|
|
115
|
-
character: CharacterChunkOptions;
|
|
116
|
-
token: TokenChunkOptions;
|
|
117
|
-
markdown: MarkdownChunkOptions;
|
|
118
|
-
html: HTMLChunkOptions;
|
|
119
|
-
json: JsonChunkOptions;
|
|
120
|
-
latex: LatexChunkOptions;
|
|
121
|
-
sentence: SentenceChunkOptions;
|
|
122
|
-
'semantic-markdown': SemanticMarkdownChunkOptions;
|
|
123
|
-
};
|
|
124
|
-
|
|
125
|
-
export type ChunkStrategy =
|
|
126
|
-
| 'recursive'
|
|
127
|
-
| 'character'
|
|
128
|
-
| 'token'
|
|
129
|
-
| 'markdown'
|
|
130
|
-
| 'html'
|
|
131
|
-
| 'json'
|
|
132
|
-
| 'latex'
|
|
133
|
-
| 'sentence'
|
|
134
|
-
| 'semantic-markdown';
|
|
135
|
-
|
|
136
|
-
export type ChunkParams =
|
|
137
|
-
| ({ strategy?: 'character' } & CharacterChunkOptions & { extract?: ExtractParams })
|
|
138
|
-
| ({ strategy: 'recursive' } & RecursiveChunkOptions & { extract?: ExtractParams })
|
|
139
|
-
| ({ strategy: 'token' } & TokenChunkOptions & { extract?: ExtractParams })
|
|
140
|
-
| ({ strategy: 'markdown' } & MarkdownChunkOptions & { extract?: ExtractParams })
|
|
141
|
-
| ({ strategy: 'html' } & HTMLChunkOptions & { extract?: ExtractParams })
|
|
142
|
-
| ({ strategy: 'json' } & JsonChunkOptions & { extract?: ExtractParams })
|
|
143
|
-
| ({ strategy: 'latex' } & LatexChunkOptions & { extract?: ExtractParams })
|
|
144
|
-
| ({ strategy: 'sentence' } & SentenceChunkOptions & { extract?: ExtractParams })
|
|
145
|
-
| ({ strategy: 'semantic-markdown' } & SemanticMarkdownChunkOptions & { extract?: ExtractParams });
|
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
import type { ChunkStrategy } from './types';
|
|
3
|
-
|
|
4
|
-
function handleDeprecatedSize<T extends { size?: number; maxSize?: number }>(data: T): Omit<T, 'size'> {
|
|
5
|
-
if (data.size !== undefined) {
|
|
6
|
-
console.warn(
|
|
7
|
-
'[DEPRECATION] `size` is deprecated. Use `maxSize` instead. This will be removed in the next major version.',
|
|
8
|
-
);
|
|
9
|
-
|
|
10
|
-
if (data.maxSize === undefined) {
|
|
11
|
-
data.maxSize = data.size;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
const { size, ...rest } = data;
|
|
16
|
-
return rest;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
// Base options that apply to all strategies
|
|
20
|
-
const baseChunkOptionsSchema = z.object({
|
|
21
|
-
size: z.number().positive().optional(),
|
|
22
|
-
maxSize: z.number().positive().optional(),
|
|
23
|
-
overlap: z.number().min(0).optional(),
|
|
24
|
-
lengthFunction: z.function().optional(),
|
|
25
|
-
keepSeparator: z.union([z.boolean(), z.literal('start'), z.literal('end')]).optional(),
|
|
26
|
-
addStartIndex: z.boolean().optional(),
|
|
27
|
-
stripWhitespace: z.boolean().optional(),
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
// Strategy-specific schemas
|
|
31
|
-
const characterChunkOptionsSchema = baseChunkOptionsSchema
|
|
32
|
-
.extend({
|
|
33
|
-
separator: z.string().optional(),
|
|
34
|
-
isSeparatorRegex: z.boolean().optional(),
|
|
35
|
-
})
|
|
36
|
-
.strict();
|
|
37
|
-
|
|
38
|
-
const recursiveChunkOptionsSchema = baseChunkOptionsSchema
|
|
39
|
-
.extend({
|
|
40
|
-
separators: z.array(z.string()).optional(),
|
|
41
|
-
isSeparatorRegex: z.boolean().optional(),
|
|
42
|
-
language: z.string().optional(),
|
|
43
|
-
})
|
|
44
|
-
.strict();
|
|
45
|
-
|
|
46
|
-
const sentenceChunkOptionsSchema = baseChunkOptionsSchema
|
|
47
|
-
.extend({
|
|
48
|
-
maxSize: z.number().positive(),
|
|
49
|
-
minSize: z.number().positive().optional(),
|
|
50
|
-
targetSize: z.number().positive().optional(),
|
|
51
|
-
sentenceEnders: z.array(z.string()).optional(),
|
|
52
|
-
fallbackToWords: z.boolean().optional(),
|
|
53
|
-
fallbackToCharacters: z.boolean().optional(),
|
|
54
|
-
})
|
|
55
|
-
.strict();
|
|
56
|
-
|
|
57
|
-
// Predicate to check for Set-like objects
|
|
58
|
-
const isSetLike = (value: unknown): value is Set<any> => {
|
|
59
|
-
return (
|
|
60
|
-
typeof value === 'object' &&
|
|
61
|
-
value !== null &&
|
|
62
|
-
typeof (value as Set<any>).has === 'function' &&
|
|
63
|
-
typeof (value as Set<any>).add === 'function' &&
|
|
64
|
-
typeof (value as Set<any>).delete === 'function' &&
|
|
65
|
-
typeof (value as Set<any>).clear === 'function' &&
|
|
66
|
-
typeof (value as Set<any>).size === 'number'
|
|
67
|
-
);
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
// Zod schema for a Set or the literal 'all'
|
|
71
|
-
const setOrAllSchema = z
|
|
72
|
-
.any()
|
|
73
|
-
.refine(value => value === 'all' || isSetLike(value), {
|
|
74
|
-
message: "Must be a Set object or the literal 'all'",
|
|
75
|
-
})
|
|
76
|
-
.optional();
|
|
77
|
-
|
|
78
|
-
const tokenChunkOptionsSchema = baseChunkOptionsSchema
|
|
79
|
-
.extend({
|
|
80
|
-
encodingName: z.string().optional(),
|
|
81
|
-
modelName: z.string().optional(),
|
|
82
|
-
allowedSpecial: setOrAllSchema,
|
|
83
|
-
disallowedSpecial: setOrAllSchema,
|
|
84
|
-
})
|
|
85
|
-
.strict();
|
|
86
|
-
|
|
87
|
-
const jsonChunkOptionsSchema = baseChunkOptionsSchema
|
|
88
|
-
.extend({
|
|
89
|
-
minSize: z.number().positive().optional(),
|
|
90
|
-
ensureAscii: z.boolean().optional(),
|
|
91
|
-
convertLists: z.boolean().optional(),
|
|
92
|
-
})
|
|
93
|
-
.strict();
|
|
94
|
-
|
|
95
|
-
const htmlChunkOptionsSchema = baseChunkOptionsSchema
|
|
96
|
-
.extend({
|
|
97
|
-
headers: z.array(z.tuple([z.string(), z.string()])).optional(),
|
|
98
|
-
sections: z.array(z.tuple([z.string(), z.string()])).optional(),
|
|
99
|
-
returnEachLine: z.boolean().optional(),
|
|
100
|
-
})
|
|
101
|
-
.strict();
|
|
102
|
-
|
|
103
|
-
const markdownChunkOptionsSchema = baseChunkOptionsSchema
|
|
104
|
-
.extend({
|
|
105
|
-
headers: z.array(z.tuple([z.string(), z.string()])).optional(),
|
|
106
|
-
returnEachLine: z.boolean().optional(),
|
|
107
|
-
stripHeaders: z.boolean().optional(),
|
|
108
|
-
})
|
|
109
|
-
.strict();
|
|
110
|
-
|
|
111
|
-
const semanticMarkdownChunkOptionsSchema = baseChunkOptionsSchema
|
|
112
|
-
.extend({
|
|
113
|
-
joinThreshold: z.number().positive().optional(),
|
|
114
|
-
encodingName: z.string().optional(),
|
|
115
|
-
modelName: z.string().optional(),
|
|
116
|
-
allowedSpecial: setOrAllSchema,
|
|
117
|
-
disallowedSpecial: setOrAllSchema,
|
|
118
|
-
})
|
|
119
|
-
.strict();
|
|
120
|
-
|
|
121
|
-
const latexChunkOptionsSchema = baseChunkOptionsSchema.strict();
|
|
122
|
-
|
|
123
|
-
// Strategy-specific validation schemas
|
|
124
|
-
const validationSchemas = {
|
|
125
|
-
character: characterChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
126
|
-
recursive: recursiveChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
127
|
-
sentence: sentenceChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
128
|
-
token: tokenChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
129
|
-
json: jsonChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
130
|
-
html: htmlChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
131
|
-
markdown: markdownChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
132
|
-
'semantic-markdown': semanticMarkdownChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
133
|
-
latex: latexChunkOptionsSchema.transform(handleDeprecatedSize),
|
|
134
|
-
} as const;
|
|
135
|
-
|
|
136
|
-
export function validateChunkParams(strategy: ChunkStrategy, params: any): void {
|
|
137
|
-
const schema = validationSchemas[strategy];
|
|
138
|
-
if (!schema) {
|
|
139
|
-
throw new Error(`Unknown chunking strategy: ${strategy}`);
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
const result = schema.safeParse(params);
|
|
143
|
-
if (!result.success) {
|
|
144
|
-
// Extract unrecognized keys for cleaner error message
|
|
145
|
-
const unrecognizedError = result.error.errors.find((e: any) => e.code === 'unrecognized_keys');
|
|
146
|
-
if (unrecognizedError && 'keys' in unrecognizedError) {
|
|
147
|
-
const keys = (unrecognizedError as any).keys.join(', ');
|
|
148
|
-
throw new Error(`Invalid parameters for ${strategy} strategy: '${keys}' not supported`);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Fallback to general error message for other validation issues
|
|
152
|
-
const errorMessage = result.error.errors
|
|
153
|
-
.map((e: any) => `${e.path.length > 0 ? e.path.join('.') : 'parameter'}: ${e.message}`)
|
|
154
|
-
.join(', ');
|
|
155
|
-
|
|
156
|
-
throw new Error(`Invalid parameters for ${strategy} strategy: ${errorMessage}`);
|
|
157
|
-
}
|
|
158
|
-
}
|
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
-
|
|
3
|
-
import type { GraphChunk, GraphEdge, GraphEmbedding, GraphNode } from './';
|
|
4
|
-
import { GraphRAG } from './';
|
|
5
|
-
|
|
6
|
-
describe('GraphRAG', () => {
|
|
7
|
-
beforeEach(() => {
|
|
8
|
-
vi.clearAllMocks(); // Clear any mock state before each test
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
describe('addNode', () => {
|
|
12
|
-
it('should throw an error if node does not have an embedding', () => {
|
|
13
|
-
const graph = new GraphRAG();
|
|
14
|
-
const node = {
|
|
15
|
-
id: '1',
|
|
16
|
-
content: 'Node 1',
|
|
17
|
-
};
|
|
18
|
-
expect(() => graph.addNode(node)).toThrow('Node must have an embedding');
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
it('should throw an error if node embedding dimension is not equal to the graph dimension', () => {
|
|
22
|
-
const graph = new GraphRAG(2);
|
|
23
|
-
const node: GraphNode = {
|
|
24
|
-
id: '1',
|
|
25
|
-
content: 'Node 1',
|
|
26
|
-
embedding: [1, 2, 3],
|
|
27
|
-
};
|
|
28
|
-
expect(() => graph.addNode(node)).toThrow('Embedding dimension must be 2');
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
it('should add a node to the graph', () => {
|
|
32
|
-
const graph = new GraphRAG(3);
|
|
33
|
-
const node = {
|
|
34
|
-
id: '1',
|
|
35
|
-
content: 'Node 1',
|
|
36
|
-
embedding: [1, 2, 3],
|
|
37
|
-
};
|
|
38
|
-
graph.addNode(node);
|
|
39
|
-
expect(graph['nodes'].size).toBe(1);
|
|
40
|
-
});
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
describe('addEdge', () => {
|
|
44
|
-
it('should throw an error if either source or target node does not exist', () => {
|
|
45
|
-
const graph = new GraphRAG();
|
|
46
|
-
const edge: GraphEdge = {
|
|
47
|
-
source: '1',
|
|
48
|
-
target: '2',
|
|
49
|
-
weight: 0.5,
|
|
50
|
-
type: 'semantic',
|
|
51
|
-
};
|
|
52
|
-
expect(() => graph.addEdge(edge)).toThrow('Both source and target nodes must exist');
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
it('should add an edge between two nodes', () => {
|
|
56
|
-
const graph = new GraphRAG(3);
|
|
57
|
-
const node1: GraphNode = {
|
|
58
|
-
id: '1',
|
|
59
|
-
content: 'Node 1',
|
|
60
|
-
embedding: [1, 2, 3],
|
|
61
|
-
};
|
|
62
|
-
const node2: GraphNode = {
|
|
63
|
-
id: '2',
|
|
64
|
-
content: 'Node 2',
|
|
65
|
-
embedding: [4, 5, 6],
|
|
66
|
-
};
|
|
67
|
-
graph.addNode(node1);
|
|
68
|
-
graph.addNode(node2);
|
|
69
|
-
const edge: GraphEdge = {
|
|
70
|
-
source: '1',
|
|
71
|
-
target: '2',
|
|
72
|
-
weight: 0.5,
|
|
73
|
-
type: 'semantic',
|
|
74
|
-
};
|
|
75
|
-
graph.addEdge(edge);
|
|
76
|
-
expect(graph['edges'].length).toBe(2);
|
|
77
|
-
});
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
describe('createGraph', () => {
|
|
81
|
-
it("chunks and embeddings can't be empty", () => {
|
|
82
|
-
const graph = new GraphRAG(3);
|
|
83
|
-
const chunks: GraphChunk[] = [];
|
|
84
|
-
const embeddings: GraphEmbedding[] = [];
|
|
85
|
-
expect(() => graph.createGraph(chunks, embeddings)).toThrowError(
|
|
86
|
-
'Chunks and embeddings arrays must not be empty',
|
|
87
|
-
);
|
|
88
|
-
});
|
|
89
|
-
it('chunks and embeddings must have the same length', () => {
|
|
90
|
-
const graph = new GraphRAG(3);
|
|
91
|
-
const chunks: GraphChunk[] = [
|
|
92
|
-
{
|
|
93
|
-
text: 'Chunk 1',
|
|
94
|
-
metadata: {},
|
|
95
|
-
},
|
|
96
|
-
{
|
|
97
|
-
text: 'Chunk 2',
|
|
98
|
-
metadata: {},
|
|
99
|
-
},
|
|
100
|
-
];
|
|
101
|
-
const embeddings: GraphEmbedding[] = [
|
|
102
|
-
{
|
|
103
|
-
vector: [1, 2, 3],
|
|
104
|
-
},
|
|
105
|
-
];
|
|
106
|
-
expect(() => graph.createGraph(chunks, embeddings)).toThrowError(
|
|
107
|
-
'Chunks and embeddings must have the same length',
|
|
108
|
-
);
|
|
109
|
-
});
|
|
110
|
-
it('should return the top ranked nodes', () => {
|
|
111
|
-
const results = [
|
|
112
|
-
{
|
|
113
|
-
metadata: {
|
|
114
|
-
text: 'Chunk 1',
|
|
115
|
-
},
|
|
116
|
-
vector: [1, 2, 3],
|
|
117
|
-
},
|
|
118
|
-
{
|
|
119
|
-
metadata: {
|
|
120
|
-
text: 'Chunk 2',
|
|
121
|
-
},
|
|
122
|
-
vector: [4, 5, 6],
|
|
123
|
-
},
|
|
124
|
-
{
|
|
125
|
-
metadata: {
|
|
126
|
-
text: 'Chunk 3',
|
|
127
|
-
},
|
|
128
|
-
vector: [7, 8, 9],
|
|
129
|
-
},
|
|
130
|
-
];
|
|
131
|
-
|
|
132
|
-
const chunks = results.map(result => ({
|
|
133
|
-
text: result?.metadata?.text,
|
|
134
|
-
metadata: result.metadata,
|
|
135
|
-
}));
|
|
136
|
-
const embeddings = results.map(result => ({
|
|
137
|
-
vector: result.vector,
|
|
138
|
-
}));
|
|
139
|
-
|
|
140
|
-
const graph = new GraphRAG(3);
|
|
141
|
-
graph.createGraph(chunks, embeddings);
|
|
142
|
-
|
|
143
|
-
const nodes = graph.getNodes();
|
|
144
|
-
expect(nodes.length).toBe(3);
|
|
145
|
-
expect(nodes[0]?.id).toBe('0');
|
|
146
|
-
expect(nodes[1]?.id).toBe('1');
|
|
147
|
-
expect(nodes[2]?.id).toBe('2');
|
|
148
|
-
|
|
149
|
-
const edges = graph.getEdges();
|
|
150
|
-
expect(edges.length).toBe(6);
|
|
151
|
-
});
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
describe('query', () => {
|
|
155
|
-
it("query embedding can't be empty", () => {
|
|
156
|
-
const graph = new GraphRAG(3);
|
|
157
|
-
const queryEmbedding: number[] = [];
|
|
158
|
-
expect(() => graph.query({ query: queryEmbedding, topK: 2, randomWalkSteps: 3, restartProb: 0.1 })).toThrowError(
|
|
159
|
-
`Query embedding must have dimension ${3}`,
|
|
160
|
-
);
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
it('topK must be greater than 0', () => {
|
|
164
|
-
const graph = new GraphRAG(3);
|
|
165
|
-
const queryEmbedding = [1, 2, 3];
|
|
166
|
-
const topK = 0;
|
|
167
|
-
expect(() => graph.query({ query: queryEmbedding, topK, randomWalkSteps: 3, restartProb: 0.1 })).toThrowError(
|
|
168
|
-
'TopK must be greater than 0',
|
|
169
|
-
);
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
it('randomWalkSteps must be greater than 0', () => {
|
|
173
|
-
const graph = new GraphRAG(3);
|
|
174
|
-
const queryEmbedding = [1, 2, 3];
|
|
175
|
-
const topK = 2;
|
|
176
|
-
const randomWalkSteps = 0;
|
|
177
|
-
expect(() => graph.query({ query: queryEmbedding, topK, randomWalkSteps, restartProb: 0.1 })).toThrowError(
|
|
178
|
-
'Random walk steps must be greater than 0',
|
|
179
|
-
);
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
it('restartProb must be between 0 and 1', () => {
|
|
183
|
-
const graph = new GraphRAG(3);
|
|
184
|
-
const queryEmbedding = [1, 2, 3];
|
|
185
|
-
const topK = 2;
|
|
186
|
-
const randomWalkSteps = 3;
|
|
187
|
-
const restartProb = -0.1;
|
|
188
|
-
expect(() => graph.query({ query: queryEmbedding, topK, randomWalkSteps, restartProb })).toThrowError(
|
|
189
|
-
'Restart probability must be between 0 and 1',
|
|
190
|
-
);
|
|
191
|
-
});
|
|
192
|
-
|
|
193
|
-
it('should return the top ranked nodes', () => {
|
|
194
|
-
const graph = new GraphRAG(3);
|
|
195
|
-
const node1: GraphNode = {
|
|
196
|
-
id: '1',
|
|
197
|
-
content: 'Node 1',
|
|
198
|
-
embedding: [1, 2, 3],
|
|
199
|
-
};
|
|
200
|
-
const node2: GraphNode = {
|
|
201
|
-
id: '2',
|
|
202
|
-
content: 'Node 2',
|
|
203
|
-
embedding: [11, 12, 13],
|
|
204
|
-
};
|
|
205
|
-
const node3: GraphNode = {
|
|
206
|
-
id: '3',
|
|
207
|
-
content: 'Node 3',
|
|
208
|
-
embedding: [21, 22, 23],
|
|
209
|
-
};
|
|
210
|
-
graph.addNode(node1);
|
|
211
|
-
graph.addNode(node2);
|
|
212
|
-
graph.addNode(node3);
|
|
213
|
-
graph.addEdge({
|
|
214
|
-
source: '1',
|
|
215
|
-
target: '2',
|
|
216
|
-
weight: 0.5,
|
|
217
|
-
type: 'semantic',
|
|
218
|
-
});
|
|
219
|
-
graph.addEdge({
|
|
220
|
-
source: '2',
|
|
221
|
-
target: '3',
|
|
222
|
-
weight: 0.7,
|
|
223
|
-
type: 'semantic',
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
const queryEmbedding = [15, 16, 17];
|
|
227
|
-
const topK = 2;
|
|
228
|
-
const randomWalkSteps = 3;
|
|
229
|
-
const restartProb = 0.1;
|
|
230
|
-
const rerankedResults = graph.query({ query: queryEmbedding, topK, randomWalkSteps, restartProb });
|
|
231
|
-
|
|
232
|
-
expect(rerankedResults.length).toBe(2);
|
|
233
|
-
});
|
|
234
|
-
});
|
|
235
|
-
});
|