@echoes-io/mcp-server 4.1.0 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/index.d.ts +2 -0
- package/cli/index.js +186 -0
- package/package.json +2 -1
- package/src/database/index.d.ts +6 -0
- package/src/database/index.js +26 -0
- package/src/database/relations.d.ts +744 -0
- package/src/database/relations.js +52 -0
- package/src/database/schema.d.ts +733 -0
- package/src/database/schema.js +69 -0
- package/src/database/vector.d.ts +25 -0
- package/src/database/vector.js +98 -0
- package/src/index.d.ts +5 -0
- package/src/index.js +5 -0
- package/src/rag/character-ner.d.ts +36 -0
- package/src/rag/character-ner.js +416 -0
- package/src/rag/database-sync.d.ts +38 -0
- package/src/rag/database-sync.js +158 -0
- package/src/rag/embeddings.d.ts +74 -0
- package/src/rag/embeddings.js +164 -0
- package/src/rag/graph-rag.d.ts +69 -0
- package/src/rag/graph-rag.js +311 -0
- package/src/rag/hybrid-rag.d.ts +109 -0
- package/src/rag/hybrid-rag.js +255 -0
- package/src/rag/index.d.ts +16 -0
- package/src/rag/index.js +33 -0
- package/src/server.d.ts +43 -0
- package/src/server.js +177 -0
- package/src/tools/index-rag.d.ts +19 -0
- package/src/tools/index-rag.js +85 -0
- package/src/tools/index-tracker.d.ts +17 -0
- package/src/tools/index-tracker.js +89 -0
- package/src/tools/index.d.ts +5 -0
- package/src/tools/index.js +5 -0
- package/src/tools/rag-context.d.ts +34 -0
- package/src/tools/rag-context.js +51 -0
- package/src/tools/rag-search.d.ts +35 -0
- package/src/tools/rag-search.js +60 -0
- package/src/tools/words-count.d.ts +15 -0
- package/src/tools/words-count.js +28 -0
- package/src/types/frontmatter.d.ts +35 -0
- package/src/types/frontmatter.js +1 -0
- package/src/utils/index.d.ts +1 -0
- package/src/utils/index.js +1 -0
- package/src/utils/markdown.d.ts +6 -0
- package/src/utils/markdown.js +36 -0
- package/src/utils/timeline-detection.d.ts +13 -0
- package/src/utils/timeline-detection.js +76 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GraphRAG implementation for Echoes storytelling platform
|
|
3
|
+
* Based on Mastra.ai GraphRAG with adaptations for chapter relationships
|
|
4
|
+
*/
|
|
5
|
+
export class GraphRAG {
|
|
6
|
+
nodes;
|
|
7
|
+
edges;
|
|
8
|
+
dimension;
|
|
9
|
+
threshold;
|
|
10
|
+
constructor(dimension = 384, threshold = 0.7) {
|
|
11
|
+
this.nodes = new Map();
|
|
12
|
+
this.edges = [];
|
|
13
|
+
this.dimension = dimension;
|
|
14
|
+
this.threshold = threshold;
|
|
15
|
+
}
|
|
16
|
+
// Add a node to the graph
|
|
17
|
+
addNode(node) {
|
|
18
|
+
if (!node.embedding) {
|
|
19
|
+
throw new Error('Node must have an embedding');
|
|
20
|
+
}
|
|
21
|
+
if (node.embedding.length !== this.dimension) {
|
|
22
|
+
throw new Error(`Embedding dimension must be ${this.dimension}`);
|
|
23
|
+
}
|
|
24
|
+
this.nodes.set(node.id, node);
|
|
25
|
+
}
|
|
26
|
+
// Add an edge between two nodes
|
|
27
|
+
addEdge(edge) {
|
|
28
|
+
if (!this.nodes.has(edge.source) || !this.nodes.has(edge.target)) {
|
|
29
|
+
throw new Error('Both source and target nodes must exist');
|
|
30
|
+
}
|
|
31
|
+
this.edges.push(edge);
|
|
32
|
+
// Add reverse edge for undirected relationships
|
|
33
|
+
if (edge.type === 'semantic' || edge.type === 'character') {
|
|
34
|
+
this.edges.push({
|
|
35
|
+
source: edge.target,
|
|
36
|
+
target: edge.source,
|
|
37
|
+
weight: edge.weight,
|
|
38
|
+
type: edge.type,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// Helper methods
|
|
43
|
+
getNodes() {
|
|
44
|
+
return Array.from(this.nodes.values());
|
|
45
|
+
}
|
|
46
|
+
getEdges() {
|
|
47
|
+
return this.edges;
|
|
48
|
+
}
|
|
49
|
+
getEdgesByType(type) {
|
|
50
|
+
return this.edges.filter((edge) => edge.type === type);
|
|
51
|
+
}
|
|
52
|
+
clear() {
|
|
53
|
+
this.nodes.clear();
|
|
54
|
+
this.edges = [];
|
|
55
|
+
}
|
|
56
|
+
updateNodeContent(id, newContent) {
|
|
57
|
+
const node = this.nodes.get(id);
|
|
58
|
+
if (!node) {
|
|
59
|
+
throw new Error(`Node ${id} not found`);
|
|
60
|
+
}
|
|
61
|
+
node.content = newContent;
|
|
62
|
+
}
|
|
63
|
+
// Get neighbors of a node
|
|
64
|
+
getNeighbors(nodeId, edgeType) {
|
|
65
|
+
return this.edges
|
|
66
|
+
.filter((edge) => edge.source === nodeId && (!edgeType || edge.type === edgeType))
|
|
67
|
+
.map((edge) => ({
|
|
68
|
+
id: edge.target,
|
|
69
|
+
weight: edge.weight,
|
|
70
|
+
}));
|
|
71
|
+
}
|
|
72
|
+
// Calculate cosine similarity between two vectors
|
|
73
|
+
cosineSimilarity(vec1, vec2) {
|
|
74
|
+
if (!vec1 || !vec2) {
|
|
75
|
+
throw new Error('Vectors must not be null or undefined');
|
|
76
|
+
}
|
|
77
|
+
if (vec1.length !== vec2.length) {
|
|
78
|
+
throw new Error(`Vector dimensions must match: vec1(${vec1.length}) !== vec2(${vec2.length})`);
|
|
79
|
+
}
|
|
80
|
+
let dotProduct = 0;
|
|
81
|
+
let normVec1 = 0;
|
|
82
|
+
let normVec2 = 0;
|
|
83
|
+
for (let i = 0; i < vec1.length; i++) {
|
|
84
|
+
const a = vec1[i];
|
|
85
|
+
const b = vec2[i];
|
|
86
|
+
dotProduct += a * b;
|
|
87
|
+
normVec1 += a * a;
|
|
88
|
+
normVec2 += b * b;
|
|
89
|
+
}
|
|
90
|
+
const magnitudeProduct = Math.sqrt(normVec1 * normVec2);
|
|
91
|
+
if (magnitudeProduct === 0)
|
|
92
|
+
return 0;
|
|
93
|
+
const similarity = dotProduct / magnitudeProduct;
|
|
94
|
+
return Math.max(-1, Math.min(1, similarity));
|
|
95
|
+
}
|
|
96
|
+
// Create graph from chapters with multiple edge types
|
|
97
|
+
createGraph(chunks, embeddings) {
|
|
98
|
+
if (!chunks?.length || !embeddings?.length) {
|
|
99
|
+
throw new Error('Chunks and embeddings arrays must not be empty');
|
|
100
|
+
}
|
|
101
|
+
if (chunks.length !== embeddings.length) {
|
|
102
|
+
throw new Error('Chunks and embeddings must have the same length');
|
|
103
|
+
}
|
|
104
|
+
// Create nodes from chunks
|
|
105
|
+
chunks.forEach((chunk, index) => {
|
|
106
|
+
const node = {
|
|
107
|
+
id: index.toString(),
|
|
108
|
+
content: chunk.text,
|
|
109
|
+
embedding: embeddings[index]?.vector,
|
|
110
|
+
metadata: chunk.metadata,
|
|
111
|
+
};
|
|
112
|
+
this.addNode(node);
|
|
113
|
+
});
|
|
114
|
+
// Create semantic edges based on cosine similarity (high threshold)
|
|
115
|
+
this.createSemanticEdges(embeddings);
|
|
116
|
+
// Create character relationship edges
|
|
117
|
+
this.createCharacterEdges();
|
|
118
|
+
// Create temporal edges (sequential chapters)
|
|
119
|
+
this.createTemporalEdges();
|
|
120
|
+
// Create location edges
|
|
121
|
+
this.createLocationEdges();
|
|
122
|
+
}
|
|
123
|
+
createSemanticEdges(embeddings) {
|
|
124
|
+
for (let i = 0; i < embeddings.length; i++) {
|
|
125
|
+
const firstEmbedding = embeddings[i]?.vector;
|
|
126
|
+
for (let j = i + 1; j < embeddings.length; j++) {
|
|
127
|
+
const secondEmbedding = embeddings[j]?.vector;
|
|
128
|
+
const similarity = this.cosineSimilarity(firstEmbedding, secondEmbedding);
|
|
129
|
+
if (similarity > this.threshold) {
|
|
130
|
+
this.addEdge({
|
|
131
|
+
source: i.toString(),
|
|
132
|
+
target: j.toString(),
|
|
133
|
+
weight: similarity,
|
|
134
|
+
type: 'semantic',
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
createCharacterEdges() {
|
|
141
|
+
const nodes = Array.from(this.nodes.values());
|
|
142
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
143
|
+
for (let j = i + 1; j < nodes.length; j++) {
|
|
144
|
+
const node1 = nodes[i];
|
|
145
|
+
const node2 = nodes[j];
|
|
146
|
+
// Find common characters (only real character names, not noise)
|
|
147
|
+
const chars1 = new Set(node1.metadata.characters.filter((char) => char.length >= 3 && char[0] === char[0].toUpperCase()));
|
|
148
|
+
const chars2 = new Set(node2.metadata.characters.filter((char) => char.length >= 3 && char[0] === char[0].toUpperCase()));
|
|
149
|
+
const commonChars = [...chars1].filter((char) => chars2.has(char));
|
|
150
|
+
// Only create edge if there are 2+ common characters (meaningful relationship)
|
|
151
|
+
if (commonChars.length >= 2) {
|
|
152
|
+
// Weight based on number of common characters, but capped
|
|
153
|
+
const weight = Math.min(commonChars.length / Math.max(chars1.size, chars2.size), 0.8);
|
|
154
|
+
this.addEdge({
|
|
155
|
+
source: node1.id,
|
|
156
|
+
target: node2.id,
|
|
157
|
+
weight: weight,
|
|
158
|
+
type: 'character',
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
createTemporalEdges() {
|
|
165
|
+
const nodes = Array.from(this.nodes.values());
|
|
166
|
+
// Group by arc and episode
|
|
167
|
+
const episodeGroups = new Map();
|
|
168
|
+
nodes.forEach((node) => {
|
|
169
|
+
const key = `${node.metadata.arc}-${node.metadata.episode}`;
|
|
170
|
+
if (!episodeGroups.has(key)) {
|
|
171
|
+
episodeGroups.set(key, []);
|
|
172
|
+
}
|
|
173
|
+
episodeGroups.get(key)?.push(node);
|
|
174
|
+
});
|
|
175
|
+
// Create temporal edges within episodes
|
|
176
|
+
episodeGroups.forEach((episodeNodes) => {
|
|
177
|
+
episodeNodes.sort((a, b) => a.metadata.chapter - b.metadata.chapter);
|
|
178
|
+
for (let i = 0; i < episodeNodes.length - 1; i++) {
|
|
179
|
+
this.addEdge({
|
|
180
|
+
source: episodeNodes[i].id,
|
|
181
|
+
target: episodeNodes[i + 1].id,
|
|
182
|
+
weight: 0.8, // High weight for sequential chapters
|
|
183
|
+
type: 'temporal',
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
createLocationEdges() {
|
|
189
|
+
const nodes = Array.from(this.nodes.values());
|
|
190
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
191
|
+
for (let j = i + 1; j < nodes.length; j++) {
|
|
192
|
+
const node1 = nodes[i];
|
|
193
|
+
const node2 = nodes[j];
|
|
194
|
+
if (node1.metadata.location &&
|
|
195
|
+
node2.metadata.location &&
|
|
196
|
+
node1.metadata.location === node2.metadata.location) {
|
|
197
|
+
this.addEdge({
|
|
198
|
+
source: node1.id,
|
|
199
|
+
target: node2.id,
|
|
200
|
+
weight: 0.6, // Moderate weight for same location
|
|
201
|
+
type: 'location',
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
// Weighted random neighbor selection for random walk
|
|
208
|
+
selectWeightedNeighbor(neighbors) {
|
|
209
|
+
const totalWeight = neighbors.reduce((sum, n) => sum + n.weight, 0);
|
|
210
|
+
let remainingWeight = Math.random() * totalWeight;
|
|
211
|
+
for (const neighbor of neighbors) {
|
|
212
|
+
remainingWeight -= neighbor.weight;
|
|
213
|
+
if (remainingWeight <= 0) {
|
|
214
|
+
return neighbor.id;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return neighbors[neighbors.length - 1]?.id || '';
|
|
218
|
+
}
|
|
219
|
+
// Random walk with restart for node ranking
|
|
220
|
+
randomWalkWithRestart(startNodeId, steps, restartProb, allowedNodeIds) {
|
|
221
|
+
const visits = new Map();
|
|
222
|
+
let currentNodeId = startNodeId;
|
|
223
|
+
for (let step = 0; step < steps; step++) {
|
|
224
|
+
visits.set(currentNodeId, (visits.get(currentNodeId) || 0) + 1);
|
|
225
|
+
if (Math.random() < restartProb) {
|
|
226
|
+
currentNodeId = startNodeId;
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
229
|
+
let neighbors = this.getNeighbors(currentNodeId);
|
|
230
|
+
if (allowedNodeIds) {
|
|
231
|
+
neighbors = neighbors.filter((n) => allowedNodeIds.has(n.id));
|
|
232
|
+
}
|
|
233
|
+
if (neighbors.length === 0) {
|
|
234
|
+
currentNodeId = startNodeId;
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
currentNodeId = this.selectWeightedNeighbor(neighbors);
|
|
238
|
+
}
|
|
239
|
+
// Normalize visits
|
|
240
|
+
const totalVisits = Array.from(visits.values()).reduce((a, b) => a + b, 0);
|
|
241
|
+
const normalizedVisits = new Map();
|
|
242
|
+
for (const [nodeId, count] of visits) {
|
|
243
|
+
normalizedVisits.set(nodeId, count / totalVisits);
|
|
244
|
+
}
|
|
245
|
+
return normalizedVisits;
|
|
246
|
+
}
|
|
247
|
+
// Main query method with character and metadata filtering
|
|
248
|
+
query({ query, topK = 10, randomWalkSteps = 100, restartProb = 0.15, characters, allCharacters = false, arc, pov, }) {
|
|
249
|
+
if (!query || query.length !== this.dimension) {
|
|
250
|
+
throw new Error(`Query embedding must have dimension ${this.dimension}`);
|
|
251
|
+
}
|
|
252
|
+
// Filter nodes based on metadata
|
|
253
|
+
const nodesToSearch = Array.from(this.nodes.values()).filter((node) => {
|
|
254
|
+
// Arc filter
|
|
255
|
+
if (arc && node.metadata.arc !== arc)
|
|
256
|
+
return false;
|
|
257
|
+
// POV filter
|
|
258
|
+
if (pov && node.metadata.pov !== pov)
|
|
259
|
+
return false;
|
|
260
|
+
// Character filter
|
|
261
|
+
if (characters?.length) {
|
|
262
|
+
const nodeChars = new Set(node.metadata.characters);
|
|
263
|
+
if (allCharacters) {
|
|
264
|
+
// All characters must be present
|
|
265
|
+
return characters.every((char) => nodeChars.has(char));
|
|
266
|
+
}
|
|
267
|
+
else {
|
|
268
|
+
// At least one character must be present
|
|
269
|
+
return characters.some((char) => nodeChars.has(char));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return true;
|
|
273
|
+
});
|
|
274
|
+
if (nodesToSearch.length === 0) {
|
|
275
|
+
return [];
|
|
276
|
+
}
|
|
277
|
+
// Calculate initial similarities
|
|
278
|
+
const similarities = nodesToSearch
|
|
279
|
+
.filter((node) => !!node.embedding)
|
|
280
|
+
.map((node) => ({
|
|
281
|
+
node,
|
|
282
|
+
similarity: this.cosineSimilarity(query, node.embedding),
|
|
283
|
+
}));
|
|
284
|
+
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
285
|
+
const topNodes = similarities.slice(0, topK);
|
|
286
|
+
// Re-rank using random walk
|
|
287
|
+
const allowedNodeIds = new Set(nodesToSearch.map((n) => n.id));
|
|
288
|
+
const rerankedNodes = new Map();
|
|
289
|
+
for (const { node, similarity } of topNodes) {
|
|
290
|
+
const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb, allowedNodeIds);
|
|
291
|
+
for (const [nodeId, walkScore] of walkScores) {
|
|
292
|
+
const graphNode = this.nodes.get(nodeId);
|
|
293
|
+
if (!graphNode)
|
|
294
|
+
continue;
|
|
295
|
+
const existingScore = rerankedNodes.get(nodeId)?.score || 0;
|
|
296
|
+
rerankedNodes.set(nodeId, {
|
|
297
|
+
node: graphNode,
|
|
298
|
+
score: existingScore + similarity * walkScore,
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
// Return final ranked results
|
|
303
|
+
return Array.from(rerankedNodes.values())
|
|
304
|
+
.sort((a, b) => b.score - a.score)
|
|
305
|
+
.slice(0, topK)
|
|
306
|
+
.map((item) => ({
|
|
307
|
+
...item.node,
|
|
308
|
+
score: item.score,
|
|
309
|
+
}));
|
|
310
|
+
}
|
|
311
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid RAG System
|
|
3
|
+
* Combines GraphRAG (primary) with sqlite-vec (fallback) for maximum reliability
|
|
4
|
+
*/
|
|
5
|
+
import type { DatabaseType } from '../database/index.js';
|
|
6
|
+
import { type EmbeddingConfig } from './embeddings.js';
|
|
7
|
+
export interface HybridRAGConfig {
|
|
8
|
+
embedding: EmbeddingConfig;
|
|
9
|
+
graphRAG: {
|
|
10
|
+
threshold: number;
|
|
11
|
+
randomWalkSteps: number;
|
|
12
|
+
restartProb: number;
|
|
13
|
+
};
|
|
14
|
+
fallback: {
|
|
15
|
+
enabled: boolean;
|
|
16
|
+
timeout: number;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export interface SearchOptions {
|
|
20
|
+
topK?: number;
|
|
21
|
+
characters?: string[];
|
|
22
|
+
allCharacters?: boolean;
|
|
23
|
+
arc?: string;
|
|
24
|
+
pov?: string;
|
|
25
|
+
useGraphRAG?: boolean;
|
|
26
|
+
}
|
|
27
|
+
export interface SearchResult {
|
|
28
|
+
id: string;
|
|
29
|
+
chapterId: string;
|
|
30
|
+
content: string;
|
|
31
|
+
characters: string[];
|
|
32
|
+
metadata: Record<string, unknown>;
|
|
33
|
+
score: number;
|
|
34
|
+
source: 'graphrag' | 'vector';
|
|
35
|
+
}
|
|
36
|
+
export declare class HybridRAG {
|
|
37
|
+
private graphRAG;
|
|
38
|
+
private vectorStore;
|
|
39
|
+
private embedder;
|
|
40
|
+
private dbSync;
|
|
41
|
+
private config;
|
|
42
|
+
private isGraphReady;
|
|
43
|
+
constructor(db: DatabaseType, config: HybridRAGConfig);
|
|
44
|
+
/**
|
|
45
|
+
* Index chapters into both GraphRAG and vector store
|
|
46
|
+
*/
|
|
47
|
+
indexChapters(chapters: Array<{
|
|
48
|
+
id: string;
|
|
49
|
+
content: string;
|
|
50
|
+
characters: string[];
|
|
51
|
+
metadata: {
|
|
52
|
+
chapterId: string;
|
|
53
|
+
arc: string;
|
|
54
|
+
episode: number;
|
|
55
|
+
chapter: number;
|
|
56
|
+
pov: string;
|
|
57
|
+
location?: string;
|
|
58
|
+
timeline?: string;
|
|
59
|
+
title?: string;
|
|
60
|
+
summary?: string;
|
|
61
|
+
filePath?: string;
|
|
62
|
+
[key: string]: unknown;
|
|
63
|
+
};
|
|
64
|
+
}>): Promise<{
|
|
65
|
+
graphNodes: number;
|
|
66
|
+
vectorEmbeddings: number;
|
|
67
|
+
dbSync: {
|
|
68
|
+
timelines: number;
|
|
69
|
+
arcs: number;
|
|
70
|
+
episodes: number;
|
|
71
|
+
chapters: number;
|
|
72
|
+
};
|
|
73
|
+
}>;
|
|
74
|
+
/**
|
|
75
|
+
* Search using hybrid approach: GraphRAG first, vector fallback
|
|
76
|
+
*/
|
|
77
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
78
|
+
private searchWithGraphRAG;
|
|
79
|
+
private searchWithVector;
|
|
80
|
+
/**
|
|
81
|
+
* Get characters that co-occur with a specific character
|
|
82
|
+
*/
|
|
83
|
+
getCoOccurringCharacters(character: string): Promise<string[]>;
|
|
84
|
+
/**
|
|
85
|
+
* Get system status and statistics
|
|
86
|
+
*/
|
|
87
|
+
getStatus(): {
|
|
88
|
+
graphRAG: {
|
|
89
|
+
ready: boolean;
|
|
90
|
+
nodes: number;
|
|
91
|
+
edges: number;
|
|
92
|
+
};
|
|
93
|
+
vectorStore: {
|
|
94
|
+
ready: boolean;
|
|
95
|
+
};
|
|
96
|
+
embedder: {
|
|
97
|
+
name: string;
|
|
98
|
+
dimension: number;
|
|
99
|
+
};
|
|
100
|
+
};
|
|
101
|
+
/**
|
|
102
|
+
* Clear all indexes
|
|
103
|
+
*/
|
|
104
|
+
clear(): Promise<void>;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Default configuration for hybrid RAG
|
|
108
|
+
*/
|
|
109
|
+
export declare const DEFAULT_HYBRID_CONFIG: HybridRAGConfig;
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid RAG System
|
|
3
|
+
* Combines GraphRAG (primary) with sqlite-vec (fallback) for maximum reliability
|
|
4
|
+
*/
|
|
5
|
+
import { VectorStore } from '../database/vector.js';
|
|
6
|
+
import { DatabaseSync } from './database-sync.js';
|
|
7
|
+
import { createEmbeddingProvider, } from './embeddings.js';
|
|
8
|
+
import { GraphRAG } from './graph-rag.js';
|
|
9
|
+
export class HybridRAG {
|
|
10
|
+
graphRAG;
|
|
11
|
+
vectorStore;
|
|
12
|
+
embedder;
|
|
13
|
+
dbSync;
|
|
14
|
+
config;
|
|
15
|
+
isGraphReady = false;
|
|
16
|
+
constructor(db, config) {
|
|
17
|
+
this.config = config;
|
|
18
|
+
this.embedder = createEmbeddingProvider(config.embedding);
|
|
19
|
+
this.graphRAG = new GraphRAG(this.embedder.dimension, config.graphRAG.threshold);
|
|
20
|
+
this.vectorStore = new VectorStore(db);
|
|
21
|
+
this.dbSync = new DatabaseSync(db);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Index chapters into both GraphRAG and vector store
|
|
25
|
+
*/
|
|
26
|
+
async indexChapters(chapters) {
|
|
27
|
+
if (chapters.length === 0) {
|
|
28
|
+
return {
|
|
29
|
+
graphNodes: 0,
|
|
30
|
+
vectorEmbeddings: 0,
|
|
31
|
+
dbSync: { timelines: 0, arcs: 0, episodes: 0, chapters: 0 },
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
try {
|
|
35
|
+
// 1. Sync database first - ensure all timeline/arc/episode/chapter records exist
|
|
36
|
+
const chapterRecords = chapters.map((ch) => ({
|
|
37
|
+
chapterId: ch.metadata.chapterId,
|
|
38
|
+
timeline: ch.metadata.timeline || 'default',
|
|
39
|
+
arc: ch.metadata.arc,
|
|
40
|
+
episode: ch.metadata.episode,
|
|
41
|
+
chapter: ch.metadata.chapter,
|
|
42
|
+
pov: ch.metadata.pov,
|
|
43
|
+
title: ch.metadata.title,
|
|
44
|
+
summary: ch.metadata.summary,
|
|
45
|
+
location: ch.metadata.location,
|
|
46
|
+
filePath: ch.metadata.filePath,
|
|
47
|
+
}));
|
|
48
|
+
const syncStats = await this.dbSync.syncChapters(chapterRecords);
|
|
49
|
+
// 2. Generate embeddings for all chapters
|
|
50
|
+
const texts = chapters.map((ch) => ch.content);
|
|
51
|
+
const embeddings = await this.embedder.embed(texts);
|
|
52
|
+
// 3. Index into GraphRAG
|
|
53
|
+
const chunks = chapters.map((ch, _i) => ({
|
|
54
|
+
text: ch.content,
|
|
55
|
+
metadata: {
|
|
56
|
+
...ch.metadata,
|
|
57
|
+
characters: ch.characters,
|
|
58
|
+
timeline: ch.metadata.timeline || 'default',
|
|
59
|
+
},
|
|
60
|
+
}));
|
|
61
|
+
const graphEmbeddings = embeddings.map((emb) => ({ vector: emb }));
|
|
62
|
+
this.graphRAG.clear();
|
|
63
|
+
this.graphRAG.createGraph(chunks, graphEmbeddings);
|
|
64
|
+
this.isGraphReady = true;
|
|
65
|
+
// 4. Index into vector store (fallback) - get real chapter IDs from database
|
|
66
|
+
let vectorCount = 0;
|
|
67
|
+
for (let i = 0; i < chapters.length; i++) {
|
|
68
|
+
const ch = chapters[i];
|
|
69
|
+
const realChapterId = await this.dbSync.getChapterId(ch.metadata.timeline || 'default', ch.metadata.arc, ch.metadata.episode, ch.metadata.chapter);
|
|
70
|
+
if (realChapterId) {
|
|
71
|
+
await this.vectorStore.insert(realChapterId, ch.content, new Float32Array(embeddings[i]), ch.characters, ch.metadata);
|
|
72
|
+
vectorCount++;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
console.warn(`Could not find database ID for chapter ${ch.metadata.chapterId}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
graphNodes: chunks.length,
|
|
80
|
+
vectorEmbeddings: vectorCount,
|
|
81
|
+
dbSync: syncStats,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
console.error('Failed to index chapters:', error);
|
|
86
|
+
throw error;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Search using hybrid approach: GraphRAG first, vector fallback
|
|
91
|
+
*/
|
|
92
|
+
async search(query, options = {}) {
|
|
93
|
+
const { topK = 10, characters, allCharacters = false, arc, pov, useGraphRAG = true } = options;
|
|
94
|
+
try {
|
|
95
|
+
// Generate query embedding
|
|
96
|
+
const queryEmbedding = await this.embedder.embedSingle(query);
|
|
97
|
+
// Try GraphRAG first (if enabled and ready)
|
|
98
|
+
if (useGraphRAG && this.isGraphReady && this.config.fallback.enabled) {
|
|
99
|
+
try {
|
|
100
|
+
const graphResults = await Promise.race([
|
|
101
|
+
this.searchWithGraphRAG(queryEmbedding, {
|
|
102
|
+
topK,
|
|
103
|
+
characters,
|
|
104
|
+
allCharacters,
|
|
105
|
+
arc,
|
|
106
|
+
pov,
|
|
107
|
+
}),
|
|
108
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('GraphRAG timeout')), this.config.fallback.timeout)),
|
|
109
|
+
]);
|
|
110
|
+
if (graphResults.length > 0) {
|
|
111
|
+
return graphResults.map((result) => ({
|
|
112
|
+
...result,
|
|
113
|
+
source: 'graphrag',
|
|
114
|
+
}));
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch (error) {
|
|
118
|
+
console.warn('GraphRAG failed, falling back to vector search:', error);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// Fallback to vector search
|
|
122
|
+
return await this.searchWithVector(queryEmbedding, {
|
|
123
|
+
topK,
|
|
124
|
+
characters,
|
|
125
|
+
allCharacters,
|
|
126
|
+
arc,
|
|
127
|
+
pov,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
console.error('Hybrid search failed:', error);
|
|
132
|
+
throw error;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
async searchWithGraphRAG(queryEmbedding, options) {
|
|
136
|
+
const results = this.graphRAG.query({
|
|
137
|
+
query: queryEmbedding,
|
|
138
|
+
topK: options.topK,
|
|
139
|
+
randomWalkSteps: this.config.graphRAG.randomWalkSteps,
|
|
140
|
+
restartProb: this.config.graphRAG.restartProb,
|
|
141
|
+
characters: options.characters,
|
|
142
|
+
allCharacters: options.allCharacters,
|
|
143
|
+
arc: options.arc,
|
|
144
|
+
pov: options.pov,
|
|
145
|
+
});
|
|
146
|
+
return results.map((result) => ({
|
|
147
|
+
id: result.id,
|
|
148
|
+
chapterId: result.metadata.chapterId,
|
|
149
|
+
content: result.content,
|
|
150
|
+
characters: result.metadata.characters,
|
|
151
|
+
metadata: result.metadata,
|
|
152
|
+
score: result.score,
|
|
153
|
+
source: 'graphrag',
|
|
154
|
+
}));
|
|
155
|
+
}
|
|
156
|
+
async searchWithVector(queryEmbedding, options) {
|
|
157
|
+
try {
|
|
158
|
+
const results = await this.vectorStore.search(new Float32Array(queryEmbedding), {
|
|
159
|
+
characters: options.characters,
|
|
160
|
+
allCharacters: options.allCharacters,
|
|
161
|
+
limit: options.topK || 10,
|
|
162
|
+
});
|
|
163
|
+
return results.map((result) => ({
|
|
164
|
+
id: result.id,
|
|
165
|
+
chapterId: result.chapterId,
|
|
166
|
+
content: result.content,
|
|
167
|
+
characters: result.characters,
|
|
168
|
+
metadata: result.metadata,
|
|
169
|
+
score: result.similarity,
|
|
170
|
+
source: 'vector',
|
|
171
|
+
}));
|
|
172
|
+
}
|
|
173
|
+
catch (error) {
|
|
174
|
+
console.warn('Vector search failed:', error);
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Get characters that co-occur with a specific character
|
|
180
|
+
*/
|
|
181
|
+
async getCoOccurringCharacters(character) {
|
|
182
|
+
try {
|
|
183
|
+
// Try GraphRAG approach first
|
|
184
|
+
if (this.isGraphReady) {
|
|
185
|
+
const nodes = this.graphRAG.getNodes();
|
|
186
|
+
const coOccurring = new Set();
|
|
187
|
+
nodes.forEach((node) => {
|
|
188
|
+
if (node.metadata.characters.includes(character)) {
|
|
189
|
+
node.metadata.characters.forEach((char) => {
|
|
190
|
+
if (char !== character) {
|
|
191
|
+
coOccurring.add(char);
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
});
|
|
196
|
+
if (coOccurring.size > 0) {
|
|
197
|
+
return Array.from(coOccurring).sort();
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// Fallback to vector store
|
|
201
|
+
return await this.vectorStore.getCharacters(character);
|
|
202
|
+
}
|
|
203
|
+
catch (error) {
|
|
204
|
+
console.error('Failed to get co-occurring characters:', error);
|
|
205
|
+
return [];
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Get system status and statistics
|
|
210
|
+
*/
|
|
211
|
+
getStatus() {
|
|
212
|
+
return {
|
|
213
|
+
graphRAG: {
|
|
214
|
+
ready: this.isGraphReady,
|
|
215
|
+
nodes: this.graphRAG.getNodes().length,
|
|
216
|
+
edges: this.graphRAG.getEdges().length,
|
|
217
|
+
},
|
|
218
|
+
vectorStore: {
|
|
219
|
+
ready: true, // Vector store is always ready if DB is available
|
|
220
|
+
},
|
|
221
|
+
embedder: {
|
|
222
|
+
name: this.embedder.name,
|
|
223
|
+
dimension: this.embedder.dimension,
|
|
224
|
+
},
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Clear all indexes
|
|
229
|
+
*/
|
|
230
|
+
async clear() {
|
|
231
|
+
this.graphRAG.clear();
|
|
232
|
+
this.isGraphReady = false;
|
|
233
|
+
// Note: Vector store clearing would require database operations
|
|
234
|
+
// This is handled at the database level
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Default configuration for hybrid RAG
|
|
239
|
+
*/
|
|
240
|
+
export const DEFAULT_HYBRID_CONFIG = {
|
|
241
|
+
embedding: {
|
|
242
|
+
provider: 'bge-base',
|
|
243
|
+
batchSize: 32,
|
|
244
|
+
maxTokens: 512,
|
|
245
|
+
},
|
|
246
|
+
graphRAG: {
|
|
247
|
+
threshold: 0.8, // Higher threshold for more selective connections
|
|
248
|
+
randomWalkSteps: 100,
|
|
249
|
+
restartProb: 0.15,
|
|
250
|
+
},
|
|
251
|
+
fallback: {
|
|
252
|
+
enabled: true,
|
|
253
|
+
timeout: 5000, // 5 seconds
|
|
254
|
+
},
|
|
255
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RAG Module - GraphRAG + Vector Search for Echoes
|
|
3
|
+
*
|
|
4
|
+
* Provides hybrid search capabilities combining:
|
|
5
|
+
* - GraphRAG: Semantic relationships, character connections, temporal sequences
|
|
6
|
+
* - Vector Search: Fast similarity search with sqlite-vec fallback
|
|
7
|
+
*/
|
|
8
|
+
export { BGEBaseEmbedding, batchArray, cosineSimilarity, createEmbeddingProvider, E5SmallEmbedding, type EmbeddingConfig, type EmbeddingProvider, GeminiEmbedding, normalizeEmbedding, } from './embeddings.js';
|
|
9
|
+
export { type GraphChunk, type GraphEdge, type GraphEmbedding, type GraphNode, GraphRAG, type RankedNode, type SupportedEdgeType, } from './graph-rag.js';
|
|
10
|
+
export { DEFAULT_HYBRID_CONFIG, HybridRAG, type HybridRAGConfig, type SearchOptions, type SearchResult, } from './hybrid-rag.js';
|
|
11
|
+
/**
|
|
12
|
+
* Quick setup function for common use cases
|
|
13
|
+
*/
|
|
14
|
+
import type { DatabaseType } from '../database/index.js';
|
|
15
|
+
import { HybridRAG, type HybridRAGConfig } from './hybrid-rag.js';
|
|
16
|
+
export declare function createHybridRAG(db: DatabaseType, config?: Partial<HybridRAGConfig>): HybridRAG;
|