@prih/mcp-graph-memory 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +512 -0
- package/dist/api/index.js +473 -0
- package/dist/api/rest/code.js +78 -0
- package/dist/api/rest/docs.js +80 -0
- package/dist/api/rest/files.js +64 -0
- package/dist/api/rest/graph.js +56 -0
- package/dist/api/rest/index.js +117 -0
- package/dist/api/rest/knowledge.js +238 -0
- package/dist/api/rest/skills.js +284 -0
- package/dist/api/rest/tasks.js +272 -0
- package/dist/api/rest/tools.js +126 -0
- package/dist/api/rest/validation.js +191 -0
- package/dist/api/rest/websocket.js +65 -0
- package/dist/api/tools/code/get-file-symbols.js +30 -0
- package/dist/api/tools/code/get-symbol.js +22 -0
- package/dist/api/tools/code/list-files.js +18 -0
- package/dist/api/tools/code/search-code.js +27 -0
- package/dist/api/tools/code/search-files.js +22 -0
- package/dist/api/tools/context/get-context.js +19 -0
- package/dist/api/tools/docs/cross-references.js +76 -0
- package/dist/api/tools/docs/explain-symbol.js +55 -0
- package/dist/api/tools/docs/find-examples.js +52 -0
- package/dist/api/tools/docs/get-node.js +24 -0
- package/dist/api/tools/docs/get-toc.js +22 -0
- package/dist/api/tools/docs/list-snippets.js +46 -0
- package/dist/api/tools/docs/list-topics.js +18 -0
- package/dist/api/tools/docs/search-files.js +22 -0
- package/dist/api/tools/docs/search-snippets.js +43 -0
- package/dist/api/tools/docs/search.js +27 -0
- package/dist/api/tools/file-index/get-file-info.js +21 -0
- package/dist/api/tools/file-index/list-all-files.js +28 -0
- package/dist/api/tools/file-index/search-all-files.js +24 -0
- package/dist/api/tools/knowledge/add-attachment.js +31 -0
- package/dist/api/tools/knowledge/create-note.js +20 -0
- package/dist/api/tools/knowledge/create-relation.js +29 -0
- package/dist/api/tools/knowledge/delete-note.js +19 -0
- package/dist/api/tools/knowledge/delete-relation.js +23 -0
- package/dist/api/tools/knowledge/find-linked-notes.js +25 -0
- package/dist/api/tools/knowledge/get-note.js +20 -0
- package/dist/api/tools/knowledge/list-notes.js +18 -0
- package/dist/api/tools/knowledge/list-relations.js +17 -0
- package/dist/api/tools/knowledge/remove-attachment.js +19 -0
- package/dist/api/tools/knowledge/search-notes.js +25 -0
- package/dist/api/tools/knowledge/update-note.js +34 -0
- package/dist/api/tools/skills/add-attachment.js +31 -0
- package/dist/api/tools/skills/bump-usage.js +19 -0
- package/dist/api/tools/skills/create-skill-link.js +25 -0
- package/dist/api/tools/skills/create-skill.js +26 -0
- package/dist/api/tools/skills/delete-skill-link.js +23 -0
- package/dist/api/tools/skills/delete-skill.js +20 -0
- package/dist/api/tools/skills/find-linked-skills.js +25 -0
- package/dist/api/tools/skills/get-skill.js +21 -0
- package/dist/api/tools/skills/link-skill.js +23 -0
- package/dist/api/tools/skills/list-skills.js +20 -0
- package/dist/api/tools/skills/recall-skills.js +18 -0
- package/dist/api/tools/skills/remove-attachment.js +19 -0
- package/dist/api/tools/skills/search-skills.js +25 -0
- package/dist/api/tools/skills/update-skill.js +58 -0
- package/dist/api/tools/tasks/add-attachment.js +31 -0
- package/dist/api/tools/tasks/create-task-link.js +25 -0
- package/dist/api/tools/tasks/create-task.js +25 -0
- package/dist/api/tools/tasks/delete-task-link.js +23 -0
- package/dist/api/tools/tasks/delete-task.js +20 -0
- package/dist/api/tools/tasks/find-linked-tasks.js +25 -0
- package/dist/api/tools/tasks/get-task.js +20 -0
- package/dist/api/tools/tasks/link-task.js +23 -0
- package/dist/api/tools/tasks/list-tasks.js +24 -0
- package/dist/api/tools/tasks/move-task.js +38 -0
- package/dist/api/tools/tasks/remove-attachment.js +19 -0
- package/dist/api/tools/tasks/search-tasks.js +25 -0
- package/dist/api/tools/tasks/update-task.js +55 -0
- package/dist/cli/index.js +451 -0
- package/dist/cli/indexer.js +277 -0
- package/dist/graphs/attachment-types.js +74 -0
- package/dist/graphs/code-types.js +10 -0
- package/dist/graphs/code.js +172 -0
- package/dist/graphs/docs.js +198 -0
- package/dist/graphs/file-index-types.js +10 -0
- package/dist/graphs/file-index.js +310 -0
- package/dist/graphs/file-lang.js +119 -0
- package/dist/graphs/knowledge-types.js +32 -0
- package/dist/graphs/knowledge.js +764 -0
- package/dist/graphs/manager-types.js +87 -0
- package/dist/graphs/skill-types.js +10 -0
- package/dist/graphs/skill.js +1013 -0
- package/dist/graphs/task-types.js +17 -0
- package/dist/graphs/task.js +960 -0
- package/dist/lib/embedder.js +101 -0
- package/dist/lib/events-log.js +400 -0
- package/dist/lib/file-import.js +327 -0
- package/dist/lib/file-mirror.js +446 -0
- package/dist/lib/frontmatter.js +17 -0
- package/dist/lib/mirror-watcher.js +637 -0
- package/dist/lib/multi-config.js +254 -0
- package/dist/lib/parsers/code.js +246 -0
- package/dist/lib/parsers/codeblock.js +66 -0
- package/dist/lib/parsers/docs.js +196 -0
- package/dist/lib/project-manager.js +418 -0
- package/dist/lib/promise-queue.js +22 -0
- package/dist/lib/search/bm25.js +167 -0
- package/dist/lib/search/code.js +103 -0
- package/dist/lib/search/docs.js +108 -0
- package/dist/lib/search/file-index.js +31 -0
- package/dist/lib/search/files.js +61 -0
- package/dist/lib/search/knowledge.js +101 -0
- package/dist/lib/search/skills.js +104 -0
- package/dist/lib/search/tasks.js +103 -0
- package/dist/lib/watcher.js +67 -0
- package/package.json +83 -0
- package/ui/README.md +54 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* BM25 keyword search index with incremental updates.
|
|
4
|
+
* Used alongside vector cosine similarity for hybrid search.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.BM25Index = void 0;
|
|
8
|
+
exports.tokenize = tokenize;
|
|
9
|
+
exports.rrfFuse = rrfFuse;
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Tokenizer
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
/**
|
|
14
|
+
* Tokenize text: split on whitespace/punctuation, split camelCase, lowercase.
|
|
15
|
+
* "getUserById" → ["get", "user", "by", "id"]
|
|
16
|
+
* "JWT tokens" → ["jwt", "tokens"]
|
|
17
|
+
*/
|
|
18
|
+
function tokenize(text) {
|
|
19
|
+
if (!text)
|
|
20
|
+
return [];
|
|
21
|
+
// Split camelCase/PascalCase boundaries, then split on non-alphanumeric
|
|
22
|
+
const parts = text
|
|
23
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2') // camelCase → camel Case
|
|
24
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') // XMLParser → XML Parser
|
|
25
|
+
.split(/[^a-zA-Z0-9]+/)
|
|
26
|
+
.map(t => t.toLowerCase())
|
|
27
|
+
.filter(t => t.length > 0);
|
|
28
|
+
return parts;
|
|
29
|
+
}
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// BM25 Index
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
class BM25Index {
|
|
34
|
+
docs = new Map();
|
|
35
|
+
df = new Map(); // document frequency per term
|
|
36
|
+
totalLength = 0;
|
|
37
|
+
k1;
|
|
38
|
+
b;
|
|
39
|
+
textExtractor;
|
|
40
|
+
constructor(textExtractor, opts) {
|
|
41
|
+
this.textExtractor = textExtractor;
|
|
42
|
+
this.k1 = opts?.k1 ?? 1.2;
|
|
43
|
+
this.b = opts?.b ?? 0.75;
|
|
44
|
+
}
|
|
45
|
+
get size() {
|
|
46
|
+
return this.docs.size;
|
|
47
|
+
}
|
|
48
|
+
hasDocument(id) {
|
|
49
|
+
return this.docs.has(id);
|
|
50
|
+
}
|
|
51
|
+
addDocument(id, attrs) {
|
|
52
|
+
// Remove old version first if exists
|
|
53
|
+
if (this.docs.has(id))
|
|
54
|
+
this.removeDocument(id);
|
|
55
|
+
const text = this.textExtractor(attrs);
|
|
56
|
+
const tokens = tokenize(text);
|
|
57
|
+
const termFreqs = new Map();
|
|
58
|
+
for (const token of tokens) {
|
|
59
|
+
termFreqs.set(token, (termFreqs.get(token) ?? 0) + 1);
|
|
60
|
+
}
|
|
61
|
+
// Update document frequency for each unique term
|
|
62
|
+
for (const term of termFreqs.keys()) {
|
|
63
|
+
this.df.set(term, (this.df.get(term) ?? 0) + 1);
|
|
64
|
+
}
|
|
65
|
+
this.docs.set(id, { termFreqs, length: tokens.length });
|
|
66
|
+
this.totalLength += tokens.length;
|
|
67
|
+
}
|
|
68
|
+
removeDocument(id) {
|
|
69
|
+
const doc = this.docs.get(id);
|
|
70
|
+
if (!doc)
|
|
71
|
+
return;
|
|
72
|
+
// Decrement document frequency for each unique term
|
|
73
|
+
for (const term of doc.termFreqs.keys()) {
|
|
74
|
+
const current = this.df.get(term) ?? 0;
|
|
75
|
+
if (current <= 1) {
|
|
76
|
+
this.df.delete(term);
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
this.df.set(term, current - 1);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
this.totalLength -= doc.length;
|
|
83
|
+
this.docs.delete(id);
|
|
84
|
+
}
|
|
85
|
+
updateDocument(id, attrs) {
|
|
86
|
+
this.removeDocument(id);
|
|
87
|
+
this.addDocument(id, attrs);
|
|
88
|
+
}
|
|
89
|
+
clear() {
|
|
90
|
+
this.docs.clear();
|
|
91
|
+
this.df.clear();
|
|
92
|
+
this.totalLength = 0;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Compute BM25 scores for all documents matching the query.
|
|
96
|
+
* Returns only documents with score > 0 (at least one query term matches).
|
|
97
|
+
*/
|
|
98
|
+
score(query) {
|
|
99
|
+
const queryTokens = tokenize(query);
|
|
100
|
+
if (queryTokens.length === 0)
|
|
101
|
+
return new Map();
|
|
102
|
+
const N = this.docs.size;
|
|
103
|
+
if (N === 0)
|
|
104
|
+
return new Map();
|
|
105
|
+
const avgDl = this.totalLength / N;
|
|
106
|
+
const results = new Map();
|
|
107
|
+
for (const [id, doc] of this.docs) {
|
|
108
|
+
let docScore = 0;
|
|
109
|
+
for (const term of queryTokens) {
|
|
110
|
+
const tf = doc.termFreqs.get(term) ?? 0;
|
|
111
|
+
if (tf === 0)
|
|
112
|
+
continue;
|
|
113
|
+
const docFreq = this.df.get(term) ?? 0;
|
|
114
|
+
// IDF: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
115
|
+
const idf = Math.log((N - docFreq + 0.5) / (docFreq + 0.5) + 1);
|
|
116
|
+
// TF saturation: (tf * (k1 + 1)) / (tf + k1 * (1 - b + b * dl/avgdl))
|
|
117
|
+
const tfNorm = (tf * (this.k1 + 1)) / (tf + this.k1 * (1 - this.b + this.b * doc.length / avgDl));
|
|
118
|
+
docScore += idf * tfNorm;
|
|
119
|
+
}
|
|
120
|
+
if (docScore > 0) {
|
|
121
|
+
results.set(id, docScore);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return results;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
exports.BM25Index = BM25Index;
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// Reciprocal Rank Fusion
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
/**
|
|
132
|
+
* Fuse two ranked lists using Reciprocal Rank Fusion (RRF).
|
|
133
|
+
* score(d) = 1/(k + rank_vector(d)) + 1/(k + rank_bm25(d))
|
|
134
|
+
*
|
|
135
|
+
* Nodes appearing in only one list get rank = Infinity for the other → only 1/(k+rank) from one source.
|
|
136
|
+
*/
|
|
137
|
+
function rrfFuse(vectorScores, bm25Scores, k = 60) {
|
|
138
|
+
// Build ranked lists (sorted desc by score, rank starts at 1)
|
|
139
|
+
const vectorRank = buildRankMap(vectorScores);
|
|
140
|
+
const bm25Rank = buildRankMap(bm25Scores);
|
|
141
|
+
// Collect all unique document IDs
|
|
142
|
+
const allIds = new Set();
|
|
143
|
+
for (const id of vectorScores.keys())
|
|
144
|
+
allIds.add(id);
|
|
145
|
+
for (const id of bm25Scores.keys())
|
|
146
|
+
allIds.add(id);
|
|
147
|
+
const fused = new Map();
|
|
148
|
+
for (const id of allIds) {
|
|
149
|
+
const vRank = vectorRank.get(id);
|
|
150
|
+
const bRank = bm25Rank.get(id);
|
|
151
|
+
let score = 0;
|
|
152
|
+
if (vRank != null)
|
|
153
|
+
score += 1 / (k + vRank);
|
|
154
|
+
if (bRank != null)
|
|
155
|
+
score += 1 / (k + bRank);
|
|
156
|
+
fused.set(id, score);
|
|
157
|
+
}
|
|
158
|
+
return fused;
|
|
159
|
+
}
|
|
160
|
+
function buildRankMap(scores) {
|
|
161
|
+
const sorted = [...scores.entries()].sort((a, b) => b[1] - a[1]);
|
|
162
|
+
const ranks = new Map();
|
|
163
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
164
|
+
ranks.set(sorted[i][0], i + 1); // rank starts at 1
|
|
165
|
+
}
|
|
166
|
+
return ranks;
|
|
167
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchCode = searchCode;
|
|
4
|
+
const embedder_1 = require("../../lib/embedder");
|
|
5
|
+
const bm25_1 = require("../../lib/search/bm25");
|
|
6
|
+
/**
|
|
7
|
+
* Semantic search over the code graph.
|
|
8
|
+
*
|
|
9
|
+
* 1. Score every node by cosine similarity to the query embedding.
|
|
10
|
+
* 2. Filter seeds below `minScore`, take top `topK`.
|
|
11
|
+
* 3. BFS expansion via graph edges up to `bfsDepth` hops with score decay.
|
|
12
|
+
* 4. De-duplicate, re-filter, sort, cap at `maxResults`.
|
|
13
|
+
*/
|
|
14
|
+
function searchCode(graph, queryEmbedding, options = {}) {
|
|
15
|
+
const { topK = 5, bfsDepth = 1, maxResults = 20, minScore = 0.5, bfsDecay = 0.8, queryText, bm25Index, searchMode = 'hybrid', rrfK = 60 } = options;
|
|
16
|
+
const useVector = searchMode !== 'keyword';
|
|
17
|
+
const useBm25 = searchMode !== 'vector' && !!queryText && !!bm25Index;
|
|
18
|
+
// --- 1. Score all nodes ---
|
|
19
|
+
const scored = [];
|
|
20
|
+
if (useVector) {
|
|
21
|
+
graph.forEachNode((id, attrs) => {
|
|
22
|
+
if (attrs.embedding.length === 0)
|
|
23
|
+
return;
|
|
24
|
+
scored.push({ id, score: (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.embedding) });
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
if (useBm25) {
|
|
28
|
+
const bm25Scores = bm25Index.score(queryText);
|
|
29
|
+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
|
|
30
|
+
if (positiveScored.length > 0) {
|
|
31
|
+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
|
|
32
|
+
const fused = (0, bm25_1.rrfFuse)(vectorMap, bm25Scores, rrfK);
|
|
33
|
+
scored.length = 0;
|
|
34
|
+
for (const [id, score] of fused)
|
|
35
|
+
scored.push({ id, score });
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
scored.length = 0;
|
|
39
|
+
for (const [id, score] of bm25Scores)
|
|
40
|
+
scored.push({ id, score });
|
|
41
|
+
}
|
|
42
|
+
// Normalize scores to 0–1 so minScore threshold works uniformly
|
|
43
|
+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
|
|
44
|
+
if (maxScore > 0) {
|
|
45
|
+
for (const s of scored)
|
|
46
|
+
s.score /= maxScore;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (scored.length === 0)
|
|
50
|
+
return [];
|
|
51
|
+
scored.sort((a, b) => b.score - a.score);
|
|
52
|
+
// --- 2. Filter seeds ---
|
|
53
|
+
const minS = minScore;
|
|
54
|
+
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
|
|
55
|
+
if (seeds.length === 0)
|
|
56
|
+
return [];
|
|
57
|
+
// --- 3. BFS expansion ---
|
|
58
|
+
const scoreMap = new Map(seeds.map(s => [s.id, s.score]));
|
|
59
|
+
function bfs(startId, seedScore) {
|
|
60
|
+
const queue = [
|
|
61
|
+
{ id: startId, depth: 0, score: seedScore },
|
|
62
|
+
];
|
|
63
|
+
const visited = new Set();
|
|
64
|
+
while (queue.length > 0) {
|
|
65
|
+
const item = queue.shift();
|
|
66
|
+
if (visited.has(item.id))
|
|
67
|
+
continue;
|
|
68
|
+
visited.add(item.id);
|
|
69
|
+
const prev = scoreMap.get(item.id) ?? -Infinity;
|
|
70
|
+
if (item.score > prev)
|
|
71
|
+
scoreMap.set(item.id, item.score);
|
|
72
|
+
if (item.depth >= bfsDepth)
|
|
73
|
+
continue;
|
|
74
|
+
if (item.score * bfsDecay < minS)
|
|
75
|
+
continue;
|
|
76
|
+
const nextScore = item.score * bfsDecay;
|
|
77
|
+
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
78
|
+
graph.inNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
for (const seed of seeds) {
|
|
82
|
+
bfs(seed.id, seed.score);
|
|
83
|
+
}
|
|
84
|
+
// --- 4. Build results ---
|
|
85
|
+
return [...scoreMap.entries()]
|
|
86
|
+
.filter(([, score]) => score >= minS)
|
|
87
|
+
.map(([id, score]) => {
|
|
88
|
+
const attrs = graph.getNodeAttributes(id);
|
|
89
|
+
return {
|
|
90
|
+
id,
|
|
91
|
+
fileId: attrs.fileId,
|
|
92
|
+
kind: attrs.kind,
|
|
93
|
+
name: attrs.name,
|
|
94
|
+
signature: attrs.signature,
|
|
95
|
+
docComment: attrs.docComment,
|
|
96
|
+
startLine: attrs.startLine,
|
|
97
|
+
endLine: attrs.endLine,
|
|
98
|
+
score,
|
|
99
|
+
};
|
|
100
|
+
})
|
|
101
|
+
.sort((a, b) => b.score - a.score)
|
|
102
|
+
.slice(0, maxResults);
|
|
103
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.search = search;
|
|
4
|
+
const embedder_1 = require("../../lib/embedder");
|
|
5
|
+
const bm25_1 = require("../../lib/search/bm25");
|
|
6
|
+
/**
|
|
7
|
+
* Semantic search over the graph.
|
|
8
|
+
*
|
|
9
|
+
* 1. Score every node by cosine similarity to the query embedding.
|
|
10
|
+
* 2. Discard seeds below `minScore` (default 0 = keep all).
|
|
11
|
+
* 3. Take the top `topK` remaining seeds.
|
|
12
|
+
* 4. BFS from each seed up to `bfsDepth` hops; BFS nodes inherit the seed's
|
|
13
|
+
* score multiplied by `bfsDecay` per hop (default 0.8), so deeper nodes
|
|
14
|
+
* rank lower and are filtered by `minScore` too.
|
|
15
|
+
* 5. De-duplicate and return results sorted by score, capped at `maxResults`.
|
|
16
|
+
*/
|
|
17
|
+
function search(graph, queryEmbedding, options = {}) {
|
|
18
|
+
const { topK = 5, bfsDepth = 1, maxResults = 20, minScore = 0.5, bfsDecay = 0.8, queryText, bm25Index, searchMode = 'hybrid', rrfK = 60 } = options;
|
|
19
|
+
const useVector = searchMode !== 'keyword';
|
|
20
|
+
const useBm25 = searchMode !== 'vector' && !!queryText && !!bm25Index;
|
|
21
|
+
// --- 1. Score all nodes ---
|
|
22
|
+
const scored = [];
|
|
23
|
+
if (useVector) {
|
|
24
|
+
graph.forEachNode((id, attrs) => {
|
|
25
|
+
if (attrs.embedding.length === 0)
|
|
26
|
+
return;
|
|
27
|
+
scored.push({ id, score: (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.embedding) });
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
if (useBm25) {
|
|
31
|
+
const bm25Scores = bm25Index.score(queryText);
|
|
32
|
+
// Only include vector results with positive scores for fusion
|
|
33
|
+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
|
|
34
|
+
if (positiveScored.length > 0) {
|
|
35
|
+
// RRF fusion
|
|
36
|
+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
|
|
37
|
+
const fused = (0, bm25_1.rrfFuse)(vectorMap, bm25Scores, rrfK);
|
|
38
|
+
scored.length = 0;
|
|
39
|
+
for (const [id, score] of fused)
|
|
40
|
+
scored.push({ id, score });
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
// BM25-only or vector returned nothing — use BM25 as fallback
|
|
44
|
+
scored.length = 0;
|
|
45
|
+
for (const [id, score] of bm25Scores)
|
|
46
|
+
scored.push({ id, score });
|
|
47
|
+
}
|
|
48
|
+
// Normalize scores to 0–1 so minScore threshold works uniformly
|
|
49
|
+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
|
|
50
|
+
if (maxScore > 0) {
|
|
51
|
+
for (const s of scored)
|
|
52
|
+
s.score /= maxScore;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
if (scored.length === 0)
|
|
56
|
+
return [];
|
|
57
|
+
scored.sort((a, b) => b.score - a.score);
|
|
58
|
+
// --- 2. Filter seeds by minScore, then take topK ---
|
|
59
|
+
const minS = minScore;
|
|
60
|
+
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
|
|
61
|
+
if (seeds.length === 0)
|
|
62
|
+
return [];
|
|
63
|
+
// --- 3. BFS expansion with score decay ---
|
|
64
|
+
// scoreMap holds the best score seen for each node
|
|
65
|
+
const scoreMap = new Map(seeds.map(s => [s.id, s.score]));
|
|
66
|
+
function bfs(startId, seedScore) {
|
|
67
|
+
const queue = [
|
|
68
|
+
{ id: startId, depth: 0, score: seedScore },
|
|
69
|
+
];
|
|
70
|
+
const localVisited = new Set();
|
|
71
|
+
while (queue.length > 0) {
|
|
72
|
+
const item = queue.shift();
|
|
73
|
+
if (localVisited.has(item.id))
|
|
74
|
+
continue;
|
|
75
|
+
localVisited.add(item.id);
|
|
76
|
+
// Keep the best score this node has received across all BFS runs
|
|
77
|
+
const prev = scoreMap.get(item.id) ?? -Infinity;
|
|
78
|
+
if (item.score > prev)
|
|
79
|
+
scoreMap.set(item.id, item.score);
|
|
80
|
+
if (item.depth >= bfsDepth)
|
|
81
|
+
continue;
|
|
82
|
+
if (item.score * bfsDecay < minS)
|
|
83
|
+
continue; // prune: deeper hops won't pass threshold
|
|
84
|
+
const nextScore = item.score * bfsDecay;
|
|
85
|
+
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
86
|
+
graph.inNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
for (const seed of seeds) {
|
|
90
|
+
bfs(seed.id, seed.score);
|
|
91
|
+
}
|
|
92
|
+
// --- 4. Build results from scoreMap, apply minScore filter, sort, cap ---
|
|
93
|
+
return [...scoreMap.entries()]
|
|
94
|
+
.filter(([, score]) => score >= minS)
|
|
95
|
+
.map(([id, score]) => {
|
|
96
|
+
const attrs = graph.getNodeAttributes(id);
|
|
97
|
+
return {
|
|
98
|
+
id,
|
|
99
|
+
fileId: attrs.fileId,
|
|
100
|
+
title: attrs.title,
|
|
101
|
+
content: attrs.content,
|
|
102
|
+
level: attrs.level,
|
|
103
|
+
score,
|
|
104
|
+
};
|
|
105
|
+
})
|
|
106
|
+
.sort((a, b) => b.score - a.score)
|
|
107
|
+
.slice(0, maxResults);
|
|
108
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchFileIndex = searchFileIndex;
|
|
4
|
+
const embedder_1 = require("../../lib/embedder");
|
|
5
|
+
/**
|
|
6
|
+
* Semantic search over file nodes by path embedding.
|
|
7
|
+
* Only searches file nodes (directories have empty embeddings).
|
|
8
|
+
* Pure cosine similarity, no BFS expansion.
|
|
9
|
+
*/
|
|
10
|
+
function searchFileIndex(graph, queryEmbedding, options = {}) {
|
|
11
|
+
const { topK = 10, minScore = 0.3 } = options;
|
|
12
|
+
const scored = [];
|
|
13
|
+
graph.forEachNode((_, attrs) => {
|
|
14
|
+
if (attrs.kind !== 'file' || attrs.embedding.length === 0)
|
|
15
|
+
return;
|
|
16
|
+
const score = (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.embedding);
|
|
17
|
+
if (score >= minScore) {
|
|
18
|
+
scored.push({
|
|
19
|
+
filePath: attrs.filePath,
|
|
20
|
+
fileName: attrs.fileName,
|
|
21
|
+
extension: attrs.extension,
|
|
22
|
+
language: attrs.language,
|
|
23
|
+
size: attrs.size,
|
|
24
|
+
score,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
return scored
|
|
29
|
+
.sort((a, b) => b.score - a.score)
|
|
30
|
+
.slice(0, topK);
|
|
31
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchDocFiles = searchDocFiles;
|
|
4
|
+
exports.searchCodeFiles = searchCodeFiles;
|
|
5
|
+
const embedder_1 = require("../../lib/embedder");
|
|
6
|
+
function searchDocFiles(graph, queryEmbedding, options = {}) {
|
|
7
|
+
const { topK = 10, minScore = 0.3 } = options;
|
|
8
|
+
// Collect root chunks (level=1) that have a fileEmbedding
|
|
9
|
+
const scored = [];
|
|
10
|
+
graph.forEachNode((_, attrs) => {
|
|
11
|
+
if (attrs.level !== 1 || attrs.fileEmbedding.length === 0)
|
|
12
|
+
return;
|
|
13
|
+
scored.push({
|
|
14
|
+
fileId: attrs.fileId,
|
|
15
|
+
title: attrs.title,
|
|
16
|
+
score: (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.fileEmbedding),
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
// Count chunks per file
|
|
20
|
+
const chunkCounts = new Map();
|
|
21
|
+
graph.forEachNode((_, attrs) => {
|
|
22
|
+
chunkCounts.set(attrs.fileId, (chunkCounts.get(attrs.fileId) ?? 0) + 1);
|
|
23
|
+
});
|
|
24
|
+
return scored
|
|
25
|
+
.filter(s => s.score >= minScore)
|
|
26
|
+
.sort((a, b) => b.score - a.score)
|
|
27
|
+
.slice(0, topK)
|
|
28
|
+
.map(s => ({
|
|
29
|
+
fileId: s.fileId,
|
|
30
|
+
title: s.title,
|
|
31
|
+
chunks: chunkCounts.get(s.fileId) ?? 0,
|
|
32
|
+
score: s.score,
|
|
33
|
+
}));
|
|
34
|
+
}
|
|
35
|
+
function searchCodeFiles(graph, queryEmbedding, options = {}) {
|
|
36
|
+
const { topK = 10, minScore = 0.3 } = options;
|
|
37
|
+
// Collect file nodes that have a fileEmbedding
|
|
38
|
+
const scored = [];
|
|
39
|
+
graph.forEachNode((_, attrs) => {
|
|
40
|
+
if (attrs.kind !== 'file' || attrs.fileEmbedding.length === 0)
|
|
41
|
+
return;
|
|
42
|
+
scored.push({
|
|
43
|
+
fileId: attrs.fileId,
|
|
44
|
+
score: (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.fileEmbedding),
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
// Count symbols per file
|
|
48
|
+
const symbolCounts = new Map();
|
|
49
|
+
graph.forEachNode((_, attrs) => {
|
|
50
|
+
symbolCounts.set(attrs.fileId, (symbolCounts.get(attrs.fileId) ?? 0) + 1);
|
|
51
|
+
});
|
|
52
|
+
return scored
|
|
53
|
+
.filter(s => s.score >= minScore)
|
|
54
|
+
.sort((a, b) => b.score - a.score)
|
|
55
|
+
.slice(0, topK)
|
|
56
|
+
.map(s => ({
|
|
57
|
+
fileId: s.fileId,
|
|
58
|
+
symbolCount: symbolCounts.get(s.fileId) ?? 0,
|
|
59
|
+
score: s.score,
|
|
60
|
+
}));
|
|
61
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchKnowledge = searchKnowledge;
|
|
4
|
+
const embedder_1 = require("../../lib/embedder");
|
|
5
|
+
const bm25_1 = require("../../lib/search/bm25");
|
|
6
|
+
/**
|
|
7
|
+
* Semantic search over the knowledge graph.
|
|
8
|
+
*
|
|
9
|
+
* 1. Score every node by cosine similarity to the query embedding.
|
|
10
|
+
* 2. Filter seeds below `minScore`, take top `topK`.
|
|
11
|
+
* 3. BFS expansion via relation edges up to `bfsDepth` hops with score decay.
|
|
12
|
+
* 4. De-duplicate, re-filter, sort, cap at `maxResults`.
|
|
13
|
+
*/
|
|
14
|
+
function searchKnowledge(graph, queryEmbedding, options = {}) {
|
|
15
|
+
const { topK = 5, bfsDepth = 1, maxResults = 20, minScore = 0.5, bfsDecay = 0.8, queryText, bm25Index, searchMode = 'hybrid', rrfK = 60 } = options;
|
|
16
|
+
const useVector = searchMode !== 'keyword';
|
|
17
|
+
const useBm25 = searchMode !== 'vector' && !!queryText && !!bm25Index;
|
|
18
|
+
// --- 1. Score all nodes (skip proxy nodes) ---
|
|
19
|
+
const scored = [];
|
|
20
|
+
if (useVector) {
|
|
21
|
+
graph.forEachNode((id, attrs) => {
|
|
22
|
+
if (attrs.proxyFor)
|
|
23
|
+
return;
|
|
24
|
+
if (attrs.embedding.length === 0)
|
|
25
|
+
return;
|
|
26
|
+
scored.push({ id, score: (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.embedding) });
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
if (useBm25) {
|
|
30
|
+
const bm25Scores = bm25Index.score(queryText);
|
|
31
|
+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
|
|
32
|
+
if (positiveScored.length > 0) {
|
|
33
|
+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
|
|
34
|
+
const fused = (0, bm25_1.rrfFuse)(vectorMap, bm25Scores, rrfK);
|
|
35
|
+
scored.length = 0;
|
|
36
|
+
for (const [id, score] of fused)
|
|
37
|
+
scored.push({ id, score });
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
scored.length = 0;
|
|
41
|
+
for (const [id, score] of bm25Scores)
|
|
42
|
+
scored.push({ id, score });
|
|
43
|
+
}
|
|
44
|
+
// Normalize scores to 0–1 so minScore threshold works uniformly
|
|
45
|
+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
|
|
46
|
+
if (maxScore > 0) {
|
|
47
|
+
for (const s of scored)
|
|
48
|
+
s.score /= maxScore;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (scored.length === 0)
|
|
52
|
+
return [];
|
|
53
|
+
scored.sort((a, b) => b.score - a.score);
|
|
54
|
+
// --- 2. Filter seeds ---
|
|
55
|
+
const minS = minScore;
|
|
56
|
+
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
|
|
57
|
+
if (seeds.length === 0)
|
|
58
|
+
return [];
|
|
59
|
+
// --- 3. BFS expansion ---
|
|
60
|
+
const scoreMap = new Map(seeds.map(s => [s.id, s.score]));
|
|
61
|
+
function bfs(startId, seedScore) {
|
|
62
|
+
const queue = [
|
|
63
|
+
{ id: startId, depth: 0, score: seedScore },
|
|
64
|
+
];
|
|
65
|
+
const visited = new Set();
|
|
66
|
+
while (queue.length > 0) {
|
|
67
|
+
const item = queue.shift();
|
|
68
|
+
if (visited.has(item.id))
|
|
69
|
+
continue;
|
|
70
|
+
visited.add(item.id);
|
|
71
|
+
const prev = scoreMap.get(item.id) ?? -Infinity;
|
|
72
|
+
if (item.score > prev)
|
|
73
|
+
scoreMap.set(item.id, item.score);
|
|
74
|
+
if (item.depth >= bfsDepth)
|
|
75
|
+
continue;
|
|
76
|
+
if (item.score * bfsDecay < minS)
|
|
77
|
+
continue;
|
|
78
|
+
const nextScore = item.score * bfsDecay;
|
|
79
|
+
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
80
|
+
graph.inNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
for (const seed of seeds) {
|
|
84
|
+
bfs(seed.id, seed.score);
|
|
85
|
+
}
|
|
86
|
+
// --- 4. Build results (exclude proxy nodes) ---
|
|
87
|
+
return [...scoreMap.entries()]
|
|
88
|
+
.filter(([id, score]) => score >= minS && !graph.getNodeAttribute(id, 'proxyFor'))
|
|
89
|
+
.map(([id, score]) => {
|
|
90
|
+
const attrs = graph.getNodeAttributes(id);
|
|
91
|
+
return {
|
|
92
|
+
id,
|
|
93
|
+
title: attrs.title,
|
|
94
|
+
content: attrs.content,
|
|
95
|
+
tags: attrs.tags,
|
|
96
|
+
score,
|
|
97
|
+
};
|
|
98
|
+
})
|
|
99
|
+
.sort((a, b) => b.score - a.score)
|
|
100
|
+
.slice(0, maxResults);
|
|
101
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchSkills = searchSkills;
|
|
4
|
+
const embedder_1 = require("../../lib/embedder");
|
|
5
|
+
const bm25_1 = require("../../lib/search/bm25");
|
|
6
|
+
/**
|
|
7
|
+
* Semantic search over the skill graph.
|
|
8
|
+
*
|
|
9
|
+
* 1. Score every node by cosine similarity to the query embedding.
|
|
10
|
+
* 2. Filter seeds below `minScore`, take top `topK`.
|
|
11
|
+
* 3. BFS expansion via relation edges up to `bfsDepth` hops with score decay.
|
|
12
|
+
* 4. De-duplicate, re-filter, sort, cap at `maxResults`.
|
|
13
|
+
*/
|
|
14
|
+
function searchSkills(graph, queryEmbedding, options = {}) {
|
|
15
|
+
const { topK = 5, bfsDepth = 1, maxResults = 20, minScore = 0.5, bfsDecay = 0.8, queryText, bm25Index, searchMode = 'hybrid', rrfK = 60 } = options;
|
|
16
|
+
const useVector = searchMode !== 'keyword';
|
|
17
|
+
const useBm25 = searchMode !== 'vector' && !!queryText && !!bm25Index;
|
|
18
|
+
// --- 1. Score all nodes (skip proxy nodes) ---
|
|
19
|
+
const scored = [];
|
|
20
|
+
if (useVector) {
|
|
21
|
+
graph.forEachNode((id, attrs) => {
|
|
22
|
+
if (attrs.proxyFor)
|
|
23
|
+
return;
|
|
24
|
+
if (attrs.embedding.length === 0)
|
|
25
|
+
return;
|
|
26
|
+
scored.push({ id, score: (0, embedder_1.cosineSimilarity)(queryEmbedding, attrs.embedding) });
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
if (useBm25) {
|
|
30
|
+
const bm25Scores = bm25Index.score(queryText);
|
|
31
|
+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
|
|
32
|
+
if (positiveScored.length > 0) {
|
|
33
|
+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
|
|
34
|
+
const fused = (0, bm25_1.rrfFuse)(vectorMap, bm25Scores, rrfK);
|
|
35
|
+
scored.length = 0;
|
|
36
|
+
for (const [id, score] of fused)
|
|
37
|
+
scored.push({ id, score });
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
scored.length = 0;
|
|
41
|
+
for (const [id, score] of bm25Scores)
|
|
42
|
+
scored.push({ id, score });
|
|
43
|
+
}
|
|
44
|
+
// Normalize scores to 0–1 so minScore threshold works uniformly
|
|
45
|
+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
|
|
46
|
+
if (maxScore > 0) {
|
|
47
|
+
for (const s of scored)
|
|
48
|
+
s.score /= maxScore;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (scored.length === 0)
|
|
52
|
+
return [];
|
|
53
|
+
scored.sort((a, b) => b.score - a.score);
|
|
54
|
+
// --- 2. Filter seeds ---
|
|
55
|
+
const minS = minScore;
|
|
56
|
+
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
|
|
57
|
+
if (seeds.length === 0)
|
|
58
|
+
return [];
|
|
59
|
+
// --- 3. BFS expansion ---
|
|
60
|
+
const scoreMap = new Map(seeds.map(s => [s.id, s.score]));
|
|
61
|
+
function bfs(startId, seedScore) {
|
|
62
|
+
const queue = [
|
|
63
|
+
{ id: startId, depth: 0, score: seedScore },
|
|
64
|
+
];
|
|
65
|
+
const visited = new Set();
|
|
66
|
+
while (queue.length > 0) {
|
|
67
|
+
const item = queue.shift();
|
|
68
|
+
if (visited.has(item.id))
|
|
69
|
+
continue;
|
|
70
|
+
visited.add(item.id);
|
|
71
|
+
const prev = scoreMap.get(item.id) ?? -Infinity;
|
|
72
|
+
if (item.score > prev)
|
|
73
|
+
scoreMap.set(item.id, item.score);
|
|
74
|
+
if (item.depth >= bfsDepth)
|
|
75
|
+
continue;
|
|
76
|
+
if (item.score * bfsDecay < minS)
|
|
77
|
+
continue;
|
|
78
|
+
const nextScore = item.score * bfsDecay;
|
|
79
|
+
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
80
|
+
graph.inNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
for (const seed of seeds) {
|
|
84
|
+
bfs(seed.id, seed.score);
|
|
85
|
+
}
|
|
86
|
+
// --- 4. Build results (exclude proxy nodes) ---
|
|
87
|
+
return [...scoreMap.entries()]
|
|
88
|
+
.filter(([id, score]) => score >= minS && !graph.getNodeAttribute(id, 'proxyFor'))
|
|
89
|
+
.map(([id, score]) => {
|
|
90
|
+
const attrs = graph.getNodeAttributes(id);
|
|
91
|
+
return {
|
|
92
|
+
id,
|
|
93
|
+
title: attrs.title,
|
|
94
|
+
description: attrs.description,
|
|
95
|
+
source: attrs.source,
|
|
96
|
+
confidence: attrs.confidence,
|
|
97
|
+
usageCount: attrs.usageCount,
|
|
98
|
+
tags: attrs.tags,
|
|
99
|
+
score,
|
|
100
|
+
};
|
|
101
|
+
})
|
|
102
|
+
.sort((a, b) => b.score - a.score)
|
|
103
|
+
.slice(0, maxResults);
|
|
104
|
+
}
|