codebase-contextualizer-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +242 -0
- package/bin/contextualizer.js +3 -0
- package/index.js +248 -0
- package/package.json +43 -0
- package/scripts/benchmark.js +189 -0
- package/src/abort.js +27 -0
- package/src/cache.js +100 -0
- package/src/concurrency.js +20 -0
- package/src/database.js +172 -0
- package/src/file-filter.js +113 -0
- package/src/file-hash.js +17 -0
- package/src/indexer.js +290 -0
- package/src/parser.js +116 -0
- package/src/paths.js +14 -0
- package/src/search.js +134 -0
- package/src/shutdown.js +34 -0
- package/src/walker.js +214 -0
- package/src/worker-pool.js +178 -0
- package/src/worker.js +124 -0
package/src/indexer.js
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
const fs = require("fs/promises");
|
|
2
|
+
const path = require("path");
|
|
3
|
+
const { createAbortError, throwIfAborted } = require("./abort");
|
|
4
|
+
const { loadCache, saveCache } = require("./cache");
|
|
5
|
+
const { mapLimit } = require("./concurrency");
|
|
6
|
+
const { databaseExists, writeEmbeddingResults } = require("./database");
|
|
7
|
+
const { hashFile } = require("./file-hash");
|
|
8
|
+
const { WorkerPool, getDefaultWorkerCount } = require("./worker-pool");
|
|
9
|
+
const { walkSourceFiles } = require("./walker");
|
|
10
|
+
|
|
11
|
+
async function resolveTargetRoot(target) {
|
|
12
|
+
const root = path.resolve(target);
|
|
13
|
+
const stat = await fs.stat(root);
|
|
14
|
+
|
|
15
|
+
if (!stat.isDirectory()) {
|
|
16
|
+
throw new Error(`Target is not a directory: ${root}`);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return root;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function createCounts(changes) {
|
|
23
|
+
return {
|
|
24
|
+
scanned: changes.new.length + changes.modified.length + changes.unchanged.length,
|
|
25
|
+
new: changes.new.length,
|
|
26
|
+
modified: changes.modified.length,
|
|
27
|
+
unchanged: changes.unchanged.length,
|
|
28
|
+
removed: changes.removed.length,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function collectState(target, options = {}) {
|
|
33
|
+
throwIfAborted(options.signal);
|
|
34
|
+
|
|
35
|
+
const root = await resolveTargetRoot(target);
|
|
36
|
+
throwIfAborted(options.signal);
|
|
37
|
+
|
|
38
|
+
const cacheState = await loadCache(root);
|
|
39
|
+
throwIfAborted(options.signal);
|
|
40
|
+
|
|
41
|
+
const walkResult = await walkSourceFiles(root, {
|
|
42
|
+
signal: options.signal,
|
|
43
|
+
});
|
|
44
|
+
throwIfAborted(options.signal);
|
|
45
|
+
|
|
46
|
+
const hashConcurrency = options.hashConcurrency || 16;
|
|
47
|
+
const hashResults = await mapLimit(walkResult.files, hashConcurrency, async (file) => {
|
|
48
|
+
throwIfAborted(options.signal);
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
const hash = await hashFile(file.absolutePath);
|
|
52
|
+
throwIfAborted(options.signal);
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
file: {
|
|
56
|
+
...file,
|
|
57
|
+
hash,
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
} catch (error) {
|
|
61
|
+
if (options.signal && options.signal.aborted) {
|
|
62
|
+
throw createAbortError(options.signal);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
error: {
|
|
67
|
+
path: file.absolutePath,
|
|
68
|
+
message: error.message,
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
const hashedFiles = hashResults.filter((result) => result.file).map((result) => result.file);
|
|
74
|
+
const errors = [
|
|
75
|
+
...walkResult.errors,
|
|
76
|
+
...hashResults.filter((result) => result.error).map((result) => result.error),
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
const currentFiles = {};
|
|
80
|
+
const filesByRelativePath = {};
|
|
81
|
+
const changes = {
|
|
82
|
+
new: [],
|
|
83
|
+
modified: [],
|
|
84
|
+
unchanged: [],
|
|
85
|
+
removed: [],
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
for (const file of hashedFiles) {
|
|
89
|
+
const previous = cacheState.cache.files[file.relativePath];
|
|
90
|
+
const status = !previous
|
|
91
|
+
? "new"
|
|
92
|
+
: previous.hash === file.hash
|
|
93
|
+
? "unchanged"
|
|
94
|
+
: "modified";
|
|
95
|
+
|
|
96
|
+
currentFiles[file.relativePath] = {
|
|
97
|
+
hash: file.hash,
|
|
98
|
+
size: file.size,
|
|
99
|
+
mtimeMs: file.mtimeMs,
|
|
100
|
+
indexedAt: previous && status === "unchanged" ? previous.indexedAt || null : null,
|
|
101
|
+
};
|
|
102
|
+
filesByRelativePath[file.relativePath] = file;
|
|
103
|
+
changes[status].push(file.relativePath);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
for (const cachedPath of Object.keys(cacheState.cache.files)) {
|
|
107
|
+
if (!currentFiles[cachedPath]) {
|
|
108
|
+
changes.removed.push(cachedPath);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
for (const key of Object.keys(changes)) {
|
|
113
|
+
changes[key].sort();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
root,
|
|
118
|
+
cachePath: cacheState.cachePath,
|
|
119
|
+
cacheExists: cacheState.exists,
|
|
120
|
+
previousCache: cacheState.cache,
|
|
121
|
+
currentFiles,
|
|
122
|
+
filesByRelativePath,
|
|
123
|
+
changes,
|
|
124
|
+
counts: createCounts(changes),
|
|
125
|
+
errors,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function createEmbeddingSummary(results) {
|
|
130
|
+
const succeeded = results.filter((result) => result.ok);
|
|
131
|
+
const failed = results.filter((result) => !result.ok);
|
|
132
|
+
const skipped = succeeded.filter((result) => result.skipped);
|
|
133
|
+
const embedded = succeeded.filter((result) => !result.skipped);
|
|
134
|
+
const chunkCount = embedded.reduce((total, result) => total + result.chunks.length, 0);
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
files: results,
|
|
138
|
+
counts: {
|
|
139
|
+
queued: results.length,
|
|
140
|
+
succeeded: succeeded.length,
|
|
141
|
+
failed: failed.length,
|
|
142
|
+
skipped: skipped.length,
|
|
143
|
+
chunks: chunkCount,
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function embedFiles(result, pathsToEmbed, options = {}) {
|
|
149
|
+
const relativePaths = [...new Set(pathsToEmbed)].sort();
|
|
150
|
+
|
|
151
|
+
if (relativePaths.length === 0) {
|
|
152
|
+
return createEmbeddingSummary([]);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const workerCount = options.workerCount || getDefaultWorkerCount();
|
|
156
|
+
const pool = new WorkerPool({
|
|
157
|
+
size: workerCount,
|
|
158
|
+
});
|
|
159
|
+
let removeAbortListener = null;
|
|
160
|
+
|
|
161
|
+
if (options.signal) {
|
|
162
|
+
const onAbort = () => {
|
|
163
|
+
void pool.close();
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
if (options.signal.aborted) {
|
|
167
|
+
await pool.close();
|
|
168
|
+
throw createAbortError(options.signal);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
172
|
+
removeAbortListener = () => options.signal.removeEventListener("abort", onAbort);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
try {
|
|
176
|
+
const embeddingResults = await Promise.all(relativePaths.map(async (relativePath) => {
|
|
177
|
+
throwIfAborted(options.signal);
|
|
178
|
+
|
|
179
|
+
const file = result.filesByRelativePath[relativePath];
|
|
180
|
+
|
|
181
|
+
if (!file) {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
const workerResult = await pool.run({
|
|
187
|
+
filePath: file.absolutePath,
|
|
188
|
+
relativePath,
|
|
189
|
+
});
|
|
190
|
+
throwIfAborted(options.signal);
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
ok: true,
|
|
194
|
+
...workerResult,
|
|
195
|
+
};
|
|
196
|
+
} catch (error) {
|
|
197
|
+
if (options.signal && options.signal.aborted) {
|
|
198
|
+
throw createAbortError(options.signal);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
ok: false,
|
|
203
|
+
filePath: file.absolutePath,
|
|
204
|
+
relativePath,
|
|
205
|
+
error: error.message,
|
|
206
|
+
chunks: [],
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
}));
|
|
210
|
+
|
|
211
|
+
return createEmbeddingSummary(embeddingResults.filter(Boolean));
|
|
212
|
+
} finally {
|
|
213
|
+
if (removeAbortListener) {
|
|
214
|
+
removeAbortListener();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
await pool.close();
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async function indexTarget(target, options = {}) {
|
|
222
|
+
const result = await collectState(target, options);
|
|
223
|
+
throwIfAborted(options.signal);
|
|
224
|
+
|
|
225
|
+
const shouldBackfillDatabase = !databaseExists(result.root);
|
|
226
|
+
const pathsToEmbed = shouldBackfillDatabase
|
|
227
|
+
? Object.keys(result.filesByRelativePath)
|
|
228
|
+
: [...result.changes.new, ...result.changes.modified];
|
|
229
|
+
const embeddingSummary = await embedFiles(result, pathsToEmbed, options);
|
|
230
|
+
throwIfAborted(options.signal);
|
|
231
|
+
|
|
232
|
+
const databaseSummary = writeEmbeddingResults(
|
|
233
|
+
result.root,
|
|
234
|
+
result.filesByRelativePath,
|
|
235
|
+
embeddingSummary,
|
|
236
|
+
result.changes.removed,
|
|
237
|
+
);
|
|
238
|
+
throwIfAborted(options.signal);
|
|
239
|
+
|
|
240
|
+
const indexedAt = new Date().toISOString();
|
|
241
|
+
const failedPaths = new Set(
|
|
242
|
+
embeddingSummary.files
|
|
243
|
+
.filter((fileResult) => !fileResult.ok)
|
|
244
|
+
.map((fileResult) => fileResult.relativePath),
|
|
245
|
+
);
|
|
246
|
+
|
|
247
|
+
for (const filePath of [...result.changes.new, ...result.changes.modified]) {
|
|
248
|
+
if (failedPaths.has(filePath)) {
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
result.currentFiles[filePath].indexedAt = indexedAt;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
for (const filePath of result.changes.unchanged) {
|
|
256
|
+
const previous = result.previousCache.files[filePath];
|
|
257
|
+
result.currentFiles[filePath].indexedAt = previous.indexedAt || indexedAt;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const filesToSave = {
|
|
261
|
+
...result.currentFiles,
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
for (const filePath of failedPaths) {
|
|
265
|
+
const previous = result.previousCache.files[filePath];
|
|
266
|
+
|
|
267
|
+
if (previous) {
|
|
268
|
+
filesToSave[filePath] = previous;
|
|
269
|
+
} else {
|
|
270
|
+
delete filesToSave[filePath];
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
await saveCache(result.root, filesToSave);
|
|
275
|
+
|
|
276
|
+
result.embeddings = embeddingSummary;
|
|
277
|
+
result.database = databaseSummary;
|
|
278
|
+
|
|
279
|
+
return result;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function statusTarget(target, options = {}) {
|
|
283
|
+
return collectState(target, options);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
module.exports = {
|
|
287
|
+
getDefaultWorkerCount,
|
|
288
|
+
indexTarget,
|
|
289
|
+
statusTarget,
|
|
290
|
+
};
|
package/src/parser.js
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
const path = require("path");
|
|
2
|
+
const Parser = require("tree-sitter");
|
|
3
|
+
const JavaScript = require("tree-sitter-javascript");
|
|
4
|
+
|
|
5
|
+
const javascriptParser = new Parser();
|
|
6
|
+
javascriptParser.setLanguage(JavaScript);
|
|
7
|
+
|
|
8
|
+
const parsers = {
|
|
9
|
+
".cjs": parseJavaScript,
|
|
10
|
+
".js": parseJavaScript,
|
|
11
|
+
".jsx": parseJavaScript,
|
|
12
|
+
".mjs": parseJavaScript,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
function getParser(filePath) {
|
|
16
|
+
return parsers[path.extname(filePath).toLowerCase()];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function getNodeName(node) {
|
|
20
|
+
const nameNode = node.childForFieldName("name");
|
|
21
|
+
return nameNode ? nameNode.text : "(anonymous)";
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function getLeadingComment(node) {
|
|
25
|
+
const previous = node.previousNamedSibling || node.previousSibling;
|
|
26
|
+
|
|
27
|
+
if (!previous || previous.type !== "comment") {
|
|
28
|
+
return "";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return previous.text.trim();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function createSemanticText(relativePath, chunk) {
|
|
35
|
+
const parts = [
|
|
36
|
+
`File: ${relativePath}`,
|
|
37
|
+
`Symbol: ${chunk.name}`,
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
if (chunk.commentText) {
|
|
41
|
+
parts.push(`Context: ${chunk.commentText}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
parts.push(`Code: ${chunk.code}`);
|
|
45
|
+
|
|
46
|
+
return parts.join(" | ");
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function createChunk(relativePath, code, node) {
|
|
50
|
+
const chunkCode = code.slice(node.startIndex, node.endIndex);
|
|
51
|
+
const commentText = getLeadingComment(node);
|
|
52
|
+
const chunk = {
|
|
53
|
+
name: getNodeName(node),
|
|
54
|
+
type: node.type,
|
|
55
|
+
startLine: node.startPosition.row + 1,
|
|
56
|
+
endLine: node.endPosition.row + 1,
|
|
57
|
+
commentText,
|
|
58
|
+
code: chunkCode,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
...chunk,
|
|
63
|
+
semanticText: createSemanticText(relativePath, chunk),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function parseJavaScript({ code, relativePath }) {
|
|
68
|
+
const tree = javascriptParser.parse(code);
|
|
69
|
+
const chunks = [];
|
|
70
|
+
const stack = [tree.rootNode];
|
|
71
|
+
|
|
72
|
+
while (stack.length > 0) {
|
|
73
|
+
const node = stack.pop();
|
|
74
|
+
|
|
75
|
+
if (
|
|
76
|
+
node.type === "function_declaration" ||
|
|
77
|
+
node.type === "class_declaration" ||
|
|
78
|
+
node.type === "method_definition"
|
|
79
|
+
) {
|
|
80
|
+
chunks.push(createChunk(relativePath, code, node));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (let index = node.childCount - 1; index >= 0; index -= 1) {
|
|
84
|
+
stack.push(node.child(index));
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return chunks;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function parseSourceFile({ filePath, relativePath, code }) {
|
|
92
|
+
const parse = getParser(filePath);
|
|
93
|
+
|
|
94
|
+
if (!parse) {
|
|
95
|
+
return {
|
|
96
|
+
supported: false,
|
|
97
|
+
reason: `Unsupported source extension: ${path.extname(filePath).toLowerCase() || "(none)"}`,
|
|
98
|
+
chunks: [],
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
supported: true,
|
|
104
|
+
chunks: parse({
|
|
105
|
+
code,
|
|
106
|
+
relativePath,
|
|
107
|
+
}),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
module.exports = {
|
|
112
|
+
getParser,
|
|
113
|
+
parseJavaScript,
|
|
114
|
+
parseSourceFile,
|
|
115
|
+
parsers,
|
|
116
|
+
};
|
package/src/paths.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
const path = require("path");
|
|
2
|
+
|
|
3
|
+
function toPosixPath(value) {
|
|
4
|
+
return value.split(path.sep).join("/");
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function toRelativePosixPath(root, target) {
|
|
8
|
+
return toPosixPath(path.relative(root, target));
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
module.exports = {
|
|
12
|
+
toPosixPath,
|
|
13
|
+
toRelativePosixPath,
|
|
14
|
+
};
|
package/src/search.js
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
const path = require("path");
|
|
2
|
+
const { pipeline } = require("@xenova/transformers");
|
|
3
|
+
const { throwIfAborted } = require("./abort");
|
|
4
|
+
const { getDatabasePath, openDatabase } = require("./database");
|
|
5
|
+
|
|
6
|
+
const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
7
|
+
|
|
8
|
+
let embedderPromise;
|
|
9
|
+
|
|
10
|
+
function getEmbedder() {
|
|
11
|
+
if (!embedderPromise) {
|
|
12
|
+
embedderPromise = pipeline("feature-extraction", MODEL_ID);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
return embedderPromise;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function toFloat32Array(value) {
|
|
19
|
+
if (value instanceof Float32Array) {
|
|
20
|
+
return new Float32Array(value);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
if (ArrayBuffer.isView(value)) {
|
|
24
|
+
return Float32Array.from(value);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (Array.isArray(value)) {
|
|
28
|
+
return Float32Array.from(value);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
throw new Error("Embedding output could not be converted to Float32Array");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function embedQuery(query) {
|
|
35
|
+
const embedder = await getEmbedder();
|
|
36
|
+
const output = await embedder(query, {
|
|
37
|
+
pooling: "mean",
|
|
38
|
+
normalize: true,
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
return toFloat32Array(output.data);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function blobToFloat32Array(blob) {
|
|
45
|
+
return new Float32Array(
|
|
46
|
+
blob.buffer,
|
|
47
|
+
blob.byteOffset,
|
|
48
|
+
blob.byteLength / Float32Array.BYTES_PER_ELEMENT,
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function cosineSimilarity(left, right) {
|
|
53
|
+
if (left.length !== right.length) {
|
|
54
|
+
return Number.NEGATIVE_INFINITY;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
let dot = 0;
|
|
58
|
+
let magnitudeLeft = 0;
|
|
59
|
+
let magnitudeRight = 0;
|
|
60
|
+
|
|
61
|
+
for (let index = 0; index < left.length; index += 1) {
|
|
62
|
+
dot += left[index] * right[index];
|
|
63
|
+
magnitudeLeft += left[index] * left[index];
|
|
64
|
+
magnitudeRight += right[index] * right[index];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (magnitudeLeft === 0 || magnitudeRight === 0) {
|
|
68
|
+
return 0;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return dot / (Math.sqrt(magnitudeLeft) * Math.sqrt(magnitudeRight));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function loadChunks(db) {
|
|
75
|
+
return db.prepare(`
|
|
76
|
+
SELECT
|
|
77
|
+
chunks.id,
|
|
78
|
+
files.path AS file,
|
|
79
|
+
chunks.name,
|
|
80
|
+
chunks.type,
|
|
81
|
+
chunks.startLine,
|
|
82
|
+
chunks.endLine,
|
|
83
|
+
chunks.code,
|
|
84
|
+
chunks.embedding
|
|
85
|
+
FROM chunks
|
|
86
|
+
INNER JOIN files ON files.id = chunks.file_id
|
|
87
|
+
`).all();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function searchTarget(target, query, options = {}) {
|
|
91
|
+
const root = path.resolve(target || ".");
|
|
92
|
+
const limit = options.limit || 5;
|
|
93
|
+
throwIfAborted(options.signal);
|
|
94
|
+
|
|
95
|
+
const queryEmbedding = await embedQuery(query);
|
|
96
|
+
throwIfAborted(options.signal);
|
|
97
|
+
|
|
98
|
+
const db = openDatabase(root);
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
const rows = loadChunks(db);
|
|
102
|
+
throwIfAborted(options.signal);
|
|
103
|
+
|
|
104
|
+
const results = rows
|
|
105
|
+
.map((row) => ({
|
|
106
|
+
id: row.id,
|
|
107
|
+
file: row.file,
|
|
108
|
+
name: row.name,
|
|
109
|
+
type: row.type,
|
|
110
|
+
startLine: row.startLine,
|
|
111
|
+
endLine: row.endLine,
|
|
112
|
+
code: row.code,
|
|
113
|
+
score: cosineSimilarity(queryEmbedding, blobToFloat32Array(row.embedding)),
|
|
114
|
+
}))
|
|
115
|
+
.filter((result) => Number.isFinite(result.score))
|
|
116
|
+
.sort((left, right) => right.score - left.score)
|
|
117
|
+
.slice(0, limit);
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
root,
|
|
121
|
+
databasePath: getDatabasePath(root),
|
|
122
|
+
query,
|
|
123
|
+
results,
|
|
124
|
+
};
|
|
125
|
+
} finally {
|
|
126
|
+
db.close();
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
module.exports = {
|
|
131
|
+
blobToFloat32Array,
|
|
132
|
+
cosineSimilarity,
|
|
133
|
+
searchTarget,
|
|
134
|
+
};
|
package/src/shutdown.js
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
const cleanupTasks = new Set();
|
|
2
|
+
|
|
3
|
+
let cleanupPromise = null;
|
|
4
|
+
|
|
5
|
+
function registerCleanup(cleanup) {
|
|
6
|
+
cleanupTasks.add(cleanup);
|
|
7
|
+
|
|
8
|
+
return () => {
|
|
9
|
+
cleanupTasks.delete(cleanup);
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async function runCleanup() {
|
|
14
|
+
if (cleanupPromise) {
|
|
15
|
+
return cleanupPromise;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
cleanupPromise = (async () => {
|
|
19
|
+
const tasks = Array.from(cleanupTasks).reverse();
|
|
20
|
+
cleanupTasks.clear();
|
|
21
|
+
await Promise.allSettled(tasks.map((cleanup) => cleanup()));
|
|
22
|
+
})();
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
return await cleanupPromise;
|
|
26
|
+
} finally {
|
|
27
|
+
cleanupPromise = null;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
module.exports = {
|
|
32
|
+
registerCleanup,
|
|
33
|
+
runCleanup,
|
|
34
|
+
};
|