@mars167/git-ai 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +364 -0
- package/README.zh-CN.md +361 -0
- package/assets/hooks/post-checkout +28 -0
- package/assets/hooks/post-merge +28 -0
- package/assets/hooks/pre-commit +17 -0
- package/assets/hooks/pre-push +29 -0
- package/dist/bin/git-ai.js +62 -0
- package/dist/src/commands/ai.js +30 -0
- package/dist/src/commands/checkIndex.js +19 -0
- package/dist/src/commands/dsr.js +156 -0
- package/dist/src/commands/graph.js +203 -0
- package/dist/src/commands/hooks.js +125 -0
- package/dist/src/commands/index.js +92 -0
- package/dist/src/commands/pack.js +31 -0
- package/dist/src/commands/query.js +139 -0
- package/dist/src/commands/semantic.js +134 -0
- package/dist/src/commands/serve.js +14 -0
- package/dist/src/commands/status.js +78 -0
- package/dist/src/commands/trae.js +75 -0
- package/dist/src/commands/unpack.js +28 -0
- package/dist/src/core/archive.js +91 -0
- package/dist/src/core/astGraph.js +127 -0
- package/dist/src/core/astGraphQuery.js +142 -0
- package/dist/src/core/cozo.js +266 -0
- package/dist/src/core/cpg/astLayer.js +56 -0
- package/dist/src/core/cpg/callGraph.js +483 -0
- package/dist/src/core/cpg/cfgLayer.js +490 -0
- package/dist/src/core/cpg/dfgLayer.js +237 -0
- package/dist/src/core/cpg/index.js +80 -0
- package/dist/src/core/cpg/types.js +108 -0
- package/dist/src/core/crypto.js +10 -0
- package/dist/src/core/dsr/generate.js +308 -0
- package/dist/src/core/dsr/gitContext.js +74 -0
- package/dist/src/core/dsr/indexMaterialize.js +106 -0
- package/dist/src/core/dsr/paths.js +26 -0
- package/dist/src/core/dsr/query.js +73 -0
- package/dist/src/core/dsr/snapshotParser.js +73 -0
- package/dist/src/core/dsr/state.js +27 -0
- package/dist/src/core/dsr/types.js +2 -0
- package/dist/src/core/embedding/fusion.js +52 -0
- package/dist/src/core/embedding/index.js +43 -0
- package/dist/src/core/embedding/parser.js +14 -0
- package/dist/src/core/embedding/semantic.js +254 -0
- package/dist/src/core/embedding/structural.js +97 -0
- package/dist/src/core/embedding/symbolic.js +117 -0
- package/dist/src/core/embedding/tokenizer.js +91 -0
- package/dist/src/core/embedding/types.js +2 -0
- package/dist/src/core/embedding.js +36 -0
- package/dist/src/core/git.js +49 -0
- package/dist/src/core/gitDiff.js +73 -0
- package/dist/src/core/indexCheck.js +131 -0
- package/dist/src/core/indexer.js +185 -0
- package/dist/src/core/indexerIncremental.js +303 -0
- package/dist/src/core/indexing/config.js +51 -0
- package/dist/src/core/indexing/hnsw.js +568 -0
- package/dist/src/core/indexing/index.js +17 -0
- package/dist/src/core/indexing/monitor.js +82 -0
- package/dist/src/core/indexing/parallel.js +252 -0
- package/dist/src/core/lancedb.js +111 -0
- package/dist/src/core/lfs.js +27 -0
- package/dist/src/core/log.js +62 -0
- package/dist/src/core/manifest.js +88 -0
- package/dist/src/core/parser/adapter.js +2 -0
- package/dist/src/core/parser/c.js +93 -0
- package/dist/src/core/parser/chunkRelations.js +178 -0
- package/dist/src/core/parser/chunker.js +274 -0
- package/dist/src/core/parser/go.js +98 -0
- package/dist/src/core/parser/java.js +80 -0
- package/dist/src/core/parser/markdown.js +76 -0
- package/dist/src/core/parser/python.js +81 -0
- package/dist/src/core/parser/rust.js +103 -0
- package/dist/src/core/parser/typescript.js +98 -0
- package/dist/src/core/parser/utils.js +62 -0
- package/dist/src/core/parser/yaml.js +53 -0
- package/dist/src/core/parser.js +75 -0
- package/dist/src/core/paths.js +10 -0
- package/dist/src/core/repoMap.js +164 -0
- package/dist/src/core/retrieval/cache.js +31 -0
- package/dist/src/core/retrieval/classifier.js +74 -0
- package/dist/src/core/retrieval/expander.js +80 -0
- package/dist/src/core/retrieval/fuser.js +40 -0
- package/dist/src/core/retrieval/index.js +32 -0
- package/dist/src/core/retrieval/reranker.js +304 -0
- package/dist/src/core/retrieval/types.js +2 -0
- package/dist/src/core/retrieval/weights.js +42 -0
- package/dist/src/core/search.js +41 -0
- package/dist/src/core/sq8.js +65 -0
- package/dist/src/core/symbolSearch.js +143 -0
- package/dist/src/core/types.js +2 -0
- package/dist/src/core/workspace.js +116 -0
- package/dist/src/mcp/server.js +794 -0
- package/docs/README.md +44 -0
- package/docs/cross-encoder.md +157 -0
- package/docs/embedding.md +158 -0
- package/docs/logo.png +0 -0
- package/docs/windows-setup.md +67 -0
- package/docs/zh-CN/DESIGN.md +102 -0
- package/docs/zh-CN/README.md +46 -0
- package/docs/zh-CN/advanced.md +26 -0
- package/docs/zh-CN/architecture_explained.md +116 -0
- package/docs/zh-CN/cli.md +109 -0
- package/docs/zh-CN/dsr.md +91 -0
- package/docs/zh-CN/graph_scenarios.md +173 -0
- package/docs/zh-CN/hooks.md +14 -0
- package/docs/zh-CN/manifests.md +136 -0
- package/docs/zh-CN/mcp.md +205 -0
- package/docs/zh-CN/quickstart.md +35 -0
- package/docs/zh-CN/rules.md +7 -0
- package/docs/zh-CN/technical-details.md +454 -0
- package/docs/zh-CN/troubleshooting.md +19 -0
- package/docs/zh-CN/windows-setup.md +67 -0
- package/install.sh +183 -0
- package/package.json +97 -0
- package/skills/git-ai-mcp/SKILL.md +86 -0
- package/skills/git-ai-mcp/references/constraints.md +143 -0
- package/skills/git-ai-mcp/references/tools.md +263 -0
- package/templates/agents/common/documents/Fix EISDIR error and enable multi-language indexing.md +14 -0
- package/templates/agents/common/documents/Fix git-ai index error in CodaGraph directory.md +13 -0
- package/templates/agents/common/skills/git-ai-mcp/SKILL.md +86 -0
- package/templates/agents/common/skills/git-ai-mcp/references/constraints.md +143 -0
- package/templates/agents/common/skills/git-ai-mcp/references/tools.md +263 -0
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.HNSWIndex = void 0;
|
|
7
|
+
exports.clampHnswParameters = clampHnswParameters;
|
|
8
|
+
exports.quantize = quantize;
|
|
9
|
+
exports.dequantize = dequantize;
|
|
10
|
+
exports.cosineSimilarity = cosineSimilarity;
|
|
11
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
12
|
+
const path_1 = __importDefault(require("path"));
|
|
13
|
+
const sq8_1 = require("../sq8");
|
|
14
|
+
const HNSW_MAGIC = Buffer.from('HNSW');
|
|
15
|
+
const HNSW_VERSION = 1;
|
|
16
|
+
function writeUInt32(value) {
|
|
17
|
+
const buf = Buffer.allocUnsafe(4);
|
|
18
|
+
buf.writeUInt32LE(value >>> 0, 0);
|
|
19
|
+
return buf;
|
|
20
|
+
}
|
|
21
|
+
function writeFloat32(value) {
|
|
22
|
+
const buf = Buffer.allocUnsafe(4);
|
|
23
|
+
buf.writeFloatLE(value, 0);
|
|
24
|
+
return buf;
|
|
25
|
+
}
|
|
26
|
+
function writeString(value) {
|
|
27
|
+
const bytes = Buffer.from(value, 'utf-8');
|
|
28
|
+
return [writeUInt32(bytes.length), bytes];
|
|
29
|
+
}
|
|
30
|
+
function assertAvailable(buf, offset, size) {
|
|
31
|
+
if (offset + size > buf.length) {
|
|
32
|
+
throw new Error('HNSW index file is truncated');
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function readUInt32(buf, state) {
|
|
36
|
+
assertAvailable(buf, state.offset, 4);
|
|
37
|
+
const value = buf.readUInt32LE(state.offset);
|
|
38
|
+
state.offset += 4;
|
|
39
|
+
return value;
|
|
40
|
+
}
|
|
41
|
+
function readFloat32(buf, state) {
|
|
42
|
+
assertAvailable(buf, state.offset, 4);
|
|
43
|
+
const value = buf.readFloatLE(state.offset);
|
|
44
|
+
state.offset += 4;
|
|
45
|
+
return value;
|
|
46
|
+
}
|
|
47
|
+
function readString(buf, state) {
|
|
48
|
+
const length = readUInt32(buf, state);
|
|
49
|
+
if (length === 0)
|
|
50
|
+
return '';
|
|
51
|
+
assertAvailable(buf, state.offset, length);
|
|
52
|
+
const value = buf.toString('utf-8', state.offset, state.offset + length);
|
|
53
|
+
state.offset += length;
|
|
54
|
+
return value;
|
|
55
|
+
}
|
|
56
|
+
function copyInt8Slice(buf, offset, length) {
|
|
57
|
+
assertAvailable(buf, offset, length);
|
|
58
|
+
const slice = buf.subarray(offset, offset + length);
|
|
59
|
+
const out = new Int8Array(length);
|
|
60
|
+
out.set(slice);
|
|
61
|
+
return out;
|
|
62
|
+
}
|
|
63
|
+
class HNSWIndex {
|
|
64
|
+
constructor(config) {
|
|
65
|
+
const clamped = clampHnswParameters(config);
|
|
66
|
+
const dim = typeof config.dim === 'number' && Number.isFinite(config.dim) ? config.dim : 0;
|
|
67
|
+
this.config = { ...clamped, dim, maxElements: config.maxElements };
|
|
68
|
+
this.nodes = new Map();
|
|
69
|
+
this.entryPoint = null;
|
|
70
|
+
this.maxLevel = 0;
|
|
71
|
+
this.levelMult = this.computeLevelMult();
|
|
72
|
+
this.dim = dim > 0 ? dim : undefined;
|
|
73
|
+
this.levelCap = this.computeLevelCap();
|
|
74
|
+
}
|
|
75
|
+
getConfig() {
|
|
76
|
+
return { ...this.config };
|
|
77
|
+
}
|
|
78
|
+
getCount() {
|
|
79
|
+
return this.nodes.size;
|
|
80
|
+
}
|
|
81
|
+
size() {
|
|
82
|
+
return this.getCount();
|
|
83
|
+
}
|
|
84
|
+
add(arg1, arg2) {
|
|
85
|
+
const entry = typeof arg1 === 'string' ? { id: arg1, vector: arg2 } : arg1;
|
|
86
|
+
if (!entry?.id)
|
|
87
|
+
throw new Error('HNSW entry id is required');
|
|
88
|
+
if (!entry.vector)
|
|
89
|
+
throw new Error('HNSW entry vector is required');
|
|
90
|
+
if (this.nodes.has(entry.id))
|
|
91
|
+
throw new Error(`HNSW entry already exists: ${entry.id}`);
|
|
92
|
+
if (this.config.maxElements && this.nodes.size >= this.config.maxElements) {
|
|
93
|
+
throw new Error('HNSW index is full');
|
|
94
|
+
}
|
|
95
|
+
this.ensureDim(entry.vector);
|
|
96
|
+
const level = this.selectLevel();
|
|
97
|
+
const node = {
|
|
98
|
+
id: entry.id,
|
|
99
|
+
vector: entry.vector,
|
|
100
|
+
level,
|
|
101
|
+
neighbors: new Map(),
|
|
102
|
+
};
|
|
103
|
+
this.nodes.set(node.id, node);
|
|
104
|
+
if (!this.entryPoint) {
|
|
105
|
+
this.entryPoint = { nodeId: node.id, level: node.level };
|
|
106
|
+
this.maxLevel = node.level;
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
const entryPointLevel = this.entryPoint.level;
|
|
110
|
+
const insertLevel = Math.min(level, entryPointLevel);
|
|
111
|
+
let current = this.findInsertionPoint(entry.vector, insertLevel);
|
|
112
|
+
for (let layer = insertLevel; layer >= 0; layer--) {
|
|
113
|
+
const candidates = this.searchLayer(entry.vector, current, this.config.efConstruction, layer);
|
|
114
|
+
const neighbors = this.selectNeighbors(candidates, this.config.M, node.id);
|
|
115
|
+
this.connectNeighbors(node.id, neighbors, layer);
|
|
116
|
+
if (candidates.length > 0)
|
|
117
|
+
current = candidates[0].id;
|
|
118
|
+
}
|
|
119
|
+
if (node.level > this.maxLevel) {
|
|
120
|
+
this.entryPoint = { nodeId: node.id, level: node.level };
|
|
121
|
+
this.maxLevel = node.level;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
addBatch(entries) {
|
|
125
|
+
if (entries.length === 0)
|
|
126
|
+
return;
|
|
127
|
+
for (const entry of entries)
|
|
128
|
+
this.add(entry);
|
|
129
|
+
}
|
|
130
|
+
search(query, k) {
|
|
131
|
+
if (!this.entryPoint)
|
|
132
|
+
return [];
|
|
133
|
+
if (k <= 0)
|
|
134
|
+
return [];
|
|
135
|
+
const limit = Math.max(1, k);
|
|
136
|
+
this.ensureDim(query);
|
|
137
|
+
let current = this.entryPoint.nodeId;
|
|
138
|
+
for (let level = this.entryPoint.level; level > 0; level--) {
|
|
139
|
+
const nearest = this.searchLayer(query, current, 1, level);
|
|
140
|
+
if (nearest.length > 0)
|
|
141
|
+
current = nearest[0].id;
|
|
142
|
+
}
|
|
143
|
+
const ef = Math.max(limit, this.config.efSearch);
|
|
144
|
+
const results = this.searchLayer(query, current, ef, 0);
|
|
145
|
+
return results.slice(0, limit);
|
|
146
|
+
}
|
|
147
|
+
searchBatch(queries, k) {
|
|
148
|
+
if (queries.length === 0)
|
|
149
|
+
return [];
|
|
150
|
+
return queries.map((query) => this.search(query, k));
|
|
151
|
+
}
|
|
152
|
+
async save(filePath) {
|
|
153
|
+
const pieces = [];
|
|
154
|
+
pieces.push(HNSW_MAGIC);
|
|
155
|
+
pieces.push(writeUInt32(HNSW_VERSION));
|
|
156
|
+
pieces.push(writeUInt32(this.config.M));
|
|
157
|
+
pieces.push(writeUInt32(this.config.efConstruction));
|
|
158
|
+
pieces.push(writeUInt32(this.config.efSearch));
|
|
159
|
+
pieces.push(writeUInt32(this.config.quantizationBits));
|
|
160
|
+
pieces.push(writeUInt32(this.dim ?? this.config.dim ?? 0));
|
|
161
|
+
pieces.push(writeUInt32(this.config.maxElements ?? 0));
|
|
162
|
+
pieces.push(writeUInt32(this.nodes.size));
|
|
163
|
+
pieces.push(writeUInt32(this.maxLevel));
|
|
164
|
+
for (const node of this.nodes.values()) {
|
|
165
|
+
pieces.push(...writeString(node.id));
|
|
166
|
+
pieces.push(writeUInt32(node.level));
|
|
167
|
+
pieces.push(writeUInt32(node.vector.dim));
|
|
168
|
+
pieces.push(writeFloat32(node.vector.scale));
|
|
169
|
+
const qBuffer = Buffer.from(node.vector.q.buffer, node.vector.q.byteOffset, node.vector.q.byteLength);
|
|
170
|
+
pieces.push(qBuffer);
|
|
171
|
+
const neighborsByLevel = Array.from(node.neighbors.entries());
|
|
172
|
+
pieces.push(writeUInt32(neighborsByLevel.length));
|
|
173
|
+
for (const [level, neighbors] of neighborsByLevel) {
|
|
174
|
+
pieces.push(writeUInt32(level));
|
|
175
|
+
pieces.push(writeUInt32(neighbors.size));
|
|
176
|
+
for (const [neighborId, distance] of neighbors) {
|
|
177
|
+
pieces.push(...writeString(neighborId));
|
|
178
|
+
pieces.push(writeFloat32(distance));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
if (this.entryPoint) {
|
|
183
|
+
pieces.push(...writeString(this.entryPoint.nodeId));
|
|
184
|
+
pieces.push(writeUInt32(this.entryPoint.level));
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
pieces.push(writeUInt32(0));
|
|
188
|
+
pieces.push(writeUInt32(0));
|
|
189
|
+
}
|
|
190
|
+
const output = Buffer.concat(pieces);
|
|
191
|
+
await fs_extra_1.default.ensureDir(path_1.default.dirname(filePath));
|
|
192
|
+
await fs_extra_1.default.writeFile(filePath, output);
|
|
193
|
+
}
|
|
194
|
+
async load(filePath) {
|
|
195
|
+
const data = await fs_extra_1.default.readFile(filePath);
|
|
196
|
+
const state = { offset: 0 };
|
|
197
|
+
assertAvailable(data, state.offset, HNSW_MAGIC.length);
|
|
198
|
+
const magic = data.subarray(state.offset, state.offset + HNSW_MAGIC.length);
|
|
199
|
+
state.offset += HNSW_MAGIC.length;
|
|
200
|
+
if (!magic.equals(HNSW_MAGIC)) {
|
|
201
|
+
throw new Error('Invalid HNSW index file');
|
|
202
|
+
}
|
|
203
|
+
const version = readUInt32(data, state);
|
|
204
|
+
if (version !== HNSW_VERSION) {
|
|
205
|
+
throw new Error(`Unsupported HNSW index version: ${version}`);
|
|
206
|
+
}
|
|
207
|
+
const M = readUInt32(data, state);
|
|
208
|
+
const efConstruction = readUInt32(data, state);
|
|
209
|
+
const efSearch = readUInt32(data, state);
|
|
210
|
+
const quantizationBits = readUInt32(data, state);
|
|
211
|
+
const dim = readUInt32(data, state);
|
|
212
|
+
const maxElements = readUInt32(data, state) || undefined;
|
|
213
|
+
const nodeCount = readUInt32(data, state);
|
|
214
|
+
const headerMaxLevel = readUInt32(data, state);
|
|
215
|
+
const config = {
|
|
216
|
+
M,
|
|
217
|
+
efConstruction,
|
|
218
|
+
efSearch,
|
|
219
|
+
quantizationBits,
|
|
220
|
+
dim: dim || undefined,
|
|
221
|
+
maxElements,
|
|
222
|
+
};
|
|
223
|
+
const nodes = new Map();
|
|
224
|
+
let maxLevel = headerMaxLevel;
|
|
225
|
+
let highest = null;
|
|
226
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
227
|
+
const id = readString(data, state);
|
|
228
|
+
const level = readUInt32(data, state);
|
|
229
|
+
const vecDim = readUInt32(data, state);
|
|
230
|
+
const scale = readFloat32(data, state);
|
|
231
|
+
const q = copyInt8Slice(data, state.offset, vecDim);
|
|
232
|
+
state.offset += vecDim;
|
|
233
|
+
if (dim && vecDim !== dim) {
|
|
234
|
+
throw new Error(`HNSW node dim mismatch: expected ${dim}, got ${vecDim}`);
|
|
235
|
+
}
|
|
236
|
+
const neighborsByLevelCount = readUInt32(data, state);
|
|
237
|
+
const neighbors = new Map();
|
|
238
|
+
for (let j = 0; j < neighborsByLevelCount; j++) {
|
|
239
|
+
const levelId = readUInt32(data, state);
|
|
240
|
+
const neighborCount = readUInt32(data, state);
|
|
241
|
+
const map = new Map();
|
|
242
|
+
for (let k = 0; k < neighborCount; k++) {
|
|
243
|
+
const neighborId = readString(data, state);
|
|
244
|
+
const distance = readFloat32(data, state);
|
|
245
|
+
map.set(neighborId, distance);
|
|
246
|
+
}
|
|
247
|
+
neighbors.set(levelId, map);
|
|
248
|
+
}
|
|
249
|
+
const node = {
|
|
250
|
+
id,
|
|
251
|
+
level,
|
|
252
|
+
vector: { dim: vecDim, scale, q },
|
|
253
|
+
neighbors,
|
|
254
|
+
};
|
|
255
|
+
nodes.set(id, node);
|
|
256
|
+
if (!highest || level > highest.level)
|
|
257
|
+
highest = { nodeId: id, level };
|
|
258
|
+
if (level > maxLevel)
|
|
259
|
+
maxLevel = level;
|
|
260
|
+
}
|
|
261
|
+
const entryId = readString(data, state);
|
|
262
|
+
const entryLevel = readUInt32(data, state);
|
|
263
|
+
let entryPoint = null;
|
|
264
|
+
if (entryId) {
|
|
265
|
+
if (!nodes.has(entryId)) {
|
|
266
|
+
throw new Error(`HNSW entry point not found: ${entryId}`);
|
|
267
|
+
}
|
|
268
|
+
const entryNode = nodes.get(entryId);
|
|
269
|
+
if (entryNode && entryLevel > entryNode.level) {
|
|
270
|
+
throw new Error(`HNSW entry point level mismatch: ${entryLevel} > ${entryNode.level}`);
|
|
271
|
+
}
|
|
272
|
+
entryPoint = { nodeId: entryId, level: entryLevel };
|
|
273
|
+
}
|
|
274
|
+
else if (highest) {
|
|
275
|
+
entryPoint = highest;
|
|
276
|
+
}
|
|
277
|
+
const clamped = clampHnswParameters(config);
|
|
278
|
+
const resolvedDim = dim || this.dim || 0;
|
|
279
|
+
this.config = { ...clamped, dim: resolvedDim, maxElements };
|
|
280
|
+
this.nodes = nodes;
|
|
281
|
+
this.entryPoint = entryPoint;
|
|
282
|
+
this.maxLevel = maxLevel;
|
|
283
|
+
this.dim = resolvedDim > 0 ? resolvedDim : undefined;
|
|
284
|
+
if (!this.dim && nodes.size > 0) {
|
|
285
|
+
const first = nodes.values().next().value;
|
|
286
|
+
this.dim = first?.vector.dim;
|
|
287
|
+
}
|
|
288
|
+
this.config.dim = this.dim ?? 0;
|
|
289
|
+
this.levelMult = this.computeLevelMult();
|
|
290
|
+
this.levelCap = this.computeLevelCap();
|
|
291
|
+
}
|
|
292
|
+
clear() {
|
|
293
|
+
this.nodes.clear();
|
|
294
|
+
this.entryPoint = null;
|
|
295
|
+
this.maxLevel = 0;
|
|
296
|
+
this.dim = this.config.dim;
|
|
297
|
+
}
|
|
298
|
+
stats() {
|
|
299
|
+
let edgeCount = 0;
|
|
300
|
+
let memoryUsage = 0;
|
|
301
|
+
for (const node of this.nodes.values()) {
|
|
302
|
+
memoryUsage += Buffer.byteLength(node.id, 'utf-8');
|
|
303
|
+
memoryUsage += node.vector.q.byteLength + 8;
|
|
304
|
+
for (const neighbors of node.neighbors.values()) {
|
|
305
|
+
edgeCount += neighbors.size;
|
|
306
|
+
for (const neighborId of neighbors.keys()) {
|
|
307
|
+
memoryUsage += Buffer.byteLength(neighborId, 'utf-8') + 8;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return {
|
|
312
|
+
nodeCount: this.nodes.size,
|
|
313
|
+
edgeCount,
|
|
314
|
+
maxLevel: this.maxLevel,
|
|
315
|
+
memoryUsage,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
toSnapshot() {
|
|
319
|
+
return {
|
|
320
|
+
config: { ...this.config },
|
|
321
|
+
nodes: Array.from(this.nodes.values()).map((node) => ({
|
|
322
|
+
id: node.id,
|
|
323
|
+
level: node.level,
|
|
324
|
+
dim: node.vector.dim,
|
|
325
|
+
scale: node.vector.scale,
|
|
326
|
+
qvec_b64: Buffer.from(node.vector.q).toString('base64'),
|
|
327
|
+
neighbors: Array.from(node.neighbors.entries()).map(([level, neighbors]) => ({
|
|
328
|
+
level,
|
|
329
|
+
items: Array.from(neighbors.entries()).map(([id, distance]) => ({ id, distance })),
|
|
330
|
+
})),
|
|
331
|
+
})),
|
|
332
|
+
entryPoint: this.entryPoint ? { ...this.entryPoint } : null,
|
|
333
|
+
maxLevel: this.maxLevel,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
static fromSnapshot(snapshot) {
|
|
337
|
+
const index = new HNSWIndex(snapshot.config);
|
|
338
|
+
if (snapshot.entries && snapshot.entries.length > 0) {
|
|
339
|
+
for (const entry of snapshot.entries) {
|
|
340
|
+
index.add({
|
|
341
|
+
id: entry.id,
|
|
342
|
+
vector: {
|
|
343
|
+
dim: entry.dim,
|
|
344
|
+
scale: entry.scale,
|
|
345
|
+
q: new Int8Array(Buffer.from(entry.qvec_b64, 'base64')),
|
|
346
|
+
},
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
return index;
|
|
350
|
+
}
|
|
351
|
+
const nodes = snapshot.nodes ?? [];
|
|
352
|
+
for (const node of nodes) {
|
|
353
|
+
index.nodes.set(node.id, {
|
|
354
|
+
id: node.id,
|
|
355
|
+
level: node.level,
|
|
356
|
+
vector: {
|
|
357
|
+
dim: node.dim,
|
|
358
|
+
scale: node.scale,
|
|
359
|
+
q: new Int8Array(Buffer.from(node.qvec_b64, 'base64')),
|
|
360
|
+
},
|
|
361
|
+
neighbors: new Map(node.neighbors.map((layer) => [
|
|
362
|
+
layer.level,
|
|
363
|
+
new Map(layer.items.map((item) => [item.id, item.distance])),
|
|
364
|
+
])),
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
const entryPoint = snapshot.entryPoint ?? null;
|
|
368
|
+
index.entryPoint = entryPoint ? { ...entryPoint } : null;
|
|
369
|
+
index.maxLevel = snapshot.maxLevel ?? entryPoint?.level ?? 0;
|
|
370
|
+
if (index.nodes.size > 0 && index.maxLevel === 0) {
|
|
371
|
+
for (const node of index.nodes.values()) {
|
|
372
|
+
if (node.level > index.maxLevel)
|
|
373
|
+
index.maxLevel = node.level;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
if (!index.dim && index.nodes.size > 0) {
|
|
377
|
+
const first = index.nodes.values().next().value;
|
|
378
|
+
index.dim = first?.vector.dim;
|
|
379
|
+
}
|
|
380
|
+
index.config.dim = index.dim ?? 0;
|
|
381
|
+
index.levelMult = index.computeLevelMult();
|
|
382
|
+
index.levelCap = index.computeLevelCap();
|
|
383
|
+
return index;
|
|
384
|
+
}
|
|
385
|
+
ensureDim(vector) {
|
|
386
|
+
if (!this.dim || this.dim === 0) {
|
|
387
|
+
this.dim = vector.dim;
|
|
388
|
+
this.config.dim = vector.dim;
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
if (vector.dim !== this.dim) {
|
|
392
|
+
throw new Error(`HNSW vector dim mismatch: expected ${this.dim}, got ${vector.dim}`);
|
|
393
|
+
}
|
|
394
|
+
if (vector.q.length !== vector.dim) {
|
|
395
|
+
throw new Error(`HNSW quantized vector length mismatch: expected ${vector.dim}, got ${vector.q.length}`);
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
computeLevelMult() {
|
|
399
|
+
const M = Math.max(2, this.config.M);
|
|
400
|
+
const base = Math.log(M);
|
|
401
|
+
if (!Number.isFinite(base) || base === 0)
|
|
402
|
+
return 1;
|
|
403
|
+
return 1 / base;
|
|
404
|
+
}
|
|
405
|
+
computeLevelCap() {
|
|
406
|
+
if (!this.config.maxElements || this.config.maxElements <= 0)
|
|
407
|
+
return undefined;
|
|
408
|
+
const base = Math.log(Math.max(2, this.config.M));
|
|
409
|
+
if (!Number.isFinite(base) || base === 0)
|
|
410
|
+
return undefined;
|
|
411
|
+
const level = Math.ceil(Math.log(this.config.maxElements) / base);
|
|
412
|
+
return Math.max(0, level);
|
|
413
|
+
}
|
|
414
|
+
selectLevel() {
|
|
415
|
+
const r = Math.max(Number.EPSILON, Math.random());
|
|
416
|
+
const level = Math.floor(-Math.log(r) * this.levelMult);
|
|
417
|
+
if (this.levelCap == null)
|
|
418
|
+
return level;
|
|
419
|
+
return Math.min(level, this.levelCap);
|
|
420
|
+
}
|
|
421
|
+
selectNeighbors(candidates, M, excludeId) {
|
|
422
|
+
const limit = Math.max(1, M);
|
|
423
|
+
const sorted = candidates
|
|
424
|
+
.filter((c) => c.id !== excludeId)
|
|
425
|
+
.sort((a, b) => b.score - a.score);
|
|
426
|
+
const neighbors = [];
|
|
427
|
+
const seen = new Set();
|
|
428
|
+
for (const candidate of sorted) {
|
|
429
|
+
if (seen.has(candidate.id))
|
|
430
|
+
continue;
|
|
431
|
+
seen.add(candidate.id);
|
|
432
|
+
neighbors.push(candidate.id);
|
|
433
|
+
if (neighbors.length >= limit)
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
return neighbors;
|
|
437
|
+
}
|
|
438
|
+
searchLayer(query, entryPoint, ef, level) {
|
|
439
|
+
const entryNode = this.nodes.get(entryPoint);
|
|
440
|
+
if (!entryNode)
|
|
441
|
+
return [];
|
|
442
|
+
const efSearch = Math.max(1, ef);
|
|
443
|
+
const queryVector = (0, sq8_1.dequantizeSQ8)(query);
|
|
444
|
+
const visited = new Set();
|
|
445
|
+
const candidates = [];
|
|
446
|
+
const top = [];
|
|
447
|
+
const entryScore = this.scoreWithQuery(queryVector, entryNode.vector);
|
|
448
|
+
const entryResult = { id: entryPoint, score: entryScore };
|
|
449
|
+
candidates.push(entryResult);
|
|
450
|
+
top.push(entryResult);
|
|
451
|
+
visited.add(entryPoint);
|
|
452
|
+
while (candidates.length > 0) {
|
|
453
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
454
|
+
const current = candidates.shift();
|
|
455
|
+
top.sort((a, b) => b.score - a.score);
|
|
456
|
+
const worstTop = top[top.length - 1];
|
|
457
|
+
if (worstTop && current.score < worstTop.score && top.length >= efSearch) {
|
|
458
|
+
break;
|
|
459
|
+
}
|
|
460
|
+
const currentNode = this.nodes.get(current.id);
|
|
461
|
+
if (!currentNode)
|
|
462
|
+
continue;
|
|
463
|
+
const neighborMap = currentNode.neighbors.get(level);
|
|
464
|
+
if (!neighborMap)
|
|
465
|
+
continue;
|
|
466
|
+
for (const neighborId of neighborMap.keys()) {
|
|
467
|
+
if (visited.has(neighborId))
|
|
468
|
+
continue;
|
|
469
|
+
visited.add(neighborId);
|
|
470
|
+
const neighborNode = this.nodes.get(neighborId);
|
|
471
|
+
if (!neighborNode)
|
|
472
|
+
continue;
|
|
473
|
+
const score = this.scoreWithQuery(queryVector, neighborNode.vector);
|
|
474
|
+
const candidate = { id: neighborId, score };
|
|
475
|
+
if (top.length < efSearch) {
|
|
476
|
+
candidates.push(candidate);
|
|
477
|
+
top.push(candidate);
|
|
478
|
+
continue;
|
|
479
|
+
}
|
|
480
|
+
top.sort((a, b) => b.score - a.score);
|
|
481
|
+
const worst = top[top.length - 1];
|
|
482
|
+
if (worst && score > worst.score) {
|
|
483
|
+
candidates.push(candidate);
|
|
484
|
+
top.push(candidate);
|
|
485
|
+
top.sort((a, b) => b.score - a.score);
|
|
486
|
+
while (top.length > efSearch)
|
|
487
|
+
top.pop();
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
top.sort((a, b) => b.score - a.score);
|
|
492
|
+
return top;
|
|
493
|
+
}
|
|
494
|
+
findInsertionPoint(query, level) {
|
|
495
|
+
if (!this.entryPoint)
|
|
496
|
+
throw new Error('HNSW index is empty');
|
|
497
|
+
let current = this.entryPoint.nodeId;
|
|
498
|
+
for (let l = this.entryPoint.level; l > level; l--) {
|
|
499
|
+
const results = this.searchLayer(query, current, 1, l);
|
|
500
|
+
if (results.length > 0)
|
|
501
|
+
current = results[0].id;
|
|
502
|
+
}
|
|
503
|
+
return current;
|
|
504
|
+
}
|
|
505
|
+
connectNeighbors(nodeId, neighbors, level) {
|
|
506
|
+
const node = this.nodes.get(nodeId);
|
|
507
|
+
if (!node)
|
|
508
|
+
return;
|
|
509
|
+
const nodeNeighbors = this.getNeighborMap(node, level);
|
|
510
|
+
for (const neighborId of neighbors) {
|
|
511
|
+
const neighborNode = this.nodes.get(neighborId);
|
|
512
|
+
if (!neighborNode)
|
|
513
|
+
continue;
|
|
514
|
+
const distance = this.distanceBetweenVectors(node.vector, neighborNode.vector);
|
|
515
|
+
nodeNeighbors.set(neighborId, distance);
|
|
516
|
+
const neighborMap = this.getNeighborMap(neighborNode, level);
|
|
517
|
+
neighborMap.set(nodeId, distance);
|
|
518
|
+
if (neighborMap.size > this.config.M) {
|
|
519
|
+
neighborNode.neighbors.set(level, this.pruneNeighbors(neighborMap, this.config.M));
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
if (nodeNeighbors.size > this.config.M) {
|
|
523
|
+
node.neighbors.set(level, this.pruneNeighbors(nodeNeighbors, this.config.M));
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
getNeighborMap(node, level) {
|
|
527
|
+
const existing = node.neighbors.get(level);
|
|
528
|
+
if (existing)
|
|
529
|
+
return existing;
|
|
530
|
+
const map = new Map();
|
|
531
|
+
node.neighbors.set(level, map);
|
|
532
|
+
return map;
|
|
533
|
+
}
|
|
534
|
+
pruneNeighbors(neighbors, max) {
|
|
535
|
+
if (neighbors.size <= max)
|
|
536
|
+
return neighbors;
|
|
537
|
+
const sorted = Array.from(neighbors.entries()).sort((a, b) => a[1] - b[1]).slice(0, max);
|
|
538
|
+
return new Map(sorted);
|
|
539
|
+
}
|
|
540
|
+
scoreWithQuery(queryVector, vector) {
|
|
541
|
+
return (0, sq8_1.cosineSimilarity)(queryVector, (0, sq8_1.dequantizeSQ8)(vector));
|
|
542
|
+
}
|
|
543
|
+
scoreBetweenVectors(a, b) {
|
|
544
|
+
return (0, sq8_1.cosineSimilarity)((0, sq8_1.dequantizeSQ8)(a), (0, sq8_1.dequantizeSQ8)(b));
|
|
545
|
+
}
|
|
546
|
+
distanceBetweenVectors(a, b) {
|
|
547
|
+
return 1 - this.scoreBetweenVectors(a, b);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
exports.HNSWIndex = HNSWIndex;
|
|
551
|
+
function clampHnswParameters(config) {
|
|
552
|
+
return {
|
|
553
|
+
M: Math.max(2, Math.round(config.M)),
|
|
554
|
+
efConstruction: Math.max(10, Math.round(config.efConstruction)),
|
|
555
|
+
efSearch: Math.max(10, Math.round(config.efSearch)),
|
|
556
|
+
quantizationBits: Math.max(4, Math.min(8, Math.round(config.quantizationBits))),
|
|
557
|
+
};
|
|
558
|
+
}
|
|
559
|
+
function quantize(vector, bits = 8, id = '') {
|
|
560
|
+
const q = (0, sq8_1.quantizeSQ8)(vector, bits);
|
|
561
|
+
return { ...q, id };
|
|
562
|
+
}
|
|
563
|
+
function dequantize(q) {
|
|
564
|
+
return (0, sq8_1.dequantizeSQ8)(q);
|
|
565
|
+
}
|
|
566
|
+
function cosineSimilarity(a, b) {
|
|
567
|
+
return (0, sq8_1.cosineSimilarity)((0, sq8_1.dequantizeSQ8)(a), (0, sq8_1.dequantizeSQ8)(b));
|
|
568
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runParallelIndexing = exports.cosineSimilarity = exports.dequantize = exports.quantize = exports.clampHnswParameters = exports.HNSWIndex = exports.MemoryMonitor = exports.defaultIndexingRuntimeConfig = exports.defaultErrorHandlingConfig = exports.defaultIndexingConfig = void 0;
|
|
4
|
+
var config_1 = require("./config");
|
|
5
|
+
Object.defineProperty(exports, "defaultIndexingConfig", { enumerable: true, get: function () { return config_1.defaultIndexingConfig; } });
|
|
6
|
+
Object.defineProperty(exports, "defaultErrorHandlingConfig", { enumerable: true, get: function () { return config_1.defaultErrorHandlingConfig; } });
|
|
7
|
+
Object.defineProperty(exports, "defaultIndexingRuntimeConfig", { enumerable: true, get: function () { return config_1.defaultIndexingRuntimeConfig; } });
|
|
8
|
+
var monitor_1 = require("./monitor");
|
|
9
|
+
Object.defineProperty(exports, "MemoryMonitor", { enumerable: true, get: function () { return monitor_1.MemoryMonitor; } });
|
|
10
|
+
var hnsw_1 = require("./hnsw");
|
|
11
|
+
Object.defineProperty(exports, "HNSWIndex", { enumerable: true, get: function () { return hnsw_1.HNSWIndex; } });
|
|
12
|
+
Object.defineProperty(exports, "clampHnswParameters", { enumerable: true, get: function () { return hnsw_1.clampHnswParameters; } });
|
|
13
|
+
Object.defineProperty(exports, "quantize", { enumerable: true, get: function () { return hnsw_1.quantize; } });
|
|
14
|
+
Object.defineProperty(exports, "dequantize", { enumerable: true, get: function () { return hnsw_1.dequantize; } });
|
|
15
|
+
Object.defineProperty(exports, "cosineSimilarity", { enumerable: true, get: function () { return hnsw_1.cosineSimilarity; } });
|
|
16
|
+
var parallel_1 = require("./parallel");
|
|
17
|
+
Object.defineProperty(exports, "runParallelIndexing", { enumerable: true, get: function () { return parallel_1.runParallelIndexing; } });
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.MemoryMonitor = void 0;
|
|
7
|
+
exports.getSystemMemoryBudgetMb = getSystemMemoryBudgetMb;
|
|
8
|
+
const os_1 = __importDefault(require("os"));
|
|
9
|
+
class MemoryMonitor {
|
|
10
|
+
constructor(config) {
|
|
11
|
+
this.budgetMb = Math.max(1, config.budgetMb);
|
|
12
|
+
this.warnThreshold = clamp(config.warningThreshold, 0, 1);
|
|
13
|
+
this.criticalThreshold = clamp(config.criticalThreshold, 0, 1);
|
|
14
|
+
this.lastSnapshot = null;
|
|
15
|
+
}
|
|
16
|
+
static fromErrorConfig(config, budgetMb) {
|
|
17
|
+
return new MemoryMonitor({
|
|
18
|
+
budgetMb,
|
|
19
|
+
warningThreshold: config.memoryWarningThreshold,
|
|
20
|
+
criticalThreshold: config.memoryCriticalThreshold,
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
sample() {
|
|
24
|
+
const mem = process.memoryUsage();
|
|
25
|
+
const rssMb = bytesToMb(mem.rss);
|
|
26
|
+
const heapUsedMb = bytesToMb(mem.heapUsed);
|
|
27
|
+
const heapTotalMb = bytesToMb(mem.heapTotal);
|
|
28
|
+
const externalMb = bytesToMb(mem.external ?? 0);
|
|
29
|
+
const usageRatio = this.budgetMb > 0 ? rssMb / this.budgetMb : 0;
|
|
30
|
+
const warning = usageRatio >= this.warnThreshold;
|
|
31
|
+
const critical = usageRatio >= this.criticalThreshold;
|
|
32
|
+
const snapshot = {
|
|
33
|
+
rssMb,
|
|
34
|
+
heapUsedMb,
|
|
35
|
+
heapTotalMb,
|
|
36
|
+
externalMb,
|
|
37
|
+
budgetMb: this.budgetMb,
|
|
38
|
+
usageRatio,
|
|
39
|
+
warning,
|
|
40
|
+
critical,
|
|
41
|
+
};
|
|
42
|
+
this.lastSnapshot = snapshot;
|
|
43
|
+
return snapshot;
|
|
44
|
+
}
|
|
45
|
+
getLastSnapshot() {
|
|
46
|
+
return this.lastSnapshot;
|
|
47
|
+
}
|
|
48
|
+
shouldThrottle() {
|
|
49
|
+
return Boolean(this.lastSnapshot?.critical);
|
|
50
|
+
}
|
|
51
|
+
async throttleIfNeeded() {
|
|
52
|
+
if (!this.shouldThrottle())
|
|
53
|
+
return;
|
|
54
|
+
const delayMs = Math.min(250, Math.max(25, Math.round((this.lastSnapshot?.usageRatio ?? 1) * 50)));
|
|
55
|
+
await sleep(delayMs);
|
|
56
|
+
}
|
|
57
|
+
adaptWorkerCount(current) {
|
|
58
|
+
if (!this.lastSnapshot)
|
|
59
|
+
return current;
|
|
60
|
+
if (this.lastSnapshot.critical)
|
|
61
|
+
return Math.max(1, Math.floor(current / 2));
|
|
62
|
+
if (this.lastSnapshot.warning)
|
|
63
|
+
return Math.max(1, current - 1);
|
|
64
|
+
return current;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
exports.MemoryMonitor = MemoryMonitor;
|
|
68
|
+
function getSystemMemoryBudgetMb() {
|
|
69
|
+
const total = bytesToMb(os_1.default.totalmem());
|
|
70
|
+
return Math.max(256, Math.floor(total * 0.5));
|
|
71
|
+
}
|
|
72
|
+
function bytesToMb(bytes) {
|
|
73
|
+
return Math.round(bytes / (1024 * 1024));
|
|
74
|
+
}
|
|
75
|
+
function clamp(value, min, max) {
|
|
76
|
+
if (Number.isNaN(value))
|
|
77
|
+
return min;
|
|
78
|
+
return Math.max(min, Math.min(max, value));
|
|
79
|
+
}
|
|
80
|
+
function sleep(ms) {
|
|
81
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
82
|
+
}
|