@mars167/git-ai 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +364 -0
- package/README.zh-CN.md +361 -0
- package/assets/hooks/post-checkout +28 -0
- package/assets/hooks/post-merge +28 -0
- package/assets/hooks/pre-commit +17 -0
- package/assets/hooks/pre-push +29 -0
- package/dist/bin/git-ai.js +62 -0
- package/dist/src/commands/ai.js +30 -0
- package/dist/src/commands/checkIndex.js +19 -0
- package/dist/src/commands/dsr.js +156 -0
- package/dist/src/commands/graph.js +203 -0
- package/dist/src/commands/hooks.js +125 -0
- package/dist/src/commands/index.js +92 -0
- package/dist/src/commands/pack.js +31 -0
- package/dist/src/commands/query.js +139 -0
- package/dist/src/commands/semantic.js +134 -0
- package/dist/src/commands/serve.js +14 -0
- package/dist/src/commands/status.js +78 -0
- package/dist/src/commands/trae.js +75 -0
- package/dist/src/commands/unpack.js +28 -0
- package/dist/src/core/archive.js +91 -0
- package/dist/src/core/astGraph.js +127 -0
- package/dist/src/core/astGraphQuery.js +142 -0
- package/dist/src/core/cozo.js +266 -0
- package/dist/src/core/cpg/astLayer.js +56 -0
- package/dist/src/core/cpg/callGraph.js +483 -0
- package/dist/src/core/cpg/cfgLayer.js +490 -0
- package/dist/src/core/cpg/dfgLayer.js +237 -0
- package/dist/src/core/cpg/index.js +80 -0
- package/dist/src/core/cpg/types.js +108 -0
- package/dist/src/core/crypto.js +10 -0
- package/dist/src/core/dsr/generate.js +308 -0
- package/dist/src/core/dsr/gitContext.js +74 -0
- package/dist/src/core/dsr/indexMaterialize.js +106 -0
- package/dist/src/core/dsr/paths.js +26 -0
- package/dist/src/core/dsr/query.js +73 -0
- package/dist/src/core/dsr/snapshotParser.js +73 -0
- package/dist/src/core/dsr/state.js +27 -0
- package/dist/src/core/dsr/types.js +2 -0
- package/dist/src/core/embedding/fusion.js +52 -0
- package/dist/src/core/embedding/index.js +43 -0
- package/dist/src/core/embedding/parser.js +14 -0
- package/dist/src/core/embedding/semantic.js +254 -0
- package/dist/src/core/embedding/structural.js +97 -0
- package/dist/src/core/embedding/symbolic.js +117 -0
- package/dist/src/core/embedding/tokenizer.js +91 -0
- package/dist/src/core/embedding/types.js +2 -0
- package/dist/src/core/embedding.js +36 -0
- package/dist/src/core/git.js +49 -0
- package/dist/src/core/gitDiff.js +73 -0
- package/dist/src/core/indexCheck.js +131 -0
- package/dist/src/core/indexer.js +185 -0
- package/dist/src/core/indexerIncremental.js +303 -0
- package/dist/src/core/indexing/config.js +51 -0
- package/dist/src/core/indexing/hnsw.js +568 -0
- package/dist/src/core/indexing/index.js +17 -0
- package/dist/src/core/indexing/monitor.js +82 -0
- package/dist/src/core/indexing/parallel.js +252 -0
- package/dist/src/core/lancedb.js +111 -0
- package/dist/src/core/lfs.js +27 -0
- package/dist/src/core/log.js +62 -0
- package/dist/src/core/manifest.js +88 -0
- package/dist/src/core/parser/adapter.js +2 -0
- package/dist/src/core/parser/c.js +93 -0
- package/dist/src/core/parser/chunkRelations.js +178 -0
- package/dist/src/core/parser/chunker.js +274 -0
- package/dist/src/core/parser/go.js +98 -0
- package/dist/src/core/parser/java.js +80 -0
- package/dist/src/core/parser/markdown.js +76 -0
- package/dist/src/core/parser/python.js +81 -0
- package/dist/src/core/parser/rust.js +103 -0
- package/dist/src/core/parser/typescript.js +98 -0
- package/dist/src/core/parser/utils.js +62 -0
- package/dist/src/core/parser/yaml.js +53 -0
- package/dist/src/core/parser.js +75 -0
- package/dist/src/core/paths.js +10 -0
- package/dist/src/core/repoMap.js +164 -0
- package/dist/src/core/retrieval/cache.js +31 -0
- package/dist/src/core/retrieval/classifier.js +74 -0
- package/dist/src/core/retrieval/expander.js +80 -0
- package/dist/src/core/retrieval/fuser.js +40 -0
- package/dist/src/core/retrieval/index.js +32 -0
- package/dist/src/core/retrieval/reranker.js +304 -0
- package/dist/src/core/retrieval/types.js +2 -0
- package/dist/src/core/retrieval/weights.js +42 -0
- package/dist/src/core/search.js +41 -0
- package/dist/src/core/sq8.js +65 -0
- package/dist/src/core/symbolSearch.js +143 -0
- package/dist/src/core/types.js +2 -0
- package/dist/src/core/workspace.js +116 -0
- package/dist/src/mcp/server.js +794 -0
- package/docs/README.md +44 -0
- package/docs/cross-encoder.md +157 -0
- package/docs/embedding.md +158 -0
- package/docs/logo.png +0 -0
- package/docs/windows-setup.md +67 -0
- package/docs/zh-CN/DESIGN.md +102 -0
- package/docs/zh-CN/README.md +46 -0
- package/docs/zh-CN/advanced.md +26 -0
- package/docs/zh-CN/architecture_explained.md +116 -0
- package/docs/zh-CN/cli.md +109 -0
- package/docs/zh-CN/dsr.md +91 -0
- package/docs/zh-CN/graph_scenarios.md +173 -0
- package/docs/zh-CN/hooks.md +14 -0
- package/docs/zh-CN/manifests.md +136 -0
- package/docs/zh-CN/mcp.md +205 -0
- package/docs/zh-CN/quickstart.md +35 -0
- package/docs/zh-CN/rules.md +7 -0
- package/docs/zh-CN/technical-details.md +454 -0
- package/docs/zh-CN/troubleshooting.md +19 -0
- package/docs/zh-CN/windows-setup.md +67 -0
- package/install.sh +183 -0
- package/package.json +97 -0
- package/skills/git-ai-mcp/SKILL.md +86 -0
- package/skills/git-ai-mcp/references/constraints.md +143 -0
- package/skills/git-ai-mcp/references/tools.md +263 -0
- package/templates/agents/common/documents/Fix EISDIR error and enable multi-language indexing.md +14 -0
- package/templates/agents/common/documents/Fix git-ai index error in CodaGraph directory.md +13 -0
- package/templates/agents/common/skills/git-ai-mcp/SKILL.md +86 -0
- package/templates/agents/common/skills/git-ai-mcp/references/constraints.md +143 -0
- package/templates/agents/common/skills/git-ai-mcp/references/tools.md +263 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.CrossEncoderReranker = void 0;
|
|
7
|
+
exports.fuseScores = fuseScores;
|
|
8
|
+
exports.rerank = rerank;
|
|
9
|
+
const path_1 = __importDefault(require("path"));
|
|
10
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
11
|
+
const crypto_1 = require("../crypto");
|
|
12
|
+
const embedding_1 = require("../embedding");
|
|
13
|
+
const log_1 = require("../log");
|
|
14
|
+
const cache_1 = require("./cache");
|
|
15
|
+
const log = (0, log_1.createLogger)({ component: 'retrieval', kind: 'reranker' });
|
|
16
|
+
function normalizeScores(values) {
|
|
17
|
+
if (values.length === 0)
|
|
18
|
+
return [];
|
|
19
|
+
const min = Math.min(...values);
|
|
20
|
+
const max = Math.max(...values);
|
|
21
|
+
const denom = max - min;
|
|
22
|
+
if (denom <= 0)
|
|
23
|
+
return values.map(() => 0);
|
|
24
|
+
return values.map((v) => (v - min) / denom);
|
|
25
|
+
}
|
|
26
|
+
function normalizeScore(value) {
|
|
27
|
+
if (!Number.isFinite(value))
|
|
28
|
+
return 0;
|
|
29
|
+
if (value >= 0 && value <= 1)
|
|
30
|
+
return value;
|
|
31
|
+
return sigmoid(value);
|
|
32
|
+
}
|
|
33
|
+
function fuseScores(originalScore, crossEncoderScore, weights) {
|
|
34
|
+
const normalized = normalizeScore(originalScore);
|
|
35
|
+
const cross = clamp(crossEncoderScore, 0, 1);
|
|
36
|
+
return weights.original * normalized + weights.crossEncoder * cross;
|
|
37
|
+
}
|
|
38
|
+
function clamp(value, min = 0, max = 1) {
|
|
39
|
+
if (!Number.isFinite(value))
|
|
40
|
+
return min;
|
|
41
|
+
return Math.max(min, Math.min(max, value));
|
|
42
|
+
}
|
|
43
|
+
function padBigInt(values, target, pad = 0n) {
|
|
44
|
+
if (values.length >= target)
|
|
45
|
+
return values.slice(0, target);
|
|
46
|
+
const out = values.slice();
|
|
47
|
+
while (out.length < target)
|
|
48
|
+
out.push(pad);
|
|
49
|
+
return out;
|
|
50
|
+
}
|
|
51
|
+
function findModelPath(modelName) {
|
|
52
|
+
const resolved = path_1.default.isAbsolute(modelName) ? modelName : path_1.default.join(process.cwd(), modelName);
|
|
53
|
+
const candidates = [resolved, path_1.default.join(resolved, 'model.onnx'), path_1.default.join(resolved, 'onnx', 'model.onnx')];
|
|
54
|
+
for (const c of candidates) {
|
|
55
|
+
if (fs_extra_1.default.pathExistsSync(c))
|
|
56
|
+
return c;
|
|
57
|
+
}
|
|
58
|
+
return resolved;
|
|
59
|
+
}
|
|
60
|
+
function sigmoid(x) {
|
|
61
|
+
if (x > 20)
|
|
62
|
+
return 1;
|
|
63
|
+
if (x < -20)
|
|
64
|
+
return 0;
|
|
65
|
+
return 1 / (1 + Math.exp(-x));
|
|
66
|
+
}
|
|
67
|
+
function validateRerankInput(query, candidates) {
|
|
68
|
+
const q = String(query ?? '').trim();
|
|
69
|
+
const safeCandidates = Array.isArray(candidates) ? candidates : [];
|
|
70
|
+
return { query: q, candidates: safeCandidates };
|
|
71
|
+
}
|
|
72
|
+
class CrossEncoderModel {
|
|
73
|
+
constructor(config, cache) {
|
|
74
|
+
this.config = config;
|
|
75
|
+
this.cache = cache;
|
|
76
|
+
this.onnxPromise = null;
|
|
77
|
+
this.sessionPromise = null;
|
|
78
|
+
this.tokenizerPromise = null;
|
|
79
|
+
}
|
|
80
|
+
async scorePairs(pairs) {
|
|
81
|
+
if (pairs.length === 0)
|
|
82
|
+
return [];
|
|
83
|
+
const cacheKey = (0, crypto_1.sha256Hex)(JSON.stringify(pairs.map((p) => [p.query, p.content])));
|
|
84
|
+
const cached = this.cache.get(cacheKey);
|
|
85
|
+
if (cached)
|
|
86
|
+
return cached.slice();
|
|
87
|
+
const modelPath = findModelPath(this.config.modelName);
|
|
88
|
+
if (!fs_extra_1.default.pathExistsSync(modelPath)) {
|
|
89
|
+
log.info('cross_encoder_model_missing', { model: modelPath });
|
|
90
|
+
const scores = pairs.map((p) => this.hashScore(p.query, p.content));
|
|
91
|
+
this.cache.set(cacheKey, scores);
|
|
92
|
+
return scores;
|
|
93
|
+
}
|
|
94
|
+
try {
|
|
95
|
+
const session = await this.getSession();
|
|
96
|
+
const tokenizer = await this.getTokenizer();
|
|
97
|
+
const batchSize = Math.max(1, this.config.batchSize);
|
|
98
|
+
const scores = new Array(pairs.length).fill(0);
|
|
99
|
+
for (let i = 0; i < pairs.length; i += batchSize) {
|
|
100
|
+
const slice = pairs.slice(i, i + batchSize);
|
|
101
|
+
const encoded = slice.map((pair) => tokenizer.encode(`${pair.query} ${pair.content}`, { maxLength: 256 }));
|
|
102
|
+
const maxLen = Math.max(2, Math.min(256, Math.max(...encoded.map((e) => e.input_ids.length))));
|
|
103
|
+
const inputIds = encoded.map((e) => padBigInt(e.input_ids, maxLen, 0n));
|
|
104
|
+
const attentionMask = encoded.map((e) => padBigInt(e.attention_mask, maxLen, 0n));
|
|
105
|
+
const feeds = await this.buildFeeds(inputIds, attentionMask, maxLen);
|
|
106
|
+
const outputs = await session.run(feeds);
|
|
107
|
+
const outputName = Object.keys(outputs)[0];
|
|
108
|
+
const output = outputs[outputName];
|
|
109
|
+
if (!output)
|
|
110
|
+
throw new Error('ONNX output missing');
|
|
111
|
+
const data = output.data;
|
|
112
|
+
const dims = output.dims ?? [slice.length, 1];
|
|
113
|
+
const perRow = Math.max(1, dims[dims.length - 1] ?? 1);
|
|
114
|
+
for (let j = 0; j < slice.length; j++) {
|
|
115
|
+
const raw = data[j * perRow] ?? 0;
|
|
116
|
+
scores[i + j] = sigmoid(Number(raw));
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
this.cache.set(cacheKey, scores);
|
|
120
|
+
return scores;
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
log.warn('cross_encoder_fallback', { err: String(err?.message ?? err) });
|
|
124
|
+
const scores = pairs.map((p) => this.hashScore(p.query, p.content));
|
|
125
|
+
this.cache.set(cacheKey, scores);
|
|
126
|
+
return scores;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
dispose() {
|
|
130
|
+
this.onnxPromise = null;
|
|
131
|
+
this.sessionPromise = null;
|
|
132
|
+
this.tokenizerPromise = null;
|
|
133
|
+
this.cache.clear();
|
|
134
|
+
}
|
|
135
|
+
async getSession() {
|
|
136
|
+
if (!this.sessionPromise) {
|
|
137
|
+
this.sessionPromise = (async () => {
|
|
138
|
+
const onnx = await this.getOnnx();
|
|
139
|
+
const modelPath = findModelPath(this.config.modelName);
|
|
140
|
+
const providers = this.config.device === 'gpu' ? ['cuda', 'cpu'] : ['cpu'];
|
|
141
|
+
const opts = { executionProviders: providers };
|
|
142
|
+
const session = await onnx.InferenceSession.create(modelPath, opts);
|
|
143
|
+
log.info('cross_encoder_session_ready', { model: modelPath, device: this.config.device });
|
|
144
|
+
return session;
|
|
145
|
+
})();
|
|
146
|
+
}
|
|
147
|
+
return this.sessionPromise;
|
|
148
|
+
}
|
|
149
|
+
async getTokenizer() {
|
|
150
|
+
if (!this.tokenizerPromise) {
|
|
151
|
+
this.tokenizerPromise = (async () => {
|
|
152
|
+
const mod = await this.loadTokenizerModule();
|
|
153
|
+
return mod.loadTokenizer(this.config.modelName);
|
|
154
|
+
})();
|
|
155
|
+
}
|
|
156
|
+
return this.tokenizerPromise;
|
|
157
|
+
}
|
|
158
|
+
async getOnnx() {
|
|
159
|
+
if (!this.onnxPromise)
|
|
160
|
+
this.onnxPromise = this.loadOnnx();
|
|
161
|
+
return this.onnxPromise;
|
|
162
|
+
}
|
|
163
|
+
async loadOnnx() {
|
|
164
|
+
const moduleName = 'onnxruntime-node';
|
|
165
|
+
const mod = await import(moduleName);
|
|
166
|
+
return mod;
|
|
167
|
+
}
|
|
168
|
+
async loadTokenizerModule() {
|
|
169
|
+
const moduleName = '../embedding/tokenizer.js';
|
|
170
|
+
const mod = await import(moduleName);
|
|
171
|
+
return mod;
|
|
172
|
+
}
|
|
173
|
+
async buildFeeds(inputIds, attentionMask, maxLen) {
|
|
174
|
+
const onnx = await this.getOnnx();
|
|
175
|
+
const batch = inputIds.length;
|
|
176
|
+
const flattenIds = inputIds.flat();
|
|
177
|
+
const flattenMask = attentionMask.flat();
|
|
178
|
+
const idsTensor = new onnx.Tensor('int64', BigInt64Array.from(flattenIds), [batch, maxLen]);
|
|
179
|
+
const maskTensor = new onnx.Tensor('int64', BigInt64Array.from(flattenMask), [batch, maxLen]);
|
|
180
|
+
const feeds = {};
|
|
181
|
+
const inputNames = ['input_ids', 'attention_mask', 'token_type_ids'];
|
|
182
|
+
for (const name of inputNames) {
|
|
183
|
+
if (name === 'input_ids')
|
|
184
|
+
feeds[name] = idsTensor;
|
|
185
|
+
if (name === 'attention_mask')
|
|
186
|
+
feeds[name] = maskTensor;
|
|
187
|
+
if (name === 'token_type_ids') {
|
|
188
|
+
const types = new onnx.Tensor('int64', new BigInt64Array(batch * maxLen), [batch, maxLen]);
|
|
189
|
+
feeds[name] = types;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return feeds;
|
|
193
|
+
}
|
|
194
|
+
hashScore(query, content) {
|
|
195
|
+
const vec = (0, embedding_1.hashEmbedding)(`${query} ${content}`, { dim: 64 });
|
|
196
|
+
const sum = vec.reduce((acc, v) => acc + v, 0);
|
|
197
|
+
return sigmoid(sum);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
class CrossEncoderReranker {
|
|
201
|
+
constructor(config, cache = new cache_1.LruCache(256)) {
|
|
202
|
+
this.config = config;
|
|
203
|
+
this.cache = cache;
|
|
204
|
+
this.model = new CrossEncoderModel(config, cache);
|
|
205
|
+
}
|
|
206
|
+
async rerank(query, candidates) {
|
|
207
|
+
const { query: q, candidates: items } = validateRerankInput(query, candidates);
|
|
208
|
+
if (!q || items.length === 0)
|
|
209
|
+
return [];
|
|
210
|
+
const limited = items.slice(0, Math.max(1, this.config.topK));
|
|
211
|
+
const pairs = limited.map((item) => ({ query: q, content: item.content }));
|
|
212
|
+
const scores = await this.model.scorePairs(pairs);
|
|
213
|
+
const originalScores = limited.map((c) => c.score);
|
|
214
|
+
const normalizedOriginal = normalizeScores(originalScores);
|
|
215
|
+
const results = limited.map((item, idx) => {
|
|
216
|
+
const rerankScore = clamp(scores[idx] ?? 0, 0, 1);
|
|
217
|
+
const originalScore = normalizedOriginal[idx] ?? 0;
|
|
218
|
+
const finalScore = this.config.scoreWeights.original * originalScore +
|
|
219
|
+
this.config.scoreWeights.crossEncoder * rerankScore;
|
|
220
|
+
return {
|
|
221
|
+
id: item.id,
|
|
222
|
+
content: item.content,
|
|
223
|
+
filePath: item.filePath,
|
|
224
|
+
originalScore: item.score,
|
|
225
|
+
rerankScore,
|
|
226
|
+
finalScore,
|
|
227
|
+
};
|
|
228
|
+
});
|
|
229
|
+
results.sort((a, b) => b.finalScore - a.finalScore || b.rerankScore - a.rerankScore);
|
|
230
|
+
return results;
|
|
231
|
+
}
|
|
232
|
+
async rerankBatch(queries, candidates) {
|
|
233
|
+
const batchSize = Math.min(queries.length, candidates.length);
|
|
234
|
+
const results = new Array(batchSize);
|
|
235
|
+
for (let i = 0; i < batchSize; i++) {
|
|
236
|
+
results[i] = await this.rerank(queries[i] ?? '', candidates[i] ?? []);
|
|
237
|
+
}
|
|
238
|
+
return results;
|
|
239
|
+
}
|
|
240
|
+
dispose() {
|
|
241
|
+
this.model.dispose();
|
|
242
|
+
this.cache.clear();
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
exports.CrossEncoderReranker = CrossEncoderReranker;
|
|
246
|
+
function tokenize(text) {
|
|
247
|
+
return String(text ?? '')
|
|
248
|
+
.toLowerCase()
|
|
249
|
+
.split(/[^a-z0-9_]+/g)
|
|
250
|
+
.map((t) => t.trim())
|
|
251
|
+
.filter(Boolean);
|
|
252
|
+
}
|
|
253
|
+
function overlapScore(queryTokens, candidateTokens) {
|
|
254
|
+
if (queryTokens.length === 0 || candidateTokens.length === 0)
|
|
255
|
+
return 0;
|
|
256
|
+
const set = new Set(candidateTokens);
|
|
257
|
+
let hits = 0;
|
|
258
|
+
for (const t of queryTokens)
|
|
259
|
+
if (set.has(t))
|
|
260
|
+
hits += 1;
|
|
261
|
+
return hits / queryTokens.length;
|
|
262
|
+
}
|
|
263
|
+
function pairwiseBoost(results) {
|
|
264
|
+
const boost = new Map();
|
|
265
|
+
for (let i = 0; i < results.length; i++) {
|
|
266
|
+
for (let j = i + 1; j < results.length; j++) {
|
|
267
|
+
const a = results[i];
|
|
268
|
+
const b = results[j];
|
|
269
|
+
if (a.source === b.source)
|
|
270
|
+
continue;
|
|
271
|
+
const aKey = `${a.source}:${a.id}`;
|
|
272
|
+
const bKey = `${b.source}:${b.id}`;
|
|
273
|
+
const aText = String(a.text ?? a.metadata?.text ?? '');
|
|
274
|
+
const bText = String(b.text ?? b.metadata?.text ?? '');
|
|
275
|
+
const aTokens = new Set(tokenize(aText));
|
|
276
|
+
const overlap = overlapScore(Array.from(aTokens), tokenize(bText));
|
|
277
|
+
if (overlap > 0.2) {
|
|
278
|
+
boost.set(aKey, (boost.get(aKey) ?? 0) + 0.05);
|
|
279
|
+
boost.set(bKey, (boost.get(bKey) ?? 0) + 0.05);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return boost;
|
|
284
|
+
}
|
|
285
|
+
function rerank(query, candidates, options = {}) {
|
|
286
|
+
const qTokens = tokenize(query);
|
|
287
|
+
const limit = Math.max(1, Number(options.limit ?? 50));
|
|
288
|
+
const ranked = candidates.map((c, idx) => {
|
|
289
|
+
const normalizedScore = 'normalizedScore' in c ? c.normalizedScore : 0;
|
|
290
|
+
const fusedScore = 'fusedScore' in c ? c.fusedScore : c.score;
|
|
291
|
+
const text = String(c.text ?? c.metadata?.text ?? '');
|
|
292
|
+
const overlap = overlapScore(qTokens, tokenize(text));
|
|
293
|
+
const rerankScore = fusedScore + overlap * 0.2;
|
|
294
|
+
return { ...c, normalizedScore, fusedScore: rerankScore, rank: idx + 1 };
|
|
295
|
+
});
|
|
296
|
+
const boosts = pairwiseBoost(ranked);
|
|
297
|
+
for (const r of ranked) {
|
|
298
|
+
const key = `${r.source}:${r.id}`;
|
|
299
|
+
const boost = boosts.get(key) ?? 0;
|
|
300
|
+
r.fusedScore += boost;
|
|
301
|
+
}
|
|
302
|
+
ranked.sort((a, b) => b.fusedScore - a.fusedScore || b.score - a.score);
|
|
303
|
+
return ranked.slice(0, limit).map((r, idx) => ({ ...r, rank: idx + 1 }));
|
|
304
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.computeWeights = computeWeights;
|
|
4
|
+
const BASE_WEIGHTS = {
|
|
5
|
+
semantic: { vectorWeight: 0.55, graphWeight: 0.2, dsrWeight: 0.15, symbolWeight: 0.1 },
|
|
6
|
+
structural: { vectorWeight: 0.25, graphWeight: 0.45, dsrWeight: 0.15, symbolWeight: 0.15 },
|
|
7
|
+
historical: { vectorWeight: 0.2, graphWeight: 0.15, dsrWeight: 0.5, symbolWeight: 0.15 },
|
|
8
|
+
hybrid: { vectorWeight: 0.4, graphWeight: 0.3, dsrWeight: 0.2, symbolWeight: 0.1 },
|
|
9
|
+
};
|
|
10
|
+
function normalize(weights) {
|
|
11
|
+
const total = weights.vectorWeight + weights.graphWeight + weights.dsrWeight + weights.symbolWeight;
|
|
12
|
+
if (total <= 0)
|
|
13
|
+
return BASE_WEIGHTS.semantic;
|
|
14
|
+
return {
|
|
15
|
+
vectorWeight: weights.vectorWeight / total,
|
|
16
|
+
graphWeight: weights.graphWeight / total,
|
|
17
|
+
dsrWeight: weights.dsrWeight / total,
|
|
18
|
+
symbolWeight: weights.symbolWeight / total,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function computeWeights(queryType, feedback) {
|
|
22
|
+
const base = { ...BASE_WEIGHTS[queryType.primary] };
|
|
23
|
+
const bias = feedback?.weightBias;
|
|
24
|
+
if (bias) {
|
|
25
|
+
base.vectorWeight += bias.vectorWeight ?? 0;
|
|
26
|
+
base.graphWeight += bias.graphWeight ?? 0;
|
|
27
|
+
base.dsrWeight += bias.dsrWeight ?? 0;
|
|
28
|
+
base.symbolWeight += bias.symbolWeight ?? 0;
|
|
29
|
+
}
|
|
30
|
+
if (feedback?.acceptedSource) {
|
|
31
|
+
const boost = 0.05;
|
|
32
|
+
if (feedback.acceptedSource === 'vector')
|
|
33
|
+
base.vectorWeight += boost;
|
|
34
|
+
if (feedback.acceptedSource === 'graph')
|
|
35
|
+
base.graphWeight += boost;
|
|
36
|
+
if (feedback.acceptedSource === 'dsr')
|
|
37
|
+
base.dsrWeight += boost;
|
|
38
|
+
if (feedback.acceptedSource === 'symbol')
|
|
39
|
+
base.symbolWeight += boost;
|
|
40
|
+
}
|
|
41
|
+
return normalize(base);
|
|
42
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.buildQueryVector = buildQueryVector;
|
|
4
|
+
exports.scoreAgainst = scoreAgainst;
|
|
5
|
+
exports.buildAdaptiveQueryPlan = buildAdaptiveQueryPlan;
|
|
6
|
+
exports.runAdaptiveRetrieval = runAdaptiveRetrieval;
|
|
7
|
+
const sq8_1 = require("./sq8");
|
|
8
|
+
const embedding_1 = require("./embedding");
|
|
9
|
+
const classifier_1 = require("./retrieval/classifier");
|
|
10
|
+
const expander_1 = require("./retrieval/expander");
|
|
11
|
+
const fuser_1 = require("./retrieval/fuser");
|
|
12
|
+
const reranker_1 = require("./retrieval/reranker");
|
|
13
|
+
const weights_1 = require("./retrieval/weights");
|
|
14
|
+
function buildQueryVector(text, dim) {
|
|
15
|
+
const vec = (0, embedding_1.hashEmbedding)(text, { dim });
|
|
16
|
+
return (0, sq8_1.quantizeSQ8)(vec);
|
|
17
|
+
}
|
|
18
|
+
function scoreAgainst(q, item) {
|
|
19
|
+
const qf = (0, sq8_1.dequantizeSQ8)(q);
|
|
20
|
+
const vf = (0, sq8_1.dequantizeSQ8)({ dim: item.dim, scale: item.scale, q: item.qvec });
|
|
21
|
+
return (0, sq8_1.cosineSimilarity)(qf, vf);
|
|
22
|
+
}
|
|
23
|
+
function buildAdaptiveQueryPlan(query, feedback) {
|
|
24
|
+
const q = String(query ?? '').trim();
|
|
25
|
+
const queryType = (0, classifier_1.classifyQuery)(q);
|
|
26
|
+
const expanded = (0, expander_1.expandQuery)(q, queryType);
|
|
27
|
+
const weights = (0, weights_1.computeWeights)(queryType, feedback);
|
|
28
|
+
return { query: q, expanded, queryType, weights };
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Runs the adaptive retrieval pipeline: classification -> expansion -> weighting -> fusion -> heuristic reranking.
|
|
32
|
+
*
|
|
33
|
+
* Note: This uses synchronous heuristic reranking. For higher quality but slower reranking using
|
|
34
|
+
* the ONNX Cross-Encoder, use the `CrossEncoderReranker` class directly (which is async).
|
|
35
|
+
*/
|
|
36
|
+
function runAdaptiveRetrieval(query, candidates, options = {}) {
|
|
37
|
+
const plan = buildAdaptiveQueryPlan(query, options.feedback);
|
|
38
|
+
const fused = (0, fuser_1.fuseResults)(candidates, plan.weights, options.limit);
|
|
39
|
+
const results = (0, reranker_1.rerank)(plan.query, fused, { limit: options.limit });
|
|
40
|
+
return { ...plan, results };
|
|
41
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.quantizeSQ8 = quantizeSQ8;
|
|
4
|
+
exports.quantizeToBits = quantizeToBits;
|
|
5
|
+
exports.dequantizeSQ8 = dequantizeSQ8;
|
|
6
|
+
exports.hnswQuantize = hnswQuantize;
|
|
7
|
+
exports.cosineSimilarity = cosineSimilarity;
|
|
8
|
+
function quantizeSQ8(vector, bits = 8) {
|
|
9
|
+
const dim = vector.length;
|
|
10
|
+
let maxAbs = 0;
|
|
11
|
+
for (let i = 0; i < dim; i++) {
|
|
12
|
+
const a = Math.abs(vector[i] ?? 0);
|
|
13
|
+
if (a > maxAbs)
|
|
14
|
+
maxAbs = a;
|
|
15
|
+
}
|
|
16
|
+
const { scale, q } = quantizeToBits(vector, bits, maxAbs);
|
|
17
|
+
return { dim, scale, q };
|
|
18
|
+
}
|
|
19
|
+
function quantizeToBits(vector, bits, maxAbs) {
|
|
20
|
+
const dim = vector.length;
|
|
21
|
+
const clampedBits = Math.max(4, Math.min(8, Math.round(bits)));
|
|
22
|
+
const range = Math.pow(2, clampedBits - 1) - 1;
|
|
23
|
+
let maxAbsLocal = maxAbs ?? 0;
|
|
24
|
+
if (maxAbsLocal === 0) {
|
|
25
|
+
for (let i = 0; i < dim; i++) {
|
|
26
|
+
const a = Math.abs(vector[i] ?? 0);
|
|
27
|
+
if (a > maxAbsLocal)
|
|
28
|
+
maxAbsLocal = a;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const scale = maxAbsLocal === 0 ? 1 : maxAbsLocal / range;
|
|
32
|
+
const q = new Int8Array(dim);
|
|
33
|
+
for (let i = 0; i < dim; i++) {
|
|
34
|
+
const v = (vector[i] ?? 0) / scale;
|
|
35
|
+
const r = Math.round(v);
|
|
36
|
+
const clamped = Math.max(-range, Math.min(range, r));
|
|
37
|
+
q[i] = clamped;
|
|
38
|
+
}
|
|
39
|
+
return { dim, scale, q };
|
|
40
|
+
}
|
|
41
|
+
function dequantizeSQ8(sq8) {
|
|
42
|
+
const out = new Float32Array(sq8.dim);
|
|
43
|
+
for (let i = 0; i < sq8.dim; i++)
|
|
44
|
+
out[i] = sq8.q[i] * sq8.scale;
|
|
45
|
+
return out;
|
|
46
|
+
}
|
|
47
|
+
function hnswQuantize(vector, bits) {
|
|
48
|
+
return quantizeToBits(vector, bits);
|
|
49
|
+
}
|
|
50
|
+
function cosineSimilarity(a, b) {
|
|
51
|
+
const dim = Math.min(a.length, b.length);
|
|
52
|
+
let dot = 0;
|
|
53
|
+
let na = 0;
|
|
54
|
+
let nb = 0;
|
|
55
|
+
for (let i = 0; i < dim; i++) {
|
|
56
|
+
const av = Number(a[i] ?? 0);
|
|
57
|
+
const bv = Number(b[i] ?? 0);
|
|
58
|
+
dot += av * bv;
|
|
59
|
+
na += av * av;
|
|
60
|
+
nb += bv * bv;
|
|
61
|
+
}
|
|
62
|
+
if (na === 0 || nb === 0)
|
|
63
|
+
return 0;
|
|
64
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb));
|
|
65
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.inferSymbolSearchMode = inferSymbolSearchMode;
|
|
4
|
+
exports.pickCoarseToken = pickCoarseToken;
|
|
5
|
+
exports.buildCoarseWhere = buildCoarseWhere;
|
|
6
|
+
exports.filterAndRankSymbolRows = filterAndRankSymbolRows;
|
|
7
|
+
function inferSymbolSearchMode(query, mode) {
|
|
8
|
+
if (mode)
|
|
9
|
+
return mode;
|
|
10
|
+
if (query.includes('*') || query.includes('?'))
|
|
11
|
+
return 'wildcard';
|
|
12
|
+
return 'substring';
|
|
13
|
+
}
|
|
14
|
+
function escapeQuotes(s) {
|
|
15
|
+
return s.replace(/'/g, "''");
|
|
16
|
+
}
|
|
17
|
+
function extractTokens(s) {
|
|
18
|
+
const tokens = s.match(/[A-Za-z0-9_$.]+/g) ?? [];
|
|
19
|
+
return tokens.map(t => t.trim()).filter(Boolean);
|
|
20
|
+
}
|
|
21
|
+
function pickCoarseToken(query) {
|
|
22
|
+
const tokens = extractTokens(query);
|
|
23
|
+
if (tokens.length === 0)
|
|
24
|
+
return '';
|
|
25
|
+
let best = tokens[0];
|
|
26
|
+
for (const t of tokens)
|
|
27
|
+
if (t.length > best.length)
|
|
28
|
+
best = t;
|
|
29
|
+
return best;
|
|
30
|
+
}
|
|
31
|
+
function buildCoarseWhere(params) {
|
|
32
|
+
const q = String(params.query ?? '');
|
|
33
|
+
const mode = inferSymbolSearchMode(q, params.mode);
|
|
34
|
+
const safe = escapeQuotes(q);
|
|
35
|
+
const likeOp = params.caseInsensitive ? 'ILIKE' : 'LIKE';
|
|
36
|
+
if (mode === 'prefix') {
|
|
37
|
+
if (!safe)
|
|
38
|
+
return null;
|
|
39
|
+
return `symbol ${likeOp} '${safe}%'`;
|
|
40
|
+
}
|
|
41
|
+
if (mode === 'substring') {
|
|
42
|
+
if (!safe)
|
|
43
|
+
return null;
|
|
44
|
+
return `symbol ${likeOp} '%${safe}%'`;
|
|
45
|
+
}
|
|
46
|
+
const token = pickCoarseToken(q);
|
|
47
|
+
if (!token)
|
|
48
|
+
return null;
|
|
49
|
+
const tokenSafe = escapeQuotes(token);
|
|
50
|
+
return `symbol ${likeOp} '%${tokenSafe}%'`;
|
|
51
|
+
}
|
|
52
|
+
function escapeRegex(s) {
|
|
53
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
54
|
+
}
|
|
55
|
+
function globToRegex(pattern, caseInsensitive) {
|
|
56
|
+
try {
|
|
57
|
+
const body = pattern
|
|
58
|
+
.split('')
|
|
59
|
+
.map(ch => {
|
|
60
|
+
if (ch === '*')
|
|
61
|
+
return '.*';
|
|
62
|
+
if (ch === '?')
|
|
63
|
+
return '.';
|
|
64
|
+
return escapeRegex(ch);
|
|
65
|
+
})
|
|
66
|
+
.join('');
|
|
67
|
+
const flags = caseInsensitive ? 'i' : '';
|
|
68
|
+
return new RegExp(`^${body}$`, flags);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function buildRegex(pattern, caseInsensitive) {
|
|
75
|
+
try {
|
|
76
|
+
const flags = caseInsensitive ? 'i' : '';
|
|
77
|
+
return new RegExp(pattern, flags);
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
function normalizeForFuzzy(s) {
|
|
84
|
+
return s.toLowerCase().replace(/[^a-z0-9_$.]+/g, '');
|
|
85
|
+
}
|
|
86
|
+
function fuzzySubsequenceScore(needle, haystack) {
|
|
87
|
+
if (!needle)
|
|
88
|
+
return 0;
|
|
89
|
+
let i = 0;
|
|
90
|
+
let score = 0;
|
|
91
|
+
let lastMatch = -2;
|
|
92
|
+
for (let j = 0; j < haystack.length && i < needle.length; j++) {
|
|
93
|
+
if (haystack[j] === needle[i]) {
|
|
94
|
+
score += (j === lastMatch + 1) ? 2 : 1;
|
|
95
|
+
lastMatch = j;
|
|
96
|
+
i++;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (i < needle.length)
|
|
100
|
+
return -1;
|
|
101
|
+
return score;
|
|
102
|
+
}
|
|
103
|
+
function filterAndRankSymbolRows(rows, params) {
|
|
104
|
+
const qRaw = String(params.query ?? '');
|
|
105
|
+
const mode = inferSymbolSearchMode(qRaw, params.mode);
|
|
106
|
+
const limit = Math.max(1, Number(params.limit ?? 50));
|
|
107
|
+
const caseInsensitive = Boolean(params.caseInsensitive);
|
|
108
|
+
const getSymbol = (r) => String(r?.symbol ?? r?.name ?? '');
|
|
109
|
+
if (mode === 'substring' || mode === 'prefix') {
|
|
110
|
+
const q = caseInsensitive ? qRaw.toLowerCase() : qRaw;
|
|
111
|
+
const out = rows.filter(r => {
|
|
112
|
+
const s = getSymbol(r);
|
|
113
|
+
const ss = caseInsensitive ? s.toLowerCase() : s;
|
|
114
|
+
return mode === 'prefix' ? ss.startsWith(q) : ss.includes(q);
|
|
115
|
+
});
|
|
116
|
+
return out.slice(0, limit);
|
|
117
|
+
}
|
|
118
|
+
if (mode === 'wildcard') {
|
|
119
|
+
const re = globToRegex(qRaw, caseInsensitive);
|
|
120
|
+
if (!re)
|
|
121
|
+
return [];
|
|
122
|
+
const out = rows.filter(r => re.test(getSymbol(r)));
|
|
123
|
+
return out.slice(0, limit);
|
|
124
|
+
}
|
|
125
|
+
if (mode === 'regex') {
|
|
126
|
+
const re = buildRegex(qRaw, caseInsensitive);
|
|
127
|
+
if (!re)
|
|
128
|
+
return [];
|
|
129
|
+
const out = rows.filter(r => re.test(getSymbol(r)));
|
|
130
|
+
return out.slice(0, limit);
|
|
131
|
+
}
|
|
132
|
+
const q = normalizeForFuzzy(qRaw);
|
|
133
|
+
const scored = rows
|
|
134
|
+
.map(r => {
|
|
135
|
+
const s = normalizeForFuzzy(getSymbol(r));
|
|
136
|
+
const score = fuzzySubsequenceScore(q, s);
|
|
137
|
+
return { r, score };
|
|
138
|
+
})
|
|
139
|
+
.filter(x => x.score >= 0)
|
|
140
|
+
.sort((a, b) => b.score - a.score)
|
|
141
|
+
.slice(0, limit);
|
|
142
|
+
return scored.map(x => x.r);
|
|
143
|
+
}
|