vemora 0.1.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +851 -0
- package/dist/cli.d.ts +16 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +682 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/ask.d.ts +14 -0
- package/dist/commands/ask.d.ts.map +1 -0
- package/dist/commands/ask.js +137 -0
- package/dist/commands/ask.js.map +1 -0
- package/dist/commands/audit.d.ts +17 -0
- package/dist/commands/audit.d.ts.map +1 -0
- package/dist/commands/audit.js +398 -0
- package/dist/commands/audit.js.map +1 -0
- package/dist/commands/brief.d.ts +16 -0
- package/dist/commands/brief.d.ts.map +1 -0
- package/dist/commands/brief.js +84 -0
- package/dist/commands/brief.js.map +1 -0
- package/dist/commands/chat.d.ts +7 -0
- package/dist/commands/chat.d.ts.map +1 -0
- package/dist/commands/chat.js +155 -0
- package/dist/commands/chat.js.map +1 -0
- package/dist/commands/context.d.ts +63 -0
- package/dist/commands/context.d.ts.map +1 -0
- package/dist/commands/context.js +794 -0
- package/dist/commands/context.js.map +1 -0
- package/dist/commands/dead-code.d.ts +15 -0
- package/dist/commands/dead-code.d.ts.map +1 -0
- package/dist/commands/dead-code.js +206 -0
- package/dist/commands/dead-code.js.map +1 -0
- package/dist/commands/deps.d.ts +20 -0
- package/dist/commands/deps.d.ts.map +1 -0
- package/dist/commands/deps.js +138 -0
- package/dist/commands/deps.js.map +1 -0
- package/dist/commands/focus.d.ts +8 -0
- package/dist/commands/focus.d.ts.map +1 -0
- package/dist/commands/focus.js +310 -0
- package/dist/commands/focus.js.map +1 -0
- package/dist/commands/index.d.ts +10 -0
- package/dist/commands/index.d.ts.map +1 -0
- package/dist/commands/index.js +366 -0
- package/dist/commands/index.js.map +1 -0
- package/dist/commands/init-agent.d.ts +23 -0
- package/dist/commands/init-agent.d.ts.map +1 -0
- package/dist/commands/init-agent.js +384 -0
- package/dist/commands/init-agent.js.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +122 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/knowledge.d.ts +14 -0
- package/dist/commands/knowledge.d.ts.map +1 -0
- package/dist/commands/knowledge.js +115 -0
- package/dist/commands/knowledge.js.map +1 -0
- package/dist/commands/plan.d.ts +24 -0
- package/dist/commands/plan.d.ts.map +1 -0
- package/dist/commands/plan.js +867 -0
- package/dist/commands/plan.js.map +1 -0
- package/dist/commands/query.d.ts +39 -0
- package/dist/commands/query.d.ts.map +1 -0
- package/dist/commands/query.js +392 -0
- package/dist/commands/query.js.map +1 -0
- package/dist/commands/remember.d.ts +11 -0
- package/dist/commands/remember.d.ts.map +1 -0
- package/dist/commands/remember.js +267 -0
- package/dist/commands/remember.js.map +1 -0
- package/dist/commands/report.d.ts +10 -0
- package/dist/commands/report.d.ts.map +1 -0
- package/dist/commands/report.js +243 -0
- package/dist/commands/report.js.map +1 -0
- package/dist/commands/status.d.ts +2 -0
- package/dist/commands/status.d.ts.map +1 -0
- package/dist/commands/status.js +127 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/summarize.d.ts +14 -0
- package/dist/commands/summarize.d.ts.map +1 -0
- package/dist/commands/summarize.js +170 -0
- package/dist/commands/summarize.js.map +1 -0
- package/dist/commands/triage.d.ts +33 -0
- package/dist/commands/triage.d.ts.map +1 -0
- package/dist/commands/triage.js +419 -0
- package/dist/commands/triage.js.map +1 -0
- package/dist/commands/usages.d.ts +14 -0
- package/dist/commands/usages.d.ts.map +1 -0
- package/dist/commands/usages.js +236 -0
- package/dist/commands/usages.js.map +1 -0
- package/dist/core/config.d.ts +35 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +159 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/types.d.ts +287 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +4 -0
- package/dist/core/types.js.map +1 -0
- package/dist/embeddings/factory.d.ts +9 -0
- package/dist/embeddings/factory.d.ts.map +1 -0
- package/dist/embeddings/factory.js +26 -0
- package/dist/embeddings/factory.js.map +1 -0
- package/dist/embeddings/noop.d.ts +17 -0
- package/dist/embeddings/noop.d.ts.map +1 -0
- package/dist/embeddings/noop.js +22 -0
- package/dist/embeddings/noop.js.map +1 -0
- package/dist/embeddings/ollama.d.ts +11 -0
- package/dist/embeddings/ollama.d.ts.map +1 -0
- package/dist/embeddings/ollama.js +49 -0
- package/dist/embeddings/ollama.js.map +1 -0
- package/dist/embeddings/openai.d.ts +10 -0
- package/dist/embeddings/openai.d.ts.map +1 -0
- package/dist/embeddings/openai.js +67 -0
- package/dist/embeddings/openai.js.map +1 -0
- package/dist/embeddings/provider.d.ts +19 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +3 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/indexer/callgraph.d.ts +16 -0
- package/dist/indexer/callgraph.d.ts.map +1 -0
- package/dist/indexer/callgraph.js +154 -0
- package/dist/indexer/callgraph.js.map +1 -0
- package/dist/indexer/chunkBySlidingWindow.d.ts +6 -0
- package/dist/indexer/chunkBySlidingWindow.d.ts.map +1 -0
- package/dist/indexer/chunkBySlidingWindow.js +30 -0
- package/dist/indexer/chunkBySlidingWindow.js.map +1 -0
- package/dist/indexer/chunkBySymbols.d.ts +7 -0
- package/dist/indexer/chunkBySymbols.d.ts.map +1 -0
- package/dist/indexer/chunkBySymbols.js +57 -0
- package/dist/indexer/chunkBySymbols.js.map +1 -0
- package/dist/indexer/chunker.d.ts +15 -0
- package/dist/indexer/chunker.d.ts.map +1 -0
- package/dist/indexer/chunker.js +26 -0
- package/dist/indexer/chunker.js.map +1 -0
- package/dist/indexer/classHeader.d.ts +7 -0
- package/dist/indexer/classHeader.d.ts.map +1 -0
- package/dist/indexer/classHeader.js +37 -0
- package/dist/indexer/classHeader.js.map +1 -0
- package/dist/indexer/deps.d.ts +66 -0
- package/dist/indexer/deps.d.ts.map +1 -0
- package/dist/indexer/deps.js +412 -0
- package/dist/indexer/deps.js.map +1 -0
- package/dist/indexer/hasher.d.ts +17 -0
- package/dist/indexer/hasher.d.ts.map +1 -0
- package/dist/indexer/hasher.js +38 -0
- package/dist/indexer/hasher.js.map +1 -0
- package/dist/indexer/parser.d.ts +18 -0
- package/dist/indexer/parser.d.ts.map +1 -0
- package/dist/indexer/parser.js +355 -0
- package/dist/indexer/parser.js.map +1 -0
- package/dist/indexer/scanner.d.ts +18 -0
- package/dist/indexer/scanner.d.ts.map +1 -0
- package/dist/indexer/scanner.js +37 -0
- package/dist/indexer/scanner.js.map +1 -0
- package/dist/indexer/strategy.d.ts +11 -0
- package/dist/indexer/strategy.d.ts.map +1 -0
- package/dist/indexer/strategy.js +15 -0
- package/dist/indexer/strategy.js.map +1 -0
- package/dist/indexer/tests.d.ts +15 -0
- package/dist/indexer/tests.d.ts.map +1 -0
- package/dist/indexer/tests.js +68 -0
- package/dist/indexer/tests.js.map +1 -0
- package/dist/indexer/todos.d.ts +9 -0
- package/dist/indexer/todos.d.ts.map +1 -0
- package/dist/indexer/todos.js +29 -0
- package/dist/indexer/todos.js.map +1 -0
- package/dist/llm/anthropic.d.ts +8 -0
- package/dist/llm/anthropic.d.ts.map +1 -0
- package/dist/llm/anthropic.js +76 -0
- package/dist/llm/anthropic.js.map +1 -0
- package/dist/llm/claude-code.d.ts +37 -0
- package/dist/llm/claude-code.d.ts.map +1 -0
- package/dist/llm/claude-code.js +97 -0
- package/dist/llm/claude-code.js.map +1 -0
- package/dist/llm/factory.d.ts +7 -0
- package/dist/llm/factory.d.ts.map +1 -0
- package/dist/llm/factory.js +47 -0
- package/dist/llm/factory.js.map +1 -0
- package/dist/llm/ollama.d.ts +8 -0
- package/dist/llm/ollama.d.ts.map +1 -0
- package/dist/llm/ollama.js +83 -0
- package/dist/llm/ollama.js.map +1 -0
- package/dist/llm/openai.d.ts +8 -0
- package/dist/llm/openai.d.ts.map +1 -0
- package/dist/llm/openai.js +68 -0
- package/dist/llm/openai.js.map +1 -0
- package/dist/llm/provider.d.ts +35 -0
- package/dist/llm/provider.d.ts.map +1 -0
- package/dist/llm/provider.js +3 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/search/bm25.d.ts +3 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +104 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/formatter.d.ts +43 -0
- package/dist/search/formatter.d.ts.map +1 -0
- package/dist/search/formatter.js +208 -0
- package/dist/search/formatter.js.map +1 -0
- package/dist/search/hybrid.d.ts +10 -0
- package/dist/search/hybrid.d.ts.map +1 -0
- package/dist/search/hybrid.js +53 -0
- package/dist/search/hybrid.js.map +1 -0
- package/dist/search/merge.d.ts +33 -0
- package/dist/search/merge.d.ts.map +1 -0
- package/dist/search/merge.js +158 -0
- package/dist/search/merge.js.map +1 -0
- package/dist/search/mmr.d.ts +23 -0
- package/dist/search/mmr.d.ts.map +1 -0
- package/dist/search/mmr.js +95 -0
- package/dist/search/mmr.js.map +1 -0
- package/dist/search/rerank.d.ts +12 -0
- package/dist/search/rerank.d.ts.map +1 -0
- package/dist/search/rerank.js +113 -0
- package/dist/search/rerank.js.map +1 -0
- package/dist/search/signature.d.ts +42 -0
- package/dist/search/signature.d.ts.map +1 -0
- package/dist/search/signature.js +112 -0
- package/dist/search/signature.js.map +1 -0
- package/dist/search/vector.d.ts +41 -0
- package/dist/search/vector.d.ts.map +1 -0
- package/dist/search/vector.js +185 -0
- package/dist/search/vector.js.map +1 -0
- package/dist/storage/cache.d.ts +30 -0
- package/dist/storage/cache.d.ts.map +1 -0
- package/dist/storage/cache.js +160 -0
- package/dist/storage/cache.js.map +1 -0
- package/dist/storage/knowledge.d.ts +23 -0
- package/dist/storage/knowledge.d.ts.map +1 -0
- package/dist/storage/knowledge.js +81 -0
- package/dist/storage/knowledge.js.map +1 -0
- package/dist/storage/planSession.d.ts +39 -0
- package/dist/storage/planSession.d.ts.map +1 -0
- package/dist/storage/planSession.js +78 -0
- package/dist/storage/planSession.js.map +1 -0
- package/dist/storage/repository.d.ts +27 -0
- package/dist/storage/repository.d.ts.map +1 -0
- package/dist/storage/repository.js +95 -0
- package/dist/storage/repository.js.map +1 -0
- package/dist/storage/session.d.ts +38 -0
- package/dist/storage/session.d.ts.map +1 -0
- package/dist/storage/session.js +100 -0
- package/dist/storage/session.js.map +1 -0
- package/dist/storage/summaries.d.ts +19 -0
- package/dist/storage/summaries.d.ts.map +1 -0
- package/dist/storage/summaries.js +66 -0
- package/dist/storage/summaries.js.map +1 -0
- package/dist/storage/usage.d.ts +39 -0
- package/dist/storage/usage.d.ts.map +1 -0
- package/dist/storage/usage.js +55 -0
- package/dist/storage/usage.js.map +1 -0
- package/dist/utils/git.d.ts +20 -0
- package/dist/utils/git.d.ts.map +1 -0
- package/dist/utils/git.js +49 -0
- package/dist/utils/git.js.map +1 -0
- package/dist/utils/tokenizer.d.ts +32 -0
- package/dist/utils/tokenizer.d.ts.map +1 -0
- package/dist/utils/tokenizer.js +66 -0
- package/dist/utils/tokenizer.js.map +1 -0
- package/package.json +71 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.applyMMR = applyMMR;
|
|
4
|
+
const vector_1 = require("./vector");
|
|
5
|
+
/**
|
|
6
|
+
* Maximal Marginal Relevance (MMR) reranking.
|
|
7
|
+
*
|
|
8
|
+
* Selects up to `topK` results from `candidates` by iteratively picking
|
|
9
|
+
* the candidate that maximises:
|
|
10
|
+
*
|
|
11
|
+
* MMR(d) = lambda * relevance(d) - (1 - lambda) * max_sim(d, selected)
|
|
12
|
+
*
|
|
13
|
+
* where:
|
|
14
|
+
* - relevance(d) is the original retrieval score (normalised to [0, 1])
|
|
15
|
+
* - max_sim(d, selected) is the maximum cosine similarity between d and
|
|
16
|
+
* any already-selected result (requires embeddings in the cache)
|
|
17
|
+
*
|
|
18
|
+
* lambda=1.0 → pure relevance ordering (identical to original ranking)
|
|
19
|
+
* lambda=0.5 → balanced relevance / diversity (default)
|
|
20
|
+
* lambda=0.0 → maximum diversity, ignores relevance
|
|
21
|
+
*
|
|
22
|
+
* Falls back to returning `candidates.slice(0, topK)` when embeddings
|
|
23
|
+
* are unavailable (keyword-only mode).
|
|
24
|
+
*/
|
|
25
|
+
function applyMMR(candidates, cache, topK, lambda = 0.5) {
|
|
26
|
+
if (candidates.length <= 1)
|
|
27
|
+
return candidates.slice(0, topK);
|
|
28
|
+
// Without vector data MMR cannot compute inter-result similarity.
|
|
29
|
+
if (!cache?.vectors || !cache.chunkIds || !cache.dimensions) {
|
|
30
|
+
return candidates.slice(0, topK);
|
|
31
|
+
}
|
|
32
|
+
const { vectors, chunkIds, dimensions } = cache;
|
|
33
|
+
// Build a fast chunk-id → flat-buffer-index map.
|
|
34
|
+
const idToIdx = new Map();
|
|
35
|
+
chunkIds.forEach((id, i) => idToIdx.set(id, i));
|
|
36
|
+
// Normalise relevance scores to [0, 1] so lambda is meaningful regardless
|
|
37
|
+
// of whether scores come from cosine similarity (already ~[0,1]) or BM25.
|
|
38
|
+
// Use reduce instead of Math.max(...spread) to avoid call-stack overflow on large arrays.
|
|
39
|
+
let maxScore = -Infinity;
|
|
40
|
+
let minScore = Infinity;
|
|
41
|
+
for (const r of candidates) {
|
|
42
|
+
if (r.score > maxScore)
|
|
43
|
+
maxScore = r.score;
|
|
44
|
+
if (r.score < minScore)
|
|
45
|
+
minScore = r.score;
|
|
46
|
+
}
|
|
47
|
+
const range = maxScore - minScore || 1;
|
|
48
|
+
const relNorm = candidates.map((r) => (r.score - minScore) / range);
|
|
49
|
+
// Pre-extract each candidate's vector as number[] so the inner loop avoids
|
|
50
|
+
// repeated Array.from() allocations (O(n² * d) → O(n * d) allocations).
|
|
51
|
+
const candidateVecs = candidates.map((r) => {
|
|
52
|
+
const idx = idToIdx.get(r.chunk.id);
|
|
53
|
+
if (idx === undefined)
|
|
54
|
+
return null;
|
|
55
|
+
const offset = idx * dimensions;
|
|
56
|
+
return Array.from(vectors.subarray(offset, offset + dimensions));
|
|
57
|
+
});
|
|
58
|
+
const selected = [];
|
|
59
|
+
// Flat-buffer indices of already-selected results (for similarity queries).
|
|
60
|
+
const selectedVecIdx = [];
|
|
61
|
+
// Track remaining candidates as index positions into `candidates`.
|
|
62
|
+
const remaining = candidates.map((_, i) => i);
|
|
63
|
+
while (selected.length < topK && remaining.length > 0) {
|
|
64
|
+
let bestPos = -1; // position in `remaining`
|
|
65
|
+
let bestMMR = -Infinity;
|
|
66
|
+
for (let pos = 0; pos < remaining.length; pos++) {
|
|
67
|
+
const candIdx = remaining[pos];
|
|
68
|
+
const relevance = relNorm[candIdx];
|
|
69
|
+
let maxSim = 0;
|
|
70
|
+
if (selectedVecIdx.length > 0) {
|
|
71
|
+
const vecA = candidateVecs[candIdx];
|
|
72
|
+
if (vecA !== null) {
|
|
73
|
+
for (const selIdx of selectedVecIdx) {
|
|
74
|
+
const sim = (0, vector_1.cosineSimilarityBinary)(vecA, vectors, selIdx * dimensions, dimensions);
|
|
75
|
+
if (sim > maxSim)
|
|
76
|
+
maxSim = sim;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSim;
|
|
81
|
+
if (mmrScore > bestMMR) {
|
|
82
|
+
bestMMR = mmrScore;
|
|
83
|
+
bestPos = pos;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const chosenIdx = remaining[bestPos];
|
|
87
|
+
selected.push(candidates[chosenIdx]);
|
|
88
|
+
const vecIdx = idToIdx.get(candidates[chosenIdx].chunk.id);
|
|
89
|
+
if (vecIdx !== undefined)
|
|
90
|
+
selectedVecIdx.push(vecIdx);
|
|
91
|
+
remaining.splice(bestPos, 1);
|
|
92
|
+
}
|
|
93
|
+
return selected;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=mmr.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mmr.js","sourceRoot":"","sources":["../../src/search/mmr.ts"],"names":[],"mappings":";;AAuBA,4BAwFC;AA9GD,qCAAkD;AAElD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,SAAgB,QAAQ,CACtB,UAA0B,EAC1B,KAA4B,EAC5B,IAAY,EACZ,MAAM,GAAG,GAAG;IAEZ,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAE7D,kEAAkE;IAClE,IAAI,CAAC,KAAK,EAAE,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;QAC5D,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC;IAEhD,iDAAiD;IACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC1C,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAEhD,0EAA0E;IAC1E,0EAA0E;IAC1E,0FAA0F;IAC1F,IAAI,QAAQ,GAAG,CAAC,QAAQ,CAAC;IACzB,IAAI,QAAQ,GAAG,QAAQ,CAAC;IACxB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;YAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;QAC3C,IAAI,CAAC,CAAC,KAAK,GAAG,QAAQ;YAAE,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC;IAC7C,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,GAAG,QAAQ,IAAI,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,GAAG,KAAK,CAAC,CAAC;IAEpE,2EAA2E;IAC3E,wEAAwE;IACxE,MAAM,aAAa,GAA2B,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACjE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC;QACnC,MAAM,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC;QAChC,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,4EAA4E;IAC5E,MAAM,cAAc,GAAa,EAAE,CAAC;IAEpC,mEAAmE;IACnE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAE9C,OAAO,QAAQ,CAAC,MAAM,GAAG,IAAI,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,0BAA0B;QAC5C,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAC;QAExB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,SAAS,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;YAEnC,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;gBACpC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBAClB,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;wBACpC,MAAM,GAAG,GAAG,IAAA,+BAAsB,EAChC,IAAI,EACJ,OAAO,EACP,MAAM,GAAG,UAAU,EACnB,UAAU,CACX,CAAC;wBACF,IAAI,GAAG,GAAG,MAAM;4BAAE,MAAM,GAAG,GAAG,CAAC;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;YAC5D,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;gBACvB,OAAO,GAAG,QAAQ,CAAC;gBACnB,OAAO,GAAG,GAAG,CAAC;YAChB,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;QAErC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC3D,IAAI,MAAM,KAAK,SAAS;YAAE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEtD,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { RerankConfig, SearchResult } from "../core/types";
|
|
2
|
+
/**
|
|
3
|
+
* Re-scores search results using the configured reranker.
|
|
4
|
+
*
|
|
5
|
+
* @param query The user's natural language query
|
|
6
|
+
* @param results Initial search results (from vector or keyword search)
|
|
7
|
+
* @param topK Number of results to return after reranking
|
|
8
|
+
* @param config Reranker config (default: xenova cross-encoder)
|
|
9
|
+
* @param fallbackModel Model name to use when config.model is not set (ollama only)
|
|
10
|
+
*/
|
|
11
|
+
export declare function rerankResults(query: string, results: SearchResult[], topK?: number, config?: RerankConfig, fallbackModel?: string): Promise<SearchResult[]>;
|
|
12
|
+
//# sourceMappingURL=rerank.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rerank.d.ts","sourceRoot":"","sources":["../../src/search/rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AA0HhE;;;;;;;;GAQG;AACH,wBAAsB,aAAa,CACjC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,YAAY,EAAE,EACvB,IAAI,SAAK,EACT,MAAM,CAAC,EAAE,YAAY,EACrB,aAAa,CAAC,EAAE,MAAM,GACrB,OAAO,CAAC,YAAY,EAAE,CAAC,CAezB"}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.rerankResults = rerankResults;
|
|
4
|
+
const ollama_1 = require("../llm/ollama");
|
|
5
|
+
// ─── Xenova cross-encoder (original implementation) ──────────────────────────
|
|
6
|
+
// biome-ignore lint/suspicious/noExplicitAny: @xenova/transformers has no exported TS types
|
|
7
|
+
let xenovaModel = null;
|
|
8
|
+
// biome-ignore lint/suspicious/noExplicitAny: @xenova/transformers has no exported TS types
|
|
9
|
+
let xenovaTokenizer = null;
|
|
10
|
+
async function initXenovaReranker() {
|
|
11
|
+
if (!xenovaModel) {
|
|
12
|
+
// biome-ignore lint/suspicious/noExplicitAny: dynamic optional dependency
|
|
13
|
+
let transformers;
|
|
14
|
+
try {
|
|
15
|
+
transformers = require("@xenova/transformers");
|
|
16
|
+
}
|
|
17
|
+
catch {
|
|
18
|
+
throw new Error("The xenova reranker requires @xenova/transformers.\n" +
|
|
19
|
+
"Install it with: npm install @xenova/transformers\n" +
|
|
20
|
+
'Or switch to the ollama reranker: set reranker.provider = "ollama" in .vemora/config.json');
|
|
21
|
+
}
|
|
22
|
+
const { AutoModelForSequenceClassification, AutoTokenizer } = transformers;
|
|
23
|
+
xenovaModel = await AutoModelForSequenceClassification.from_pretrained("Xenova/ms-marco-MiniLM-L-6-v2", { quantized: false });
|
|
24
|
+
xenovaTokenizer = await AutoTokenizer.from_pretrained("Xenova/ms-marco-MiniLM-L-6-v2");
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
async function rerankXenova(query, results, topK) {
|
|
28
|
+
await initXenovaReranker();
|
|
29
|
+
const reranked = [];
|
|
30
|
+
const candidates = results.slice(0, 25);
|
|
31
|
+
for (const res of candidates) {
|
|
32
|
+
try {
|
|
33
|
+
const inputs = await xenovaTokenizer(query, {
|
|
34
|
+
text_pair: res.chunk.content,
|
|
35
|
+
truncation: true,
|
|
36
|
+
padding: true,
|
|
37
|
+
});
|
|
38
|
+
const { logits } = await xenovaModel(inputs);
|
|
39
|
+
reranked.push({ ...res, score: logits.data[0] });
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
reranked.push(res);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
reranked.sort((a, b) => b.score - a.score);
|
|
46
|
+
return reranked.slice(0, topK);
|
|
47
|
+
}
|
|
48
|
+
// ─── Ollama LLM reranker ──────────────────────────────────────────────────────
|
|
49
|
+
async function rerankOllama(query, results, topK, config, fallbackModel) {
|
|
50
|
+
const candidates = results.slice(0, 25);
|
|
51
|
+
if (candidates.length === 0)
|
|
52
|
+
return [];
|
|
53
|
+
const model = config.model ?? fallbackModel ?? "gemma4:e4b";
|
|
54
|
+
const ollama = new ollama_1.OllamaProvider(config.baseUrl ?? "http://localhost:11434");
|
|
55
|
+
const chunkList = candidates
|
|
56
|
+
.map((r, i) => {
|
|
57
|
+
const snippet = r.chunk.content.slice(0, 300).replace(/\n/g, " ");
|
|
58
|
+
return `[${i}] ${r.chunk.file}:${r.chunk.start}\n${snippet}`;
|
|
59
|
+
})
|
|
60
|
+
.join("\n\n");
|
|
61
|
+
const prompt = `Rank the following code chunks by relevance to the query. ` +
|
|
62
|
+
`Return ONLY a JSON array of 0-based indices, most relevant first. ` +
|
|
63
|
+
`Example for 3 chunks: [2, 0, 1]\n\n` +
|
|
64
|
+
`Query: "${query}"\n\n` +
|
|
65
|
+
`Chunks:\n${chunkList}`;
|
|
66
|
+
try {
|
|
67
|
+
const response = await ollama.chat([{ role: "user", content: prompt }], { model, temperature: 0, maxTokens: 100 });
|
|
68
|
+
const raw = response.content.trim();
|
|
69
|
+
// Extract JSON array from response (model may wrap it in prose)
|
|
70
|
+
const match = raw.match(/\[[\d,\s]+\]/);
|
|
71
|
+
if (!match)
|
|
72
|
+
return results.slice(0, topK);
|
|
73
|
+
const indices = JSON.parse(match[0]);
|
|
74
|
+
const reranked = indices
|
|
75
|
+
.filter((i) => i >= 0 && i < candidates.length)
|
|
76
|
+
.map((i) => candidates[i]);
|
|
77
|
+
// Append any candidates not mentioned by the model, preserving original order
|
|
78
|
+
const seen = new Set(indices);
|
|
79
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
80
|
+
if (!seen.has(i))
|
|
81
|
+
reranked.push(candidates[i]);
|
|
82
|
+
}
|
|
83
|
+
return reranked.slice(0, topK);
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
// Graceful fallback: return original order
|
|
87
|
+
return results.slice(0, topK);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
91
|
+
/**
|
|
92
|
+
* Re-scores search results using the configured reranker.
|
|
93
|
+
*
|
|
94
|
+
* @param query The user's natural language query
|
|
95
|
+
* @param results Initial search results (from vector or keyword search)
|
|
96
|
+
* @param topK Number of results to return after reranking
|
|
97
|
+
* @param config Reranker config (default: xenova cross-encoder)
|
|
98
|
+
* @param fallbackModel Model name to use when config.model is not set (ollama only)
|
|
99
|
+
*/
|
|
100
|
+
async function rerankResults(query, results, topK = 10, config, fallbackModel) {
|
|
101
|
+
if (results.length === 0)
|
|
102
|
+
return [];
|
|
103
|
+
const provider = config?.provider ?? "xenova";
|
|
104
|
+
if (provider === "none") {
|
|
105
|
+
return results.slice(0, topK);
|
|
106
|
+
}
|
|
107
|
+
if (provider === "ollama") {
|
|
108
|
+
return rerankOllama(query, results, topK, config, fallbackModel);
|
|
109
|
+
}
|
|
110
|
+
// Default: xenova
|
|
111
|
+
return rerankXenova(query, results, topK);
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=rerank.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rerank.js","sourceRoot":"","sources":["../../src/search/rerank.ts"],"names":[],"mappings":";;AAmIA,sCAqBC;AAvJD,0CAA+C;AAE/C,gFAAgF;AAEhF,4FAA4F;AAC5F,IAAI,WAAW,GAAQ,IAAI,CAAC;AAC5B,4FAA4F;AAC5F,IAAI,eAAe,GAAQ,IAAI,CAAC;AAEhC,KAAK,UAAU,kBAAkB;IAC/B,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,0EAA0E;QAC1E,IAAI,YAAiB,CAAC;QACtB,IAAI,CAAC;YACH,YAAY,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;QACjD,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,sDAAsD;gBACpD,qDAAqD;gBACrD,2FAA2F,CAC9F,CAAC;QACJ,CAAC;QACD,MAAM,EAAE,kCAAkC,EAAE,aAAa,EAAE,GAAG,YAAY,CAAC;QAC3E,WAAW,GAAG,MAAM,kCAAkC,CAAC,eAAe,CACpE,+BAA+B,EAC/B,EAAE,SAAS,EAAE,KAAK,EAAE,CACrB,CAAC;QACF,eAAe,GAAG,MAAM,aAAa,CAAC,eAAe,CACnD,+BAA+B,CAChC,CAAC;IACJ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,KAAa,EACb,OAAuB,EACvB,IAAY;IAEZ,MAAM,kBAAkB,EAAE,CAAC;IAE3B,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAExC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,KAAK,EAAE;gBAC1C,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,OAAO;gBAC5B,UAAU,EAAE,IAAI;gBAChB,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;YACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC,EAAE,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC3C,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,iFAAiF;AAEjF,KAAK,UAAU,YAAY,CACzB,KAAa,EACb,OAAuB,EACvB,IAAY,EACZ,MAAoB,EACpB,aAAsB;IAEtB,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACxC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,aAAa,IAAI,YAAY,CAAC;IAC5D,MAAM,MAAM,GAAG,IAAI,uBAAc,CAAC,MAAM,CAAC,OAAO,IAAI,wBAAwB,CAAC,CAAC;IAE9E,MAAM,SAAS,GAAG,UAAU;SACzB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAClE,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,KAAK,KAAK,OAAO,EAAE,CAAC;IAC/D,CAAC,CAAC;SACD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GACV,4DAA4D;QAC5D,oEAAoE;QACpE,qCAAqC;QACrC,WAAW,KAAK,OAAO;QACvB,YAAY,SAAS,EAAE,CAAC;IAE1B,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAChC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,CAC1C,CAAC;QAEF,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACpC,gEAAgE;QAChE,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QACxC,IAAI,CAAC,KAAK;YAAE,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAa,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,OAAO;aACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;aAC9C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAE7B,8EAA8E;QAC9E,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC;QAED,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,2CAA2C;QAC3C,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChC,CAAC;AACH,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;GAQG;AACI,KAAK,UAAU,aAAa,CACjC,KAAa,EACb,OAAuB,EACvB,IAAI,GAAG,EAAE,EACT,MAAqB,EACrB,aAAsB;IAEtB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,MAAM,QAAQ,GAAG,MAAM,EAAE,QAAQ,IAAI,QAAQ,CAAC;IAE9C,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACxB,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC1B,OAAO,YAAY,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,MAAO,EAAE,aAAa,CAAC,CAAC;IACpE,CAAC;IAED,kBAAkB;IAClB,OAAO,YAAY,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;AAC5C,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Signature extraction for code chunks.
|
|
3
|
+
*
|
|
4
|
+
* Given the raw content of a chunk, returns the declaration signature —
|
|
5
|
+
* the part that describes *what* exists without the implementation body.
|
|
6
|
+
* This is used in the query output for medium-relevance results.
|
|
7
|
+
*
|
|
8
|
+
* Design philosophy:
|
|
9
|
+
* - Interfaces and type aliases ARE their signature → show them in full (compact)
|
|
10
|
+
* - Functions and classes → extract up to the opening brace, replace body with { … }
|
|
11
|
+
* - Arrow functions with expression bodies → show up to =>
|
|
12
|
+
* - Everything else → show first meaningful lines
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Extracts the declaration signature from a chunk's content.
|
|
16
|
+
*
|
|
17
|
+
* Examples:
|
|
18
|
+
*
|
|
19
|
+
* Input: "export async function connect(\n host: string,\n): Promise<void> {\n ..."
|
|
20
|
+
* Output: "export async function connect(\n host: string,\n): Promise<void> { … }"
|
|
21
|
+
*
|
|
22
|
+
* Input: "export interface ImapConfig {\n host: string;\n}"
|
|
23
|
+
* Output: (same — interfaces are returned as-is, they are compact)
|
|
24
|
+
*
|
|
25
|
+
* Input: "export const send = async (msg: Email): Promise<void> =>\n smtp.send(msg);"
|
|
26
|
+
* Output: "export const send = async (msg: Email): Promise<void> => …"
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractSignature(content: string): string;
|
|
29
|
+
export type DisplayTier = "high" | "med" | "low";
|
|
30
|
+
/**
|
|
31
|
+
* Determines how much of a chunk to show based on its rank in results.
|
|
32
|
+
*
|
|
33
|
+
* high (rank 1-3) → full code block (capped at MAX_HIGH_LINES automatically)
|
|
34
|
+
* med (rank 4-7) → signature only (declaration without body)
|
|
35
|
+
* low (rank 8+) → file + symbol + score only (no code)
|
|
36
|
+
*
|
|
37
|
+
* The --show-code flag overrides all tiers to show full code.
|
|
38
|
+
*/
|
|
39
|
+
export declare function getDisplayTier(rank: number): DisplayTier;
|
|
40
|
+
/** Lines shown automatically for high-tier results (without --show-code) */
|
|
41
|
+
export declare const HIGH_CODE_LINES = 30;
|
|
42
|
+
//# sourceMappingURL=signature.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"signature.d.ts","sourceRoot":"","sources":["../../src/search/signature.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAMH;;;;;;;;;;;;;GAaG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAmExD;AAID,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,KAAK,GAAG,KAAK,CAAC;AAEjD;;;;;;;;GAQG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,CAIxD;AAED,4EAA4E;AAC5E,eAAO,MAAM,eAAe,KAAK,CAAC"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Signature extraction for code chunks.
|
|
4
|
+
*
|
|
5
|
+
* Given the raw content of a chunk, returns the declaration signature —
|
|
6
|
+
* the part that describes *what* exists without the implementation body.
|
|
7
|
+
* This is used in the query output for medium-relevance results.
|
|
8
|
+
*
|
|
9
|
+
* Design philosophy:
|
|
10
|
+
* - Interfaces and type aliases ARE their signature → show them in full (compact)
|
|
11
|
+
* - Functions and classes → extract up to the opening brace, replace body with { … }
|
|
12
|
+
* - Arrow functions with expression bodies → show up to =>
|
|
13
|
+
* - Everything else → show first meaningful lines
|
|
14
|
+
*/
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.HIGH_CODE_LINES = void 0;
|
|
17
|
+
exports.extractSignature = extractSignature;
|
|
18
|
+
exports.getDisplayTier = getDisplayTier;
|
|
19
|
+
const MAX_SIG_LINES = 10;
|
|
20
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
21
|
+
/**
|
|
22
|
+
* Extracts the declaration signature from a chunk's content.
|
|
23
|
+
*
|
|
24
|
+
* Examples:
|
|
25
|
+
*
|
|
26
|
+
* Input: "export async function connect(\n host: string,\n): Promise<void> {\n ..."
|
|
27
|
+
* Output: "export async function connect(\n host: string,\n): Promise<void> { … }"
|
|
28
|
+
*
|
|
29
|
+
* Input: "export interface ImapConfig {\n host: string;\n}"
|
|
30
|
+
* Output: (same — interfaces are returned as-is, they are compact)
|
|
31
|
+
*
|
|
32
|
+
* Input: "export const send = async (msg: Email): Promise<void> =>\n smtp.send(msg);"
|
|
33
|
+
* Output: "export const send = async (msg: Email): Promise<void> => …"
|
|
34
|
+
*/
|
|
35
|
+
function extractSignature(content) {
|
|
36
|
+
const lines = content.split("\n");
|
|
37
|
+
const firstMeaningful = lines.find((l) => l.trim().length > 0) ?? "";
|
|
38
|
+
// ── Interfaces and type aliases ──────────────────────────────────────────
|
|
39
|
+
// These are inherently compact — the full declaration IS the signature.
|
|
40
|
+
// Show up to 20 lines (rare for them to be longer).
|
|
41
|
+
if (/^\s*(export\s+)?(type\s+\w|interface\s+\w)/.test(firstMeaningful)) {
|
|
42
|
+
const slice = lines.slice(0, 20);
|
|
43
|
+
if (lines.length > 20)
|
|
44
|
+
slice.push(" …");
|
|
45
|
+
return slice.join("\n").trim();
|
|
46
|
+
}
|
|
47
|
+
// ── File header / import blocks ──────────────────────────────────────────
|
|
48
|
+
// Chunks that are just import statements — show first 5 lines.
|
|
49
|
+
if (/^\s*import\s/.test(firstMeaningful)) {
|
|
50
|
+
const slice = lines.slice(0, 5);
|
|
51
|
+
if (lines.length > 5)
|
|
52
|
+
slice.push(` … (${lines.length - 5} more lines)`);
|
|
53
|
+
return slice.join("\n").trim();
|
|
54
|
+
}
|
|
55
|
+
// ── Function / class / method declarations ───────────────────────────────
|
|
56
|
+
const result = [];
|
|
57
|
+
for (let i = 0; i < lines.length; i++) {
|
|
58
|
+
const line = lines[i];
|
|
59
|
+
const trimmed = line.trimEnd();
|
|
60
|
+
// Line ends with opening brace → body starts here.
|
|
61
|
+
// Captures patterns like:
|
|
62
|
+
// ): Promise<void> {
|
|
63
|
+
// export class Foo extends Bar {
|
|
64
|
+
// } catch (e) { (skip mid-function braces — result.length guard)
|
|
65
|
+
if (/\{\s*(\/\/[^\n]*)?\s*$/.test(trimmed)) {
|
|
66
|
+
// Strip the brace (and any trailing comment) and append { … }
|
|
67
|
+
const withoutBrace = trimmed
|
|
68
|
+
.replace(/\s*\{\s*(\/\/[^\n]*)?\s*$/, "")
|
|
69
|
+
.trimEnd();
|
|
70
|
+
result.push((withoutBrace !== ""
|
|
71
|
+
? withoutBrace
|
|
72
|
+
: trimmed.replace(/\{.*$/, "").trim()) + " { … }");
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
// Arrow function with expression body:
|
|
76
|
+
// const f = (x: number) =>
|
|
77
|
+
// x * 2
|
|
78
|
+
if (trimmed.endsWith("=>") && !trimmed.endsWith("=>>")) {
|
|
79
|
+
result.push(trimmed + " …");
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
result.push(line);
|
|
83
|
+
// Single-line declarations ending with `;` (e.g. `declare function f(): void;`)
|
|
84
|
+
if (result.length === 1 && trimmed.endsWith(";"))
|
|
85
|
+
break;
|
|
86
|
+
// Safety cap
|
|
87
|
+
if (result.length >= MAX_SIG_LINES) {
|
|
88
|
+
result.push(" …");
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return result.join("\n").trim();
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Determines how much of a chunk to show based on its rank in results.
|
|
96
|
+
*
|
|
97
|
+
* high (rank 1-3) → full code block (capped at MAX_HIGH_LINES automatically)
|
|
98
|
+
* med (rank 4-7) → signature only (declaration without body)
|
|
99
|
+
* low (rank 8+) → file + symbol + score only (no code)
|
|
100
|
+
*
|
|
101
|
+
* The --show-code flag overrides all tiers to show full code.
|
|
102
|
+
*/
|
|
103
|
+
function getDisplayTier(rank) {
|
|
104
|
+
if (rank <= 3)
|
|
105
|
+
return "high";
|
|
106
|
+
if (rank <= 7)
|
|
107
|
+
return "med";
|
|
108
|
+
return "low";
|
|
109
|
+
}
|
|
110
|
+
/** Lines shown automatically for high-tier results (without --show-code) */
|
|
111
|
+
exports.HIGH_CODE_LINES = 30;
|
|
112
|
+
//# sourceMappingURL=signature.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"signature.js","sourceRoot":"","sources":["../../src/search/signature.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;GAYG;;;AAoBH,4CAmEC;AAeD,wCAIC;AAxGD,MAAM,aAAa,GAAG,EAAE,CAAC;AAEzB,iFAAiF;AAEjF;;;;;;;;;;;;;GAaG;AACH,SAAgB,gBAAgB,CAAC,OAAe;IAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAErE,4EAA4E;IAC5E,wEAAwE;IACxE,oDAAoD;IACpD,IAAI,4CAA4C,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;QACvE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACjC,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE;YAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IACjC,CAAC;IAED,4EAA4E;IAC5E,+DAA+D;IAC/D,IAAI,cAAc,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAChC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,KAAK,CAAC,MAAM,GAAG,CAAC,cAAc,CAAC,CAAC;QACzE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IACjC,CAAC;IAED,4EAA4E;IAC5E,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;QAE/B,mDAAmD;QACnD,0BAA0B;QAC1B,uBAAuB;QACvB,mCAAmC;QACnC,4EAA4E;QAC5E,IAAI,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,8DAA8D;YAC9D,MAAM,YAAY,GAAG,OAAO;iBACzB,OAAO,CAAC,2BAA2B,EAAE,EAAE,CAAC;iBACxC,OAAO,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CACT,CAAC,YAAY,KAAK,EAAE;gBAClB,CAAC,CAAC,YAAY;gBACd,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,GAAG,QAAQ,CACpD,CAAC;YACF,MAAM;QACR,CAAC;QAED,uCAAuC;QACvC,6BAA6B;QAC7B,YAAY;QACZ,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YACvD,MAAM,CAAC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;YAC5B,MAAM;QACR,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElB,gFAAgF;QAChF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,MAAM;QAExD,aAAa;QACb,IAAI,MAAM,CAAC,MAAM,IAAI,aAAa,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAClC,CAAC;AAMD;;;;;;;;GAQG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAC7B,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC5B,OAAO,KAAK,CAAC;AACf,CAAC;AAED,4EAA4E;AAC/D,QAAA,eAAe,GAAG,EAAE,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { Chunk, EmbeddingCache, SearchResult, SymbolIndex } from "../core/types";
|
|
2
|
+
/**
|
|
3
|
+
* Computes cosine similarity between two vectors.
|
|
4
|
+
* Returns a value in [-1, 1]. For normalized embeddings (OpenAI, Ollama)
|
|
5
|
+
* this is effectively in [0, 1] for semantically related pairs.
|
|
6
|
+
*/
|
|
7
|
+
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
8
|
+
/**
|
|
9
|
+
* Fast cosine similarity between a number[] and a Float32Array slice.
|
|
10
|
+
*/
|
|
11
|
+
export declare function cosineSimilarityBinary(query: number[], vectors: Float32Array, offset: number, dims: number): number;
|
|
12
|
+
/**
|
|
13
|
+
* Performs an exhaustive nearest-neighbor search over all cached embeddings.
|
|
14
|
+
*
|
|
15
|
+
* Uses HNSW (O(log N)) when the index is available in the cache.
|
|
16
|
+
* Falls back to exhaustive O(n * d) cosine search on the contiguous
|
|
17
|
+
* Float32Array buffer if the HNSW index is missing or fails to load.
|
|
18
|
+
*/
|
|
19
|
+
export declare function vectorSearch(queryEmbedding: number[], chunks: Chunk[], cache: EmbeddingCache, symbols: SymbolIndex, topK?: number): SearchResult[];
|
|
20
|
+
/**
|
|
21
|
+
* Direct symbol lookup — bypasses embedding when the query matches a known
|
|
22
|
+
* symbol name. Returns chunks for matched symbols scored by match quality.
|
|
23
|
+
*
|
|
24
|
+
* Match tiers (in order of precedence):
|
|
25
|
+
* 1.0 — exact match: query === symbolName (case-insensitive)
|
|
26
|
+
* 0.95 — single-word match: one word in query === symbolName
|
|
27
|
+
* 0.80 — prefix match: symbolName starts with a query word (or vice versa)
|
|
28
|
+
*
|
|
29
|
+
* Returns [] if no symbol matches (caller should fall through to vector/BM25).
|
|
30
|
+
* Only activates for narrow queries (≤5 matches) to avoid false positives on
|
|
31
|
+
* generic names like "connect" or "init".
|
|
32
|
+
*/
|
|
33
|
+
export declare function symbolLookup(query: string, chunks: Chunk[], symbols: SymbolIndex): SearchResult[];
|
|
34
|
+
/**
|
|
35
|
+
* TF-based keyword search — used as fallback when embeddings are not available.
|
|
36
|
+
*
|
|
37
|
+
* Scores each chunk by term frequency normalized by content length.
|
|
38
|
+
* Not as powerful as semantic search but requires no embedding service.
|
|
39
|
+
*/
|
|
40
|
+
export declare function keywordSearch(query: string, chunks: Chunk[], symbols: SymbolIndex, topK?: number): SearchResult[];
|
|
41
|
+
//# sourceMappingURL=vector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,KAAK,EACL,cAAc,EACd,YAAY,EACZ,WAAW,EACZ,MAAM,eAAe,CAAC;AAIvB;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAejE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,EAAE,YAAY,EACrB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,GACX,MAAM,CAcR;AAID;;;;;;GAMG;AACH,wBAAgB,YAAY,CAC1B,cAAc,EAAE,MAAM,EAAE,EACxB,MAAM,EAAE,KAAK,EAAE,EACf,KAAK,EAAE,cAAc,EACrB,OAAO,EAAE,WAAW,EACpB,IAAI,SAAK,GACR,YAAY,EAAE,CA4DhB;AAID;;;;;;;;;;;;GAYG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,KAAK,EAAE,EACf,OAAO,EAAE,WAAW,GACnB,YAAY,EAAE,CA+BhB;AAID;;;;;GAKG;AACH,wBAAgB,aAAa,CAC3B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,KAAK,EAAE,EACf,OAAO,EAAE,WAAW,EACpB,IAAI,SAAK,GACR,YAAY,EAAE,CAyChB"}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.cosineSimilarity = cosineSimilarity;
|
|
4
|
+
exports.cosineSimilarityBinary = cosineSimilarityBinary;
|
|
5
|
+
exports.vectorSearch = vectorSearch;
|
|
6
|
+
exports.symbolLookup = symbolLookup;
|
|
7
|
+
exports.keywordSearch = keywordSearch;
|
|
8
|
+
// ─── Cosine Similarity ────────────────────────────────────────────────────────
|
|
9
|
+
/**
|
|
10
|
+
* Computes cosine similarity between two vectors.
|
|
11
|
+
* Returns a value in [-1, 1]. For normalized embeddings (OpenAI, Ollama)
|
|
12
|
+
* this is effectively in [0, 1] for semantically related pairs.
|
|
13
|
+
*/
|
|
14
|
+
function cosineSimilarity(a, b) {
|
|
15
|
+
if (a.length === 0 || a.length !== b.length)
|
|
16
|
+
return 0;
|
|
17
|
+
let dot = 0;
|
|
18
|
+
let normA = 0;
|
|
19
|
+
let normB = 0;
|
|
20
|
+
for (let i = 0; i < a.length; i++) {
|
|
21
|
+
dot += a[i] * b[i];
|
|
22
|
+
normA += a[i] * a[i];
|
|
23
|
+
normB += b[i] * b[i];
|
|
24
|
+
}
|
|
25
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
26
|
+
return denom === 0 ? 0 : dot / denom;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Fast cosine similarity between a number[] and a Float32Array slice.
|
|
30
|
+
*/
|
|
31
|
+
function cosineSimilarityBinary(query, vectors, offset, dims) {
|
|
32
|
+
let dot = 0;
|
|
33
|
+
let normA = 0;
|
|
34
|
+
let normB = 0;
|
|
35
|
+
for (let i = 0; i < dims; i++) {
|
|
36
|
+
const valB = vectors[offset + i];
|
|
37
|
+
dot += query[i] * valB;
|
|
38
|
+
normA += query[i] * query[i];
|
|
39
|
+
normB += valB * valB;
|
|
40
|
+
}
|
|
41
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
42
|
+
return denom === 0 ? 0 : dot / denom;
|
|
43
|
+
}
|
|
44
|
+
// ─── Vector Search ────────────────────────────────────────────────────────────
|
|
45
|
+
/**
|
|
46
|
+
* Performs an exhaustive nearest-neighbor search over all cached embeddings.
|
|
47
|
+
*
|
|
48
|
+
* Uses HNSW (O(log N)) when the index is available in the cache.
|
|
49
|
+
* Falls back to exhaustive O(n * d) cosine search on the contiguous
|
|
50
|
+
* Float32Array buffer if the HNSW index is missing or fails to load.
|
|
51
|
+
*/
|
|
52
|
+
function vectorSearch(queryEmbedding, chunks, cache, symbols, topK = 10) {
|
|
53
|
+
if (queryEmbedding.length === 0)
|
|
54
|
+
return [];
|
|
55
|
+
const scored = [];
|
|
56
|
+
const { vectors, chunkIds, dimensions, hnswIndex } = cache;
|
|
57
|
+
if (!vectors || !chunkIds) {
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
if (hnswIndex) {
|
|
61
|
+
try {
|
|
62
|
+
const { HNSW } = require("hnsw");
|
|
63
|
+
const index = HNSW.fromJSON(hnswIndex);
|
|
64
|
+
// Increased efSearch for better accuracy
|
|
65
|
+
const hnswResults = index.searchKNN(queryEmbedding, Math.max(topK * 2, 20), { efSearch: 64 });
|
|
66
|
+
const chunkById = new Map(chunks.map(c => [c.id, c]));
|
|
67
|
+
const chunkResults = [];
|
|
68
|
+
for (const res of hnswResults) {
|
|
69
|
+
const chunkId = chunkIds[res.id];
|
|
70
|
+
const chunk = chunkById.get(chunkId);
|
|
71
|
+
if (chunk) {
|
|
72
|
+
const symbol = chunk.symbol ? symbols[chunk.symbol] : undefined;
|
|
73
|
+
chunkResults.push({ chunk, score: res.score, symbol });
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
chunkResults.sort((a, b) => b.score - a.score);
|
|
77
|
+
return chunkResults.slice(0, topK);
|
|
78
|
+
}
|
|
79
|
+
catch (e) {
|
|
80
|
+
console.warn("HNSW search failed, falling back to exhaustive search:", e);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// Fallback: Optimized path: binary search over contiguous buffer
|
|
84
|
+
// Map chunkId to index for fast lookup
|
|
85
|
+
const idToIndex = new Map();
|
|
86
|
+
chunkIds.forEach((id, i) => idToIndex.set(id, i));
|
|
87
|
+
for (const chunk of chunks) {
|
|
88
|
+
const idx = idToIndex.get(chunk.id);
|
|
89
|
+
if (idx === undefined)
|
|
90
|
+
continue;
|
|
91
|
+
const score = cosineSimilarityBinary(queryEmbedding, vectors, idx * dimensions, dimensions);
|
|
92
|
+
const symbol = chunk.symbol ? symbols[chunk.symbol] : undefined;
|
|
93
|
+
scored.push({ chunk, score, symbol });
|
|
94
|
+
}
|
|
95
|
+
scored.sort((a, b) => b.score - a.score);
|
|
96
|
+
return scored.slice(0, topK);
|
|
97
|
+
}
|
|
98
|
+
// ─── Symbol Lookup ────────────────────────────────────────────────────────────
|
|
99
|
+
/**
|
|
100
|
+
* Direct symbol lookup — bypasses embedding when the query matches a known
|
|
101
|
+
* symbol name. Returns chunks for matched symbols scored by match quality.
|
|
102
|
+
*
|
|
103
|
+
* Match tiers (in order of precedence):
|
|
104
|
+
* 1.0 — exact match: query === symbolName (case-insensitive)
|
|
105
|
+
* 0.95 — single-word match: one word in query === symbolName
|
|
106
|
+
* 0.80 — prefix match: symbolName starts with a query word (or vice versa)
|
|
107
|
+
*
|
|
108
|
+
* Returns [] if no symbol matches (caller should fall through to vector/BM25).
|
|
109
|
+
* Only activates for narrow queries (≤5 matches) to avoid false positives on
|
|
110
|
+
* generic names like "connect" or "init".
|
|
111
|
+
*/
|
|
112
|
+
function symbolLookup(query, chunks, symbols) {
|
|
113
|
+
const q = query.trim().toLowerCase();
|
|
114
|
+
const words = q.split(/[\s\W]+/).filter((w) => w.length >= 2);
|
|
115
|
+
const candidates = [];
|
|
116
|
+
for (const name of Object.keys(symbols)) {
|
|
117
|
+
const lower = name.toLowerCase();
|
|
118
|
+
if (lower === q) {
|
|
119
|
+
candidates.push({ name, score: 1.0 });
|
|
120
|
+
}
|
|
121
|
+
else if (words.some((w) => lower === w)) {
|
|
122
|
+
candidates.push({ name, score: 0.95 });
|
|
123
|
+
}
|
|
124
|
+
else if (words.some((w) => lower.startsWith(w) || w.startsWith(lower))) {
|
|
125
|
+
candidates.push({ name, score: 0.8 });
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (candidates.length === 0 || candidates.length > 5)
|
|
129
|
+
return [];
|
|
130
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
131
|
+
const results = [];
|
|
132
|
+
for (const { name, score } of candidates) {
|
|
133
|
+
const sym = symbols[name];
|
|
134
|
+
const chunk = chunks.find((c) => c.symbol === name && c.file === sym.file);
|
|
135
|
+
if (chunk) {
|
|
136
|
+
results.push({ chunk, score, symbol: sym });
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return results;
|
|
140
|
+
}
|
|
141
|
+
// ─── Keyword Search ───────────────────────────────────────────────────────────
|
|
142
|
+
/**
|
|
143
|
+
* TF-based keyword search — used as fallback when embeddings are not available.
|
|
144
|
+
*
|
|
145
|
+
* Scores each chunk by term frequency normalized by content length.
|
|
146
|
+
* Not as powerful as semantic search but requires no embedding service.
|
|
147
|
+
*/
|
|
148
|
+
function keywordSearch(query, chunks, symbols, topK = 10) {
|
|
149
|
+
// Split query into meaningful terms (skip stop words shorter than 3 chars)
|
|
150
|
+
const terms = query
|
|
151
|
+
.toLowerCase()
|
|
152
|
+
.split(/[\s\W]+/)
|
|
153
|
+
.filter((t) => t.length >= 3);
|
|
154
|
+
if (terms.length === 0)
|
|
155
|
+
return [];
|
|
156
|
+
const scored = chunks.map((chunk) => {
|
|
157
|
+
const content = chunk.content.toLowerCase();
|
|
158
|
+
let score = 0;
|
|
159
|
+
for (const term of terms) {
|
|
160
|
+
// Count occurrences
|
|
161
|
+
let pos = 0;
|
|
162
|
+
let count = 0;
|
|
163
|
+
while ((pos = content.indexOf(term, pos)) !== -1) {
|
|
164
|
+
count++;
|
|
165
|
+
pos += term.length;
|
|
166
|
+
}
|
|
167
|
+
if (count > 0) {
|
|
168
|
+
// TF normalized by content length (log-scaled to dampen large files)
|
|
169
|
+
score += count / Math.log(content.length + 2);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Bonus: symbol name matches the query terms
|
|
173
|
+
if (chunk.symbol) {
|
|
174
|
+
const symLower = chunk.symbol.toLowerCase();
|
|
175
|
+
if (terms.some((t) => symLower.includes(t))) {
|
|
176
|
+
score *= 2;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
const symbol = chunk.symbol ? symbols[chunk.symbol] : undefined;
|
|
180
|
+
return { chunk, score, symbol };
|
|
181
|
+
});
|
|
182
|
+
scored.sort((a, b) => b.score - a.score);
|
|
183
|
+
return scored.filter((r) => r.score > 0).slice(0, topK);
|
|
184
|
+
}
|
|
185
|
+
//# sourceMappingURL=vector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector.js","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":";;AAcA,4CAeC;AAKD,wDAmBC;AAWD,oCAkEC;AAiBD,oCAmCC;AAUD,sCA8CC;AAvOD,iFAAiF;AAEjF;;;;GAIG;AACH,SAAgB,gBAAgB,CAAC,CAAW,EAAE,CAAW;IACvD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;QAAE,OAAO,CAAC,CAAC;IAEtD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,SAAgB,sBAAsB,CACpC,KAAe,EACf,OAAqB,EACrB,MAAc,EACd,IAAY;IAEZ,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACjC,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QACvB,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC7B,KAAK,IAAI,IAAI,GAAG,IAAI,CAAC;IACvB,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC;AACvC,CAAC;AAED,iFAAiF;AAEjF;;;;;;GAMG;AACH,SAAgB,YAAY,CAC1B,cAAwB,EACxB,MAAe,EACf,KAAqB,EACrB,OAAoB,EACpB,IAAI,GAAG,EAAE;IAET,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE3C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,KAAK,CAAC;IAE3D,IAAI,CAAC,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;YACjC,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YACvC,yCAAyC;YACzC,MAAM,WAAW,GAAG,KAAK,CAAC,SAAS,CACjC,cAAc,EACd,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,EAAE,EAAE,CAAC,EACtB,EAAE,QAAQ,EAAE,EAAE,EAAE,CACjB,CAAC;YAEA,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YACtD,MAAM,YAAY,GAAmB,EAAE,CAAC;YACxC,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACjC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;gBACrC,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;oBAChE,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;YAEH,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAC/C,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,wDAAwD,EAAE,CAAC,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC;IAED,iEAAiE;IACjE,uCAAuC;IACvC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC5C,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAElD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,SAAS;YAAE,SAAS;QAEhC,MAAM,KAAK,GAAG,sBAAsB,CAClC,cAAc,EACd,OAAQ,EACR,GAAG,GAAG,UAAU,EAChB,UAAU,CACX,CAAC;QACF,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAChE,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACxC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC/B,CAAC;AAED,iFAAiF;AAEjF;;;;;;;;;;;;GAYG;AACH,SAAgB,YAAY,CAC1B,KAAa,EACb,MAAe,EACf,OAAoB;IAEpB,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAE9D,MAAM,UAAU,GAA2C,EAAE,CAAC;IAE9D,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;YAChB,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACxC,CAAC;aAAM,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;YAC1C,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;aAAM,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;YACzE,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAEhE,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAE7C,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,UAAU,EAAE,CAAC;QACzC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAC1B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,CAAC;QAC3E,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,iFAAiF;AAEjF;;;;;GAKG;AACH,SAAgB,aAAa,CAC3B,KAAa,EACb,MAAe,EACf,OAAoB,EACpB,IAAI,GAAG,EAAE;IAET,2EAA2E;IAC3E,MAAM,KAAK,GAAG,KAAK;SAChB,WAAW,EAAE;SACb,KAAK,CAAC,SAAS,CAAC;SAChB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAEhC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;QAC5C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,oBAAoB;YACpB,IAAI,GAAG,GAAG,CAAC,CAAC;YACZ,IAAI,KAAK,GAAG,CAAC,CAAC;YACd,OAAO,CAAC,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACjD,KAAK,EAAE,CAAC;gBACR,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC;YACrB,CAAC;YACD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,qEAAqE;gBACrE,KAAK,IAAI,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC5C,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC5C,KAAK,IAAI,CAAC,CAAC;YACb,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAChE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAC1D,CAAC"}
|