@unlimiting/qsc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunker/ast.d.ts +7 -0
- package/dist/chunker/ast.d.ts.map +1 -0
- package/dist/chunker/ast.js +302 -0
- package/dist/chunker/ast.js.map +1 -0
- package/dist/chunker/index.d.ts +15 -0
- package/dist/chunker/index.d.ts.map +1 -0
- package/dist/chunker/index.js +26 -0
- package/dist/chunker/index.js.map +1 -0
- package/dist/chunker/languages/dart.d.ts +3 -0
- package/dist/chunker/languages/dart.d.ts.map +1 -0
- package/dist/chunker/languages/dart.js +22 -0
- package/dist/chunker/languages/dart.js.map +1 -0
- package/dist/chunker/languages/go.d.ts +3 -0
- package/dist/chunker/languages/go.d.ts.map +1 -0
- package/dist/chunker/languages/go.js +20 -0
- package/dist/chunker/languages/go.js.map +1 -0
- package/dist/chunker/languages/index.d.ts +12 -0
- package/dist/chunker/languages/index.d.ts.map +1 -0
- package/dist/chunker/languages/index.js +35 -0
- package/dist/chunker/languages/index.js.map +1 -0
- package/dist/chunker/languages/kotlin.d.ts +3 -0
- package/dist/chunker/languages/kotlin.d.ts.map +1 -0
- package/dist/chunker/languages/kotlin.js +23 -0
- package/dist/chunker/languages/kotlin.js.map +1 -0
- package/dist/chunker/languages/python.d.ts +3 -0
- package/dist/chunker/languages/python.d.ts.map +1 -0
- package/dist/chunker/languages/python.js +21 -0
- package/dist/chunker/languages/python.js.map +1 -0
- package/dist/chunker/languages/swift.d.ts +3 -0
- package/dist/chunker/languages/swift.d.ts.map +1 -0
- package/dist/chunker/languages/swift.js +24 -0
- package/dist/chunker/languages/swift.js.map +1 -0
- package/dist/chunker/languages/typescript.d.ts +4 -0
- package/dist/chunker/languages/typescript.d.ts.map +1 -0
- package/dist/chunker/languages/typescript.js +34 -0
- package/dist/chunker/languages/typescript.js.map +1 -0
- package/dist/chunker/token.d.ts +6 -0
- package/dist/chunker/token.d.ts.map +1 -0
- package/dist/chunker/token.js +107 -0
- package/dist/chunker/token.js.map +1 -0
- package/dist/collection.d.ts +22 -0
- package/dist/collection.d.ts.map +1 -0
- package/dist/collection.js +154 -0
- package/dist/collection.js.map +1 -0
- package/dist/config/index.d.ts +95 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +103 -0
- package/dist/config/index.js.map +1 -0
- package/dist/embedder/index.d.ts +14 -0
- package/dist/embedder/index.d.ts.map +1 -0
- package/dist/embedder/index.js +18 -0
- package/dist/embedder/index.js.map +1 -0
- package/dist/embedder/local.d.ts +11 -0
- package/dist/embedder/local.d.ts.map +1 -0
- package/dist/embedder/local.js +60 -0
- package/dist/embedder/local.js.map +1 -0
- package/dist/embedder/openai.d.ts +10 -0
- package/dist/embedder/openai.d.ts.map +1 -0
- package/dist/embedder/openai.js +69 -0
- package/dist/embedder/openai.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +824 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/index.d.ts +17 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +18 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/local.d.ts +10 -0
- package/dist/llm/local.d.ts.map +1 -0
- package/dist/llm/local.js +76 -0
- package/dist/llm/local.js.map +1 -0
- package/dist/llm/openai.d.ts +10 -0
- package/dist/llm/openai.d.ts.map +1 -0
- package/dist/llm/openai.js +76 -0
- package/dist/llm/openai.js.map +1 -0
- package/dist/mcp.d.ts +3 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +393 -0
- package/dist/mcp.js.map +1 -0
- package/dist/scanner/git.d.ts +26 -0
- package/dist/scanner/git.d.ts.map +1 -0
- package/dist/scanner/git.js +134 -0
- package/dist/scanner/git.js.map +1 -0
- package/dist/scanner/index.d.ts +17 -0
- package/dist/scanner/index.d.ts.map +1 -0
- package/dist/scanner/index.js +174 -0
- package/dist/scanner/index.js.map +1 -0
- package/dist/search/bm25.d.ts +17 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +27 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/expander.d.ts +12 -0
- package/dist/search/expander.d.ts.map +1 -0
- package/dist/search/expander.js +60 -0
- package/dist/search/expander.js.map +1 -0
- package/dist/search/fusion.d.ts +32 -0
- package/dist/search/fusion.d.ts.map +1 -0
- package/dist/search/fusion.js +80 -0
- package/dist/search/fusion.js.map +1 -0
- package/dist/search/index.d.ts +61 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +137 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/reranker.d.ts +18 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +56 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/search/vector.d.ts +23 -0
- package/dist/search/vector.d.ts.map +1 -0
- package/dist/search/vector.js +47 -0
- package/dist/search/vector.js.map +1 -0
- package/dist/store.d.ts +119 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +500 -0
- package/dist/store.js.map +1 -0
- package/package.json +48 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readFileSync, statSync } from "node:fs";
|
|
3
|
+
import { resolve, relative, extname } from "node:path";
|
|
4
|
+
import fg from "fast-glob";
|
|
5
|
+
// --- Language detection ---
|
|
6
|
+
const EXTENSION_LANGUAGE_MAP = {
|
|
7
|
+
".ts": "typescript",
|
|
8
|
+
".tsx": "typescript",
|
|
9
|
+
".js": "javascript",
|
|
10
|
+
".jsx": "javascript",
|
|
11
|
+
".mjs": "javascript",
|
|
12
|
+
".cjs": "javascript",
|
|
13
|
+
".py": "python",
|
|
14
|
+
".pyi": "python",
|
|
15
|
+
".go": "go",
|
|
16
|
+
".dart": "dart",
|
|
17
|
+
".kt": "kotlin",
|
|
18
|
+
".kts": "kotlin",
|
|
19
|
+
".swift": "swift",
|
|
20
|
+
".java": "java",
|
|
21
|
+
".rs": "rust",
|
|
22
|
+
".rb": "ruby",
|
|
23
|
+
".php": "php",
|
|
24
|
+
".c": "c",
|
|
25
|
+
".h": "c",
|
|
26
|
+
".cpp": "cpp",
|
|
27
|
+
".hpp": "cpp",
|
|
28
|
+
".cc": "cpp",
|
|
29
|
+
".cs": "csharp",
|
|
30
|
+
".scala": "scala",
|
|
31
|
+
".lua": "lua",
|
|
32
|
+
".sh": "shell",
|
|
33
|
+
".bash": "shell",
|
|
34
|
+
".zsh": "shell",
|
|
35
|
+
".sql": "sql",
|
|
36
|
+
".html": "html",
|
|
37
|
+
".htm": "html",
|
|
38
|
+
".css": "css",
|
|
39
|
+
".scss": "scss",
|
|
40
|
+
".less": "less",
|
|
41
|
+
".json": "json",
|
|
42
|
+
".yaml": "yaml",
|
|
43
|
+
".yml": "yaml",
|
|
44
|
+
".toml": "toml",
|
|
45
|
+
".xml": "xml",
|
|
46
|
+
".md": "markdown",
|
|
47
|
+
".mdx": "markdown",
|
|
48
|
+
".vue": "vue",
|
|
49
|
+
".svelte": "svelte",
|
|
50
|
+
".astro": "astro",
|
|
51
|
+
".graphql": "graphql",
|
|
52
|
+
".gql": "graphql",
|
|
53
|
+
".proto": "protobuf",
|
|
54
|
+
".r": "r",
|
|
55
|
+
".R": "r",
|
|
56
|
+
".ex": "elixir",
|
|
57
|
+
".exs": "elixir",
|
|
58
|
+
".erl": "erlang",
|
|
59
|
+
".hs": "haskell",
|
|
60
|
+
".ml": "ocaml",
|
|
61
|
+
".mli": "ocaml",
|
|
62
|
+
".clj": "clojure",
|
|
63
|
+
".tf": "terraform",
|
|
64
|
+
".dockerfile": "dockerfile",
|
|
65
|
+
};
|
|
66
|
+
export function detectLanguage(filePath) {
|
|
67
|
+
const ext = extname(filePath).toLowerCase();
|
|
68
|
+
if (ext && EXTENSION_LANGUAGE_MAP[ext]) {
|
|
69
|
+
return EXTENSION_LANGUAGE_MAP[ext];
|
|
70
|
+
}
|
|
71
|
+
// Check for special filenames
|
|
72
|
+
const base = filePath.split("/").pop() ?? "";
|
|
73
|
+
if (base === "Dockerfile" || base.startsWith("Dockerfile."))
|
|
74
|
+
return "dockerfile";
|
|
75
|
+
if (base === "Makefile" || base === "GNUmakefile")
|
|
76
|
+
return "makefile";
|
|
77
|
+
return undefined;
|
|
78
|
+
}
|
|
79
|
+
// --- Binary detection ---
|
|
80
|
+
const BINARY_EXTENSIONS = new Set([
|
|
81
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".avif",
|
|
82
|
+
".mp3", ".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", ".wav", ".ogg", ".flac",
|
|
83
|
+
".zip", ".gz", ".tar", ".bz2", ".7z", ".rar", ".xz", ".zst",
|
|
84
|
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
|
85
|
+
".exe", ".dll", ".so", ".dylib", ".a", ".o", ".obj", ".lib",
|
|
86
|
+
".wasm", ".class", ".pyc", ".pyo",
|
|
87
|
+
".ttf", ".otf", ".woff", ".woff2", ".eot",
|
|
88
|
+
".sqlite", ".db", ".sqlite3",
|
|
89
|
+
".bin", ".dat", ".iso", ".img",
|
|
90
|
+
".node", ".map",
|
|
91
|
+
]);
|
|
92
|
+
function isBinaryExtension(filePath) {
|
|
93
|
+
const ext = extname(filePath).toLowerCase();
|
|
94
|
+
return BINARY_EXTENSIONS.has(ext);
|
|
95
|
+
}
|
|
96
|
+
function isBinaryContent(buffer) {
|
|
97
|
+
// Check first 8KB for null bytes (common binary indicator)
|
|
98
|
+
const checkLength = Math.min(buffer.length, 8192);
|
|
99
|
+
for (let i = 0; i < checkLength; i++) {
|
|
100
|
+
if (buffer[i] === 0)
|
|
101
|
+
return true;
|
|
102
|
+
}
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
// --- Hashing ---
|
|
106
|
+
export function hashContent(content) {
|
|
107
|
+
return createHash("sha256").update(content).digest("hex");
|
|
108
|
+
}
|
|
109
|
+
// --- Scanner ---
|
|
110
|
+
export async function scanRepository(repoPath, config) {
|
|
111
|
+
const absoluteRoot = resolve(repoPath);
|
|
112
|
+
const maxFileSize = config.max_file_size;
|
|
113
|
+
const excludePatterns = config.exclude;
|
|
114
|
+
// Use fast-glob to find all files, respecting .gitignore
|
|
115
|
+
const entries = await fg("**/*", {
|
|
116
|
+
cwd: absoluteRoot,
|
|
117
|
+
dot: false,
|
|
118
|
+
onlyFiles: true,
|
|
119
|
+
followSymbolicLinks: false,
|
|
120
|
+
ignore: excludePatterns,
|
|
121
|
+
absolute: true,
|
|
122
|
+
suppressErrors: true,
|
|
123
|
+
concurrency: 64,
|
|
124
|
+
});
|
|
125
|
+
const files = [];
|
|
126
|
+
let totalSize = 0;
|
|
127
|
+
for (const absolutePath of entries) {
|
|
128
|
+
// Skip binary files by extension
|
|
129
|
+
if (isBinaryExtension(absolutePath))
|
|
130
|
+
continue;
|
|
131
|
+
// Check file size
|
|
132
|
+
let stat;
|
|
133
|
+
try {
|
|
134
|
+
stat = statSync(absolutePath);
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
continue; // skip files we can't stat
|
|
138
|
+
}
|
|
139
|
+
if (stat.size > maxFileSize)
|
|
140
|
+
continue;
|
|
141
|
+
if (stat.size === 0)
|
|
142
|
+
continue;
|
|
143
|
+
// Read file content
|
|
144
|
+
let content;
|
|
145
|
+
try {
|
|
146
|
+
content = readFileSync(absolutePath);
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
continue; // skip files we can't read
|
|
150
|
+
}
|
|
151
|
+
// Skip binary content (null bytes check)
|
|
152
|
+
if (isBinaryContent(content))
|
|
153
|
+
continue;
|
|
154
|
+
const relPath = relative(absoluteRoot, absolutePath);
|
|
155
|
+
const hash = hashContent(content);
|
|
156
|
+
const language = detectLanguage(relPath);
|
|
157
|
+
files.push({
|
|
158
|
+
path: relPath,
|
|
159
|
+
absolutePath,
|
|
160
|
+
hash,
|
|
161
|
+
size: stat.size,
|
|
162
|
+
language,
|
|
163
|
+
});
|
|
164
|
+
totalSize += stat.size;
|
|
165
|
+
}
|
|
166
|
+
// Sort by path for deterministic output
|
|
167
|
+
files.sort((a, b) => a.path.localeCompare(b.path));
|
|
168
|
+
return {
|
|
169
|
+
files,
|
|
170
|
+
repoRoot: absoluteRoot,
|
|
171
|
+
totalSize,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/scanner/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACvD,OAAO,EAAE,MAAM,WAAW,CAAC;AAmB3B,6BAA6B;AAE7B,MAAM,sBAAsB,GAA2B;IACrD,KAAK,EAAE,YAAY;IACnB,MAAM,EAAE,YAAY;IACpB,KAAK,EAAE,YAAY;IACnB,MAAM,EAAE,YAAY;IACpB,MAAM,EAAE,YAAY;IACpB,MAAM,EAAE,YAAY;IACpB,KAAK,EAAE,QAAQ;IACf,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,IAAI;IACX,OAAO,EAAE,MAAM;IACf,KAAK,EAAE,QAAQ;IACf,MAAM,EAAE,QAAQ;IAChB,QAAQ,EAAE,OAAO;IACjB,OAAO,EAAE,MAAM;IACf,KAAK,EAAE,MAAM;IACb,KAAK,EAAE,MAAM;IACb,MAAM,EAAE,KAAK;IACb,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,MAAM,EAAE,KAAK;IACb,MAAM,EAAE,KAAK;IACb,KAAK,EAAE,KAAK;IACZ,KAAK,EAAE,QAAQ;IACf,QAAQ,EAAE,OAAO;IACjB,MAAM,EAAE,KAAK;IACb,KAAK,EAAE,OAAO;IACd,OAAO,EAAE,OAAO;IAChB,MAAM,EAAE,OAAO;IACf,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,MAAM;IACd,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,MAAM;IACd,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;IACb,KAAK,EAAE,UAAU;IACjB,MAAM,EAAE,UAAU;IAClB,MAAM,EAAE,KAAK;IACb,SAAS,EAAE,QAAQ;IACnB,QAAQ,EAAE,OAAO;IACjB,UAAU,EAAE,SAAS;IACrB,MAAM,EAAE,SAAS;IACjB,QAAQ,EAAE,UAAU;IACpB,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;IACT,KAAK,EAAE,QAAQ;IACf,MAAM,EAAE,QAAQ;IAChB,MAAM,EAAE,QAAQ;IAChB,KAAK,EAAE,SAAS;IAChB,KAAK,EAAE,OAAO;IACd,MAAM,EAAE,OAAO;IACf,MAAM,EAAE,SAAS;IACjB,KAAK,EAAE,WAAW;IAClB,aAAa,EAAE,YAAY;CAC5B,CAAC;AAEF,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,IAAI,GAAG,IAAI,sBAAsB,CAAC,GAAG,CAAC,EAAE,CAAC;QACvC,OAAO,sBAAsB,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC;IACD,8BAA8B;IAC9B,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;IAC7C,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,YAAY,CAAC;IACjF,IAAI,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,aAAa;QAAE,OAAO,UAAU,CAAC;IACrE,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,2BAA2B;AAE3B,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC;IAChC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IACzE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IAC/E,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IAC3D,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO;IACzD,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM;IAC3D,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM;IACjC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM;IACzC,SAAS,EAAE,KAAK,EAAE,UAAU;IAC5B,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IAC9B,OAAO,EAAE,MAAM;CAChB,CAAC,CAAC;AAEH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,eAAe,CAAC,MAAc;IACrC,2DAA2D;IAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;IACnC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,kBAAkB;AAElB,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED,kBAAkB;AAElB,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAAgB,EAChB,MAAqB;IAErB,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IACvC,MAAM,WAAW,GAAG,MAAM,CAAC,aAAa,CAAC;IACzC,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC;IAEvC,yDAAyD;IACzD,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE;QAC/B,GAAG,EAAE,YAAY;QACjB,GAAG,EAAE,KAAK;QACV,SAAS,EAAE,IAAI;QACf,mBAAmB,EAAE,KAAK;QAC1B,MAAM,EAAE,eAAe;QACvB,QAAQ,EAAE,IAAI;QACd,cAAc,EAAE,IAAI;QACpB,WAAW,EAAE,EAAE;KAChB,CAAC,CAAC;IAEH,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,YAAY,IAAI,OAAO,EAAE,CAAC;QACnC,iCAAiC;QACjC,IAAI,iBAAiB,CAAC,YAAY,CAAC;YAAE,SAAS;QAE9C,kBAAkB;QAClB,IAAI,IAAI,CAAC;QACT,IAAI,CAAC;YACH,IAAI,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;QAChC,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,2BAA2B;QACvC,CAAC;QAED,IAAI,IAAI,CAAC,IAAI,GAAG,WAAW;YAAE,SAAS;QACtC,IAAI,IAAI,CAAC,IAAI,KAAK,CAAC;YAAE,SAAS;QAE9B,oBAAoB;QACpB,IAAI,OAAe,CAAC;QACpB,IAAI,CAAC;YACH,OAAO,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,SAAS,CAAC,2BAA2B;QACvC,CAAC;QAED,yCAAyC;QACzC,IAAI,eAAe,CAAC,OAAO,CAAC;YAAE,SAAS;QAEvC,MAAM,OAAO,GAAG,QAAQ,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;QACrD,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QAEzC,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,OAAO;YACb,YAAY;YACZ,IAAI;YACJ,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,QAAQ;SACT,CAAC,CAAC;QAEH,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC;IACzB,CAAC;IAED,wCAAwC;IACxC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAEnD,OAAO;QACL,KAAK;QACL,QAAQ,EAAE,YAAY;QACtB,SAAS;KACV,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Store } from "../store.js";
|
|
2
|
+
export interface BM25SearchResult {
|
|
3
|
+
chunkId: number;
|
|
4
|
+
fileId: number;
|
|
5
|
+
filePath: string;
|
|
6
|
+
content: string;
|
|
7
|
+
startLine: number | null;
|
|
8
|
+
endLine: number | null;
|
|
9
|
+
chunkType: string | null;
|
|
10
|
+
name: string | null;
|
|
11
|
+
score: number;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Perform BM25 full-text search via Store.searchBM25.
|
|
15
|
+
*/
|
|
16
|
+
export declare function searchBM25(store: Store, query: string, limit?: number): BM25SearchResult[];
|
|
17
|
+
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAc,MAAM,aAAa,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAYD;;GAEG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,KAAK,EACZ,KAAK,EAAE,MAAM,EACb,KAAK,SAAK,GACT,gBAAgB,EAAE,CAapB"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize FTS5 BM25 rank to a 0–1 score (higher is better).
|
|
3
|
+
* FTS5 BM25 rank is negative; lower (more negative) means stronger match.
|
|
4
|
+
* Formula: |rank| / (1 + |rank|)
|
|
5
|
+
*/
|
|
6
|
+
function normalizeBM25Rank(rank) {
|
|
7
|
+
const abs = Math.abs(rank);
|
|
8
|
+
return abs / (1 + abs);
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Perform BM25 full-text search via Store.searchBM25.
|
|
12
|
+
*/
|
|
13
|
+
export function searchBM25(store, query, limit = 20) {
|
|
14
|
+
const raw = store.searchBM25(query, limit);
|
|
15
|
+
return raw.map((r) => ({
|
|
16
|
+
chunkId: r.chunk_id,
|
|
17
|
+
fileId: r.file_id,
|
|
18
|
+
filePath: r.file_path,
|
|
19
|
+
content: r.content,
|
|
20
|
+
startLine: r.start_line,
|
|
21
|
+
endLine: r.end_line,
|
|
22
|
+
chunkType: r.chunk_type,
|
|
23
|
+
name: r.name,
|
|
24
|
+
score: normalizeBM25Rank(r.rank),
|
|
25
|
+
}));
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=bm25.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAcA;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC3B,OAAO,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CACxB,KAAY,EACZ,KAAa,EACb,KAAK,GAAG,EAAE;IAEV,MAAM,GAAG,GAAiB,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACzD,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrB,OAAO,EAAE,CAAC,CAAC,QAAQ;QACnB,MAAM,EAAE,CAAC,CAAC,OAAO;QACjB,QAAQ,EAAE,CAAC,CAAC,SAAS;QACrB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,SAAS,EAAE,CAAC,CAAC,UAAU;QACvB,OAAO,EAAE,CAAC,CAAC,QAAQ;QACnB,SAAS,EAAE,CAAC,CAAC,UAAU;QACvB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,KAAK,EAAE,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC;KACjC,CAAC,CAAC,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { LLMProvider } from "../llm/index.js";
|
|
2
|
+
export type ExpandedQueryType = "lex" | "vec" | "hyde";
|
|
3
|
+
export interface ExpandedQuery {
|
|
4
|
+
type: ExpandedQueryType;
|
|
5
|
+
text: string;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Expand a search query using an LLM to generate lexical, semantic, and HyDE variants.
|
|
9
|
+
* If no LLM is provided, returns an empty array (caller uses original query only).
|
|
10
|
+
*/
|
|
11
|
+
export declare function expandQuery(query: string, llm?: LLMProvider): Promise<ExpandedQuery[]>;
|
|
12
|
+
//# sourceMappingURL=expander.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"expander.d.ts","sourceRoot":"","sources":["../../src/search/expander.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAEnD,MAAM,MAAM,iBAAiB,GAAG,KAAK,GAAG,KAAK,GAAG,MAAM,CAAC;AAEvD,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,iBAAiB,CAAC;IACxB,IAAI,EAAE,MAAM,CAAC;CACd;AAcD;;;GAGG;AACH,wBAAsB,WAAW,CAC/B,KAAK,EAAE,MAAM,EACb,GAAG,CAAC,EAAE,WAAW,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAgB1B"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
const EXPANSION_PROMPT = `You are a code search query expander. Given a user query about source code, generate expanded queries for better search coverage.
|
|
2
|
+
|
|
3
|
+
For each query, produce exactly 3 variations:
|
|
4
|
+
1. "lex": Extract key technical terms, function/class names, and synonyms for keyword search.
|
|
5
|
+
2. "vec": Rephrase semantically to capture the intent, suitable for embedding-based search.
|
|
6
|
+
3. "hyde": Write a short hypothetical code snippet (2-5 lines) that would match the query.
|
|
7
|
+
|
|
8
|
+
Respond ONLY with valid JSON array, no markdown fencing:
|
|
9
|
+
[{"type":"lex","text":"..."},{"type":"vec","text":"..."},{"type":"hyde","text":"..."}]
|
|
10
|
+
|
|
11
|
+
User query: `;
|
|
12
|
+
/**
|
|
13
|
+
* Expand a search query using an LLM to generate lexical, semantic, and HyDE variants.
|
|
14
|
+
* If no LLM is provided, returns an empty array (caller uses original query only).
|
|
15
|
+
*/
|
|
16
|
+
export async function expandQuery(query, llm) {
|
|
17
|
+
if (!llm)
|
|
18
|
+
return [];
|
|
19
|
+
try {
|
|
20
|
+
const response = await llm.generate(EXPANSION_PROMPT + query, {
|
|
21
|
+
temperature: 0.3,
|
|
22
|
+
maxTokens: 512,
|
|
23
|
+
});
|
|
24
|
+
const parsed = parseExpansionResponse(response);
|
|
25
|
+
// Filter out entries that duplicate the original query
|
|
26
|
+
return parsed.filter((e) => e.text.trim() !== query.trim());
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
// LLM failure: graceful fallback to no expansion
|
|
30
|
+
return [];
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
function parseExpansionResponse(response) {
|
|
34
|
+
try {
|
|
35
|
+
// Try to extract JSON array from response (handle potential markdown fencing)
|
|
36
|
+
const jsonMatch = response.match(/\[[\s\S]*\]/);
|
|
37
|
+
if (!jsonMatch)
|
|
38
|
+
return [];
|
|
39
|
+
const arr = JSON.parse(jsonMatch[0]);
|
|
40
|
+
const results = [];
|
|
41
|
+
for (const item of arr) {
|
|
42
|
+
if (typeof item === "object" &&
|
|
43
|
+
item !== null &&
|
|
44
|
+
"type" in item &&
|
|
45
|
+
"text" in item) {
|
|
46
|
+
const typed = item;
|
|
47
|
+
if ((typed.type === "lex" || typed.type === "vec" || typed.type === "hyde") &&
|
|
48
|
+
typeof typed.text === "string" &&
|
|
49
|
+
typed.text.trim().length > 0) {
|
|
50
|
+
results.push({ type: typed.type, text: typed.text.trim() });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return results;
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return [];
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=expander.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"expander.js","sourceRoot":"","sources":["../../src/search/expander.ts"],"names":[],"mappings":"AASA,MAAM,gBAAgB,GAAG;;;;;;;;;;aAUZ,CAAC;AAEd;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAa,EACb,GAAiB;IAEjB,IAAI,CAAC,GAAG;QAAE,OAAO,EAAE,CAAC;IAEpB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,gBAAgB,GAAG,KAAK,EAAE;YAC5D,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,GAAG;SACf,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;QAChD,uDAAuD;QACvD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,iDAAiD;QACjD,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,sBAAsB,CAAC,QAAgB;IAC9C,IAAI,CAAC;QACH,8EAA8E;QAC9E,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAChD,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAC;QAE1B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAc,CAAC;QAClD,MAAM,OAAO,GAAoB,EAAE,CAAC;QAEpC,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;YACvB,IACE,OAAO,IAAI,KAAK,QAAQ;gBACxB,IAAI,KAAK,IAAI;gBACb,MAAM,IAAI,IAAI;gBACd,MAAM,IAAI,IAAI,EACd,CAAC;gBACD,MAAM,KAAK,GAAG,IAAsC,CAAC;gBACrD,IACE,CAAC,KAAK,CAAC,IAAI,KAAK,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;oBACvE,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ;oBAC9B,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAC5B,CAAC;oBACD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBAC9D,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type { BM25SearchResult } from "./bm25.js";
|
|
2
|
+
import type { VectorSearchResult } from "./vector.js";
|
|
3
|
+
export interface FusedResult {
|
|
4
|
+
chunkId: number;
|
|
5
|
+
fileId: number;
|
|
6
|
+
filePath: string;
|
|
7
|
+
content: string;
|
|
8
|
+
startLine: number | null;
|
|
9
|
+
endLine: number | null;
|
|
10
|
+
chunkType: string | null;
|
|
11
|
+
name: string | null;
|
|
12
|
+
score: number;
|
|
13
|
+
scores: {
|
|
14
|
+
bm25?: number;
|
|
15
|
+
vector?: number;
|
|
16
|
+
rrf: number;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Reciprocal Rank Fusion (RRF) combining BM25 and vector search results.
|
|
21
|
+
*
|
|
22
|
+
* RRF score = sum of 1/(k + rank_i) across all ranked lists the item appears in.
|
|
23
|
+
* Default k=60 (standard in literature).
|
|
24
|
+
*
|
|
25
|
+
* Items from the original query get 2x weight. Duplicates are merged by chunkId.
|
|
26
|
+
*/
|
|
27
|
+
export declare function reciprocalRankFusion(bm25Results: BM25SearchResult[][], vectorResults: VectorSearchResult[][], options?: {
|
|
28
|
+
k?: number;
|
|
29
|
+
limit?: number;
|
|
30
|
+
originalWeight?: number;
|
|
31
|
+
}): FusedResult[];
|
|
32
|
+
//# sourceMappingURL=fusion.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fusion.d.ts","sourceRoot":"","sources":["../../src/search/fusion.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAClD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEtD,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE;QACN,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,GAAG,EAAE,MAAM,CAAC;KACb,CAAC;CACH;AAeD;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,WAAW,EAAE,gBAAgB,EAAE,EAAE,EACjC,aAAa,EAAE,kBAAkB,EAAE,EAAE,EACrC,OAAO,GAAE;IAAE,CAAC,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAA;CAAO,GACpE,WAAW,EAAE,CA+Ef"}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reciprocal Rank Fusion (RRF) combining BM25 and vector search results.
|
|
3
|
+
*
|
|
4
|
+
* RRF score = sum of 1/(k + rank_i) across all ranked lists the item appears in.
|
|
5
|
+
* Default k=60 (standard in literature).
|
|
6
|
+
*
|
|
7
|
+
* Items from the original query get 2x weight. Duplicates are merged by chunkId.
|
|
8
|
+
*/
|
|
9
|
+
export function reciprocalRankFusion(bm25Results, vectorResults, options = {}) {
|
|
10
|
+
const k = options.k ?? 60;
|
|
11
|
+
const limit = options.limit ?? 20;
|
|
12
|
+
const originalWeight = options.originalWeight ?? 2;
|
|
13
|
+
// Map chunkId -> accumulated data
|
|
14
|
+
const items = new Map();
|
|
15
|
+
function ensureItem(chunkId, source) {
|
|
16
|
+
if (!items.has(chunkId)) {
|
|
17
|
+
items.set(chunkId, {
|
|
18
|
+
chunkId: source.chunkId,
|
|
19
|
+
fileId: source.fileId,
|
|
20
|
+
filePath: source.filePath,
|
|
21
|
+
content: source.content,
|
|
22
|
+
startLine: source.startLine,
|
|
23
|
+
endLine: source.endLine,
|
|
24
|
+
chunkType: source.chunkType,
|
|
25
|
+
name: source.name,
|
|
26
|
+
rrfScore: 0,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
return items.get(chunkId);
|
|
30
|
+
}
|
|
31
|
+
// Process BM25 result lists
|
|
32
|
+
for (let listIdx = 0; listIdx < bm25Results.length; listIdx++) {
|
|
33
|
+
const list = bm25Results[listIdx];
|
|
34
|
+
const weight = listIdx === 0 ? originalWeight : 1;
|
|
35
|
+
for (let rank = 0; rank < list.length; rank++) {
|
|
36
|
+
const r = list[rank];
|
|
37
|
+
const item = ensureItem(r.chunkId, r);
|
|
38
|
+
item.rrfScore += weight / (k + rank + 1);
|
|
39
|
+
// Keep best BM25 score
|
|
40
|
+
if (item.bm25Score === undefined || r.score > item.bm25Score) {
|
|
41
|
+
item.bm25Score = r.score;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// Process vector result lists
|
|
46
|
+
for (let listIdx = 0; listIdx < vectorResults.length; listIdx++) {
|
|
47
|
+
const list = vectorResults[listIdx];
|
|
48
|
+
const weight = listIdx === 0 ? originalWeight : 1;
|
|
49
|
+
for (let rank = 0; rank < list.length; rank++) {
|
|
50
|
+
const r = list[rank];
|
|
51
|
+
const item = ensureItem(r.chunkId, r);
|
|
52
|
+
item.rrfScore += weight / (k + rank + 1);
|
|
53
|
+
// Keep best vector score
|
|
54
|
+
if (item.vectorScore === undefined || r.score > item.vectorScore) {
|
|
55
|
+
item.vectorScore = r.score;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Sort by RRF score descending and return top results
|
|
60
|
+
const sorted = Array.from(items.values())
|
|
61
|
+
.sort((a, b) => b.rrfScore - a.rrfScore)
|
|
62
|
+
.slice(0, limit);
|
|
63
|
+
return sorted.map((item) => ({
|
|
64
|
+
chunkId: item.chunkId,
|
|
65
|
+
fileId: item.fileId,
|
|
66
|
+
filePath: item.filePath,
|
|
67
|
+
content: item.content,
|
|
68
|
+
startLine: item.startLine,
|
|
69
|
+
endLine: item.endLine,
|
|
70
|
+
chunkType: item.chunkType,
|
|
71
|
+
name: item.name,
|
|
72
|
+
score: item.rrfScore,
|
|
73
|
+
scores: {
|
|
74
|
+
bm25: item.bm25Score,
|
|
75
|
+
vector: item.vectorScore,
|
|
76
|
+
rrf: item.rrfScore,
|
|
77
|
+
},
|
|
78
|
+
}));
|
|
79
|
+
}
|
|
80
|
+
//# sourceMappingURL=fusion.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fusion.js","sourceRoot":"","sources":["../../src/search/fusion.ts"],"names":[],"mappings":"AAiCA;;;;;;;GAOG;AACH,MAAM,UAAU,oBAAoB,CAClC,WAAiC,EACjC,aAAqC,EACrC,UAAmE,EAAE;IAErE,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC1B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,CAAC,CAAC;IAEnD,kCAAkC;IAClC,MAAM,KAAK,GAAG,IAAI,GAAG,EAA6C,CAAC;IAEnE,SAAS,UAAU,CACjB,OAAe,EACf,MAA6C;QAE7C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACxB,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE;gBACjB,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,QAAQ,EAAE,CAAC;aACZ,CAAC,CAAC;QACL,CAAC;QACD,OAAO,KAAK,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC;IAC7B,CAAC;IAED,4BAA4B;IAC5B,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC;QAC9D,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;YAC9C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YACtC,IAAI,CAAC,QAAQ,IAAI,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;YACzC,uBAAuB;YACvB,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS,IAAI,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;gBAC7D,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC;YAC3B,CAAC;QACH,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,aAAa,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC;QAChE,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,MAAM,GAAG,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;YAC9C,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YACtC,IAAI,CAAC,QAAQ,IAAI,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;YACzC,yBAAyB;YACzB,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,IAAI,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;gBACjE,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC;YAC7B,CAAC;QACH,CAAC;IACH,CAAC;IAED,sDAAsD;IACtD,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;SACtC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;SACvC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAEnB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC3B,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,KAAK,EAAE,IAAI,CAAC,QAAQ;QACpB,MAAM,EAAE;YACN,IAAI,EAAE,IAAI,CAAC,SAAS;YACpB,MAAM,EAAE,IAAI,CAAC,WAAW;YACxB,GAAG,EAAE,IAAI,CAAC,QAAQ;SACnB;KACF,CAAC,CAAC,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { Store } from "../store.js";
|
|
2
|
+
import type { Embedder } from "../embedder/index.js";
|
|
3
|
+
import type { LLMProvider } from "../llm/index.js";
|
|
4
|
+
export interface SearchTiming {
|
|
5
|
+
total: number;
|
|
6
|
+
expand?: number;
|
|
7
|
+
bm25?: number;
|
|
8
|
+
vector?: number;
|
|
9
|
+
fusion?: number;
|
|
10
|
+
rerank?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface SearchOptions {
|
|
13
|
+
mode: "bm25" | "vector" | "hybrid";
|
|
14
|
+
limit?: number;
|
|
15
|
+
expand?: boolean;
|
|
16
|
+
rerank?: boolean;
|
|
17
|
+
rrfK?: number;
|
|
18
|
+
benchmark?: boolean;
|
|
19
|
+
}
|
|
20
|
+
export interface SearchResult {
|
|
21
|
+
chunkId: number;
|
|
22
|
+
fileId: number;
|
|
23
|
+
filePath: string;
|
|
24
|
+
content: string;
|
|
25
|
+
startLine: number | null;
|
|
26
|
+
endLine: number | null;
|
|
27
|
+
chunkType: string | null;
|
|
28
|
+
name: string | null;
|
|
29
|
+
score: number;
|
|
30
|
+
scores: {
|
|
31
|
+
bm25?: number;
|
|
32
|
+
vector?: number;
|
|
33
|
+
rrf?: number;
|
|
34
|
+
rerank?: number;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
export interface SearchResponse {
|
|
38
|
+
results: SearchResult[];
|
|
39
|
+
timing?: SearchTiming;
|
|
40
|
+
counts?: {
|
|
41
|
+
bm25?: number;
|
|
42
|
+
vector?: number;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export interface SearchPipeline {
|
|
46
|
+
search(query: string, options?: SearchOptions): Promise<SearchResponse>;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Create a hybrid search pipeline.
|
|
50
|
+
*
|
|
51
|
+
* @param store - SQLite store with searchBM25 and searchVector
|
|
52
|
+
* @param embedder - (optional) embedding provider for vector search
|
|
53
|
+
* @param llm - (optional) LLM provider for query expansion and reranking
|
|
54
|
+
*/
|
|
55
|
+
export declare function createSearchPipeline(store: Store, embedder?: Embedder, llm?: LLMProvider): SearchPipeline;
|
|
56
|
+
export type { BM25SearchResult } from "./bm25.js";
|
|
57
|
+
export type { VectorSearchResult } from "./vector.js";
|
|
58
|
+
export type { ExpandedQuery, ExpandedQueryType } from "./expander.js";
|
|
59
|
+
export type { FusedResult } from "./fusion.js";
|
|
60
|
+
export type { RerankedResult } from "./reranker.js";
|
|
61
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAUnD,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE;QACN,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,MAAM,CAAC,EAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;CACzE;AAID;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,KAAK,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,GAAG,CAAC,EAAE,WAAW,GAChB,cAAc,CA6IhB;AAGD,YAAY,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAClD,YAAY,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACtD,YAAY,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AACtE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { searchBM25 } from "./bm25.js";
|
|
2
|
+
import { searchVectorWithEmbedding } from "./vector.js";
|
|
3
|
+
import { expandQuery } from "./expander.js";
|
|
4
|
+
import { reciprocalRankFusion } from "./fusion.js";
|
|
5
|
+
import { rerank } from "./reranker.js";
|
|
6
|
+
// --- Factory ---
|
|
7
|
+
/**
|
|
8
|
+
* Create a hybrid search pipeline.
|
|
9
|
+
*
|
|
10
|
+
* @param store - SQLite store with searchBM25 and searchVector
|
|
11
|
+
* @param embedder - (optional) embedding provider for vector search
|
|
12
|
+
* @param llm - (optional) LLM provider for query expansion and reranking
|
|
13
|
+
*/
|
|
14
|
+
export function createSearchPipeline(store, embedder, llm) {
|
|
15
|
+
return {
|
|
16
|
+
async search(query, options = { mode: "hybrid" }) {
|
|
17
|
+
const limit = options.limit ?? 20;
|
|
18
|
+
const fetchLimit = limit * 3; // Fetch more for fusion/reranking
|
|
19
|
+
const bench = options.benchmark ?? false;
|
|
20
|
+
const timing = { total: 0 };
|
|
21
|
+
const counts = {};
|
|
22
|
+
const totalStart = bench ? performance.now() : 0;
|
|
23
|
+
// Step 1: (Optional) Query expansion
|
|
24
|
+
let expanded = [];
|
|
25
|
+
if (options.expand && llm) {
|
|
26
|
+
const t0 = bench ? performance.now() : 0;
|
|
27
|
+
expanded = await expandQuery(query, llm);
|
|
28
|
+
if (bench)
|
|
29
|
+
timing.expand = performance.now() - t0;
|
|
30
|
+
}
|
|
31
|
+
// Separate expanded queries by target search type
|
|
32
|
+
const lexQueries = expanded.filter((e) => e.type === "lex");
|
|
33
|
+
const vecQueries = expanded.filter((e) => e.type === "vec" || e.type === "hyde");
|
|
34
|
+
// Step 2: Execute searches based on mode
|
|
35
|
+
const bm25Lists = [];
|
|
36
|
+
const vectorLists = [];
|
|
37
|
+
if (options.mode === "bm25" || options.mode === "hybrid") {
|
|
38
|
+
const t0 = bench ? performance.now() : 0;
|
|
39
|
+
// Original query BM25 (index 0 = original, gets higher weight)
|
|
40
|
+
bm25Lists.push(searchBM25(store, query, fetchLimit));
|
|
41
|
+
// Expanded lex queries
|
|
42
|
+
for (const eq of lexQueries) {
|
|
43
|
+
bm25Lists.push(searchBM25(store, eq.text, fetchLimit));
|
|
44
|
+
}
|
|
45
|
+
if (bench) {
|
|
46
|
+
timing.bm25 = performance.now() - t0;
|
|
47
|
+
counts.bm25 = bm25Lists.reduce((sum, list) => sum + list.length, 0);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if ((options.mode === "vector" || options.mode === "hybrid") &&
|
|
51
|
+
embedder) {
|
|
52
|
+
const t0 = bench ? performance.now() : 0;
|
|
53
|
+
// Batch embed all vector queries at once
|
|
54
|
+
const vecQueryTexts = [query, ...vecQueries.map((e) => e.text)];
|
|
55
|
+
const embeddings = await embedder.embed(vecQueryTexts);
|
|
56
|
+
// Original query vector search (index 0)
|
|
57
|
+
for (let i = 0; i < embeddings.length; i++) {
|
|
58
|
+
vectorLists.push(searchVectorWithEmbedding(store, embeddings[i], fetchLimit));
|
|
59
|
+
}
|
|
60
|
+
if (bench) {
|
|
61
|
+
timing.vector = performance.now() - t0;
|
|
62
|
+
counts.vector = vectorLists.reduce((sum, list) => sum + list.length, 0);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Step 3: Combine results via RRF
|
|
66
|
+
let fused;
|
|
67
|
+
{
|
|
68
|
+
const t0 = bench ? performance.now() : 0;
|
|
69
|
+
if (options.mode === "bm25") {
|
|
70
|
+
fused = reciprocalRankFusion(bm25Lists, [], {
|
|
71
|
+
k: options.rrfK,
|
|
72
|
+
limit: options.rerank ? fetchLimit : limit,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
else if (options.mode === "vector") {
|
|
76
|
+
if (vectorLists.length === 0) {
|
|
77
|
+
if (bench)
|
|
78
|
+
timing.total = performance.now() - totalStart;
|
|
79
|
+
return { results: [], timing: bench ? timing : undefined, counts: bench ? counts : undefined };
|
|
80
|
+
}
|
|
81
|
+
fused = reciprocalRankFusion([], vectorLists, {
|
|
82
|
+
k: options.rrfK,
|
|
83
|
+
limit: options.rerank ? fetchLimit : limit,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
fused = reciprocalRankFusion(bm25Lists, vectorLists, {
|
|
88
|
+
k: options.rrfK,
|
|
89
|
+
limit: options.rerank ? fetchLimit : limit,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
if (bench)
|
|
93
|
+
timing.fusion = performance.now() - t0;
|
|
94
|
+
}
|
|
95
|
+
// Step 4: (Optional) LLM reranking
|
|
96
|
+
let finalResults;
|
|
97
|
+
if (options.rerank && llm && fused.length > 0) {
|
|
98
|
+
const t0 = bench ? performance.now() : 0;
|
|
99
|
+
const reranked = await rerank(query, fused, llm, {
|
|
100
|
+
topN: Math.min(fused.length, limit * 2),
|
|
101
|
+
});
|
|
102
|
+
finalResults = reranked.slice(0, limit);
|
|
103
|
+
if (bench)
|
|
104
|
+
timing.rerank = performance.now() - t0;
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
finalResults = fused.slice(0, limit);
|
|
108
|
+
}
|
|
109
|
+
if (bench)
|
|
110
|
+
timing.total = performance.now() - totalStart;
|
|
111
|
+
// Convert to SearchResult
|
|
112
|
+
const results = finalResults.map((r) => ({
|
|
113
|
+
chunkId: r.chunkId,
|
|
114
|
+
fileId: r.fileId,
|
|
115
|
+
filePath: r.filePath,
|
|
116
|
+
content: r.content,
|
|
117
|
+
startLine: r.startLine,
|
|
118
|
+
endLine: r.endLine,
|
|
119
|
+
chunkType: r.chunkType,
|
|
120
|
+
name: r.name,
|
|
121
|
+
score: r.score,
|
|
122
|
+
scores: {
|
|
123
|
+
bm25: r.scores.bm25,
|
|
124
|
+
vector: r.scores.vector,
|
|
125
|
+
rrf: r.scores.rrf,
|
|
126
|
+
rerank: "rerank" in r.scores ? r.scores.rerank : undefined,
|
|
127
|
+
},
|
|
128
|
+
}));
|
|
129
|
+
return {
|
|
130
|
+
results,
|
|
131
|
+
timing: bench ? timing : undefined,
|
|
132
|
+
counts: bench ? counts : undefined,
|
|
133
|
+
};
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
//# sourceMappingURL=index.js.map
|