march-cli 0.1.35 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/package.json +1 -1
  2. package/src/agent/code-search/cache.mjs +133 -0
  3. package/src/agent/code-search/chunk-rules.mjs +107 -0
  4. package/src/agent/code-search/chunker.mjs +125 -0
  5. package/src/agent/code-search/engine.mjs +109 -0
  6. package/src/agent/code-search/languages.mjs +25 -0
  7. package/src/agent/code-search/parser-pool.mjs +29 -0
  8. package/src/agent/code-search/rerank.mjs +43 -0
  9. package/src/agent/code-search/retrieval/bm25.mjs +47 -0
  10. package/src/agent/code-search/retrieval/fusion.mjs +18 -0
  11. package/src/agent/code-search/retrieval/model2vec.mjs +96 -0
  12. package/src/agent/code-search/retrieval/safetensors.mjs +49 -0
  13. package/src/agent/code-search/retrieval/vector.mjs +107 -0
  14. package/src/agent/code-search/retrieval/wordpiece.mjs +82 -0
  15. package/src/agent/code-search/scanner.mjs +84 -0
  16. package/src/agent/code-search/tokenize.mjs +16 -0
  17. package/src/agent/code-search/tool.mjs +75 -0
  18. package/src/agent/runner/provider-quota-runtime.mjs +38 -0
  19. package/src/agent/runner.mjs +5 -0
  20. package/src/agent/runtime/remote-runner-client.mjs +2 -0
  21. package/src/agent/runtime/runner-ipc-target.mjs +7 -0
  22. package/src/agent/runtime/state/runner-state.mjs +1 -0
  23. package/src/agent/runtime/ui-event-bridge.mjs +2 -0
  24. package/src/agent/tools.mjs +3 -0
  25. package/src/cli/commands/registry/slash-command-registry.mjs +10 -7
  26. package/src/cli/commands/status-command.mjs +61 -35
  27. package/src/context/system-core/base.md +5 -0
  28. package/src/provider/quota/codex.mjs +278 -0
  29. package/src/provider/quota/index.mjs +46 -0
  30. package/src/provider/quota/transport-observer.mjs +99 -0
  31. package/src/web-ui/runtime-host.mjs +3 -0
  32. package/src/web-ui/server.mjs +1 -0
  33. package/src/web-ui/session-manager.mjs +2 -0
  34. package/src/web-ui/src/components/AppShell.tsx +1 -0
  35. package/src/web-ui/src/components/RightSidebar.tsx +47 -2
  36. package/src/web-ui/src/model.ts +20 -0
  37. package/src/web-ui/src/runtime/client.ts +8 -1
  38. package/src/web-ui/src/runtime/useWebRuntime.ts +13 -1
  39. package/src/web-ui/src/styles/shell.css +10 -0
  40. package/src/web-ui/dist/assets/index-BUmhnID4.css +0 -1
  41. package/src/web-ui/dist/assets/index-CtuqTjcB.js +0 -1845
  42. package/src/web-ui/dist/index.html +0 -13
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "march-cli",
3
- "version": "0.1.35",
3
+ "version": "0.1.36",
4
4
  "description": "March CLI — terminal-native coding agent with context reconstruction",
5
5
  "type": "module",
6
6
  "main": "./src/main.mjs",
@@ -0,0 +1,133 @@
1
+ import { mkdir, readFile, rename, writeFile } from "node:fs/promises";
2
+ import { dirname } from "node:path";
3
+ import { chunkFile } from "./chunker.mjs";
4
+ import { Bm25Index } from "./retrieval/bm25.mjs";
5
+ import { LocalVectorIndex, defaultVectorizer } from "./retrieval/vector.mjs";
6
+
7
+ const DEFAULT_MAX_FILE_ENTRIES = 8_000;
8
+ const DEFAULT_MAX_INDEX_ENTRIES = 24;
9
+
10
+ export class CodeSearchIndexCache {
11
+ constructor({
12
+ maxFileEntries = DEFAULT_MAX_FILE_ENTRIES,
13
+ maxIndexEntries = DEFAULT_MAX_INDEX_ENTRIES,
14
+ storagePath = null,
15
+ vectorizer = defaultVectorizer,
16
+ } = {}) {
17
+ this.maxFileEntries = maxFileEntries;
18
+ this.maxIndexEntries = maxIndexEntries;
19
+ this.storagePath = storagePath;
20
+ this.vectorizer = vectorizer;
21
+ this.fileChunks = new Map();
22
+ this.indices = new Map();
23
+ this.loaded = false;
24
+ this.dirty = false;
25
+ }
26
+
27
+ async build(files) {
28
+ await this.load();
29
+ const chunks = [];
30
+ let reusedFiles = 0;
31
+ let indexedFiles = 0;
32
+ for (const file of files) {
33
+ const signature = fileSignature(file);
34
+ const key = fileCacheKey(file);
35
+ const cached = this.fileChunks.get(key);
36
+ if (cached?.signature === signature) {
37
+ chunks.push(...cached.chunks);
38
+ reusedFiles += 1;
39
+ continue;
40
+ }
41
+ const fileChunks = await chunkFile(file);
42
+ this.fileChunks.set(key, { signature, chunks: fileChunks });
43
+ this.dirty = true;
44
+ chunks.push(...fileChunks);
45
+ indexedFiles += 1;
46
+ }
47
+
48
+ this.pruneFileCache();
49
+ await this.persist();
50
+
51
+ const indexSignature = [this.vectorizer.id, ...files.map(fileSignature)].join("\n");
52
+ const cachedIndex = this.indices.get(indexSignature);
53
+ if (cachedIndex) {
54
+ this.indices.delete(indexSignature);
55
+ this.indices.set(indexSignature, cachedIndex);
56
+ return { chunks, index: cachedIndex, reusedFiles, indexedFiles, reusedIndex: true, vectorizer: this.vectorizer.id };
57
+ }
58
+
59
+ const index = {
60
+ lexical: new Bm25Index(chunks),
61
+ vector: await LocalVectorIndex.create(chunks, { vectorizer: this.vectorizer }),
62
+ };
63
+ this.indices.set(indexSignature, index);
64
+ this.pruneIndexCache();
65
+ return { chunks, index, reusedFiles, indexedFiles, reusedIndex: false, vectorizer: this.vectorizer.id };
66
+ }
67
+
68
+ clear() {
69
+ this.fileChunks.clear();
70
+ this.indices.clear();
71
+ this.loaded = true;
72
+ this.dirty = true;
73
+ }
74
+
75
+ async load() {
76
+ if (this.loaded) return;
77
+ this.loaded = true;
78
+ if (!this.storagePath) return;
79
+ try {
80
+ const raw = await readFile(this.storagePath, "utf8");
81
+ const parsed = JSON.parse(raw);
82
+ for (const entry of parsed.files ?? []) {
83
+ if (!entry?.key || !entry.signature || !Array.isArray(entry.chunks)) continue;
84
+ this.fileChunks.set(entry.key, { signature: entry.signature, chunks: entry.chunks });
85
+ }
86
+ this.pruneFileCache();
87
+ } catch (err) {
88
+ if (err?.code !== "ENOENT") this.fileChunks.clear();
89
+ }
90
+ }
91
+
92
+ async persist() {
93
+ if (!this.storagePath || !this.dirty) return;
94
+ const payload = {
95
+ version: 1,
96
+ files: [...this.fileChunks.entries()].map(([key, value]) => ({
97
+ key,
98
+ signature: value.signature,
99
+ chunks: value.chunks,
100
+ })),
101
+ };
102
+ await mkdir(dirname(this.storagePath), { recursive: true });
103
+ const tmpPath = `${this.storagePath}.${process.pid}.tmp`;
104
+ await writeFile(tmpPath, JSON.stringify(payload), "utf8");
105
+ await rename(tmpPath, this.storagePath);
106
+ this.dirty = false;
107
+ }
108
+
109
+ pruneFileCache() {
110
+ while (this.fileChunks.size > this.maxFileEntries) {
111
+ const oldestKey = this.fileChunks.keys().next().value;
112
+ this.fileChunks.delete(oldestKey);
113
+ this.dirty = true;
114
+ }
115
+ }
116
+
117
+ pruneIndexCache() {
118
+ while (this.indices.size > this.maxIndexEntries) {
119
+ const oldestKey = this.indices.keys().next().value;
120
+ this.indices.delete(oldestKey);
121
+ }
122
+ }
123
+ }
124
+
125
+ export const defaultCodeSearchIndexCache = new CodeSearchIndexCache();
126
+
127
+ function fileSignature(file) {
128
+ return `${fileCacheKey(file)}:${file.size ?? 0}:${Math.trunc(file.mtimeMs ?? 0)}`;
129
+ }
130
+
131
+ function fileCacheKey(file) {
132
+ return file.absPath ?? file.relPath;
133
+ }
@@ -0,0 +1,107 @@
1
+ const FUNCTION_TYPES = new Set([
2
+ "function_declaration",
3
+ "function_definition",
4
+ "function_item",
5
+ "method_definition",
6
+ "method_declaration",
7
+ ]);
8
+ const CLASS_TYPES = new Set([
9
+ "class_declaration",
10
+ "class_definition",
11
+ "enum_declaration",
12
+ "enum_item",
13
+ "interface_declaration",
14
+ "struct_item",
15
+ "type_alias_declaration",
16
+ ]);
17
+
18
+ const LANGUAGE_RULES = {
19
+ javascript: jsTsRules(),
20
+ typescript: jsTsRules(),
21
+ tsx: jsTsRules(),
22
+ python: rules({
23
+ function: ["function_definition"],
24
+ class: ["class_definition"],
25
+ }),
26
+ rust: rules({
27
+ function: ["function_item"],
28
+ class: ["enum_item", "impl_item", "struct_item", "trait_item", "type_item"],
29
+ }),
30
+ go: rules({
31
+ function: ["function_declaration", "method_declaration"],
32
+ class: ["type_declaration"],
33
+ }),
34
+ java: rules({
35
+ function: ["constructor_declaration", "method_declaration"],
36
+ class: ["class_declaration", "enum_declaration", "interface_declaration", "record_declaration"],
37
+ }),
38
+ };
39
+
40
+ export function chunkRuleFor(language, node) {
41
+ const rule = LANGUAGE_RULES[language]?.get(node.type);
42
+ if (rule) return rule;
43
+ if (FUNCTION_TYPES.has(node.type) || /function|method/.test(node.type)) return { kind: "function" };
44
+ if (CLASS_TYPES.has(node.type) || /class|interface|struct|enum|type_alias/.test(node.type)) return { kind: "class" };
45
+ return null;
46
+ }
47
+
48
+ export function extractNodeSymbols(language, node) {
49
+ const byField = extractFieldSymbols(node);
50
+ if (byField.length > 0) return byField.slice(0, 5);
51
+ return extractSymbolsFromText(language, node.text).slice(0, 5);
52
+ }
53
+
54
+ function jsTsRules() {
55
+ return rules({
56
+ function: ["function_declaration", "generator_function_declaration", "method_definition"],
57
+ class: ["abstract_class_declaration", "class_declaration", "interface_declaration", "type_alias_declaration"],
58
+ block: ["lexical_declaration"],
59
+ });
60
+ }
61
+
62
+ function rules(groups) {
63
+ const map = new Map();
64
+ for (const [kind, nodeTypes] of Object.entries(groups)) {
65
+ for (const type of nodeTypes) map.set(type, { kind });
66
+ }
67
+ return map;
68
+ }
69
+
70
+ function extractFieldSymbols(node) {
71
+ const symbols = [];
72
+ const children = node.children ?? [];
73
+ for (let index = 0; index < children.length; index += 1) {
74
+ const field = node.fieldNameForChild(index) ?? "";
75
+ if (field === "name" && children[index].text && isIdentifier(children[index].text)) {
76
+ symbols.push(children[index].text);
77
+ }
78
+ }
79
+ return symbols;
80
+ }
81
+
82
+ function isIdentifier(text) {
83
+ return /^[A-Za-z_$][\w$]*$/.test(text);
84
+ }
85
+
86
+ function extractSymbolsFromText(language, text) {
87
+ const source = String(text ?? "");
88
+ const patterns = symbolPatterns(language);
89
+ const symbols = [];
90
+ for (const pattern of patterns) {
91
+ const match = source.match(pattern);
92
+ if (match?.[1]) symbols.push(match[1]);
93
+ }
94
+ return [...new Set(symbols)];
95
+ }
96
+
97
+ function symbolPatterns(language) {
98
+ if (language === "python") return [/^\s*(?:async\s+)?def\s+([A-Za-z_][\w]*)/m, /^\s*class\s+([A-Za-z_][\w]*)/m];
99
+ if (language === "rust") return [/^\s*(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][\w]*)/m, /^\s*(?:pub\s+)?(?:struct|enum|trait|impl)\s+([A-Za-z_][\w]*)/m];
100
+ if (language === "go") return [/^\s*func\s+(?:\([^)]*\)\s*)?([A-Za-z_][\w]*)/m, /^\s*type\s+([A-Za-z_][\w]*)/m];
101
+ if (language === "java") return [/\b(?:class|interface|enum|record)\s+([A-Za-z_][\w]*)/m, /\b([A-Za-z_][\w]*)\s*\([^)]*\)\s*\{/m];
102
+ return [
103
+ /\b(?:function|class|interface|type)\s+([A-Za-z_$][\w$]*)/m,
104
+ /\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=/m,
105
+ /\b([A-Za-z_$][\w$]*)\s*[:=]\s*(?:async\s*)?\([^)]*\)\s*=>/m,
106
+ ];
107
+ }
@@ -0,0 +1,125 @@
1
+ import { chunkRuleFor, extractNodeSymbols } from "./chunk-rules.mjs";
2
+ import { getParser } from "./parser-pool.mjs";
3
+ import { uniqueTokens } from "./tokenize.mjs";
4
+
5
+ const MAX_CHUNK_LINES = 80;
6
+ const FALLBACK_WINDOW = 60;
7
+
8
+
9
+ export async function chunkFile(file) {
10
+ const lines = file.content.split("\n");
11
+ const parser = await getParser(file.language);
12
+ if (!parser) return fallbackChunks(file, lines);
13
+ try {
14
+ const tree = parser.parse(file.content);
15
+ const chunks = collectAstChunks(file, lines, tree.rootNode);
16
+ const completed = addUncoveredLineChunks(file, lines, chunks);
17
+ return completed.length > 0 ? completed : fallbackChunks(file, lines);
18
+ } catch {
19
+ return fallbackChunks(file, lines);
20
+ }
21
+ }
22
+
23
+ function collectAstChunks(file, lines, rootNode) {
24
+ const chunks = [];
25
+ walk(rootNode, (node) => {
26
+ const rule = chunkRuleFor(file.language, node);
27
+ if (!rule) return;
28
+ const start = node.startPosition.row + 1;
29
+ const end = Math.min(node.endPosition.row + 1, start + MAX_CHUNK_LINES - 1);
30
+ if (end < start) return;
31
+ const content = lines.slice(start - 1, end).join("\n");
32
+ if (!content.trim()) return;
33
+ chunks.push(toChunk(file, content, start, end, rule.kind, extractNodeSymbols(file.language, node)));
34
+ });
35
+ return dedupeContainedChunks(chunks);
36
+ }
37
+
38
+ function walk(node, visit) {
39
+ visit(node);
40
+ for (const child of node.namedChildren ?? node.children ?? []) walk(child, visit);
41
+ }
42
+
43
+
44
+ function dedupeContainedChunks(chunks) {
45
+ const sorted = chunks.sort((a, b) => a.start_line - b.start_line || a.end_line - b.end_line);
46
+ return sorted.filter((chunk, index) => !sorted.some((other, otherIndex) => (
47
+ otherIndex !== index
48
+ && other.file_path === chunk.file_path
49
+ && other.start_line <= chunk.start_line
50
+ && other.end_line >= chunk.end_line
51
+ && span(other) < span(chunk) + 10
52
+ )));
53
+ }
54
+
55
+ function span(chunk) {
56
+ return chunk.end_line - chunk.start_line;
57
+ }
58
+
59
+ function addUncoveredLineChunks(file, lines, chunks) {
60
+ const covered = new Set();
61
+ for (const chunk of chunks) {
62
+ for (let line = chunk.start_line; line <= chunk.end_line; line += 1) covered.add(line);
63
+ }
64
+ const completed = [...chunks];
65
+ let start = null;
66
+ for (let line = 1; line <= lines.length; line += 1) {
67
+ if (!covered.has(line) && lines[line - 1]?.trim()) {
68
+ start ??= line;
69
+ } else if (start !== null) {
70
+ completed.push(...fallbackRangeChunks(file, lines, start, line - 1));
71
+ start = null;
72
+ }
73
+ }
74
+ if (start !== null) completed.push(...fallbackRangeChunks(file, lines, start, lines.length));
75
+ return completed.sort((a, b) => a.start_line - b.start_line || a.end_line - b.end_line);
76
+ }
77
+
78
+ function fallbackRangeChunks(file, lines, start, end) {
79
+ const chunks = [];
80
+ for (let line = start; line <= end; line += FALLBACK_WINDOW) {
81
+ const chunkEnd = Math.min(end, line + FALLBACK_WINDOW - 1);
82
+ const content = lines.slice(line - 1, chunkEnd).join("\n");
83
+ if (content.trim()) chunks.push(toChunk(file, content, line, chunkEnd, docsOrConfigKind(file), []));
84
+ }
85
+ return chunks;
86
+ }
87
+
88
+ function fallbackChunks(file, lines) {
89
+ const kind = docsOrConfigKind(file);
90
+ const chunks = [];
91
+ for (let start = 1; start <= lines.length; start += FALLBACK_WINDOW) {
92
+ const end = Math.min(lines.length, start + FALLBACK_WINDOW - 1);
93
+ const content = lines.slice(start - 1, end).join("\n");
94
+ if (content.trim()) chunks.push(toChunk(file, content, start, end, kind, []));
95
+ }
96
+ return chunks;
97
+ }
98
+
99
+ function docsOrConfigKind(file) {
100
+ if (/\.(md|mdx|txt)$/i.test(file.relPath)) return "docs";
101
+ if (/\.(json|jsonc|ya?ml|toml)$/i.test(file.relPath)) return "config";
102
+ return "block";
103
+ }
104
+
105
+ function toChunk(file, content, start, end, kind, symbols) {
106
+ return {
107
+ id: `${file.relPath}:${start}-${end}`,
108
+ file_path: file.relPath,
109
+ abs_path: file.absPath,
110
+ start_line: start,
111
+ end_line: end,
112
+ language: file.language,
113
+ kind: refineKind(kind, content),
114
+ symbols,
115
+ identifiers: uniqueTokens(`${symbols.join(" ")} ${content}`).slice(0, 40),
116
+ content,
117
+ };
118
+ }
119
+
120
+ function refineKind(kind, content) {
121
+ if (kind !== "block") return kind;
122
+ if (/\b(class|interface|struct|enum|type)\b/.test(content)) return "class";
123
+ if (/\b(function|def|fn)\b|=>/.test(content)) return "function";
124
+ return kind;
125
+ }
@@ -0,0 +1,109 @@
1
+ import { defaultCodeSearchIndexCache } from "./cache.mjs";
2
+ import { rrfFuse } from "./retrieval/fusion.mjs";
3
+ import { rerankResults } from "./rerank.mjs";
4
+ import { scanCodeFiles } from "./scanner.mjs";
5
+
6
+ const DEFAULT_TOP_K = 5;
7
+ const RETRIEVAL_LIMIT = 80;
8
+
9
+ export async function searchCode(options = {}) {
10
+ const {
11
+ root,
12
+ query,
13
+ path = ".",
14
+ top_k = DEFAULT_TOP_K,
15
+ mode = "auto",
16
+ include_tests = false,
17
+ related_to,
18
+ cache = defaultCodeSearchIndexCache,
19
+ } = options;
20
+ const normalizedQuery = String(query ?? "").trim();
21
+ const activeCache = cache ?? defaultCodeSearchIndexCache;
22
+ if (!normalizedQuery && !related_to) return { results: [], stats: { files: 0, chunks: 0 } };
23
+
24
+ const files = await scanCodeFiles({ root, path });
25
+ const built = await activeCache.build(files);
26
+ const related = related_to ? relatedQuery(built.chunks, related_to, normalizedQuery) : null;
27
+ const queryText = related?.query ?? normalizedQuery;
28
+ const retrieved = await retrieveChunks(built.index, queryText, mode);
29
+ const filtered = related ? retrieved.filter((result) => result.chunk.id !== related.targetId) : retrieved;
30
+ const ranked = rerankResults(filtered, queryText, { includeTests: include_tests });
31
+ const limit = clampTopK(top_k);
32
+ return {
33
+ results: ranked.slice(0, limit).map(formatResult),
34
+ stats: formatStats(files, built, resultMode({ related_to, mode })),
35
+ };
36
+ }
37
+
38
+ async function retrieveChunks(index, queryText, mode) {
39
+ const lexical = index.lexical.search(queryText, { limit: RETRIEVAL_LIMIT });
40
+ if (mode === "lexical" || mode === "symbol") return lexical;
41
+ const semantic = await index.vector.search(queryText, { limit: RETRIEVAL_LIMIT });
42
+ if (mode === "semantic") return semantic;
43
+ return rrfFuse([
44
+ { results: lexical, weight: 1.2 },
45
+ { results: semantic, weight: 1 },
46
+ ], { limit: RETRIEVAL_LIMIT });
47
+ }
48
+
49
+ function resultMode({ related_to, mode }) {
50
+ if (related_to) return "related";
51
+ if (mode === "symbol") return "symbol";
52
+ if (mode === "semantic") return "semantic";
53
+ if (mode === "lexical") return "lexical";
54
+ return "hybrid";
55
+ }
56
+
57
+ function relatedQuery(chunks, relatedTo, query) {
58
+ const target = findRelatedTarget(chunks, relatedTo);
59
+ if (!target) throw new Error(`No indexed chunk found at ${relatedTo.file_path}:${relatedTo.line}`);
60
+ return {
61
+ targetId: target.id,
62
+ query: [query, target.symbols.join(" "), target.identifiers.join(" "), target.content].filter(Boolean).join("\n"),
63
+ };
64
+ }
65
+
66
+ function findRelatedTarget(chunks, relatedTo) {
67
+ const filePath = String(relatedTo?.file_path ?? "").replace(/\\/g, "/");
68
+ const line = Math.trunc(Number(relatedTo?.line));
69
+ if (!filePath || !Number.isFinite(line)) throw new Error("related_to requires file_path and line");
70
+ return chunks.find((chunk) => chunk.file_path === filePath && chunk.start_line <= line && chunk.end_line >= line);
71
+ }
72
+
73
+ function formatStats(files, built, mode) {
74
+ return {
75
+ files: files.length,
76
+ chunks: built.chunks.length,
77
+ mode,
78
+ reused_files: built.reusedFiles,
79
+ indexed_files: built.indexedFiles,
80
+ reused_index: built.reusedIndex,
81
+ vectorizer: built.vectorizer,
82
+ };
83
+ }
84
+
85
+ function formatResult({ chunk, score }) {
86
+ return {
87
+ file_path: chunk.file_path,
88
+ start_line: chunk.start_line,
89
+ end_line: chunk.end_line,
90
+ language: chunk.language,
91
+ kind: chunk.kind,
92
+ symbols: chunk.symbols,
93
+ score: Number(score.toFixed(3)),
94
+ snippet: trimSnippet(chunk.content),
95
+ };
96
+ }
97
+
98
+ function trimSnippet(content) {
99
+ const lines = String(content ?? "").split("\n");
100
+ const selected = lines.slice(0, 40);
101
+ const suffix = lines.length > selected.length ? "\n…" : "";
102
+ return selected.join("\n") + suffix;
103
+ }
104
+
105
+ function clampTopK(value) {
106
+ const parsed = Math.trunc(Number(value));
107
+ if (!Number.isFinite(parsed) || parsed < 1) return DEFAULT_TOP_K;
108
+ return Math.min(parsed, 20);
109
+ }
@@ -0,0 +1,25 @@
1
+ import { extname } from "node:path";
2
+ import { LANG_ALIASES, LANGUAGES } from "../../cli/tui/syntax/languages.mjs";
3
+
4
+ export const SEARCHABLE_TEXT_EXTENSIONS = new Set([
5
+ ".md", ".mdx", ".txt", ".json", ".jsonc", ".yaml", ".yml", ".toml", ".xml", ".html", ".css",
6
+ ]);
7
+
8
+ export function languageForPath(path) {
9
+ const ext = extname(path).slice(1).toLowerCase();
10
+ if (!ext) return "";
11
+ return LANG_ALIASES.get(ext) ?? "";
12
+ }
13
+
14
+ export function canParseLanguage(language) {
15
+ return Boolean(language && LANGUAGES[language]);
16
+ }
17
+
18
+ export function isSearchableTextPath(path) {
19
+ const ext = extname(path).toLowerCase();
20
+ return SEARCHABLE_TEXT_EXTENSIONS.has(ext) || canParseLanguage(languageForPath(path));
21
+ }
22
+
23
+ export function languageConfig(language) {
24
+ return LANGUAGES[language] ?? null;
25
+ }
@@ -0,0 +1,29 @@
1
+ import { dirname, join } from "node:path";
2
+ import { fileURLToPath } from "node:url";
3
+ import { Language, Parser } from "web-tree-sitter";
4
+ import { canParseLanguage, languageConfig } from "./languages.mjs";
5
+
6
+ const RESOURCE_DIR = join(dirname(fileURLToPath(import.meta.url)), "../../cli/tui/syntax/tree-sitter");
7
+
8
+ let initPromise = null;
9
+ const parsers = new Map();
10
+
11
+ export async function getParser(language) {
12
+ if (!canParseLanguage(language)) return null;
13
+ if (!initPromise) initPromise = Parser.init().catch(() => false);
14
+ const ready = await initPromise;
15
+ if (ready === false) return null;
16
+ if (parsers.has(language)) return parsers.get(language);
17
+
18
+ try {
19
+ const config = languageConfig(language);
20
+ const grammar = await Language.load(join(RESOURCE_DIR, config.file));
21
+ const parser = new Parser();
22
+ parser.setLanguage(grammar);
23
+ parsers.set(language, parser);
24
+ return parser;
25
+ } catch {
26
+ parsers.set(language, null);
27
+ return null;
28
+ }
29
+ }
@@ -0,0 +1,43 @@
1
+ import { tokenize } from "./tokenize.mjs";
2
+
3
+ export function rerankResults(results, query, { includeTests = false } = {}) {
4
+ const queryTokens = new Set(tokenize(query));
5
+ return results
6
+ .map((result) => ({ ...result, score: applyBoosts(result.chunk, result.score, queryTokens, includeTests) }))
7
+ .filter((result) => includeTests || !isTestPath(result.chunk.file_path))
8
+ .sort((a, b) => b.score - a.score || a.chunk.file_path.localeCompare(b.chunk.file_path));
9
+ }
10
+
11
+ function applyBoosts(chunk, baseScore, queryTokens, includeTests) {
12
+ let score = baseScore;
13
+ const symbolTokens = new Set(tokenize(chunk.symbols.join(" ")));
14
+ const pathTokens = new Set(tokenize(chunk.file_path));
15
+ const identifierTokens = new Set(chunk.identifiers ?? []);
16
+
17
+ score += overlap(queryTokens, symbolTokens) * 3.0;
18
+ score += overlap(queryTokens, identifierTokens) * 1.5;
19
+ score += overlap(queryTokens, pathTokens) * 1.0;
20
+ if (chunk.kind === "function" || chunk.kind === "class") score += 0.75;
21
+ if (isImplementationPath(chunk.file_path)) score += 0.5;
22
+ if (!includeTests && isTestPath(chunk.file_path)) score -= 2.5;
23
+ if (isVendorPath(chunk.file_path)) score -= 5;
24
+ return score;
25
+ }
26
+
27
+ function overlap(a, b) {
28
+ let count = 0;
29
+ for (const token of a) if (b.has(token)) count += 1;
30
+ return count;
31
+ }
32
+
33
+ function isImplementationPath(path) {
34
+ return /(^|\/)src\//.test(path) || /(^|\/)lib\//.test(path);
35
+ }
36
+
37
+ function isTestPath(path) {
38
+ return /(^|\/)(__tests__|test|tests|spec)(\/|$)|\.(test|spec)\.[cm]?[jt]sx?$/.test(path);
39
+ }
40
+
41
+ function isVendorPath(path) {
42
+ return /(^|\/)(node_modules|vendor|dist|build|coverage)(\/|$)/.test(path);
43
+ }
@@ -0,0 +1,47 @@
1
+ import { tokenize } from "../tokenize.mjs";
2
+
3
+ const K1 = 1.2;
4
+ const B = 0.75;
5
+
6
+ export class Bm25Index {
7
+ constructor(chunks) {
8
+ this.chunks = chunks;
9
+ this.documents = chunks.map((chunk) => buildDocument(chunk));
10
+ this.averageLength = this.documents.reduce((sum, doc) => sum + doc.length, 0) / Math.max(1, this.documents.length);
11
+ this.documentFrequency = new Map();
12
+ for (const doc of this.documents) {
13
+ for (const token of doc.uniqueTokens) this.documentFrequency.set(token, (this.documentFrequency.get(token) ?? 0) + 1);
14
+ }
15
+ }
16
+
17
+ search(query, { limit = 50 } = {}) {
18
+ const queryTokens = [...new Set(tokenize(query))];
19
+ if (queryTokens.length === 0) return [];
20
+ const scored = [];
21
+ for (let index = 0; index < this.documents.length; index += 1) {
22
+ const score = this.scoreDocument(this.documents[index], queryTokens);
23
+ if (score > 0) scored.push({ chunk: this.chunks[index], score });
24
+ }
25
+ scored.sort((a, b) => b.score - a.score || a.chunk.file_path.localeCompare(b.chunk.file_path));
26
+ return scored.slice(0, limit);
27
+ }
28
+
29
+ scoreDocument(doc, queryTokens) {
30
+ let score = 0;
31
+ for (const token of queryTokens) {
32
+ const frequency = doc.termFrequency.get(token) ?? 0;
33
+ if (frequency === 0) continue;
34
+ const idf = Math.log(1 + (this.documents.length - (this.documentFrequency.get(token) ?? 0) + 0.5) / ((this.documentFrequency.get(token) ?? 0) + 0.5));
35
+ const denominator = frequency + K1 * (1 - B + B * doc.length / Math.max(1, this.averageLength));
36
+ score += idf * (frequency * (K1 + 1)) / denominator;
37
+ }
38
+ return score;
39
+ }
40
+ }
41
+
42
+ function buildDocument(chunk) {
43
+ const tokens = tokenize(`${chunk.file_path} ${chunk.symbols.join(" ")} ${chunk.content}`);
44
+ const termFrequency = new Map();
45
+ for (const token of tokens) termFrequency.set(token, (termFrequency.get(token) ?? 0) + 1);
46
+ return { termFrequency, uniqueTokens: new Set(tokens), length: Math.max(1, tokens.length) };
47
+ }
@@ -0,0 +1,18 @@
1
+ const RRF_K = 60;
2
+
3
+ export function rrfFuse(resultSets, { limit = 80 } = {}) {
4
+ const fused = new Map();
5
+ for (const { results, weight = 1 } of resultSets) {
6
+ for (let index = 0; index < results.length; index += 1) {
7
+ const result = results[index];
8
+ const id = result.chunk.id;
9
+ const current = fused.get(id) ?? { chunk: result.chunk, score: 0, sources: [] };
10
+ current.score += weight / (RRF_K + index + 1);
11
+ current.sources.push({ rank: index + 1, score: result.score });
12
+ fused.set(id, current);
13
+ }
14
+ }
15
+ return [...fused.values()]
16
+ .sort((a, b) => b.score - a.score || a.chunk.file_path.localeCompare(b.chunk.file_path))
17
+ .slice(0, limit);
18
+ }