codebasesearch 0.1.24 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp.js +6 -47
- package/package.json +1 -1
- package/src/ignore-parser.js +2 -0
- package/src/search-worker.js +20 -81
- package/src/text-search.js +80 -27
- package/.prd +0 -78
package/mcp.js
CHANGED
|
@@ -22,20 +22,9 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
22
22
|
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
23
23
|
import { cwd } from 'process';
|
|
24
24
|
import { join } from 'path';
|
|
25
|
-
import { existsSync, readFileSync, appendFileSync, writeFileSync
|
|
26
|
-
import { homedir } from 'os';
|
|
25
|
+
import { existsSync, readFileSync, appendFileSync, writeFileSync } from 'fs';
|
|
27
26
|
import { supervisor } from './src/supervisor.js';
|
|
28
27
|
|
|
29
|
-
const WORKSPACE_PATH = join(homedir(), 'workspace');
|
|
30
|
-
|
|
31
|
-
function getWorkspaceFolders() {
|
|
32
|
-
try {
|
|
33
|
-
return readdirSync(WORKSPACE_PATH, { withFileTypes: true })
|
|
34
|
-
.filter(e => e.isDirectory() && !e.name.startsWith('.'))
|
|
35
|
-
.map(e => join(WORKSPACE_PATH, e.name));
|
|
36
|
-
} catch { return []; }
|
|
37
|
-
}
|
|
38
|
-
|
|
39
28
|
function ensureIgnoreEntry(rootPath) {
|
|
40
29
|
const gitignorePath = join(rootPath, '.gitignore');
|
|
41
30
|
const entry = '.code-search/';
|
|
@@ -49,10 +38,10 @@ function ensureIgnoreEntry(rootPath) {
|
|
|
49
38
|
} catch (e) {}
|
|
50
39
|
}
|
|
51
40
|
|
|
52
|
-
function formatResults(result, query
|
|
53
|
-
if (result.resultsCount === 0) return `No results found
|
|
41
|
+
function formatResults(result, query) {
|
|
42
|
+
if (result.resultsCount === 0) return `No results found for: "${query}"`;
|
|
54
43
|
const plural = result.resultsCount !== 1 ? 's' : '';
|
|
55
|
-
const header = `Found ${result.resultsCount} result${plural}
|
|
44
|
+
const header = `Found ${result.resultsCount} result${plural} for: "${query}"\n\n`;
|
|
56
45
|
const body = result.results.map((r) => {
|
|
57
46
|
const pathPart = r.relativePath || r.absolutePath;
|
|
58
47
|
const lineCount = r.totalLines ? ` [${r.totalLines}L]` : '';
|
|
@@ -88,18 +77,6 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
88
77
|
required: ['query'],
|
|
89
78
|
},
|
|
90
79
|
},
|
|
91
|
-
{
|
|
92
|
-
name: 'search_workspace',
|
|
93
|
-
description: 'Search across ALL repositories in ~/workspace simultaneously. Returns ranked results with repo name prefix.',
|
|
94
|
-
inputSchema: {
|
|
95
|
-
type: 'object',
|
|
96
|
-
properties: {
|
|
97
|
-
query: { type: 'string', description: 'Natural language search query' },
|
|
98
|
-
limit: { type: 'number', description: 'Max results to return (default: 10)' },
|
|
99
|
-
},
|
|
100
|
-
required: ['query'],
|
|
101
|
-
},
|
|
102
|
-
},
|
|
103
80
|
],
|
|
104
81
|
}));
|
|
105
82
|
|
|
@@ -107,26 +84,15 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
107
84
|
const { name, arguments: args } = request.params;
|
|
108
85
|
const query = args?.query;
|
|
109
86
|
|
|
110
|
-
if (
|
|
87
|
+
if (name !== 'search') return errResponse(`Unknown tool: ${name}`);
|
|
111
88
|
if (!query || typeof query !== 'string') return errResponse('Error: query is required and must be a string');
|
|
112
89
|
|
|
113
90
|
try {
|
|
114
|
-
if (name === 'search_workspace') {
|
|
115
|
-
const result = await supervisor.sendRequest({
|
|
116
|
-
type: 'search-all',
|
|
117
|
-
query,
|
|
118
|
-
workspacePaths: getWorkspaceFolders(),
|
|
119
|
-
limit: args?.limit || 10,
|
|
120
|
-
});
|
|
121
|
-
if (result.error) return errResponse(`Error: ${result.error}`);
|
|
122
|
-
return okResponse(formatResults(result, query, 'workspace'));
|
|
123
|
-
}
|
|
124
|
-
|
|
125
91
|
const repositoryPath = args?.repository_path || cwd();
|
|
126
92
|
ensureIgnoreEntry(repositoryPath);
|
|
127
93
|
const result = await supervisor.sendRequest({ type: 'search', query, repositoryPath });
|
|
128
94
|
if (result.error) return errResponse(`Error: ${result.error}`);
|
|
129
|
-
return okResponse(formatResults(result, query
|
|
95
|
+
return okResponse(formatResults(result, query));
|
|
130
96
|
} catch (error) {
|
|
131
97
|
return errResponse(`Error: ${error.message}`);
|
|
132
98
|
}
|
|
@@ -135,13 +101,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
135
101
|
export async function startMcpServer() {
|
|
136
102
|
const transport = new StdioServerTransport();
|
|
137
103
|
await server.connect(transport);
|
|
138
|
-
|
|
139
|
-
const workspacePaths = getWorkspaceFolders();
|
|
140
|
-
if (workspacePaths.length > 0) {
|
|
141
|
-
supervisor.sendRequest({ type: 'index-all', workspacePaths })
|
|
142
|
-
.then(r => console.error(`[MCP] Pre-indexed workspace: ${r.message || JSON.stringify(r)}`))
|
|
143
|
-
.catch(e => console.error(`[MCP] Pre-index warning: ${e.message}`));
|
|
144
|
-
}
|
|
145
104
|
}
|
|
146
105
|
|
|
147
106
|
const isMain = process.argv[1] && (
|
package/package.json
CHANGED
package/src/ignore-parser.js
CHANGED
|
@@ -197,6 +197,8 @@ const IGNORED_DIRECTORIES = new Set([
|
|
|
197
197
|
'node_modules', 'bower_components', 'jspm_packages', 'web_modules',
|
|
198
198
|
// Version control
|
|
199
199
|
'.git', '.svn', '.hg', '.bzr',
|
|
200
|
+
// Tool config (AI assistants, editors)
|
|
201
|
+
'.claude', '.cursor', '.aider',
|
|
200
202
|
// IDE
|
|
201
203
|
'.vscode', '.idea', '.vs', '.atom',
|
|
202
204
|
// Build outputs (unambiguous names only)
|
package/src/search-worker.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { parentPort } from 'worker_threads';
|
|
2
2
|
import { resolve, relative } from 'path';
|
|
3
|
-
import { existsSync, readFileSync,
|
|
3
|
+
import { existsSync, readFileSync, statSync } from 'fs';
|
|
4
4
|
import { loadIgnorePatterns } from './ignore-parser.js';
|
|
5
5
|
import { scanRepository } from './scanner.js';
|
|
6
6
|
import { buildTextIndex, searchText } from './text-search.js';
|
|
@@ -21,26 +21,22 @@ function findEnclosingContext(content, lineStart) {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
function getFileTotalLines(absoluteFilePath) {
|
|
24
|
+
if (fileLineCountCache.has(absoluteFilePath)) {
|
|
25
|
+
return fileLineCountCache.get(absoluteFilePath);
|
|
26
|
+
}
|
|
24
27
|
try {
|
|
25
28
|
const content = readFileSync(absoluteFilePath, 'utf8');
|
|
26
|
-
|
|
29
|
+
const count = content.split('\n').length;
|
|
30
|
+
fileLineCountCache.set(absoluteFilePath, count);
|
|
31
|
+
return count;
|
|
27
32
|
} catch {
|
|
28
33
|
return null;
|
|
29
34
|
}
|
|
30
35
|
}
|
|
31
36
|
|
|
32
37
|
let indexCache = new Map();
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
try {
|
|
36
|
-
const entries = readdirSync(workspacePath, { withFileTypes: true });
|
|
37
|
-
return entries
|
|
38
|
-
.filter(e => e.isDirectory() && !e.name.startsWith('.'))
|
|
39
|
-
.map(e => resolve(workspacePath, e.name));
|
|
40
|
-
} catch {
|
|
41
|
-
return [];
|
|
42
|
-
}
|
|
43
|
-
}
|
|
38
|
+
// Cache file line counts to avoid repeated disk reads on every search
|
|
39
|
+
const fileLineCountCache = new Map();
|
|
44
40
|
|
|
45
41
|
async function initializeIndex(repositoryPath) {
|
|
46
42
|
const absolutePath = resolve(repositoryPath);
|
|
@@ -87,7 +83,17 @@ async function performSearch(repositoryPath, query) {
|
|
|
87
83
|
return { error: indexData.error, results: [] };
|
|
88
84
|
}
|
|
89
85
|
|
|
90
|
-
const
|
|
86
|
+
const rawResults = searchText(query, indexData.chunks, indexData.indexData);
|
|
87
|
+
|
|
88
|
+
// Deduplicate: keep best-scoring chunk per file, then take top results
|
|
89
|
+
const bestPerFile = new Map();
|
|
90
|
+
for (const r of rawResults) {
|
|
91
|
+
const existing = bestPerFile.get(r.file_path);
|
|
92
|
+
if (!existing || r.score > existing.score) {
|
|
93
|
+
bestPerFile.set(r.file_path, r);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const results = Array.from(bestPerFile.values()).sort((a, b) => b.score - a.score);
|
|
91
97
|
|
|
92
98
|
return {
|
|
93
99
|
query,
|
|
@@ -115,53 +121,6 @@ async function performSearch(repositoryPath, query) {
|
|
|
115
121
|
}
|
|
116
122
|
}
|
|
117
123
|
|
|
118
|
-
async function performSearchAll(workspacePaths, query, limit = 10) {
|
|
119
|
-
const allResults = [];
|
|
120
|
-
|
|
121
|
-
for (const repoPath of workspacePaths) {
|
|
122
|
-
const absolutePath = resolve(repoPath);
|
|
123
|
-
if (!existsSync(absolutePath)) continue;
|
|
124
|
-
|
|
125
|
-
const indexData = await initializeIndex(absolutePath);
|
|
126
|
-
if (indexData.error || !indexData.chunks) continue;
|
|
127
|
-
|
|
128
|
-
const results = searchText(query, indexData.chunks, indexData.indexData);
|
|
129
|
-
const repoName = absolutePath.split('/').pop();
|
|
130
|
-
|
|
131
|
-
const seenFiles = new Set();
|
|
132
|
-
for (const r of results) {
|
|
133
|
-
if (!seenFiles.has(r.file_path)) {
|
|
134
|
-
seenFiles.add(r.file_path);
|
|
135
|
-
allResults.push({ ...r, repoName, repoPath: absolutePath });
|
|
136
|
-
}
|
|
137
|
-
if (seenFiles.size >= limit) break;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
allResults.sort((a, b) => b.score - a.score);
|
|
142
|
-
const top = allResults.slice(0, limit);
|
|
143
|
-
|
|
144
|
-
return {
|
|
145
|
-
query,
|
|
146
|
-
resultsCount: top.length,
|
|
147
|
-
results: top.map((r, idx) => {
|
|
148
|
-
const absoluteFilePath = resolve(r.repoPath, r.file_path);
|
|
149
|
-
const totalLines = getFileTotalLines(absoluteFilePath);
|
|
150
|
-
const enclosingContext = findEnclosingContext(r.content, r.line_start);
|
|
151
|
-
return {
|
|
152
|
-
rank: idx + 1,
|
|
153
|
-
absolutePath: absoluteFilePath,
|
|
154
|
-
relativePath: `${r.repoName}/${r.file_path}`,
|
|
155
|
-
lines: `${r.line_start}-${r.line_end}`,
|
|
156
|
-
totalLines,
|
|
157
|
-
enclosingContext,
|
|
158
|
-
score: (r.score * 100).toFixed(1),
|
|
159
|
-
snippet: r.content.split('\n').slice(0, 30).join('\n'),
|
|
160
|
-
};
|
|
161
|
-
}),
|
|
162
|
-
};
|
|
163
|
-
}
|
|
164
|
-
|
|
165
124
|
if (parentPort) {
|
|
166
125
|
parentPort.on('message', async (msg) => {
|
|
167
126
|
try {
|
|
@@ -170,26 +129,6 @@ if (parentPort) {
|
|
|
170
129
|
return;
|
|
171
130
|
}
|
|
172
131
|
|
|
173
|
-
if (msg.type === 'index-all') {
|
|
174
|
-
const folders = msg.workspacePaths || getWorkspaceFolders(msg.workspacePath || '');
|
|
175
|
-
let indexed = 0;
|
|
176
|
-
for (const folder of folders) {
|
|
177
|
-
if (existsSync(folder)) {
|
|
178
|
-
await initializeIndex(folder);
|
|
179
|
-
indexed++;
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
parentPort.postMessage({ id: msg.id, result: { indexed, message: `Indexed ${indexed} repositories` } });
|
|
183
|
-
return;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
if (msg.type === 'search-all') {
|
|
187
|
-
const folders = msg.workspacePaths || getWorkspaceFolders(msg.workspacePath || '');
|
|
188
|
-
const result = await performSearchAll(folders, msg.query, msg.limit || 10);
|
|
189
|
-
parentPort.postMessage({ id: msg.id, result });
|
|
190
|
-
return;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
132
|
if (msg.type === 'search') {
|
|
194
133
|
const result = await performSearch(msg.repositoryPath || process.cwd(), msg.query);
|
|
195
134
|
parentPort.postMessage({ id: msg.id, result });
|
package/src/text-search.js
CHANGED
|
@@ -1,35 +1,71 @@
|
|
|
1
1
|
export function buildTextIndex(chunks) {
|
|
2
2
|
const index = new Map();
|
|
3
|
-
const chunkMetadata =
|
|
4
|
-
|
|
5
|
-
chunks.
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const
|
|
10
|
-
|
|
11
|
-
tokens.forEach(token => {
|
|
12
|
-
frequency.set(token, (frequency.get(token) || 0) + 1);
|
|
13
|
-
if (!index.has(token)) {
|
|
14
|
-
index.set(token, new Set());
|
|
15
|
-
}
|
|
16
|
-
index.get(token).add(idx);
|
|
17
|
-
});
|
|
3
|
+
const chunkMetadata = new Array(chunks.length);
|
|
4
|
+
|
|
5
|
+
for (let idx = 0; idx < chunks.length; idx++) {
|
|
6
|
+
const chunk = chunks[idx];
|
|
7
|
+
const frequency = tokenizeToFrequency(chunk.content, index, idx);
|
|
8
|
+
const fileNameTokens = new Set(tokenize(chunk.file_path));
|
|
9
|
+
const symbols = new Set(extractSymbols(chunk.content));
|
|
18
10
|
|
|
19
11
|
chunkMetadata[idx] = {
|
|
20
|
-
tokens,
|
|
21
12
|
fileNameTokens,
|
|
22
13
|
symbols,
|
|
23
14
|
frequency,
|
|
24
15
|
isCode: isCodeFile(chunk.file_path),
|
|
16
|
+
contentLower: chunk.content.toLowerCase(),
|
|
25
17
|
};
|
|
26
|
-
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Precompute IDF for each token: log((N+1)/(df+1))
|
|
21
|
+
const N = chunks.length;
|
|
22
|
+
const idf = new Map();
|
|
23
|
+
for (const [token, docSet] of index) {
|
|
24
|
+
idf.set(token, Math.log((N + 1) / (docSet.size + 1)) + 1);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return { index, chunkMetadata, idf };
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function tokenizeToFrequency(text, index, chunkIdx) {
|
|
31
|
+
const frequency = new Map();
|
|
32
|
+
|
|
33
|
+
for (const word of text.split(/\s+/)) {
|
|
34
|
+
if (word.length === 0) continue;
|
|
35
|
+
|
|
36
|
+
const hasUpperCase = word !== word.toLowerCase();
|
|
37
|
+
if (hasUpperCase) {
|
|
38
|
+
const camelTokens = word.match(/[A-Z]?[a-z]+|[A-Z]+(?=[A-Z][a-z]|\d|\W|$)|[0-9]+/g);
|
|
39
|
+
if (camelTokens) {
|
|
40
|
+
for (const t of camelTokens) {
|
|
41
|
+
if (t.length > 1) frequency.set(t.toLowerCase(), (frequency.get(t.toLowerCase()) || 0) + 1);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const cleaned = word.replace(/[^\w]/g, '').toLowerCase();
|
|
47
|
+
if (cleaned.length > 1) {
|
|
48
|
+
frequency.set(cleaned, (frequency.get(cleaned) || 0) + 1);
|
|
49
|
+
if (word.includes('-') || word.includes('_') || word.includes('.')) {
|
|
50
|
+
for (const part of word.split(/[-_.]/)) {
|
|
51
|
+
const partCleaned = part.replace(/[^\w]/g, '').toLowerCase();
|
|
52
|
+
if (partCleaned.length > 1 && partCleaned !== cleaned) frequency.set(partCleaned, (frequency.get(partCleaned) || 0) + 1);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
27
57
|
|
|
28
|
-
|
|
58
|
+
for (const token of frequency.keys()) {
|
|
59
|
+
let docSet = index.get(token);
|
|
60
|
+
if (!docSet) { docSet = new Set(); index.set(token, docSet); }
|
|
61
|
+
docSet.add(chunkIdx);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return frequency;
|
|
29
65
|
}
|
|
30
66
|
|
|
31
67
|
export function searchText(query, chunks, indexData) {
|
|
32
|
-
const { index, chunkMetadata } = indexData;
|
|
68
|
+
const { index, chunkMetadata, idf } = indexData;
|
|
33
69
|
const queryTokens = tokenize(query);
|
|
34
70
|
const querySymbols = extractSymbols(query);
|
|
35
71
|
const chunkScores = new Map();
|
|
@@ -47,36 +83,53 @@ export function searchText(query, chunks, indexData) {
|
|
|
47
83
|
}
|
|
48
84
|
});
|
|
49
85
|
|
|
50
|
-
|
|
86
|
+
const queryLower = query.toLowerCase();
|
|
87
|
+
|
|
88
|
+
let scoringCandidates = candidates;
|
|
89
|
+
if (candidates.size > 500) {
|
|
90
|
+
const ranked = Array.from(candidates).sort((a, b) => {
|
|
91
|
+
let aSum = 0, bSum = 0;
|
|
92
|
+
for (const token of queryTokens) {
|
|
93
|
+
if (index.has(token)) {
|
|
94
|
+
if (index.get(token).has(a)) aSum += idf.get(token) || 1;
|
|
95
|
+
if (index.get(token).has(b)) bSum += idf.get(token) || 1;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return bSum - aSum;
|
|
99
|
+
});
|
|
100
|
+
scoringCandidates = new Set(ranked.slice(0, 500));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
for (const idx of scoringCandidates) {
|
|
51
104
|
const chunk = chunks[idx];
|
|
52
105
|
const meta = chunkMetadata[idx];
|
|
53
106
|
let score = 0;
|
|
54
107
|
|
|
55
|
-
|
|
56
|
-
if (chunk.content.toLowerCase().includes(query.toLowerCase())) {
|
|
108
|
+
if (queryTokens.length > 1 && meta.contentLower.includes(queryLower)) {
|
|
57
109
|
score += 30;
|
|
58
110
|
}
|
|
59
111
|
|
|
60
112
|
// Symbol match in content - function/class named after query terms
|
|
61
113
|
querySymbols.forEach(symbol => {
|
|
62
|
-
if (meta.symbols.
|
|
114
|
+
if (meta.symbols.has(symbol)) score += 10;
|
|
63
115
|
});
|
|
64
116
|
|
|
65
117
|
// Filename token match - strong signal that this file is about the query topic
|
|
66
118
|
let fileNameMatches = 0;
|
|
67
119
|
queryTokens.forEach(token => {
|
|
68
|
-
if (meta.fileNameTokens.
|
|
120
|
+
if (meta.fileNameTokens.has(token)) fileNameMatches++;
|
|
69
121
|
});
|
|
70
122
|
if (fileNameMatches > 0) {
|
|
71
123
|
score += fileNameMatches * 10;
|
|
72
124
|
}
|
|
73
125
|
|
|
74
|
-
//
|
|
126
|
+
// TF-IDF scoring: reward rare tokens that appear in this chunk
|
|
75
127
|
queryTokens.forEach(token => {
|
|
76
128
|
if (index.has(token) && index.get(token).has(idx)) {
|
|
77
|
-
const
|
|
129
|
+
const tf = Math.min(meta.frequency.get(token) || 1, 5);
|
|
130
|
+
const tokenIdf = idf ? (idf.get(token) || 1) : 1;
|
|
78
131
|
const lengthBoost = token.length > 4 ? 1.5 : 1;
|
|
79
|
-
score += lengthBoost *
|
|
132
|
+
score += lengthBoost * tf * tokenIdf;
|
|
80
133
|
}
|
|
81
134
|
});
|
|
82
135
|
|
package/.prd
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"project": "code-search",
|
|
3
|
-
"created": "2026-03-12",
|
|
4
|
-
"objective": "Profile and improve code-search speed and result quality",
|
|
5
|
-
"items": [
|
|
6
|
-
{
|
|
7
|
-
"id": "fix-dedup-buildtextindex",
|
|
8
|
-
"subject": "Remove duplicate buildTextIndex from search.js",
|
|
9
|
-
"status": "pending",
|
|
10
|
-
"description": "search.js has a private copy of buildTextIndex and tokenize/extractSymbols/isCodeFile that duplicates text-search.js. Import the exported buildTextIndex from text-search.js instead.",
|
|
11
|
-
"category": "refactor",
|
|
12
|
-
"effort": "small",
|
|
13
|
-
"blocking": ["fix-score-normalization", "fix-hybrid-weights"],
|
|
14
|
-
"blockedBy": []
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"id": "fix-chunk-size",
|
|
18
|
-
"subject": "Reduce chunk size from 300 to 60 lines for better semantic granularity",
|
|
19
|
-
"status": "pending",
|
|
20
|
-
"description": "scanner.js uses 300-line chunks. Embeddings work best on 50-100 line chunks. Reduce to 60-line chunks with 15-line overlap for better vector search quality.",
|
|
21
|
-
"category": "feature",
|
|
22
|
-
"effort": "small",
|
|
23
|
-
"blocking": [],
|
|
24
|
-
"blockedBy": []
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
"id": "fix-score-normalization",
|
|
28
|
-
"subject": "Fix text search score normalization so top result is always 1.0",
|
|
29
|
-
"status": "pending",
|
|
30
|
-
"description": "Text scores divide raw by 100 but scores can exceed 100. Use dynamic max-score scaling. Lower hasGoodTextResults threshold from 0.5 to 0.3.",
|
|
31
|
-
"category": "bug",
|
|
32
|
-
"effort": "small",
|
|
33
|
-
"blocking": [],
|
|
34
|
-
"blockedBy": ["fix-dedup-buildtextindex"]
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
"id": "fix-hybrid-weights",
|
|
38
|
-
"subject": "Boost text-only exact-match results in hybrid merge",
|
|
39
|
-
"status": "pending",
|
|
40
|
-
"description": "Text-only results are capped at 20% weight. Give high-scoring text-only results a floor finalScore of 0.4.",
|
|
41
|
-
"category": "feature",
|
|
42
|
-
"effort": "small",
|
|
43
|
-
"blocking": [],
|
|
44
|
-
"blockedBy": ["fix-dedup-buildtextindex"]
|
|
45
|
-
},
|
|
46
|
-
{
|
|
47
|
-
"id": "fix-vector-cache-key",
|
|
48
|
-
"subject": "Strengthen vector search cache key to 20 dimensions",
|
|
49
|
-
"status": "pending",
|
|
50
|
-
"description": "Cache key uses only first 5 embedding dims. Use 20 dims for near-zero collision rate.",
|
|
51
|
-
"category": "bug",
|
|
52
|
-
"effort": "small",
|
|
53
|
-
"blocking": [],
|
|
54
|
-
"blockedBy": []
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"id": "remove-dead-meanpooling",
|
|
58
|
-
"subject": "Remove dead meanPooling function from embeddings.js",
|
|
59
|
-
"status": "pending",
|
|
60
|
-
"description": "meanPooling is defined but never called. Remove dead code.",
|
|
61
|
-
"category": "refactor",
|
|
62
|
-
"effort": "small",
|
|
63
|
-
"blocking": [],
|
|
64
|
-
"blockedBy": []
|
|
65
|
-
},
|
|
66
|
-
{
|
|
67
|
-
"id": "verify-and-commit",
|
|
68
|
-
"subject": "Verify improvements and commit all changes",
|
|
69
|
-
"status": "pending",
|
|
70
|
-
"description": "Run end-to-end search logic test inline. Commit and push all changes.",
|
|
71
|
-
"category": "infra",
|
|
72
|
-
"effort": "small",
|
|
73
|
-
"blocking": [],
|
|
74
|
-
"blockedBy": ["fix-dedup-buildtextindex", "fix-chunk-size", "fix-score-normalization", "fix-hybrid-weights", "fix-vector-cache-key", "remove-dead-meanpooling"]
|
|
75
|
-
}
|
|
76
|
-
],
|
|
77
|
-
"completed": []
|
|
78
|
-
}
|