bluera-knowledge 0.12.11 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/rules/code-quality.md +12 -0
- package/.claude/rules/git.md +5 -0
- package/.claude/rules/versioning.md +7 -0
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +2 -0
- package/CLAUDE.md +5 -13
- package/README.md +11 -2
- package/commands/crawl.md +2 -1
- package/commands/test-plugin.md +197 -72
- package/dist/{chunk-7DZZHYDU.js → chunk-6ZVW2P2F.js} +66 -38
- package/dist/chunk-6ZVW2P2F.js.map +1 -0
- package/dist/{chunk-S5VW7NPH.js → chunk-GCUKVV33.js} +2 -2
- package/dist/{chunk-XVVMSRLO.js → chunk-H5AKKHY7.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/docs/claude-code-best-practices.md +458 -0
- package/eslint.config.js +1 -1
- package/hooks/check-dependencies.sh +18 -1
- package/hooks/hooks.json +2 -2
- package/hooks/posttooluse-bk-reminder.py +30 -2
- package/package.json +1 -1
- package/scripts/test-mcp-dev.js +260 -0
- package/src/services/index.service.test.ts +347 -0
- package/src/services/index.service.ts +93 -44
- package/tests/integration/cli-consistency.test.ts +3 -2
- package/dist/chunk-7DZZHYDU.js.map +0 -1
- package/docs/plans/2024-12-17-ai-search-quality-implementation.md +0 -752
- package/docs/plans/2024-12-17-ai-search-quality-testing-design.md +0 -201
- package/docs/plans/2025-12-16-bluera-knowledge-cli.md +0 -2951
- package/docs/plans/2025-12-16-phase2-features.md +0 -1518
- package/docs/plans/2025-12-17-hil-implementation.md +0 -926
- package/docs/plans/2025-12-17-hil-quality-testing.md +0 -224
- package/docs/plans/2025-12-17-search-quality-phase1-implementation.md +0 -1416
- package/docs/plans/2025-12-17-search-quality-testing-v2-design.md +0 -212
- package/docs/plans/2025-12-28-ai-agent-optimization.md +0 -1630
- /package/dist/{chunk-S5VW7NPH.js.map → chunk-GCUKVV33.js.map} +0 -0
- /package/dist/{chunk-XVVMSRLO.js.map → chunk-H5AKKHY7.js.map} +0 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP Development Test Helper
|
|
4
|
+
*
|
|
5
|
+
* Spawns the MCP server and communicates with it directly via JSON-RPC over stdio.
|
|
6
|
+
* Used by test-plugin --dev to test MCP functionality without needing the plugin installed.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* ./scripts/test-mcp-dev.mjs call <tool-name> '<json-args>'
|
|
10
|
+
* ./scripts/test-mcp-dev.mjs call execute '{"command":"help"}'
|
|
11
|
+
* ./scripts/test-mcp-dev.mjs call search '{"query":"test"}'
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { spawn } from 'node:child_process';
|
|
15
|
+
import { createInterface } from 'node:readline';
|
|
16
|
+
import { dirname, join } from 'node:path';
|
|
17
|
+
import { fileURLToPath } from 'node:url';
|
|
18
|
+
|
|
19
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
const PROJECT_ROOT = join(__dirname, '..');
|
|
21
|
+
|
|
22
|
+
class MCPTestClient {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.server = null;
|
|
25
|
+
this.messageId = 0;
|
|
26
|
+
this.pendingRequests = new Map();
|
|
27
|
+
this.initialized = false;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async start() {
|
|
31
|
+
return new Promise((resolve, reject) => {
|
|
32
|
+
const serverPath = join(PROJECT_ROOT, 'dist', 'mcp', 'server.js');
|
|
33
|
+
|
|
34
|
+
this.server = spawn('node', [serverPath], {
|
|
35
|
+
cwd: PROJECT_ROOT,
|
|
36
|
+
env: {
|
|
37
|
+
...process.env,
|
|
38
|
+
PROJECT_ROOT: PROJECT_ROOT,
|
|
39
|
+
DATA_DIR: '.bluera/bluera-knowledge/data',
|
|
40
|
+
CONFIG_PATH: '.bluera/bluera-knowledge/config.json',
|
|
41
|
+
},
|
|
42
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Handle stderr (logs)
|
|
46
|
+
this.server.stderr.on('data', (data) => {
|
|
47
|
+
// Suppress server logs in test output unless DEBUG
|
|
48
|
+
if (process.env.DEBUG) {
|
|
49
|
+
process.stderr.write(`[server] ${data}`);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// Handle stdout (JSON-RPC responses)
|
|
54
|
+
const rl = createInterface({ input: this.server.stdout });
|
|
55
|
+
rl.on('line', (line) => {
|
|
56
|
+
try {
|
|
57
|
+
const msg = JSON.parse(line);
|
|
58
|
+
if (msg.id !== undefined && this.pendingRequests.has(msg.id)) {
|
|
59
|
+
const { resolve, reject } = this.pendingRequests.get(msg.id);
|
|
60
|
+
this.pendingRequests.delete(msg.id);
|
|
61
|
+
if (msg.error) {
|
|
62
|
+
reject(new Error(msg.error.message || JSON.stringify(msg.error)));
|
|
63
|
+
} else {
|
|
64
|
+
resolve(msg.result);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
} catch {
|
|
68
|
+
// Not JSON or parse error - ignore
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
this.server.on('error', reject);
|
|
73
|
+
this.server.on('spawn', () => {
|
|
74
|
+
// Give server a moment to initialize
|
|
75
|
+
setTimeout(() => resolve(), 100);
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async initialize() {
|
|
81
|
+
if (this.initialized) return;
|
|
82
|
+
|
|
83
|
+
// Send initialize request
|
|
84
|
+
const initResult = await this.sendRequest('initialize', {
|
|
85
|
+
protocolVersion: '2024-11-05',
|
|
86
|
+
capabilities: {},
|
|
87
|
+
clientInfo: { name: 'test-mcp-dev', version: '1.0.0' },
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Send initialized notification
|
|
91
|
+
this.sendNotification('notifications/initialized', {});
|
|
92
|
+
|
|
93
|
+
this.initialized = true;
|
|
94
|
+
return initResult;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
sendRequest(method, params) {
|
|
98
|
+
return new Promise((resolve, reject) => {
|
|
99
|
+
const id = ++this.messageId;
|
|
100
|
+
const request = { jsonrpc: '2.0', id, method, params };
|
|
101
|
+
|
|
102
|
+
this.pendingRequests.set(id, { resolve, reject });
|
|
103
|
+
|
|
104
|
+
// Set timeout for request
|
|
105
|
+
setTimeout(() => {
|
|
106
|
+
if (this.pendingRequests.has(id)) {
|
|
107
|
+
this.pendingRequests.delete(id);
|
|
108
|
+
reject(new Error(`Request timeout: ${method}`));
|
|
109
|
+
}
|
|
110
|
+
}, 30000);
|
|
111
|
+
|
|
112
|
+
this.server.stdin.write(JSON.stringify(request) + '\n');
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
sendNotification(method, params) {
|
|
117
|
+
const notification = { jsonrpc: '2.0', method, params };
|
|
118
|
+
this.server.stdin.write(JSON.stringify(notification) + '\n');
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async callTool(name, args) {
|
|
122
|
+
if (!this.initialized) {
|
|
123
|
+
await this.initialize();
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const result = await this.sendRequest('tools/call', {
|
|
127
|
+
name,
|
|
128
|
+
arguments: args,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
return result;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async listTools() {
|
|
135
|
+
if (!this.initialized) {
|
|
136
|
+
await this.initialize();
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return this.sendRequest('tools/list', {});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
stop() {
|
|
143
|
+
if (this.server) {
|
|
144
|
+
this.server.kill('SIGTERM');
|
|
145
|
+
this.server = null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Session mode - reads multiple commands from stdin, maintains server across calls
|
|
151
|
+
async function sessionMode() {
|
|
152
|
+
const client = new MCPTestClient();
|
|
153
|
+
await client.start();
|
|
154
|
+
|
|
155
|
+
const rl = createInterface({ input: process.stdin });
|
|
156
|
+
const results = [];
|
|
157
|
+
let lastResultId = null;
|
|
158
|
+
|
|
159
|
+
for await (const line of rl) {
|
|
160
|
+
const trimmed = line.trim();
|
|
161
|
+
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
162
|
+
|
|
163
|
+
const spaceIndex = trimmed.indexOf(' ');
|
|
164
|
+
const toolName = spaceIndex > 0 ? trimmed.slice(0, spaceIndex) : trimmed;
|
|
165
|
+
let argsStr = spaceIndex > 0 ? trimmed.slice(spaceIndex + 1) : '{}';
|
|
166
|
+
|
|
167
|
+
// Substitute $LAST_ID with actual result ID from previous search
|
|
168
|
+
if (lastResultId && argsStr.includes('$LAST_ID')) {
|
|
169
|
+
argsStr = argsStr.replace(/\$LAST_ID/g, lastResultId);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const args = JSON.parse(argsStr);
|
|
173
|
+
const result = await client.callTool(toolName, args);
|
|
174
|
+
results.push(result);
|
|
175
|
+
|
|
176
|
+
// Extract result ID for next call (search results have results[0].id)
|
|
177
|
+
// Search results have a header line before JSON, so find first '{'
|
|
178
|
+
if (result?.content?.[0]?.text) {
|
|
179
|
+
const text = result.content[0].text;
|
|
180
|
+
const jsonStart = text.indexOf('{');
|
|
181
|
+
if (jsonStart >= 0) {
|
|
182
|
+
try {
|
|
183
|
+
const parsed = JSON.parse(text.slice(jsonStart));
|
|
184
|
+
if (parsed.results?.[0]?.id) {
|
|
185
|
+
lastResultId = parsed.results[0].id;
|
|
186
|
+
}
|
|
187
|
+
} catch {
|
|
188
|
+
// Not valid JSON or no results - ignore
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
client.stop();
|
|
195
|
+
console.log(JSON.stringify(results, null, 2));
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// CLI interface
|
|
199
|
+
async function main() {
|
|
200
|
+
const args = process.argv.slice(2);
|
|
201
|
+
|
|
202
|
+
if (args.length < 1) {
|
|
203
|
+
console.error('Usage: test-mcp-dev.js <command> [args...]');
|
|
204
|
+
console.error('Commands:');
|
|
205
|
+
console.error(' call <tool-name> <json-args> - Call an MCP tool (one-shot)');
|
|
206
|
+
console.error(' session - Read commands from stdin (persistent server)');
|
|
207
|
+
console.error(' list - List available tools');
|
|
208
|
+
console.error('');
|
|
209
|
+
console.error('Examples:');
|
|
210
|
+
console.error(' ./scripts/test-mcp-dev.js call execute \'{"command":"help"}\'');
|
|
211
|
+
console.error(' ./scripts/test-mcp-dev.js call search \'{"query":"test"}\'');
|
|
212
|
+
console.error(' ./scripts/test-mcp-dev.js list');
|
|
213
|
+
console.error('');
|
|
214
|
+
console.error('Session mode (maintains cache across calls):');
|
|
215
|
+
console.error(' echo -e \'search {"query":"test"}\\nget_full_context {"resultId":"$LAST_ID"}\' | ./scripts/test-mcp-dev.js session');
|
|
216
|
+
process.exit(1);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const command = args[0];
|
|
220
|
+
|
|
221
|
+
// Session mode handles its own client lifecycle
|
|
222
|
+
if (command === 'session') {
|
|
223
|
+
await sessionMode();
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const client = new MCPTestClient();
|
|
228
|
+
|
|
229
|
+
try {
|
|
230
|
+
await client.start();
|
|
231
|
+
|
|
232
|
+
if (command === 'call') {
|
|
233
|
+
if (args.length < 3) {
|
|
234
|
+
console.error('Usage: test-mcp-dev.js call <tool-name> <json-args>');
|
|
235
|
+
process.exit(1);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const toolName = args[1];
|
|
239
|
+
const toolArgs = JSON.parse(args[2]);
|
|
240
|
+
|
|
241
|
+
const result = await client.callTool(toolName, toolArgs);
|
|
242
|
+
|
|
243
|
+
// Output result as JSON for parsing by test-plugin
|
|
244
|
+
console.log(JSON.stringify(result, null, 2));
|
|
245
|
+
} else if (command === 'list') {
|
|
246
|
+
const result = await client.listTools();
|
|
247
|
+
console.log(JSON.stringify(result, null, 2));
|
|
248
|
+
} else {
|
|
249
|
+
console.error(`Unknown command: ${command}`);
|
|
250
|
+
process.exit(1);
|
|
251
|
+
}
|
|
252
|
+
} catch (error) {
|
|
253
|
+
console.error(`Error: ${error.message}`);
|
|
254
|
+
process.exit(1);
|
|
255
|
+
} finally {
|
|
256
|
+
client.stop();
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
main();
|
|
@@ -1952,3 +1952,350 @@ describe('IndexService - Symlink Handling', () => {
|
|
|
1952
1952
|
// (on most systems, readdir with withFileTypes shows symlinks as isFile() if target is file)
|
|
1953
1953
|
});
|
|
1954
1954
|
});
|
|
1955
|
+
|
|
1956
|
+
describe('IndexService - Batch Embedding', () => {
|
|
1957
|
+
let indexService: IndexService;
|
|
1958
|
+
let lanceStore: LanceStore;
|
|
1959
|
+
let embeddingEngine: EmbeddingEngine;
|
|
1960
|
+
let tempDir: string;
|
|
1961
|
+
let testFilesDir: string;
|
|
1962
|
+
const storeId = createStoreId('batch-embed-test');
|
|
1963
|
+
|
|
1964
|
+
beforeAll(async () => {
|
|
1965
|
+
tempDir = await mkdtemp(join(tmpdir(), 'index-batch-embed-test-'));
|
|
1966
|
+
testFilesDir = join(tempDir, 'files');
|
|
1967
|
+
await mkdir(testFilesDir, { recursive: true });
|
|
1968
|
+
|
|
1969
|
+
lanceStore = new LanceStore(tempDir);
|
|
1970
|
+
embeddingEngine = new EmbeddingEngine();
|
|
1971
|
+
|
|
1972
|
+
await embeddingEngine.initialize();
|
|
1973
|
+
await lanceStore.initialize(storeId);
|
|
1974
|
+
|
|
1975
|
+
indexService = new IndexService(lanceStore, embeddingEngine);
|
|
1976
|
+
}, 120000);
|
|
1977
|
+
|
|
1978
|
+
afterAll(async () => {
|
|
1979
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
1980
|
+
});
|
|
1981
|
+
|
|
1982
|
+
beforeEach(async () => {
|
|
1983
|
+
// Clear test directory for fresh state
|
|
1984
|
+
await rm(testFilesDir, { recursive: true, force: true });
|
|
1985
|
+
await mkdir(testFilesDir, { recursive: true });
|
|
1986
|
+
});
|
|
1987
|
+
|
|
1988
|
+
it('calls embedBatch instead of sequential embed for multiple chunks', async () => {
|
|
1989
|
+
// Create a file large enough to produce multiple chunks (>1500 chars)
|
|
1990
|
+
const largeContent = Array(50)
|
|
1991
|
+
.fill('This is a paragraph of text that will be chunked. ')
|
|
1992
|
+
.join('\n\n');
|
|
1993
|
+
await writeFile(join(testFilesDir, 'large.md'), largeContent);
|
|
1994
|
+
|
|
1995
|
+
const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
|
|
1996
|
+
|
|
1997
|
+
const store: FileStore = {
|
|
1998
|
+
type: 'file',
|
|
1999
|
+
id: storeId,
|
|
2000
|
+
name: 'Batch Embed Test Store',
|
|
2001
|
+
path: testFilesDir,
|
|
2002
|
+
createdAt: new Date(),
|
|
2003
|
+
updatedAt: new Date(),
|
|
2004
|
+
};
|
|
2005
|
+
|
|
2006
|
+
const result = await indexService.indexStore(store);
|
|
2007
|
+
|
|
2008
|
+
expect(result.success).toBe(true);
|
|
2009
|
+
if (result.success) {
|
|
2010
|
+
// Should have created multiple chunks
|
|
2011
|
+
expect(result.data.chunksCreated).toBeGreaterThan(1);
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
// embedBatch should be called (it internally uses embed via Promise.all)
|
|
2015
|
+
expect(embedBatchSpy).toHaveBeenCalled();
|
|
2016
|
+
// Verify batch was called with multiple items
|
|
2017
|
+
const callArgs = embedBatchSpy.mock.calls[0];
|
|
2018
|
+
expect(callArgs).toBeDefined();
|
|
2019
|
+
expect(callArgs[0].length).toBeGreaterThan(1);
|
|
2020
|
+
|
|
2021
|
+
embedBatchSpy.mockRestore();
|
|
2022
|
+
});
|
|
2023
|
+
|
|
2024
|
+
it('preserves chunk order when using batch embedding', async () => {
|
|
2025
|
+
// Create file with distinct, ordered sections that will produce multiple chunks
|
|
2026
|
+
const sections = Array(10)
|
|
2027
|
+
.fill(null)
|
|
2028
|
+
.map((_, i) => `# Section ${String(i + 1)}\n\n${'Content for section. '.repeat(50)}`)
|
|
2029
|
+
.join('\n\n');
|
|
2030
|
+
|
|
2031
|
+
await writeFile(join(testFilesDir, 'ordered.md'), sections);
|
|
2032
|
+
|
|
2033
|
+
const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
|
|
2034
|
+
|
|
2035
|
+
const store: FileStore = {
|
|
2036
|
+
type: 'file',
|
|
2037
|
+
id: storeId,
|
|
2038
|
+
name: 'Order Test Store',
|
|
2039
|
+
path: testFilesDir,
|
|
2040
|
+
createdAt: new Date(),
|
|
2041
|
+
updatedAt: new Date(),
|
|
2042
|
+
};
|
|
2043
|
+
|
|
2044
|
+
const result = await indexService.indexStore(store);
|
|
2045
|
+
|
|
2046
|
+
expect(result.success).toBe(true);
|
|
2047
|
+
if (result.success) {
|
|
2048
|
+
// Verify chunks are in correct order
|
|
2049
|
+
expect(result.data.chunksCreated).toBeGreaterThan(1);
|
|
2050
|
+
}
|
|
2051
|
+
|
|
2052
|
+
// embedBatch should be called with chunks in order
|
|
2053
|
+
expect(embedBatchSpy).toHaveBeenCalled();
|
|
2054
|
+
const callArgs = embedBatchSpy.mock.calls[0];
|
|
2055
|
+
expect(callArgs).toBeDefined();
|
|
2056
|
+
|
|
2057
|
+
// Verify that if content has "Section 1", it comes before "Section 2" in the array
|
|
2058
|
+
const batchedTexts = callArgs[0];
|
|
2059
|
+
const section1Index = batchedTexts.findIndex((t: string) => t.includes('Section 1'));
|
|
2060
|
+
const section2Index = batchedTexts.findIndex((t: string) => t.includes('Section 2'));
|
|
2061
|
+
|
|
2062
|
+
// Section 1 should appear before Section 2 in the batch (or they may be in different chunks)
|
|
2063
|
+
if (section1Index !== -1 && section2Index !== -1) {
|
|
2064
|
+
expect(section1Index).toBeLessThan(section2Index);
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
embedBatchSpy.mockRestore();
|
|
2068
|
+
});
|
|
2069
|
+
|
|
2070
|
+
it('handles single-chunk files correctly', async () => {
|
|
2071
|
+
// Create a small file that won't be chunked
|
|
2072
|
+
await writeFile(join(testFilesDir, 'small.ts'), 'export const x = 42;');
|
|
2073
|
+
|
|
2074
|
+
const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
|
|
2075
|
+
|
|
2076
|
+
const store: FileStore = {
|
|
2077
|
+
type: 'file',
|
|
2078
|
+
id: storeId,
|
|
2079
|
+
name: 'Single Chunk Test Store',
|
|
2080
|
+
path: testFilesDir,
|
|
2081
|
+
createdAt: new Date(),
|
|
2082
|
+
updatedAt: new Date(),
|
|
2083
|
+
};
|
|
2084
|
+
|
|
2085
|
+
const result = await indexService.indexStore(store);
|
|
2086
|
+
|
|
2087
|
+
expect(result.success).toBe(true);
|
|
2088
|
+
if (result.success) {
|
|
2089
|
+
// Should have exactly one chunk
|
|
2090
|
+
expect(result.data.chunksCreated).toBe(1);
|
|
2091
|
+
}
|
|
2092
|
+
|
|
2093
|
+
// embedBatch should still be called (with a single item)
|
|
2094
|
+
expect(embedBatchSpy).toHaveBeenCalled();
|
|
2095
|
+
|
|
2096
|
+
embedBatchSpy.mockRestore();
|
|
2097
|
+
});
|
|
2098
|
+
|
|
2099
|
+
it('handles multiple files with batch embedding', async () => {
|
|
2100
|
+
// Create multiple files to verify batch embedding works across files
|
|
2101
|
+
await writeFile(join(testFilesDir, 'file1.ts'), 'export const a = 1;');
|
|
2102
|
+
await writeFile(join(testFilesDir, 'file2.ts'), 'export const b = 2;');
|
|
2103
|
+
await writeFile(join(testFilesDir, 'file3.ts'), 'export const c = 3;');
|
|
2104
|
+
|
|
2105
|
+
const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
|
|
2106
|
+
|
|
2107
|
+
const multiStoreId = createStoreId('multi-file-test');
|
|
2108
|
+
await lanceStore.initialize(multiStoreId);
|
|
2109
|
+
|
|
2110
|
+
const multiIndexService = new IndexService(lanceStore, embeddingEngine);
|
|
2111
|
+
|
|
2112
|
+
const store: FileStore = {
|
|
2113
|
+
type: 'file',
|
|
2114
|
+
id: multiStoreId,
|
|
2115
|
+
name: 'Multi File Test Store',
|
|
2116
|
+
path: testFilesDir,
|
|
2117
|
+
createdAt: new Date(),
|
|
2118
|
+
updatedAt: new Date(),
|
|
2119
|
+
};
|
|
2120
|
+
|
|
2121
|
+
const result = await multiIndexService.indexStore(store);
|
|
2122
|
+
|
|
2123
|
+
expect(result.success).toBe(true);
|
|
2124
|
+
if (result.success) {
|
|
2125
|
+
// Should index all 3 files
|
|
2126
|
+
expect(result.data.documentsIndexed).toBe(3);
|
|
2127
|
+
}
|
|
2128
|
+
|
|
2129
|
+
// embedBatch should be called once per file (3 times)
|
|
2130
|
+
expect(embedBatchSpy).toHaveBeenCalledTimes(3);
|
|
2131
|
+
|
|
2132
|
+
embedBatchSpy.mockRestore();
|
|
2133
|
+
});
|
|
2134
|
+
});
|
|
2135
|
+
|
|
2136
|
+
describe('IndexService - Parallel File Processing', () => {
|
|
2137
|
+
let lanceStore: LanceStore;
|
|
2138
|
+
let embeddingEngine: EmbeddingEngine;
|
|
2139
|
+
let tempDir: string;
|
|
2140
|
+
let testFilesDir: string;
|
|
2141
|
+
const storeId = createStoreId('parallel-test');
|
|
2142
|
+
|
|
2143
|
+
beforeAll(async () => {
|
|
2144
|
+
tempDir = await mkdtemp(join(tmpdir(), 'index-parallel-test-'));
|
|
2145
|
+
testFilesDir = join(tempDir, 'files');
|
|
2146
|
+
await mkdir(testFilesDir, { recursive: true });
|
|
2147
|
+
|
|
2148
|
+
lanceStore = new LanceStore(tempDir);
|
|
2149
|
+
embeddingEngine = new EmbeddingEngine();
|
|
2150
|
+
|
|
2151
|
+
await embeddingEngine.initialize();
|
|
2152
|
+
await lanceStore.initialize(storeId);
|
|
2153
|
+
}, 120000);
|
|
2154
|
+
|
|
2155
|
+
afterAll(async () => {
|
|
2156
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
2157
|
+
});
|
|
2158
|
+
|
|
2159
|
+
beforeEach(async () => {
|
|
2160
|
+
// Clear test directory for fresh state
|
|
2161
|
+
await rm(testFilesDir, { recursive: true, force: true });
|
|
2162
|
+
await mkdir(testFilesDir, { recursive: true });
|
|
2163
|
+
});
|
|
2164
|
+
|
|
2165
|
+
it('uses concurrency option from IndexService constructor', async () => {
|
|
2166
|
+
// Create 10 test files
|
|
2167
|
+
for (let i = 0; i < 10; i++) {
|
|
2168
|
+
await writeFile(
|
|
2169
|
+
join(testFilesDir, `file${String(i)}.ts`),
|
|
2170
|
+
`export const x${String(i)} = ${String(i)};`
|
|
2171
|
+
);
|
|
2172
|
+
}
|
|
2173
|
+
|
|
2174
|
+
// Track when files start being processed
|
|
2175
|
+
const processingTimestamps: number[] = [];
|
|
2176
|
+
const originalEmbedBatch = embeddingEngine.embedBatch.bind(embeddingEngine);
|
|
2177
|
+
|
|
2178
|
+
vi.spyOn(embeddingEngine, 'embedBatch').mockImplementation(async (texts: string[]) => {
|
|
2179
|
+
processingTimestamps.push(Date.now());
|
|
2180
|
+
// Small delay to simulate processing time
|
|
2181
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
2182
|
+
return originalEmbedBatch(texts);
|
|
2183
|
+
});
|
|
2184
|
+
|
|
2185
|
+
const concurrency = 4;
|
|
2186
|
+
const indexService = new IndexService(lanceStore, embeddingEngine, { concurrency });
|
|
2187
|
+
|
|
2188
|
+
const parallelStoreId = createStoreId('parallel-concurrency-test');
|
|
2189
|
+
await lanceStore.initialize(parallelStoreId);
|
|
2190
|
+
|
|
2191
|
+
const store: FileStore = {
|
|
2192
|
+
type: 'file',
|
|
2193
|
+
id: parallelStoreId,
|
|
2194
|
+
name: 'Parallel Test Store',
|
|
2195
|
+
path: testFilesDir,
|
|
2196
|
+
createdAt: new Date(),
|
|
2197
|
+
updatedAt: new Date(),
|
|
2198
|
+
};
|
|
2199
|
+
|
|
2200
|
+
const result = await indexService.indexStore(store);
|
|
2201
|
+
|
|
2202
|
+
expect(result.success).toBe(true);
|
|
2203
|
+
if (result.success) {
|
|
2204
|
+
expect(result.data.documentsIndexed).toBe(10);
|
|
2205
|
+
}
|
|
2206
|
+
|
|
2207
|
+
vi.restoreAllMocks();
|
|
2208
|
+
});
|
|
2209
|
+
|
|
2210
|
+
it('reports progress correctly with parallel processing', async () => {
|
|
2211
|
+
// Create test files
|
|
2212
|
+
for (let i = 0; i < 5; i++) {
|
|
2213
|
+
await writeFile(
|
|
2214
|
+
join(testFilesDir, `progress${String(i)}.ts`),
|
|
2215
|
+
`export const p${String(i)} = ${String(i)};`
|
|
2216
|
+
);
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
const concurrency = 2;
|
|
2220
|
+
const indexService = new IndexService(lanceStore, embeddingEngine, { concurrency });
|
|
2221
|
+
|
|
2222
|
+
const progressStoreId = createStoreId('progress-test');
|
|
2223
|
+
await lanceStore.initialize(progressStoreId);
|
|
2224
|
+
|
|
2225
|
+
const store: FileStore = {
|
|
2226
|
+
type: 'file',
|
|
2227
|
+
id: progressStoreId,
|
|
2228
|
+
name: 'Progress Test Store',
|
|
2229
|
+
path: testFilesDir,
|
|
2230
|
+
createdAt: new Date(),
|
|
2231
|
+
updatedAt: new Date(),
|
|
2232
|
+
};
|
|
2233
|
+
|
|
2234
|
+
const progressEvents: Array<{ type: string; current: number; total: number }> = [];
|
|
2235
|
+
const onProgress = (event: { type: string; current: number; total: number }): void => {
|
|
2236
|
+
progressEvents.push(event);
|
|
2237
|
+
};
|
|
2238
|
+
|
|
2239
|
+
const result = await indexService.indexStore(store, onProgress);
|
|
2240
|
+
|
|
2241
|
+
expect(result.success).toBe(true);
|
|
2242
|
+
|
|
2243
|
+
// Should have start event
|
|
2244
|
+
expect(progressEvents.some((e) => e.type === 'start')).toBe(true);
|
|
2245
|
+
|
|
2246
|
+
// Should have progress events
|
|
2247
|
+
const progressOnly = progressEvents.filter((e) => e.type === 'progress');
|
|
2248
|
+
expect(progressOnly.length).toBeGreaterThan(0);
|
|
2249
|
+
|
|
2250
|
+
// Should have complete event
|
|
2251
|
+
expect(progressEvents.some((e) => e.type === 'complete')).toBe(true);
|
|
2252
|
+
|
|
2253
|
+
// Current should never exceed total
|
|
2254
|
+
for (const event of progressEvents) {
|
|
2255
|
+
expect(event.current).toBeLessThanOrEqual(event.total);
|
|
2256
|
+
}
|
|
2257
|
+
});
|
|
2258
|
+
|
|
2259
|
+
it('continues processing remaining files if one file fails to read', async () => {
|
|
2260
|
+
// Create valid test files
|
|
2261
|
+
await writeFile(join(testFilesDir, 'valid1.ts'), 'export const a = 1;');
|
|
2262
|
+
await writeFile(join(testFilesDir, 'valid2.ts'), 'export const b = 2;');
|
|
2263
|
+
await writeFile(join(testFilesDir, 'valid3.ts'), 'export const c = 3;');
|
|
2264
|
+
|
|
2265
|
+
// Create a file that will fail to read (remove read permission)
|
|
2266
|
+
const unreadablePath = join(testFilesDir, 'unreadable.ts');
|
|
2267
|
+
await writeFile(unreadablePath, 'export const x = 999;');
|
|
2268
|
+
await chmod(unreadablePath, 0o000);
|
|
2269
|
+
|
|
2270
|
+
const concurrency = 2;
|
|
2271
|
+
const indexService = new IndexService(lanceStore, embeddingEngine, { concurrency });
|
|
2272
|
+
|
|
2273
|
+
const errorStoreId = createStoreId('error-handling-test');
|
|
2274
|
+
await lanceStore.initialize(errorStoreId);
|
|
2275
|
+
|
|
2276
|
+
const store: FileStore = {
|
|
2277
|
+
type: 'file',
|
|
2278
|
+
id: errorStoreId,
|
|
2279
|
+
name: 'Error Handling Test Store',
|
|
2280
|
+
path: testFilesDir,
|
|
2281
|
+
createdAt: new Date(),
|
|
2282
|
+
updatedAt: new Date(),
|
|
2283
|
+
};
|
|
2284
|
+
|
|
2285
|
+
// The indexing should either succeed with partial results or fail gracefully
|
|
2286
|
+
const result = await indexService.indexStore(store);
|
|
2287
|
+
|
|
2288
|
+
// Restore permissions for cleanup
|
|
2289
|
+
await chmod(unreadablePath, 0o644);
|
|
2290
|
+
|
|
2291
|
+
// With current implementation, it may fail completely on the first error
|
|
2292
|
+
// This test documents the current behavior
|
|
2293
|
+
if (result.success) {
|
|
2294
|
+
// If it succeeds, it should have indexed at least the valid files
|
|
2295
|
+
expect(result.data.documentsIndexed).toBeGreaterThanOrEqual(0);
|
|
2296
|
+
} else {
|
|
2297
|
+
// If it fails, it should have an error
|
|
2298
|
+
expect(result.error).toBeDefined();
|
|
2299
|
+
}
|
|
2300
|
+
});
|
|
2301
|
+
});
|