bluera-knowledge 0.12.11 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/rules/code-quality.md +12 -0
- package/.claude/rules/git.md +5 -0
- package/.claude/rules/versioning.md +7 -0
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +2 -0
- package/CLAUDE.md +5 -13
- package/README.md +11 -2
- package/commands/crawl.md +2 -1
- package/commands/test-plugin.md +197 -72
- package/dist/{chunk-7DZZHYDU.js → chunk-6ZVW2P2F.js} +66 -38
- package/dist/chunk-6ZVW2P2F.js.map +1 -0
- package/dist/{chunk-S5VW7NPH.js → chunk-GCUKVV33.js} +2 -2
- package/dist/{chunk-XVVMSRLO.js → chunk-H5AKKHY7.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/docs/claude-code-best-practices.md +458 -0
- package/eslint.config.js +1 -1
- package/hooks/check-dependencies.sh +18 -1
- package/hooks/hooks.json +2 -2
- package/hooks/posttooluse-bk-reminder.py +30 -2
- package/package.json +1 -1
- package/scripts/test-mcp-dev.js +260 -0
- package/src/services/index.service.test.ts +347 -0
- package/src/services/index.service.ts +93 -44
- package/tests/integration/cli-consistency.test.ts +3 -2
- package/dist/chunk-7DZZHYDU.js.map +0 -1
- package/docs/plans/2024-12-17-ai-search-quality-implementation.md +0 -752
- package/docs/plans/2024-12-17-ai-search-quality-testing-design.md +0 -201
- package/docs/plans/2025-12-16-bluera-knowledge-cli.md +0 -2951
- package/docs/plans/2025-12-16-phase2-features.md +0 -1518
- package/docs/plans/2025-12-17-hil-implementation.md +0 -926
- package/docs/plans/2025-12-17-hil-quality-testing.md +0 -224
- package/docs/plans/2025-12-17-search-quality-phase1-implementation.md +0 -1416
- package/docs/plans/2025-12-17-search-quality-testing-v2-design.md +0 -212
- package/docs/plans/2025-12-28-ai-agent-optimization.md +0 -1630
- /package/dist/{chunk-S5VW7NPH.js.map → chunk-GCUKVV33.js.map} +0 -0
- /package/dist/{chunk-XVVMSRLO.js.map → chunk-H5AKKHY7.js.map} +0 -0
|
@@ -25,6 +25,7 @@ interface IndexOptions {
|
|
|
25
25
|
chunkSize?: number;
|
|
26
26
|
chunkOverlap?: number;
|
|
27
27
|
codeGraphService?: CodeGraphService;
|
|
28
|
+
concurrency?: number;
|
|
28
29
|
}
|
|
29
30
|
|
|
30
31
|
const TEXT_EXTENSIONS = new Set([
|
|
@@ -62,6 +63,7 @@ export class IndexService {
|
|
|
62
63
|
private readonly embeddingEngine: EmbeddingEngine;
|
|
63
64
|
private readonly chunker: ChunkingService;
|
|
64
65
|
private readonly codeGraphService: CodeGraphService | undefined;
|
|
66
|
+
private readonly concurrency: number;
|
|
65
67
|
|
|
66
68
|
constructor(
|
|
67
69
|
lanceStore: LanceStore,
|
|
@@ -75,6 +77,7 @@ export class IndexService {
|
|
|
75
77
|
chunkOverlap: options.chunkOverlap ?? 100,
|
|
76
78
|
});
|
|
77
79
|
this.codeGraphService = options.codeGraphService;
|
|
80
|
+
this.concurrency = options.concurrency ?? 4;
|
|
78
81
|
}
|
|
79
82
|
|
|
80
83
|
async indexStore(store: Store, onProgress?: ProgressCallback): Promise<Result<IndexResult>> {
|
|
@@ -123,6 +126,7 @@ export class IndexService {
|
|
|
123
126
|
storeId: store.id,
|
|
124
127
|
path: store.path,
|
|
125
128
|
fileCount: files.length,
|
|
129
|
+
concurrency: this.concurrency,
|
|
126
130
|
},
|
|
127
131
|
'Files scanned for indexing'
|
|
128
132
|
);
|
|
@@ -138,59 +142,30 @@ export class IndexService {
|
|
|
138
142
|
message: 'Starting index',
|
|
139
143
|
});
|
|
140
144
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
const
|
|
144
|
-
// Pass file path for semantic Markdown chunking
|
|
145
|
-
const chunks = this.chunker.chunk(content, filePath);
|
|
145
|
+
// Process files in parallel batches
|
|
146
|
+
for (let i = 0; i < files.length; i += this.concurrency) {
|
|
147
|
+
const batch = files.slice(i, i + this.concurrency);
|
|
146
148
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
149
|
+
const batchResults = await Promise.all(
|
|
150
|
+
batch.map((filePath) => this.processFile(filePath, store))
|
|
151
|
+
);
|
|
151
152
|
|
|
152
|
-
// Collect
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
// Collect results from batch
|
|
154
|
+
for (const result of batchResults) {
|
|
155
|
+
documents.push(...result.documents);
|
|
156
|
+
if (result.sourceFile !== undefined) {
|
|
157
|
+
sourceFiles.push(result.sourceFile);
|
|
158
|
+
}
|
|
155
159
|
}
|
|
156
160
|
|
|
157
|
-
|
|
158
|
-
const vector = await this.embeddingEngine.embed(chunk.content);
|
|
159
|
-
const chunkId =
|
|
160
|
-
chunks.length > 1
|
|
161
|
-
? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}`
|
|
162
|
-
: `${store.id}-${fileHash}`;
|
|
163
|
-
|
|
164
|
-
const doc: Document = {
|
|
165
|
-
id: createDocumentId(chunkId),
|
|
166
|
-
content: chunk.content,
|
|
167
|
-
vector,
|
|
168
|
-
metadata: {
|
|
169
|
-
type: chunks.length > 1 ? 'chunk' : 'file',
|
|
170
|
-
storeId: store.id,
|
|
171
|
-
path: filePath,
|
|
172
|
-
indexedAt: new Date(),
|
|
173
|
-
fileHash,
|
|
174
|
-
chunkIndex: chunk.chunkIndex,
|
|
175
|
-
totalChunks: chunk.totalChunks,
|
|
176
|
-
// New metadata for ranking
|
|
177
|
-
fileType,
|
|
178
|
-
sectionHeader: chunk.sectionHeader,
|
|
179
|
-
functionName: chunk.functionName,
|
|
180
|
-
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
181
|
-
docSummary: chunk.docSummary,
|
|
182
|
-
},
|
|
183
|
-
};
|
|
184
|
-
documents.push(doc);
|
|
185
|
-
}
|
|
186
|
-
filesProcessed++;
|
|
161
|
+
filesProcessed += batch.length;
|
|
187
162
|
|
|
188
|
-
// Emit progress event
|
|
163
|
+
// Emit progress event after each batch
|
|
189
164
|
onProgress?.({
|
|
190
165
|
type: 'progress',
|
|
191
166
|
current: filesProcessed,
|
|
192
167
|
total: files.length,
|
|
193
|
-
message: `
|
|
168
|
+
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`,
|
|
194
169
|
});
|
|
195
170
|
}
|
|
196
171
|
|
|
@@ -235,6 +210,80 @@ export class IndexService {
|
|
|
235
210
|
});
|
|
236
211
|
}
|
|
237
212
|
|
|
213
|
+
/**
|
|
214
|
+
* Process a single file: read, chunk, embed, and return documents.
|
|
215
|
+
* Extracted for parallel processing.
|
|
216
|
+
*/
|
|
217
|
+
private async processFile(
|
|
218
|
+
filePath: string,
|
|
219
|
+
store: FileStore | RepoStore
|
|
220
|
+
): Promise<{
|
|
221
|
+
documents: Document[];
|
|
222
|
+
sourceFile: { path: string; content: string } | undefined;
|
|
223
|
+
}> {
|
|
224
|
+
const content = await readFile(filePath, 'utf-8');
|
|
225
|
+
const fileHash = createHash('md5').update(content).digest('hex');
|
|
226
|
+
const chunks = this.chunker.chunk(content, filePath);
|
|
227
|
+
|
|
228
|
+
const ext = extname(filePath).toLowerCase();
|
|
229
|
+
const fileName = basename(filePath).toLowerCase();
|
|
230
|
+
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
231
|
+
|
|
232
|
+
// Track source file for code graph
|
|
233
|
+
const sourceFile = ['.ts', '.tsx', '.js', '.jsx'].includes(ext)
|
|
234
|
+
? { path: filePath, content }
|
|
235
|
+
: undefined;
|
|
236
|
+
|
|
237
|
+
// Skip files with no chunks (empty files)
|
|
238
|
+
if (chunks.length === 0) {
|
|
239
|
+
return { documents: [], sourceFile };
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Batch embed all chunks from this file
|
|
243
|
+
const chunkContents = chunks.map((c) => c.content);
|
|
244
|
+
const vectors = await this.embeddingEngine.embedBatch(chunkContents);
|
|
245
|
+
|
|
246
|
+
const documents: Document[] = [];
|
|
247
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
248
|
+
const chunk = chunks[i];
|
|
249
|
+
const vector = vectors[i];
|
|
250
|
+
|
|
251
|
+
// Fail fast if chunk/vector mismatch (should never happen)
|
|
252
|
+
if (chunk === undefined || vector === undefined) {
|
|
253
|
+
throw new Error(
|
|
254
|
+
`Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== undefined)}, vector=${String(vector !== undefined)}`
|
|
255
|
+
);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const chunkId =
|
|
259
|
+
chunks.length > 1
|
|
260
|
+
? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}`
|
|
261
|
+
: `${store.id}-${fileHash}`;
|
|
262
|
+
|
|
263
|
+
documents.push({
|
|
264
|
+
id: createDocumentId(chunkId),
|
|
265
|
+
content: chunk.content,
|
|
266
|
+
vector,
|
|
267
|
+
metadata: {
|
|
268
|
+
type: chunks.length > 1 ? 'chunk' : 'file',
|
|
269
|
+
storeId: store.id,
|
|
270
|
+
path: filePath,
|
|
271
|
+
indexedAt: new Date(),
|
|
272
|
+
fileHash,
|
|
273
|
+
chunkIndex: chunk.chunkIndex,
|
|
274
|
+
totalChunks: chunk.totalChunks,
|
|
275
|
+
fileType,
|
|
276
|
+
sectionHeader: chunk.sectionHeader,
|
|
277
|
+
functionName: chunk.functionName,
|
|
278
|
+
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
279
|
+
docSummary: chunk.docSummary,
|
|
280
|
+
},
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return { documents, sourceFile };
|
|
285
|
+
}
|
|
286
|
+
|
|
238
287
|
private async scanDirectory(dir: string): Promise<string[]> {
|
|
239
288
|
const files: string[] = [];
|
|
240
289
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
@@ -182,10 +182,11 @@ describe('CLI Consistency', () => {
|
|
|
182
182
|
beforeAll(async () => {
|
|
183
183
|
try {
|
|
184
184
|
cli(`store create quiet-test-store --type file --source "${testFilesDir}"`);
|
|
185
|
+
cli('index quiet-test-store');
|
|
185
186
|
} catch {
|
|
186
187
|
// Store may already exist
|
|
187
188
|
}
|
|
188
|
-
});
|
|
189
|
+
}, 120000);
|
|
189
190
|
|
|
190
191
|
it('--quiet suppresses all output for index on success', () => {
|
|
191
192
|
const result = runCli('index quiet-test-store --quiet');
|
|
@@ -204,7 +205,7 @@ describe('CLI Consistency', () => {
|
|
|
204
205
|
});
|
|
205
206
|
|
|
206
207
|
it('--quiet outputs only paths for search', () => {
|
|
207
|
-
const result = runCli('search "test" --quiet');
|
|
208
|
+
const result = runCli('search "test" --stores quiet-test-store --quiet');
|
|
208
209
|
expect(result.exitCode).toBe(0);
|
|
209
210
|
// Should not contain verbose headers
|
|
210
211
|
expect(result.stdout).not.toContain('Search:');
|