bluera-knowledge 0.12.11 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +2 -0
- package/README.md +3 -2
- package/dist/{chunk-7DZZHYDU.js → chunk-6ZVW2P2F.js} +66 -38
- package/dist/chunk-6ZVW2P2F.js.map +1 -0
- package/dist/{chunk-S5VW7NPH.js → chunk-GCUKVV33.js} +2 -2
- package/dist/{chunk-XVVMSRLO.js → chunk-H5AKKHY7.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/package.json +1 -1
- package/src/services/index.service.test.ts +347 -0
- package/src/services/index.service.ts +93 -44
- package/dist/chunk-7DZZHYDU.js.map +0 -1
- /package/dist/{chunk-S5VW7NPH.js.map → chunk-GCUKVV33.js.map} +0 -0
- /package/dist/{chunk-XVVMSRLO.js.map → chunk-H5AKKHY7.js.map} +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [0.13.0](https://github.com/blueraai/bluera-knowledge/compare/v0.12.11...v0.13.0) (2026-01-15)
|
|
6
|
+
|
|
5
7
|
## [0.12.10](https://github.com/blueraai/bluera-knowledge/compare/v0.11.21...v0.12.10) (2026-01-15)
|
|
6
8
|
|
|
7
9
|
|
package/README.md
CHANGED
|
@@ -479,10 +479,11 @@ When you add a repository or index content:
|
|
|
479
479
|
|
|
480
480
|
Background jobs include significant performance optimizations:
|
|
481
481
|
|
|
482
|
-
- **⚡ Parallel Embedding** -
|
|
482
|
+
- **⚡ Parallel Embedding** - Batch processes up to 32 chunks simultaneously
|
|
483
|
+
- **📂 Parallel File I/O** - Processes multiple files concurrently (configurable, default: 4)
|
|
483
484
|
- **🔓 Non-Blocking** - Continue working while indexing completes
|
|
484
485
|
- **📊 Progress Tracking** - Real-time updates on files processed and progress percentage
|
|
485
|
-
- **🧹 Auto-Cleanup** - Completed jobs are cleaned up
|
|
486
|
+
- **🧹 Auto-Cleanup** - Completed/stale jobs are cleaned up automatically
|
|
486
487
|
|
|
487
488
|
---
|
|
488
489
|
|
|
@@ -2416,6 +2416,7 @@ var IndexService = class {
|
|
|
2416
2416
|
embeddingEngine;
|
|
2417
2417
|
chunker;
|
|
2418
2418
|
codeGraphService;
|
|
2419
|
+
concurrency;
|
|
2419
2420
|
constructor(lanceStore, embeddingEngine, options = {}) {
|
|
2420
2421
|
this.lanceStore = lanceStore;
|
|
2421
2422
|
this.embeddingEngine = embeddingEngine;
|
|
@@ -2424,6 +2425,7 @@ var IndexService = class {
|
|
|
2424
2425
|
chunkOverlap: options.chunkOverlap ?? 100
|
|
2425
2426
|
});
|
|
2426
2427
|
this.codeGraphService = options.codeGraphService;
|
|
2428
|
+
this.concurrency = options.concurrency ?? 4;
|
|
2427
2429
|
}
|
|
2428
2430
|
async indexStore(store, onProgress) {
|
|
2429
2431
|
logger.info(
|
|
@@ -2463,7 +2465,8 @@ var IndexService = class {
|
|
|
2463
2465
|
{
|
|
2464
2466
|
storeId: store.id,
|
|
2465
2467
|
path: store.path,
|
|
2466
|
-
fileCount: files.length
|
|
2468
|
+
fileCount: files.length,
|
|
2469
|
+
concurrency: this.concurrency
|
|
2467
2470
|
},
|
|
2468
2471
|
"Files scanned for indexing"
|
|
2469
2472
|
);
|
|
@@ -2474,47 +2477,23 @@ var IndexService = class {
|
|
|
2474
2477
|
total: files.length,
|
|
2475
2478
|
message: "Starting index"
|
|
2476
2479
|
});
|
|
2477
|
-
for (
|
|
2478
|
-
const
|
|
2479
|
-
const
|
|
2480
|
-
|
|
2481
|
-
|
|
2482
|
-
const
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
for (const chunk of chunks) {
|
|
2488
|
-
const vector = await this.embeddingEngine.embed(chunk.content);
|
|
2489
|
-
const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
|
|
2490
|
-
const doc = {
|
|
2491
|
-
id: createDocumentId(chunkId),
|
|
2492
|
-
content: chunk.content,
|
|
2493
|
-
vector,
|
|
2494
|
-
metadata: {
|
|
2495
|
-
type: chunks.length > 1 ? "chunk" : "file",
|
|
2496
|
-
storeId: store.id,
|
|
2497
|
-
path: filePath,
|
|
2498
|
-
indexedAt: /* @__PURE__ */ new Date(),
|
|
2499
|
-
fileHash,
|
|
2500
|
-
chunkIndex: chunk.chunkIndex,
|
|
2501
|
-
totalChunks: chunk.totalChunks,
|
|
2502
|
-
// New metadata for ranking
|
|
2503
|
-
fileType,
|
|
2504
|
-
sectionHeader: chunk.sectionHeader,
|
|
2505
|
-
functionName: chunk.functionName,
|
|
2506
|
-
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
2507
|
-
docSummary: chunk.docSummary
|
|
2508
|
-
}
|
|
2509
|
-
};
|
|
2510
|
-
documents.push(doc);
|
|
2480
|
+
for (let i = 0; i < files.length; i += this.concurrency) {
|
|
2481
|
+
const batch = files.slice(i, i + this.concurrency);
|
|
2482
|
+
const batchResults = await Promise.all(
|
|
2483
|
+
batch.map((filePath) => this.processFile(filePath, store))
|
|
2484
|
+
);
|
|
2485
|
+
for (const result of batchResults) {
|
|
2486
|
+
documents.push(...result.documents);
|
|
2487
|
+
if (result.sourceFile !== void 0) {
|
|
2488
|
+
sourceFiles.push(result.sourceFile);
|
|
2489
|
+
}
|
|
2511
2490
|
}
|
|
2512
|
-
filesProcessed
|
|
2491
|
+
filesProcessed += batch.length;
|
|
2513
2492
|
onProgress?.({
|
|
2514
2493
|
type: "progress",
|
|
2515
2494
|
current: filesProcessed,
|
|
2516
2495
|
total: files.length,
|
|
2517
|
-
message: `
|
|
2496
|
+
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`
|
|
2518
2497
|
});
|
|
2519
2498
|
}
|
|
2520
2499
|
if (documents.length > 0) {
|
|
@@ -2549,6 +2528,55 @@ var IndexService = class {
|
|
|
2549
2528
|
timeMs
|
|
2550
2529
|
});
|
|
2551
2530
|
}
|
|
2531
|
+
/**
|
|
2532
|
+
* Process a single file: read, chunk, embed, and return documents.
|
|
2533
|
+
* Extracted for parallel processing.
|
|
2534
|
+
*/
|
|
2535
|
+
async processFile(filePath, store) {
|
|
2536
|
+
const content = await readFile3(filePath, "utf-8");
|
|
2537
|
+
const fileHash = createHash2("md5").update(content).digest("hex");
|
|
2538
|
+
const chunks = this.chunker.chunk(content, filePath);
|
|
2539
|
+
const ext = extname(filePath).toLowerCase();
|
|
2540
|
+
const fileName = basename(filePath).toLowerCase();
|
|
2541
|
+
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
2542
|
+
const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
|
|
2543
|
+
if (chunks.length === 0) {
|
|
2544
|
+
return { documents: [], sourceFile };
|
|
2545
|
+
}
|
|
2546
|
+
const chunkContents = chunks.map((c) => c.content);
|
|
2547
|
+
const vectors = await this.embeddingEngine.embedBatch(chunkContents);
|
|
2548
|
+
const documents = [];
|
|
2549
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
2550
|
+
const chunk = chunks[i];
|
|
2551
|
+
const vector = vectors[i];
|
|
2552
|
+
if (chunk === void 0 || vector === void 0) {
|
|
2553
|
+
throw new Error(
|
|
2554
|
+
`Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
|
|
2555
|
+
);
|
|
2556
|
+
}
|
|
2557
|
+
const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
|
|
2558
|
+
documents.push({
|
|
2559
|
+
id: createDocumentId(chunkId),
|
|
2560
|
+
content: chunk.content,
|
|
2561
|
+
vector,
|
|
2562
|
+
metadata: {
|
|
2563
|
+
type: chunks.length > 1 ? "chunk" : "file",
|
|
2564
|
+
storeId: store.id,
|
|
2565
|
+
path: filePath,
|
|
2566
|
+
indexedAt: /* @__PURE__ */ new Date(),
|
|
2567
|
+
fileHash,
|
|
2568
|
+
chunkIndex: chunk.chunkIndex,
|
|
2569
|
+
totalChunks: chunk.totalChunks,
|
|
2570
|
+
fileType,
|
|
2571
|
+
sectionHeader: chunk.sectionHeader,
|
|
2572
|
+
functionName: chunk.functionName,
|
|
2573
|
+
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
2574
|
+
docSummary: chunk.docSummary
|
|
2575
|
+
}
|
|
2576
|
+
});
|
|
2577
|
+
}
|
|
2578
|
+
return { documents, sourceFile };
|
|
2579
|
+
}
|
|
2552
2580
|
async scanDirectory(dir) {
|
|
2553
2581
|
const files = [];
|
|
2554
2582
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
@@ -4651,4 +4679,4 @@ export {
|
|
|
4651
4679
|
createServices,
|
|
4652
4680
|
destroyServices
|
|
4653
4681
|
};
|
|
4654
|
-
//# sourceMappingURL=chunk-
|
|
4682
|
+
//# sourceMappingURL=chunk-6ZVW2P2F.js.map
|