bluera-knowledge 0.12.10 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +2 -0
- package/README.md +3 -2
- package/dist/{chunk-VTATT3IR.js → chunk-6ZVW2P2F.js} +100 -38
- package/dist/chunk-6ZVW2P2F.js.map +1 -0
- package/dist/{chunk-6777ULXC.js → chunk-GCUKVV33.js} +2 -2
- package/dist/{chunk-JET33NMA.js → chunk-H5AKKHY7.js} +3 -2
- package/dist/chunk-H5AKKHY7.js.map +1 -0
- package/dist/index.js +3 -3
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/package.json +1 -1
- package/src/mcp/handlers/job.handler.ts +5 -0
- package/src/services/index.service.test.ts +347 -0
- package/src/services/index.service.ts +93 -44
- package/src/services/job.service.test.ts +87 -0
- package/src/services/job.service.ts +43 -0
- package/dist/chunk-JET33NMA.js.map +0 -1
- package/dist/chunk-VTATT3IR.js.map +0 -1
- /package/dist/{chunk-6777ULXC.js.map → chunk-GCUKVV33.js.map} +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [0.13.0](https://github.com/blueraai/bluera-knowledge/compare/v0.12.11...v0.13.0) (2026-01-15)
|
|
6
|
+
|
|
5
7
|
## [0.12.10](https://github.com/blueraai/bluera-knowledge/compare/v0.11.21...v0.12.10) (2026-01-15)
|
|
6
8
|
|
|
7
9
|
|
package/README.md
CHANGED
|
@@ -479,10 +479,11 @@ When you add a repository or index content:
|
|
|
479
479
|
|
|
480
480
|
Background jobs include significant performance optimizations:
|
|
481
481
|
|
|
482
|
-
- **⚡ Parallel Embedding** -
|
|
482
|
+
- **⚡ Parallel Embedding** - Batch processes up to 32 chunks simultaneously
|
|
483
|
+
- **📂 Parallel File I/O** - Processes multiple files concurrently (configurable, default: 4)
|
|
483
484
|
- **🔓 Non-Blocking** - Continue working while indexing completes
|
|
484
485
|
- **📊 Progress Tracking** - Real-time updates on files processed and progress percentage
|
|
485
|
-
- **🧹 Auto-Cleanup** - Completed jobs are cleaned up
|
|
486
|
+
- **🧹 Auto-Cleanup** - Completed/stale jobs are cleaned up automatically
|
|
486
487
|
|
|
487
488
|
---
|
|
488
489
|
|
|
@@ -464,6 +464,40 @@ var JobService = class {
|
|
|
464
464
|
}
|
|
465
465
|
return cleaned;
|
|
466
466
|
}
|
|
467
|
+
/**
|
|
468
|
+
* Clean up stale pending jobs that never started or got stuck
|
|
469
|
+
*
|
|
470
|
+
* @param olderThanHours - Consider pending jobs stale after this many hours (default 2)
|
|
471
|
+
* @param options - Options for cleanup behavior
|
|
472
|
+
* @param options.markAsFailed - If true, mark jobs as failed instead of deleting
|
|
473
|
+
* @returns Number of jobs cleaned up or marked as failed
|
|
474
|
+
*/
|
|
475
|
+
cleanupStalePendingJobs(olderThanHours = 2, options = {}) {
|
|
476
|
+
const jobs = this.listJobs();
|
|
477
|
+
const cutoffTime = Date.now() - olderThanHours * 60 * 60 * 1e3;
|
|
478
|
+
let cleaned = 0;
|
|
479
|
+
for (const job of jobs) {
|
|
480
|
+
if (job.status === "pending" && new Date(job.updatedAt).getTime() < cutoffTime) {
|
|
481
|
+
const jobFile = path.join(this.jobsDir, `${job.id}.json`);
|
|
482
|
+
if (options.markAsFailed === true) {
|
|
483
|
+
this.updateJob(job.id, {
|
|
484
|
+
status: "failed",
|
|
485
|
+
message: `Job marked as stale - pending for over ${String(olderThanHours)} hours without progress`
|
|
486
|
+
});
|
|
487
|
+
} else {
|
|
488
|
+
try {
|
|
489
|
+
fs.unlinkSync(jobFile);
|
|
490
|
+
} catch (error) {
|
|
491
|
+
throw new Error(
|
|
492
|
+
`Failed to delete stale job ${job.id}: ${error instanceof Error ? error.message : String(error)}`
|
|
493
|
+
);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
cleaned++;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return cleaned;
|
|
500
|
+
}
|
|
467
501
|
/**
|
|
468
502
|
* Delete a specific job
|
|
469
503
|
*/
|
|
@@ -2382,6 +2416,7 @@ var IndexService = class {
|
|
|
2382
2416
|
embeddingEngine;
|
|
2383
2417
|
chunker;
|
|
2384
2418
|
codeGraphService;
|
|
2419
|
+
concurrency;
|
|
2385
2420
|
constructor(lanceStore, embeddingEngine, options = {}) {
|
|
2386
2421
|
this.lanceStore = lanceStore;
|
|
2387
2422
|
this.embeddingEngine = embeddingEngine;
|
|
@@ -2390,6 +2425,7 @@ var IndexService = class {
|
|
|
2390
2425
|
chunkOverlap: options.chunkOverlap ?? 100
|
|
2391
2426
|
});
|
|
2392
2427
|
this.codeGraphService = options.codeGraphService;
|
|
2428
|
+
this.concurrency = options.concurrency ?? 4;
|
|
2393
2429
|
}
|
|
2394
2430
|
async indexStore(store, onProgress) {
|
|
2395
2431
|
logger.info(
|
|
@@ -2429,7 +2465,8 @@ var IndexService = class {
|
|
|
2429
2465
|
{
|
|
2430
2466
|
storeId: store.id,
|
|
2431
2467
|
path: store.path,
|
|
2432
|
-
fileCount: files.length
|
|
2468
|
+
fileCount: files.length,
|
|
2469
|
+
concurrency: this.concurrency
|
|
2433
2470
|
},
|
|
2434
2471
|
"Files scanned for indexing"
|
|
2435
2472
|
);
|
|
@@ -2440,47 +2477,23 @@ var IndexService = class {
|
|
|
2440
2477
|
total: files.length,
|
|
2441
2478
|
message: "Starting index"
|
|
2442
2479
|
});
|
|
2443
|
-
for (
|
|
2444
|
-
const
|
|
2445
|
-
const
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
const
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
for (const chunk of chunks) {
|
|
2454
|
-
const vector = await this.embeddingEngine.embed(chunk.content);
|
|
2455
|
-
const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
|
|
2456
|
-
const doc = {
|
|
2457
|
-
id: createDocumentId(chunkId),
|
|
2458
|
-
content: chunk.content,
|
|
2459
|
-
vector,
|
|
2460
|
-
metadata: {
|
|
2461
|
-
type: chunks.length > 1 ? "chunk" : "file",
|
|
2462
|
-
storeId: store.id,
|
|
2463
|
-
path: filePath,
|
|
2464
|
-
indexedAt: /* @__PURE__ */ new Date(),
|
|
2465
|
-
fileHash,
|
|
2466
|
-
chunkIndex: chunk.chunkIndex,
|
|
2467
|
-
totalChunks: chunk.totalChunks,
|
|
2468
|
-
// New metadata for ranking
|
|
2469
|
-
fileType,
|
|
2470
|
-
sectionHeader: chunk.sectionHeader,
|
|
2471
|
-
functionName: chunk.functionName,
|
|
2472
|
-
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
2473
|
-
docSummary: chunk.docSummary
|
|
2474
|
-
}
|
|
2475
|
-
};
|
|
2476
|
-
documents.push(doc);
|
|
2480
|
+
for (let i = 0; i < files.length; i += this.concurrency) {
|
|
2481
|
+
const batch = files.slice(i, i + this.concurrency);
|
|
2482
|
+
const batchResults = await Promise.all(
|
|
2483
|
+
batch.map((filePath) => this.processFile(filePath, store))
|
|
2484
|
+
);
|
|
2485
|
+
for (const result of batchResults) {
|
|
2486
|
+
documents.push(...result.documents);
|
|
2487
|
+
if (result.sourceFile !== void 0) {
|
|
2488
|
+
sourceFiles.push(result.sourceFile);
|
|
2489
|
+
}
|
|
2477
2490
|
}
|
|
2478
|
-
filesProcessed
|
|
2491
|
+
filesProcessed += batch.length;
|
|
2479
2492
|
onProgress?.({
|
|
2480
2493
|
type: "progress",
|
|
2481
2494
|
current: filesProcessed,
|
|
2482
2495
|
total: files.length,
|
|
2483
|
-
message: `
|
|
2496
|
+
message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`
|
|
2484
2497
|
});
|
|
2485
2498
|
}
|
|
2486
2499
|
if (documents.length > 0) {
|
|
@@ -2515,6 +2528,55 @@ var IndexService = class {
|
|
|
2515
2528
|
timeMs
|
|
2516
2529
|
});
|
|
2517
2530
|
}
|
|
2531
|
+
/**
|
|
2532
|
+
* Process a single file: read, chunk, embed, and return documents.
|
|
2533
|
+
* Extracted for parallel processing.
|
|
2534
|
+
*/
|
|
2535
|
+
async processFile(filePath, store) {
|
|
2536
|
+
const content = await readFile3(filePath, "utf-8");
|
|
2537
|
+
const fileHash = createHash2("md5").update(content).digest("hex");
|
|
2538
|
+
const chunks = this.chunker.chunk(content, filePath);
|
|
2539
|
+
const ext = extname(filePath).toLowerCase();
|
|
2540
|
+
const fileName = basename(filePath).toLowerCase();
|
|
2541
|
+
const fileType = this.classifyFileType(ext, fileName, filePath);
|
|
2542
|
+
const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
|
|
2543
|
+
if (chunks.length === 0) {
|
|
2544
|
+
return { documents: [], sourceFile };
|
|
2545
|
+
}
|
|
2546
|
+
const chunkContents = chunks.map((c) => c.content);
|
|
2547
|
+
const vectors = await this.embeddingEngine.embedBatch(chunkContents);
|
|
2548
|
+
const documents = [];
|
|
2549
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
2550
|
+
const chunk = chunks[i];
|
|
2551
|
+
const vector = vectors[i];
|
|
2552
|
+
if (chunk === void 0 || vector === void 0) {
|
|
2553
|
+
throw new Error(
|
|
2554
|
+
`Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
|
|
2555
|
+
);
|
|
2556
|
+
}
|
|
2557
|
+
const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
|
|
2558
|
+
documents.push({
|
|
2559
|
+
id: createDocumentId(chunkId),
|
|
2560
|
+
content: chunk.content,
|
|
2561
|
+
vector,
|
|
2562
|
+
metadata: {
|
|
2563
|
+
type: chunks.length > 1 ? "chunk" : "file",
|
|
2564
|
+
storeId: store.id,
|
|
2565
|
+
path: filePath,
|
|
2566
|
+
indexedAt: /* @__PURE__ */ new Date(),
|
|
2567
|
+
fileHash,
|
|
2568
|
+
chunkIndex: chunk.chunkIndex,
|
|
2569
|
+
totalChunks: chunk.totalChunks,
|
|
2570
|
+
fileType,
|
|
2571
|
+
sectionHeader: chunk.sectionHeader,
|
|
2572
|
+
functionName: chunk.functionName,
|
|
2573
|
+
hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
|
|
2574
|
+
docSummary: chunk.docSummary
|
|
2575
|
+
}
|
|
2576
|
+
});
|
|
2577
|
+
}
|
|
2578
|
+
return { documents, sourceFile };
|
|
2579
|
+
}
|
|
2518
2580
|
async scanDirectory(dir) {
|
|
2519
2581
|
const files = [];
|
|
2520
2582
|
const entries = await readdir(dir, { withFileTypes: true });
|
|
@@ -4617,4 +4679,4 @@ export {
|
|
|
4617
4679
|
createServices,
|
|
4618
4680
|
destroyServices
|
|
4619
4681
|
};
|
|
4620
|
-
//# sourceMappingURL=chunk-
|
|
4682
|
+
//# sourceMappingURL=chunk-6ZVW2P2F.js.map
|