bluera-knowledge 0.12.10 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.12.10",
3
+ "version": "0.13.0",
4
4
  "description": "Clone repos, crawl docs, search locally. Fast, authoritative answers for AI coding agents.",
5
5
  "mcpServers": {
6
6
  "bluera-knowledge": {
package/CHANGELOG.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
4
4
 
5
+ ## [0.13.0](https://github.com/blueraai/bluera-knowledge/compare/v0.12.11...v0.13.0) (2026-01-15)
6
+
5
7
  ## [0.12.10](https://github.com/blueraai/bluera-knowledge/compare/v0.11.21...v0.12.10) (2026-01-15)
6
8
 
7
9
 
package/README.md CHANGED
@@ -479,10 +479,11 @@ When you add a repository or index content:
479
479
 
480
480
  Background jobs include significant performance optimizations:
481
481
 
482
- - **⚡ Parallel Embedding** - Processes 32 chunks simultaneously (~30x faster than sequential)
482
+ - **⚡ Parallel Embedding** - Batch processes up to 32 chunks simultaneously
483
+ - **📂 Parallel File I/O** - Processes multiple files concurrently (configurable, default: 4)
483
484
  - **🔓 Non-Blocking** - Continue working while indexing completes
484
485
  - **📊 Progress Tracking** - Real-time updates on files processed and progress percentage
485
- - **🧹 Auto-Cleanup** - Completed jobs are cleaned up after 24 hours
486
+ - **🧹 Auto-Cleanup** - Completed/stale jobs are cleaned up automatically
486
487
 
487
488
  ---
488
489
 
@@ -464,6 +464,40 @@ var JobService = class {
464
464
  }
465
465
  return cleaned;
466
466
  }
467
+ /**
468
+ * Clean up stale pending jobs that never started or got stuck
469
+ *
470
+ * @param olderThanHours - Consider pending jobs stale after this many hours (default 2)
471
+ * @param options - Options for cleanup behavior
472
+ * @param options.markAsFailed - If true, mark jobs as failed instead of deleting
473
+ * @returns Number of jobs cleaned up or marked as failed
474
+ */
475
+ cleanupStalePendingJobs(olderThanHours = 2, options = {}) {
476
+ const jobs = this.listJobs();
477
+ const cutoffTime = Date.now() - olderThanHours * 60 * 60 * 1e3;
478
+ let cleaned = 0;
479
+ for (const job of jobs) {
480
+ if (job.status === "pending" && new Date(job.updatedAt).getTime() < cutoffTime) {
481
+ const jobFile = path.join(this.jobsDir, `${job.id}.json`);
482
+ if (options.markAsFailed === true) {
483
+ this.updateJob(job.id, {
484
+ status: "failed",
485
+ message: `Job marked as stale - pending for over ${String(olderThanHours)} hours without progress`
486
+ });
487
+ } else {
488
+ try {
489
+ fs.unlinkSync(jobFile);
490
+ } catch (error) {
491
+ throw new Error(
492
+ `Failed to delete stale job ${job.id}: ${error instanceof Error ? error.message : String(error)}`
493
+ );
494
+ }
495
+ }
496
+ cleaned++;
497
+ }
498
+ }
499
+ return cleaned;
500
+ }
467
501
  /**
468
502
  * Delete a specific job
469
503
  */
@@ -2382,6 +2416,7 @@ var IndexService = class {
2382
2416
  embeddingEngine;
2383
2417
  chunker;
2384
2418
  codeGraphService;
2419
+ concurrency;
2385
2420
  constructor(lanceStore, embeddingEngine, options = {}) {
2386
2421
  this.lanceStore = lanceStore;
2387
2422
  this.embeddingEngine = embeddingEngine;
@@ -2390,6 +2425,7 @@ var IndexService = class {
2390
2425
  chunkOverlap: options.chunkOverlap ?? 100
2391
2426
  });
2392
2427
  this.codeGraphService = options.codeGraphService;
2428
+ this.concurrency = options.concurrency ?? 4;
2393
2429
  }
2394
2430
  async indexStore(store, onProgress) {
2395
2431
  logger.info(
@@ -2429,7 +2465,8 @@ var IndexService = class {
2429
2465
  {
2430
2466
  storeId: store.id,
2431
2467
  path: store.path,
2432
- fileCount: files.length
2468
+ fileCount: files.length,
2469
+ concurrency: this.concurrency
2433
2470
  },
2434
2471
  "Files scanned for indexing"
2435
2472
  );
@@ -2440,47 +2477,23 @@ var IndexService = class {
2440
2477
  total: files.length,
2441
2478
  message: "Starting index"
2442
2479
  });
2443
- for (const filePath of files) {
2444
- const content = await readFile3(filePath, "utf-8");
2445
- const fileHash = createHash2("md5").update(content).digest("hex");
2446
- const chunks = this.chunker.chunk(content, filePath);
2447
- const ext = extname(filePath).toLowerCase();
2448
- const fileName = basename(filePath).toLowerCase();
2449
- const fileType = this.classifyFileType(ext, fileName, filePath);
2450
- if ([".ts", ".tsx", ".js", ".jsx"].includes(ext)) {
2451
- sourceFiles.push({ path: filePath, content });
2452
- }
2453
- for (const chunk of chunks) {
2454
- const vector = await this.embeddingEngine.embed(chunk.content);
2455
- const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
2456
- const doc = {
2457
- id: createDocumentId(chunkId),
2458
- content: chunk.content,
2459
- vector,
2460
- metadata: {
2461
- type: chunks.length > 1 ? "chunk" : "file",
2462
- storeId: store.id,
2463
- path: filePath,
2464
- indexedAt: /* @__PURE__ */ new Date(),
2465
- fileHash,
2466
- chunkIndex: chunk.chunkIndex,
2467
- totalChunks: chunk.totalChunks,
2468
- // New metadata for ranking
2469
- fileType,
2470
- sectionHeader: chunk.sectionHeader,
2471
- functionName: chunk.functionName,
2472
- hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
2473
- docSummary: chunk.docSummary
2474
- }
2475
- };
2476
- documents.push(doc);
2480
+ for (let i = 0; i < files.length; i += this.concurrency) {
2481
+ const batch = files.slice(i, i + this.concurrency);
2482
+ const batchResults = await Promise.all(
2483
+ batch.map((filePath) => this.processFile(filePath, store))
2484
+ );
2485
+ for (const result of batchResults) {
2486
+ documents.push(...result.documents);
2487
+ if (result.sourceFile !== void 0) {
2488
+ sourceFiles.push(result.sourceFile);
2489
+ }
2477
2490
  }
2478
- filesProcessed++;
2491
+ filesProcessed += batch.length;
2479
2492
  onProgress?.({
2480
2493
  type: "progress",
2481
2494
  current: filesProcessed,
2482
2495
  total: files.length,
2483
- message: `Indexing ${filePath}`
2496
+ message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`
2484
2497
  });
2485
2498
  }
2486
2499
  if (documents.length > 0) {
@@ -2515,6 +2528,55 @@ var IndexService = class {
2515
2528
  timeMs
2516
2529
  });
2517
2530
  }
2531
+ /**
2532
+ * Process a single file: read, chunk, embed, and return documents.
2533
+ * Extracted for parallel processing.
2534
+ */
2535
+ async processFile(filePath, store) {
2536
+ const content = await readFile3(filePath, "utf-8");
2537
+ const fileHash = createHash2("md5").update(content).digest("hex");
2538
+ const chunks = this.chunker.chunk(content, filePath);
2539
+ const ext = extname(filePath).toLowerCase();
2540
+ const fileName = basename(filePath).toLowerCase();
2541
+ const fileType = this.classifyFileType(ext, fileName, filePath);
2542
+ const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
2543
+ if (chunks.length === 0) {
2544
+ return { documents: [], sourceFile };
2545
+ }
2546
+ const chunkContents = chunks.map((c) => c.content);
2547
+ const vectors = await this.embeddingEngine.embedBatch(chunkContents);
2548
+ const documents = [];
2549
+ for (let i = 0; i < chunks.length; i++) {
2550
+ const chunk = chunks[i];
2551
+ const vector = vectors[i];
2552
+ if (chunk === void 0 || vector === void 0) {
2553
+ throw new Error(
2554
+ `Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
2555
+ );
2556
+ }
2557
+ const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
2558
+ documents.push({
2559
+ id: createDocumentId(chunkId),
2560
+ content: chunk.content,
2561
+ vector,
2562
+ metadata: {
2563
+ type: chunks.length > 1 ? "chunk" : "file",
2564
+ storeId: store.id,
2565
+ path: filePath,
2566
+ indexedAt: /* @__PURE__ */ new Date(),
2567
+ fileHash,
2568
+ chunkIndex: chunk.chunkIndex,
2569
+ totalChunks: chunk.totalChunks,
2570
+ fileType,
2571
+ sectionHeader: chunk.sectionHeader,
2572
+ functionName: chunk.functionName,
2573
+ hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
2574
+ docSummary: chunk.docSummary
2575
+ }
2576
+ });
2577
+ }
2578
+ return { documents, sourceFile };
2579
+ }
2518
2580
  async scanDirectory(dir) {
2519
2581
  const files = [];
2520
2582
  const entries = await readdir(dir, { withFileTypes: true });
@@ -4617,4 +4679,4 @@ export {
4617
4679
  createServices,
4618
4680
  destroyServices
4619
4681
  };
4620
- //# sourceMappingURL=chunk-VTATT3IR.js.map
4682
+ //# sourceMappingURL=chunk-6ZVW2P2F.js.map