bluera-knowledge 0.12.11 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.12.11",
3
+ "version": "0.13.0",
4
4
  "description": "Clone repos, crawl docs, search locally. Fast, authoritative answers for AI coding agents.",
5
5
  "mcpServers": {
6
6
  "bluera-knowledge": {
package/CHANGELOG.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
4
4
 
5
+ ## [0.13.0](https://github.com/blueraai/bluera-knowledge/compare/v0.12.11...v0.13.0) (2026-01-15)
6
+
5
7
  ## [0.12.10](https://github.com/blueraai/bluera-knowledge/compare/v0.11.21...v0.12.10) (2026-01-15)
6
8
 
7
9
 
package/README.md CHANGED
@@ -479,10 +479,11 @@ When you add a repository or index content:
479
479
 
480
480
  Background jobs include significant performance optimizations:
481
481
 
482
- - **⚡ Parallel Embedding** - Processes 32 chunks simultaneously (~30x faster than sequential)
482
+ - **⚡ Parallel Embedding** - Batch processes up to 32 chunks simultaneously
483
+ - **📂 Parallel File I/O** - Processes multiple files concurrently (configurable, default: 4)
483
484
  - **🔓 Non-Blocking** - Continue working while indexing completes
484
485
  - **📊 Progress Tracking** - Real-time updates on files processed and progress percentage
485
- - **🧹 Auto-Cleanup** - Completed jobs are cleaned up after 24 hours
486
+ - **🧹 Auto-Cleanup** - Completed/stale jobs are cleaned up automatically
486
487
 
487
488
  ---
488
489
 
@@ -2416,6 +2416,7 @@ var IndexService = class {
2416
2416
  embeddingEngine;
2417
2417
  chunker;
2418
2418
  codeGraphService;
2419
+ concurrency;
2419
2420
  constructor(lanceStore, embeddingEngine, options = {}) {
2420
2421
  this.lanceStore = lanceStore;
2421
2422
  this.embeddingEngine = embeddingEngine;
@@ -2424,6 +2425,7 @@ var IndexService = class {
2424
2425
  chunkOverlap: options.chunkOverlap ?? 100
2425
2426
  });
2426
2427
  this.codeGraphService = options.codeGraphService;
2428
+ this.concurrency = options.concurrency ?? 4;
2427
2429
  }
2428
2430
  async indexStore(store, onProgress) {
2429
2431
  logger.info(
@@ -2463,7 +2465,8 @@ var IndexService = class {
2463
2465
  {
2464
2466
  storeId: store.id,
2465
2467
  path: store.path,
2466
- fileCount: files.length
2468
+ fileCount: files.length,
2469
+ concurrency: this.concurrency
2467
2470
  },
2468
2471
  "Files scanned for indexing"
2469
2472
  );
@@ -2474,47 +2477,23 @@ var IndexService = class {
2474
2477
  total: files.length,
2475
2478
  message: "Starting index"
2476
2479
  });
2477
- for (const filePath of files) {
2478
- const content = await readFile3(filePath, "utf-8");
2479
- const fileHash = createHash2("md5").update(content).digest("hex");
2480
- const chunks = this.chunker.chunk(content, filePath);
2481
- const ext = extname(filePath).toLowerCase();
2482
- const fileName = basename(filePath).toLowerCase();
2483
- const fileType = this.classifyFileType(ext, fileName, filePath);
2484
- if ([".ts", ".tsx", ".js", ".jsx"].includes(ext)) {
2485
- sourceFiles.push({ path: filePath, content });
2486
- }
2487
- for (const chunk of chunks) {
2488
- const vector = await this.embeddingEngine.embed(chunk.content);
2489
- const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
2490
- const doc = {
2491
- id: createDocumentId(chunkId),
2492
- content: chunk.content,
2493
- vector,
2494
- metadata: {
2495
- type: chunks.length > 1 ? "chunk" : "file",
2496
- storeId: store.id,
2497
- path: filePath,
2498
- indexedAt: /* @__PURE__ */ new Date(),
2499
- fileHash,
2500
- chunkIndex: chunk.chunkIndex,
2501
- totalChunks: chunk.totalChunks,
2502
- // New metadata for ranking
2503
- fileType,
2504
- sectionHeader: chunk.sectionHeader,
2505
- functionName: chunk.functionName,
2506
- hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
2507
- docSummary: chunk.docSummary
2508
- }
2509
- };
2510
- documents.push(doc);
2480
+ for (let i = 0; i < files.length; i += this.concurrency) {
2481
+ const batch = files.slice(i, i + this.concurrency);
2482
+ const batchResults = await Promise.all(
2483
+ batch.map((filePath) => this.processFile(filePath, store))
2484
+ );
2485
+ for (const result of batchResults) {
2486
+ documents.push(...result.documents);
2487
+ if (result.sourceFile !== void 0) {
2488
+ sourceFiles.push(result.sourceFile);
2489
+ }
2511
2490
  }
2512
- filesProcessed++;
2491
+ filesProcessed += batch.length;
2513
2492
  onProgress?.({
2514
2493
  type: "progress",
2515
2494
  current: filesProcessed,
2516
2495
  total: files.length,
2517
- message: `Indexing ${filePath}`
2496
+ message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`
2518
2497
  });
2519
2498
  }
2520
2499
  if (documents.length > 0) {
@@ -2549,6 +2528,55 @@ var IndexService = class {
2549
2528
  timeMs
2550
2529
  });
2551
2530
  }
2531
+ /**
2532
+ * Process a single file: read, chunk, embed, and return documents.
2533
+ * Extracted for parallel processing.
2534
+ */
2535
+ async processFile(filePath, store) {
2536
+ const content = await readFile3(filePath, "utf-8");
2537
+ const fileHash = createHash2("md5").update(content).digest("hex");
2538
+ const chunks = this.chunker.chunk(content, filePath);
2539
+ const ext = extname(filePath).toLowerCase();
2540
+ const fileName = basename(filePath).toLowerCase();
2541
+ const fileType = this.classifyFileType(ext, fileName, filePath);
2542
+ const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
2543
+ if (chunks.length === 0) {
2544
+ return { documents: [], sourceFile };
2545
+ }
2546
+ const chunkContents = chunks.map((c) => c.content);
2547
+ const vectors = await this.embeddingEngine.embedBatch(chunkContents);
2548
+ const documents = [];
2549
+ for (let i = 0; i < chunks.length; i++) {
2550
+ const chunk = chunks[i];
2551
+ const vector = vectors[i];
2552
+ if (chunk === void 0 || vector === void 0) {
2553
+ throw new Error(
2554
+ `Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
2555
+ );
2556
+ }
2557
+ const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
2558
+ documents.push({
2559
+ id: createDocumentId(chunkId),
2560
+ content: chunk.content,
2561
+ vector,
2562
+ metadata: {
2563
+ type: chunks.length > 1 ? "chunk" : "file",
2564
+ storeId: store.id,
2565
+ path: filePath,
2566
+ indexedAt: /* @__PURE__ */ new Date(),
2567
+ fileHash,
2568
+ chunkIndex: chunk.chunkIndex,
2569
+ totalChunks: chunk.totalChunks,
2570
+ fileType,
2571
+ sectionHeader: chunk.sectionHeader,
2572
+ functionName: chunk.functionName,
2573
+ hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
2574
+ docSummary: chunk.docSummary
2575
+ }
2576
+ });
2577
+ }
2578
+ return { documents, sourceFile };
2579
+ }
2552
2580
  async scanDirectory(dir) {
2553
2581
  const files = [];
2554
2582
  const entries = await readdir(dir, { withFileTypes: true });
@@ -4651,4 +4679,4 @@ export {
4651
4679
  createServices,
4652
4680
  destroyServices
4653
4681
  };
4654
- //# sourceMappingURL=chunk-7DZZHYDU.js.map
4682
+ //# sourceMappingURL=chunk-6ZVW2P2F.js.map