npm - bluera-knowledge - Versions diffs - 0.12.10 → 0.13.0 - Mend

bluera-knowledge 0.12.10 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +2 -0
package/README.md +3 -2
package/dist/{chunk-VTATT3IR.js → chunk-6ZVW2P2F.js} +100 -38
package/dist/chunk-6ZVW2P2F.js.map +1 -0
package/dist/{chunk-6777ULXC.js → chunk-GCUKVV33.js} +2 -2
package/dist/{chunk-JET33NMA.js → chunk-H5AKKHY7.js} +3 -2
package/dist/chunk-H5AKKHY7.js.map +1 -0
package/dist/index.js +3 -3
package/dist/mcp/server.js +2 -2
package/dist/workers/background-worker-cli.js +2 -2
package/package.json +1 -1
package/src/mcp/handlers/job.handler.ts +5 -0
package/src/services/index.service.test.ts +347 -0
package/src/services/index.service.ts +93 -44
package/src/services/job.service.test.ts +87 -0
package/src/services/job.service.ts +43 -0
package/dist/chunk-JET33NMA.js.map +0 -1
package/dist/chunk-VTATT3IR.js.map +0 -1
/package/dist/{chunk-6777ULXC.js.map → chunk-GCUKVV33.js.map} +0 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bluera-knowledge",
-  "version": "0.12.10",
+  "version": "0.13.0",
   "description": "Clone repos, crawl docs, search locally. Fast, authoritative answers for AI coding agents.",
   "mcpServers": {
     "bluera-knowledge": {

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,8 @@
 All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
+## [0.13.0](https://github.com/blueraai/bluera-knowledge/compare/v0.12.11...v0.13.0) (2026-01-15)
 ## [0.12.10](https://github.com/blueraai/bluera-knowledge/compare/v0.11.21...v0.12.10) (2026-01-15)

package/README.md CHANGED Viewed

@@ -479,10 +479,11 @@ When you add a repository or index content:
 Background jobs include significant performance optimizations:
-- **⚡ Parallel Embedding** - Processes 32 chunks simultaneously (~30x faster than sequential)
+- **⚡ Parallel Embedding** - Batch processes up to 32 chunks simultaneously
+- **📂 Parallel File I/O** - Processes multiple files concurrently (configurable, default: 4)
 - **🔓 Non-Blocking** - Continue working while indexing completes
 - **📊 Progress Tracking** - Real-time updates on files processed and progress percentage
-- **🧹 Auto-Cleanup** - Completed jobs are cleaned up after 24 hours
+- **🧹 Auto-Cleanup** - Completed/stale jobs are cleaned up automatically
 ---

package/dist/{chunk-VTATT3IR.js → chunk-6ZVW2P2F.js} RENAMED Viewed

@@ -464,6 +464,40 @@ var JobService = class {
     }
     return cleaned;
   }
+  /**
+   * Clean up stale pending jobs that never started or got stuck
+   *
+   * @param olderThanHours - Consider pending jobs stale after this many hours (default 2)
+   * @param options - Options for cleanup behavior
+   * @param options.markAsFailed - If true, mark jobs as failed instead of deleting
+   * @returns Number of jobs cleaned up or marked as failed
+   */
+  cleanupStalePendingJobs(olderThanHours = 2, options = {}) {
+    const jobs = this.listJobs();
+    const cutoffTime = Date.now() - olderThanHours * 60 * 60 * 1e3;
+    let cleaned = 0;
+    for (const job of jobs) {
+      if (job.status === "pending" && new Date(job.updatedAt).getTime() < cutoffTime) {
+        const jobFile = path.join(this.jobsDir, `${job.id}.json`);
+        if (options.markAsFailed === true) {
+          this.updateJob(job.id, {
+            status: "failed",
+            message: `Job marked as stale - pending for over ${String(olderThanHours)} hours without progress`
+          });
+        } else {
+          try {
+            fs.unlinkSync(jobFile);
+          } catch (error) {
+            throw new Error(
+              `Failed to delete stale job ${job.id}: ${error instanceof Error ? error.message : String(error)}`
+            );
+          }
+        }
+        cleaned++;
+      }
+    }
+    return cleaned;
+  }
   /**
    * Delete a specific job
    */
@@ -2382,6 +2416,7 @@ var IndexService = class {
   embeddingEngine;
   chunker;
   codeGraphService;
+  concurrency;
   constructor(lanceStore, embeddingEngine, options = {}) {
     this.lanceStore = lanceStore;
     this.embeddingEngine = embeddingEngine;
@@ -2390,6 +2425,7 @@ var IndexService = class {
       chunkOverlap: options.chunkOverlap ?? 100
     });
     this.codeGraphService = options.codeGraphService;
+    this.concurrency = options.concurrency ?? 4;
   }
   async indexStore(store, onProgress) {
     logger.info(
@@ -2429,7 +2465,8 @@ var IndexService = class {
       {
         storeId: store.id,
         path: store.path,
-        fileCount: files.length
+        fileCount: files.length,
+        concurrency: this.concurrency
       },
       "Files scanned for indexing"
     );
@@ -2440,47 +2477,23 @@ var IndexService = class {
       total: files.length,
       message: "Starting index"
     });
-    for (const filePath of files) {
-      const content = await readFile3(filePath, "utf-8");
-      const fileHash = createHash2("md5").update(content).digest("hex");
-      const chunks = this.chunker.chunk(content, filePath);
-      const ext = extname(filePath).toLowerCase();
-      const fileName = basename(filePath).toLowerCase();
-      const fileType = this.classifyFileType(ext, fileName, filePath);
-      if ([".ts", ".tsx", ".js", ".jsx"].includes(ext)) {
-        sourceFiles.push({ path: filePath, content });
-      }
-      for (const chunk of chunks) {
-        const vector = await this.embeddingEngine.embed(chunk.content);
-        const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
-        const doc = {
-          id: createDocumentId(chunkId),
-          content: chunk.content,
-          vector,
-          metadata: {
-            type: chunks.length > 1 ? "chunk" : "file",
-            storeId: store.id,
-            path: filePath,
-            indexedAt: /* @__PURE__ */ new Date(),
-            fileHash,
-            chunkIndex: chunk.chunkIndex,
-            totalChunks: chunk.totalChunks,
-            // New metadata for ranking
-            fileType,
-            sectionHeader: chunk.sectionHeader,
-            functionName: chunk.functionName,
-            hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
-            docSummary: chunk.docSummary
-          }
-        };
-        documents.push(doc);
+    for (let i = 0; i < files.length; i += this.concurrency) {
+      const batch = files.slice(i, i + this.concurrency);
+      const batchResults = await Promise.all(
+        batch.map((filePath) => this.processFile(filePath, store))
+      );
+      for (const result of batchResults) {
+        documents.push(...result.documents);
+        if (result.sourceFile !== void 0) {
+          sourceFiles.push(result.sourceFile);
+        }
       }
-      filesProcessed++;
+      filesProcessed += batch.length;
       onProgress?.({
         type: "progress",
         current: filesProcessed,
         total: files.length,
-        message: `Indexing ${filePath}`
+        message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`
       });
     }
     if (documents.length > 0) {
@@ -2515,6 +2528,55 @@ var IndexService = class {
       timeMs
     });
   }
+  /**
+   * Process a single file: read, chunk, embed, and return documents.
+   * Extracted for parallel processing.
+   */
+  async processFile(filePath, store) {
+    const content = await readFile3(filePath, "utf-8");
+    const fileHash = createHash2("md5").update(content).digest("hex");
+    const chunks = this.chunker.chunk(content, filePath);
+    const ext = extname(filePath).toLowerCase();
+    const fileName = basename(filePath).toLowerCase();
+    const fileType = this.classifyFileType(ext, fileName, filePath);
+    const sourceFile = [".ts", ".tsx", ".js", ".jsx"].includes(ext) ? { path: filePath, content } : void 0;
+    if (chunks.length === 0) {
+      return { documents: [], sourceFile };
+    }
+    const chunkContents = chunks.map((c) => c.content);
+    const vectors = await this.embeddingEngine.embedBatch(chunkContents);
+    const documents = [];
+    for (let i = 0; i < chunks.length; i++) {
+      const chunk = chunks[i];
+      const vector = vectors[i];
+      if (chunk === void 0 || vector === void 0) {
+        throw new Error(
+          `Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== void 0)}, vector=${String(vector !== void 0)}`
+        );
+      }
+      const chunkId = chunks.length > 1 ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}` : `${store.id}-${fileHash}`;
+      documents.push({
+        id: createDocumentId(chunkId),
+        content: chunk.content,
+        vector,
+        metadata: {
+          type: chunks.length > 1 ? "chunk" : "file",
+          storeId: store.id,
+          path: filePath,
+          indexedAt: /* @__PURE__ */ new Date(),
+          fileHash,
+          chunkIndex: chunk.chunkIndex,
+          totalChunks: chunk.totalChunks,
+          fileType,
+          sectionHeader: chunk.sectionHeader,
+          functionName: chunk.functionName,
+          hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
+          docSummary: chunk.docSummary
+        }
+      });
+    }
+    return { documents, sourceFile };
+  }
   async scanDirectory(dir) {
     const files = [];
     const entries = await readdir(dir, { withFileTypes: true });
@@ -4617,4 +4679,4 @@ export {
   createServices,
   destroyServices
 };
-//# sourceMappingURL=chunk-VTATT3IR.js.map
+//# sourceMappingURL=chunk-6ZVW2P2F.js.map