npm - bluera-knowledge - Versions diffs - 0.9.37 → 0.9.39 - Mend

bluera-knowledge 0.9.37 → 0.9.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/.env.example +12 -0
package/CHANGELOG.md +47 -0
package/README.md +27 -1
package/dist/{chunk-CGDEV2RC.js → chunk-HUEWT6U5.js} +92 -16
package/dist/chunk-HUEWT6U5.js.map +1 -0
package/dist/{chunk-AT6G626F.js → chunk-IZWOEBFM.js} +2 -2
package/dist/{chunk-6TKD5XE4.js → chunk-TIGPI3BE.js} +15 -6
package/dist/chunk-TIGPI3BE.js.map +1 -0
package/dist/index.js +23 -9
package/dist/index.js.map +1 -1
package/dist/mcp/server.js +2 -2
package/dist/workers/background-worker-cli.js +3 -2
package/dist/workers/background-worker-cli.js.map +1 -1
package/package.json +1 -1
package/plugin.json +1 -1
package/src/cli/commands/crawl.test.ts +1 -0
package/src/cli/commands/crawl.ts +2 -0
package/src/cli/commands/index-cmd.test.ts +1 -0
package/src/cli/commands/search.ts +22 -4
package/src/db/lance.ts +2 -2
package/src/mcp/handlers/search.handler.ts +7 -2
package/src/mcp/schemas/index.ts +5 -0
package/src/mcp/server.ts +5 -0
package/src/services/index.service.test.ts +1 -0
package/src/services/index.service.ts +2 -0
package/src/services/search.service.test.ts +191 -3
package/src/services/search.service.ts +121 -18
package/src/types/search.ts +8 -0
package/src/workers/background-worker.test.ts +1 -0
package/src/workers/background-worker.ts +2 -0
package/dist/chunk-6TKD5XE4.js.map +0 -1
package/dist/chunk-CGDEV2RC.js.map +0 -1
/package/dist/{chunk-AT6G626F.js.map → chunk-IZWOEBFM.js.map} +0 -0

package/.env.example CHANGED Viewed

@@ -2,3 +2,15 @@
 # Valid values: trace, debug, info, warn, error, fatal
 # Default: info
 LOG_LEVEL=info
+# Search Quality Configuration
+# Test file boost multiplier (default: 0.5)
+# Lower values penalize test files more in search results
+# SEARCH_TEST_FILE_BOOST=0.5
+# Confidence thresholds for raw vector similarity scoring
+# Results with maxRawScore >= high threshold are "high" confidence
+# Results with maxRawScore >= medium threshold are "medium" confidence
+# Results below medium threshold are "low" confidence
+# SEARCH_CONFIDENCE_HIGH=0.5
+# SEARCH_CONFIDENCE_MEDIUM=0.3

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,53 @@
 All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
+## [0.9.39](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.39) (2026-01-08)
+### Features
+* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
+### Bug Fixes
+* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
+* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
+* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
+* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
+* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
+* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
+* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
+* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
+* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
+* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
+* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
+* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
+* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
+* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
+* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
+## [0.9.38](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.38) (2026-01-08)
+### Bug Fixes
+* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
+* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
+* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
+* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
+* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
+* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
+* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
+* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
+* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
+* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
+* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
+* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
+* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
+* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
+* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
 ## [0.9.37](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.37) (2026-01-08)

package/README.md CHANGED Viewed

@@ -563,9 +563,17 @@ Store is ready for searching!
 **Search across indexed knowledge stores**
 ```bash
-/bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>]
+/bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>] [--min-relevance=<0-1>]
 ```
+**Options:**
+- `--stores=<names>` - Comma-separated store names to search (default: all stores)
+- `--limit=<number>` - Maximum results to return (default: 10)
+- `--min-relevance=<0-1>` - Minimum raw cosine similarity; returns empty if no results meet threshold
+- `--threshold=<0-1>` - Minimum normalized score to include results
+- `--mode=<mode>` - Search mode: `hybrid` (default), `vector`, or `fts`
+- `--detail=<level>` - Context detail: `minimal` (default), `contextual`, or `full`
 **Examples:**
 ```bash
 # Search all stores
@@ -579,6 +587,9 @@ Store is ready for searching!
 # Limit results
 /bluera-knowledge:search "testing patterns" --limit=5
+# Filter irrelevant results (returns empty if nothing is truly relevant)
+/bluera-knowledge:search "kubernetes deployment" --min-relevance=0.4
 ```
 <details>
@@ -1213,8 +1224,23 @@ bluera-knowledge search "routing" --stores react,vue
 # Get more results with full content
 bluera-knowledge search "middleware" --limit 20 --include-content
+# Filter irrelevant results (returns empty if nothing is truly relevant)
+bluera-knowledge search "kubernetes deployment" --min-relevance 0.4
+# Get JSON output with confidence and raw scores
+bluera-knowledge search "express middleware" --format json
 ```
+**Search Options:**
+- `-s, --stores <stores>` - Comma-separated store names/IDs
+- `-m, --mode <mode>` - `hybrid` (default), `vector`, or `fts`
+- `-n, --limit <count>` - Max results (default: 10)
+- `-t, --threshold <score>` - Min normalized score (0-1)
+- `--min-relevance <score>` - Min raw cosine similarity (0-1)
+- `--include-content` - Show full content in results
+- `--detail <level>` - `minimal`, `contextual`, or `full`
 #### List Stores
 ```bash

package/dist/{chunk-CGDEV2RC.js → chunk-HUEWT6U5.js} RENAMED Viewed

@@ -2310,6 +2310,7 @@ var IndexService = class {
     }
     if (documents.length > 0) {
       await this.lanceStore.addDocuments(store.id, documents);
+      await this.lanceStore.createFtsIndex(store.id);
     }
     if (this.codeGraphService && sourceFiles.length > 0) {
       const graph = await this.codeGraphService.buildGraph(sourceFiles);
@@ -2745,6 +2746,17 @@ var SearchService = class {
     this.graphCache.set(storeId, result);
     return result;
   }
+  /**
+   * Calculate confidence level based on max raw vector similarity score.
+   * Configurable via environment variables.
+   */
+  calculateConfidence(maxRawScore) {
+    const highThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_HIGH"] ?? "0.5");
+    const mediumThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_MEDIUM"] ?? "0.3");
+    if (maxRawScore >= highThreshold) return "high";
+    if (maxRawScore >= mediumThreshold) return "medium";
+    return "low";
+  }
   async search(query) {
     const startTime = Date.now();
     const mode = query.mode ?? "hybrid";
@@ -2761,18 +2773,52 @@ var SearchService = class {
         stores,
         detail,
         intent: primaryIntent,
-        intents
+        intents,
+        minRelevance: query.minRelevance
       },
       "Search query received"
     );
     let allResults = [];
+    let maxRawScore = 0;
     const fetchLimit = limit * 3;
     if (mode === "vector") {
+      const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
+      maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
       allResults = await this.vectorSearch(query.query, stores, fetchLimit, query.threshold);
     } else if (mode === "fts") {
       allResults = await this.ftsSearch(query.query, stores, fetchLimit);
     } else {
-      allResults = await this.hybridSearch(query.query, stores, fetchLimit, query.threshold);
+      const hybridResult = await this.hybridSearchWithMetadata(
+        query.query,
+        stores,
+        fetchLimit,
+        query.threshold
+      );
+      allResults = hybridResult.results;
+      maxRawScore = hybridResult.maxRawScore;
+    }
+    if (query.minRelevance !== void 0 && maxRawScore < query.minRelevance) {
+      const timeMs2 = Date.now() - startTime;
+      logger2.info(
+        {
+          query: query.query,
+          mode,
+          maxRawScore,
+          minRelevance: query.minRelevance,
+          timeMs: timeMs2
+        },
+        "Search filtered by minRelevance - no sufficiently relevant results"
+      );
+      return {
+        query: query.query,
+        mode,
+        stores,
+        results: [],
+        totalResults: 0,
+        timeMs: timeMs2,
+        confidence: this.calculateConfidence(maxRawScore),
+        maxRawScore
+      };
     }
     const dedupedResults = this.deduplicateBySource(allResults, query.query);
     const resultsToEnhance = dedupedResults.slice(0, limit);
@@ -2788,6 +2834,7 @@ var SearchService = class {
       return this.addProgressiveContext(r, query.query, detail, graph);
     });
     const timeMs = Date.now() - startTime;
+    const confidence = mode !== "fts" ? this.calculateConfidence(maxRawScore) : void 0;
     logger2.info(
       {
         query: query.query,
@@ -2795,6 +2842,8 @@ var SearchService = class {
         resultCount: enhancedResults.length,
         dedupedFrom: allResults.length,
         intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
+        maxRawScore: mode !== "fts" ? maxRawScore : void 0,
+        confidence,
         timeMs
       },
       "Search complete"
@@ -2805,7 +2854,9 @@ var SearchService = class {
       stores,
       results: enhancedResults,
       totalResults: enhancedResults.length,
-      timeMs
+      timeMs,
+      confidence,
+      maxRawScore: mode !== "fts" ? maxRawScore : void 0
     };
   }
   /**
@@ -2866,20 +2917,29 @@ var SearchService = class {
     }
     return normalized;
   }
-  async vectorSearch(query, stores, limit, threshold) {
+  /**
+   * Fetch raw vector search results without normalization.
+   * Returns results with raw cosine similarity scores [0-1].
+   */
+  async vectorSearchRaw(query, stores, limit) {
     const queryVector = await this.embeddingEngine.embed(query);
     const results = [];
     for (const storeId of stores) {
-      const hits = await this.lanceStore.search(storeId, queryVector, limit, threshold);
+      const hits = await this.lanceStore.search(storeId, queryVector, limit);
       results.push(
         ...hits.map((r) => ({
           id: r.id,
           score: r.score,
+          // Raw cosine similarity (1 - distance)
           content: r.content,
           metadata: r.metadata
         }))
       );
     }
+    return results.sort((a, b) => b.score - a.score).slice(0, limit);
+  }
+  async vectorSearch(query, stores, limit, threshold) {
+    const results = await this.vectorSearchRaw(query, stores, limit);
     const normalized = this.normalizeAndFilterScores(results, threshold);
     return normalized.slice(0, limit);
   }
@@ -2898,12 +2958,19 @@ var SearchService = class {
     }
     return results.sort((a, b) => b.score - a.score).slice(0, limit);
   }
-  async hybridSearch(query, stores, limit, threshold) {
+  /**
+   * Internal hybrid search result with additional metadata for confidence calculation.
+   */
+  async hybridSearchWithMetadata(query, stores, limit, threshold) {
     const intents = classifyQueryIntents(query);
-    const [vectorResults, ftsResults] = await Promise.all([
-      this.vectorSearch(query, stores, limit * 2),
-      this.ftsSearch(query, stores, limit * 2)
-    ]);
+    const rawVectorResults = await this.vectorSearchRaw(query, stores, limit * 2);
+    const rawVectorScores = /* @__PURE__ */ new Map();
+    rawVectorResults.forEach((r) => {
+      rawVectorScores.set(r.id, r.score);
+    });
+    const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
+    const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
+    const ftsResults = await this.ftsSearch(query, stores, limit * 2);
     const vectorRanks = /* @__PURE__ */ new Map();
     const ftsRanks = /* @__PURE__ */ new Map();
     const allDocs = /* @__PURE__ */ new Map();
@@ -2923,6 +2990,7 @@ var SearchService = class {
     for (const [id, result] of allDocs) {
       const vectorRank = vectorRanks.get(id) ?? Infinity;
       const ftsRank = ftsRanks.get(id) ?? Infinity;
+      const rawVectorScore = rawVectorScores.get(id);
       const vectorRRF = vectorRank !== Infinity ? vectorWeight / (k + vectorRank) : 0;
       const ftsRRF = ftsRank !== Infinity ? ftsWeight / (k + ftsRank) : 0;
       const fileTypeBoost = this.getFileTypeBoost(
@@ -2947,10 +3015,14 @@ var SearchService = class {
       if (ftsRank !== Infinity) {
         metadata.ftsRank = ftsRank;
       }
+      if (rawVectorScore !== void 0) {
+        metadata.rawVectorScore = rawVectorScore;
+      }
       rrfScores.push({
         id,
         score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
         result,
+        rawVectorScore,
         metadata
       });
     }
@@ -2987,9 +3059,9 @@ var SearchService = class {
       normalizedResults = [];
     }
     if (threshold !== void 0) {
-      return normalizedResults.filter((r) => r.score >= threshold);
+      normalizedResults = normalizedResults.filter((r) => r.score >= threshold);
     }
-    return normalizedResults;
+    return { results: normalizedResults, maxRawScore };
   }
   async searchAllStores(query, storeIds) {
     return this.search({
@@ -3022,7 +3094,7 @@ var SearchService = class {
         baseBoost = 0.75;
         break;
       case "test":
-        baseBoost = 0.7;
+        baseBoost = parseFloat(process.env["SEARCH_TEST_FILE_BOOST"] ?? "0.5");
         break;
       case "config":
         baseBoost = 0.5;
@@ -3039,7 +3111,11 @@ var SearchService = class {
       totalConfidence += confidence;
     }
     const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
-    return baseBoost * blendedMultiplier;
+    const finalBoost = baseBoost * blendedMultiplier;
+    if (fileType === "test") {
+      return Math.min(finalBoost, 0.6);
+    }
+    return finalBoost;
   }
   /**
    * Get a score multiplier based on URL keyword matching.
@@ -4078,7 +4154,7 @@ var LanceStore = class {
       return results.map((r) => ({
         id: createDocumentId(r.id),
         content: r.content,
-        score: r.score,
+        score: r._score,
         // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
         metadata: JSON.parse(r.metadata)
       }));
@@ -4190,4 +4266,4 @@ export {
   createServices,
   destroyServices
 };
-//# sourceMappingURL=chunk-CGDEV2RC.js.map
+//# sourceMappingURL=chunk-HUEWT6U5.js.map