bluera-knowledge 0.9.38 → 0.9.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -2,3 +2,15 @@
2
2
  # Valid values: trace, debug, info, warn, error, fatal
3
3
  # Default: info
4
4
  LOG_LEVEL=info
5
+
6
+ # Search Quality Configuration
7
+ # Test file boost multiplier (default: 0.5)
8
+ # Lower values penalize test files more in search results
9
+ # SEARCH_TEST_FILE_BOOST=0.5
10
+
11
+ # Confidence thresholds for raw vector similarity scoring
12
+ # Results with maxRawScore >= high threshold are "high" confidence
13
+ # Results with maxRawScore >= medium threshold are "medium" confidence
14
+ # Results below medium threshold are "low" confidence
15
+ # SEARCH_CONFIDENCE_HIGH=0.5
16
+ # SEARCH_CONFIDENCE_MEDIUM=0.3
package/CHANGELOG.md CHANGED
@@ -2,6 +2,32 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
4
4
 
5
+ ## [0.9.39](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.39) (2026-01-08)
6
+
7
+
8
+ ### Features
9
+
10
+ * **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
16
+ * **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
17
+ * **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
18
+ * **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
19
+ * **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
20
+ * **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
21
+ * **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
22
+ * **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
23
+ * **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
24
+ * **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
25
+ * **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
26
+ * **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
27
+ * **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
28
+ * **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
29
+ * **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
30
+
5
31
  ## [0.9.38](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.38) (2026-01-08)
6
32
 
7
33
 
package/README.md CHANGED
@@ -563,9 +563,17 @@ Store is ready for searching!
563
563
  **Search across indexed knowledge stores**
564
564
 
565
565
  ```bash
566
- /bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>]
566
+ /bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>] [--min-relevance=<0-1>]
567
567
  ```
568
568
 
569
+ **Options:**
570
+ - `--stores=<names>` - Comma-separated store names to search (default: all stores)
571
+ - `--limit=<number>` - Maximum results to return (default: 10)
572
+ - `--min-relevance=<0-1>` - Minimum raw cosine similarity; returns empty if no results meet threshold
573
+ - `--threshold=<0-1>` - Minimum normalized score to include results
574
+ - `--mode=<mode>` - Search mode: `hybrid` (default), `vector`, or `fts`
575
+ - `--detail=<level>` - Context detail: `minimal` (default), `contextual`, or `full`
576
+
569
577
  **Examples:**
570
578
  ```bash
571
579
  # Search all stores
@@ -579,6 +587,9 @@ Store is ready for searching!
579
587
 
580
588
  # Limit results
581
589
  /bluera-knowledge:search "testing patterns" --limit=5
590
+
591
+ # Filter irrelevant results (returns empty if nothing is truly relevant)
592
+ /bluera-knowledge:search "kubernetes deployment" --min-relevance=0.4
582
593
  ```
583
594
 
584
595
  <details>
@@ -1213,8 +1224,23 @@ bluera-knowledge search "routing" --stores react,vue
1213
1224
 
1214
1225
  # Get more results with full content
1215
1226
  bluera-knowledge search "middleware" --limit 20 --include-content
1227
+
1228
+ # Filter irrelevant results (returns empty if nothing is truly relevant)
1229
+ bluera-knowledge search "kubernetes deployment" --min-relevance 0.4
1230
+
1231
+ # Get JSON output with confidence and raw scores
1232
+ bluera-knowledge search "express middleware" --format json
1216
1233
  ```
1217
1234
 
1235
+ **Search Options:**
1236
+ - `-s, --stores <stores>` - Comma-separated store names/IDs
1237
+ - `-m, --mode <mode>` - `hybrid` (default), `vector`, or `fts`
1238
+ - `-n, --limit <count>` - Max results (default: 10)
1239
+ - `-t, --threshold <score>` - Min normalized score (0-1)
1240
+ - `--min-relevance <score>` - Min raw cosine similarity (0-1)
1241
+ - `--include-content` - Show full content in results
1242
+ - `--detail <level>` - `minimal`, `contextual`, or `full`
1243
+
1218
1244
  #### List Stores
1219
1245
 
1220
1246
  ```bash
@@ -2746,6 +2746,17 @@ var SearchService = class {
2746
2746
  this.graphCache.set(storeId, result);
2747
2747
  return result;
2748
2748
  }
2749
+ /**
2750
+ * Calculate confidence level based on max raw vector similarity score.
2751
+ * Configurable via environment variables.
2752
+ */
2753
+ calculateConfidence(maxRawScore) {
2754
+ const highThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_HIGH"] ?? "0.5");
2755
+ const mediumThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_MEDIUM"] ?? "0.3");
2756
+ if (maxRawScore >= highThreshold) return "high";
2757
+ if (maxRawScore >= mediumThreshold) return "medium";
2758
+ return "low";
2759
+ }
2749
2760
  async search(query) {
2750
2761
  const startTime = Date.now();
2751
2762
  const mode = query.mode ?? "hybrid";
@@ -2762,18 +2773,52 @@ var SearchService = class {
2762
2773
  stores,
2763
2774
  detail,
2764
2775
  intent: primaryIntent,
2765
- intents
2776
+ intents,
2777
+ minRelevance: query.minRelevance
2766
2778
  },
2767
2779
  "Search query received"
2768
2780
  );
2769
2781
  let allResults = [];
2782
+ let maxRawScore = 0;
2770
2783
  const fetchLimit = limit * 3;
2771
2784
  if (mode === "vector") {
2785
+ const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
2786
+ maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
2772
2787
  allResults = await this.vectorSearch(query.query, stores, fetchLimit, query.threshold);
2773
2788
  } else if (mode === "fts") {
2774
2789
  allResults = await this.ftsSearch(query.query, stores, fetchLimit);
2775
2790
  } else {
2776
- allResults = await this.hybridSearch(query.query, stores, fetchLimit, query.threshold);
2791
+ const hybridResult = await this.hybridSearchWithMetadata(
2792
+ query.query,
2793
+ stores,
2794
+ fetchLimit,
2795
+ query.threshold
2796
+ );
2797
+ allResults = hybridResult.results;
2798
+ maxRawScore = hybridResult.maxRawScore;
2799
+ }
2800
+ if (query.minRelevance !== void 0 && maxRawScore < query.minRelevance) {
2801
+ const timeMs2 = Date.now() - startTime;
2802
+ logger2.info(
2803
+ {
2804
+ query: query.query,
2805
+ mode,
2806
+ maxRawScore,
2807
+ minRelevance: query.minRelevance,
2808
+ timeMs: timeMs2
2809
+ },
2810
+ "Search filtered by minRelevance - no sufficiently relevant results"
2811
+ );
2812
+ return {
2813
+ query: query.query,
2814
+ mode,
2815
+ stores,
2816
+ results: [],
2817
+ totalResults: 0,
2818
+ timeMs: timeMs2,
2819
+ confidence: this.calculateConfidence(maxRawScore),
2820
+ maxRawScore
2821
+ };
2777
2822
  }
2778
2823
  const dedupedResults = this.deduplicateBySource(allResults, query.query);
2779
2824
  const resultsToEnhance = dedupedResults.slice(0, limit);
@@ -2789,6 +2834,7 @@ var SearchService = class {
2789
2834
  return this.addProgressiveContext(r, query.query, detail, graph);
2790
2835
  });
2791
2836
  const timeMs = Date.now() - startTime;
2837
+ const confidence = mode !== "fts" ? this.calculateConfidence(maxRawScore) : void 0;
2792
2838
  logger2.info(
2793
2839
  {
2794
2840
  query: query.query,
@@ -2796,6 +2842,8 @@ var SearchService = class {
2796
2842
  resultCount: enhancedResults.length,
2797
2843
  dedupedFrom: allResults.length,
2798
2844
  intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
2845
+ maxRawScore: mode !== "fts" ? maxRawScore : void 0,
2846
+ confidence,
2799
2847
  timeMs
2800
2848
  },
2801
2849
  "Search complete"
@@ -2806,7 +2854,9 @@ var SearchService = class {
2806
2854
  stores,
2807
2855
  results: enhancedResults,
2808
2856
  totalResults: enhancedResults.length,
2809
- timeMs
2857
+ timeMs,
2858
+ confidence,
2859
+ maxRawScore: mode !== "fts" ? maxRawScore : void 0
2810
2860
  };
2811
2861
  }
2812
2862
  /**
@@ -2867,20 +2917,29 @@ var SearchService = class {
2867
2917
  }
2868
2918
  return normalized;
2869
2919
  }
2870
- async vectorSearch(query, stores, limit, threshold) {
2920
+ /**
2921
+ * Fetch raw vector search results without normalization.
2922
+ * Returns results with raw cosine similarity scores [0-1].
2923
+ */
2924
+ async vectorSearchRaw(query, stores, limit) {
2871
2925
  const queryVector = await this.embeddingEngine.embed(query);
2872
2926
  const results = [];
2873
2927
  for (const storeId of stores) {
2874
- const hits = await this.lanceStore.search(storeId, queryVector, limit, threshold);
2928
+ const hits = await this.lanceStore.search(storeId, queryVector, limit);
2875
2929
  results.push(
2876
2930
  ...hits.map((r) => ({
2877
2931
  id: r.id,
2878
2932
  score: r.score,
2933
+ // Raw cosine similarity (1 - distance)
2879
2934
  content: r.content,
2880
2935
  metadata: r.metadata
2881
2936
  }))
2882
2937
  );
2883
2938
  }
2939
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
2940
+ }
2941
+ async vectorSearch(query, stores, limit, threshold) {
2942
+ const results = await this.vectorSearchRaw(query, stores, limit);
2884
2943
  const normalized = this.normalizeAndFilterScores(results, threshold);
2885
2944
  return normalized.slice(0, limit);
2886
2945
  }
@@ -2899,12 +2958,19 @@ var SearchService = class {
2899
2958
  }
2900
2959
  return results.sort((a, b) => b.score - a.score).slice(0, limit);
2901
2960
  }
2902
- async hybridSearch(query, stores, limit, threshold) {
2961
+ /**
2962
+ * Internal hybrid search result with additional metadata for confidence calculation.
2963
+ */
2964
+ async hybridSearchWithMetadata(query, stores, limit, threshold) {
2903
2965
  const intents = classifyQueryIntents(query);
2904
- const [vectorResults, ftsResults] = await Promise.all([
2905
- this.vectorSearch(query, stores, limit * 2),
2906
- this.ftsSearch(query, stores, limit * 2)
2907
- ]);
2966
+ const rawVectorResults = await this.vectorSearchRaw(query, stores, limit * 2);
2967
+ const rawVectorScores = /* @__PURE__ */ new Map();
2968
+ rawVectorResults.forEach((r) => {
2969
+ rawVectorScores.set(r.id, r.score);
2970
+ });
2971
+ const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
2972
+ const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
2973
+ const ftsResults = await this.ftsSearch(query, stores, limit * 2);
2908
2974
  const vectorRanks = /* @__PURE__ */ new Map();
2909
2975
  const ftsRanks = /* @__PURE__ */ new Map();
2910
2976
  const allDocs = /* @__PURE__ */ new Map();
@@ -2924,6 +2990,7 @@ var SearchService = class {
2924
2990
  for (const [id, result] of allDocs) {
2925
2991
  const vectorRank = vectorRanks.get(id) ?? Infinity;
2926
2992
  const ftsRank = ftsRanks.get(id) ?? Infinity;
2993
+ const rawVectorScore = rawVectorScores.get(id);
2927
2994
  const vectorRRF = vectorRank !== Infinity ? vectorWeight / (k + vectorRank) : 0;
2928
2995
  const ftsRRF = ftsRank !== Infinity ? ftsWeight / (k + ftsRank) : 0;
2929
2996
  const fileTypeBoost = this.getFileTypeBoost(
@@ -2948,10 +3015,14 @@ var SearchService = class {
2948
3015
  if (ftsRank !== Infinity) {
2949
3016
  metadata.ftsRank = ftsRank;
2950
3017
  }
3018
+ if (rawVectorScore !== void 0) {
3019
+ metadata.rawVectorScore = rawVectorScore;
3020
+ }
2951
3021
  rrfScores.push({
2952
3022
  id,
2953
3023
  score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
2954
3024
  result,
3025
+ rawVectorScore,
2955
3026
  metadata
2956
3027
  });
2957
3028
  }
@@ -2988,9 +3059,9 @@ var SearchService = class {
2988
3059
  normalizedResults = [];
2989
3060
  }
2990
3061
  if (threshold !== void 0) {
2991
- return normalizedResults.filter((r) => r.score >= threshold);
3062
+ normalizedResults = normalizedResults.filter((r) => r.score >= threshold);
2992
3063
  }
2993
- return normalizedResults;
3064
+ return { results: normalizedResults, maxRawScore };
2994
3065
  }
2995
3066
  async searchAllStores(query, storeIds) {
2996
3067
  return this.search({
@@ -3023,7 +3094,7 @@ var SearchService = class {
3023
3094
  baseBoost = 0.75;
3024
3095
  break;
3025
3096
  case "test":
3026
- baseBoost = 0.7;
3097
+ baseBoost = parseFloat(process.env["SEARCH_TEST_FILE_BOOST"] ?? "0.5");
3027
3098
  break;
3028
3099
  case "config":
3029
3100
  baseBoost = 0.5;
@@ -3040,7 +3111,11 @@ var SearchService = class {
3040
3111
  totalConfidence += confidence;
3041
3112
  }
3042
3113
  const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
3043
- return baseBoost * blendedMultiplier;
3114
+ const finalBoost = baseBoost * blendedMultiplier;
3115
+ if (fileType === "test") {
3116
+ return Math.min(finalBoost, 0.6);
3117
+ }
3118
+ return finalBoost;
3044
3119
  }
3045
3120
  /**
3046
3121
  * Get a score multiplier based on URL keyword matching.
@@ -4191,4 +4266,4 @@ export {
4191
4266
  createServices,
4192
4267
  destroyServices
4193
4268
  };
4194
- //# sourceMappingURL=chunk-XJFV7AJW.js.map
4269
+ //# sourceMappingURL=chunk-HUEWT6U5.js.map