bluera-knowledge 0.9.37 → 0.9.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -2,3 +2,15 @@
2
2
  # Valid values: trace, debug, info, warn, error, fatal
3
3
  # Default: info
4
4
  LOG_LEVEL=info
5
+
6
+ # Search Quality Configuration
7
+ # Test file boost multiplier (default: 0.5)
8
+ # Lower values penalize test files more in search results
9
+ # SEARCH_TEST_FILE_BOOST=0.5
10
+
11
+ # Confidence thresholds for raw vector similarity scoring
12
+ # Results with maxRawScore >= high threshold are "high" confidence
13
+ # Results with maxRawScore >= medium threshold are "medium" confidence
14
+ # Results below medium threshold are "low" confidence
15
+ # SEARCH_CONFIDENCE_HIGH=0.5
16
+ # SEARCH_CONFIDENCE_MEDIUM=0.3
package/CHANGELOG.md CHANGED
@@ -2,6 +2,53 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
4
4
 
5
+ ## [0.9.39](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.39) (2026-01-08)
6
+
7
+
8
+ ### Features
9
+
10
+ * **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
16
+ * **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
17
+ * **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
18
+ * **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
19
+ * **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
20
+ * **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
21
+ * **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
22
+ * **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
23
+ * **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
24
+ * **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
25
+ * **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
26
+ * **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
27
+ * **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
28
+ * **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
29
+ * **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
30
+
31
+ ## [0.9.38](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.38) (2026-01-08)
32
+
33
+
34
+ ### Bug Fixes
35
+
36
+ * **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
37
+ * **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
38
+ * **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
39
+ * **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
40
+ * **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
41
+ * **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
42
+ * **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
43
+ * **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
44
+ * **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
45
+ * **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
46
+ * **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
47
+ * **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
48
+ * **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
49
+ * **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
50
+ * **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
51
+
5
52
  ## [0.9.37](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.37) (2026-01-08)
6
53
 
7
54
 
package/README.md CHANGED
@@ -563,9 +563,17 @@ Store is ready for searching!
563
563
  **Search across indexed knowledge stores**
564
564
 
565
565
  ```bash
566
- /bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>]
566
+ /bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>] [--min-relevance=<0-1>]
567
567
  ```
568
568
 
569
+ **Options:**
570
+ - `--stores=<names>` - Comma-separated store names to search (default: all stores)
571
+ - `--limit=<number>` - Maximum results to return (default: 10)
572
+ - `--min-relevance=<0-1>` - Minimum raw cosine similarity; returns empty if no results meet threshold
573
+ - `--threshold=<0-1>` - Minimum normalized score to include results
574
+ - `--mode=<mode>` - Search mode: `hybrid` (default), `vector`, or `fts`
575
+ - `--detail=<level>` - Context detail: `minimal` (default), `contextual`, or `full`
576
+
569
577
  **Examples:**
570
578
  ```bash
571
579
  # Search all stores
@@ -579,6 +587,9 @@ Store is ready for searching!
579
587
 
580
588
  # Limit results
581
589
  /bluera-knowledge:search "testing patterns" --limit=5
590
+
591
+ # Filter irrelevant results (returns empty if nothing is truly relevant)
592
+ /bluera-knowledge:search "kubernetes deployment" --min-relevance=0.4
582
593
  ```
583
594
 
584
595
  <details>
@@ -1213,8 +1224,23 @@ bluera-knowledge search "routing" --stores react,vue
1213
1224
 
1214
1225
  # Get more results with full content
1215
1226
  bluera-knowledge search "middleware" --limit 20 --include-content
1227
+
1228
+ # Filter irrelevant results (returns empty if nothing is truly relevant)
1229
+ bluera-knowledge search "kubernetes deployment" --min-relevance 0.4
1230
+
1231
+ # Get JSON output with confidence and raw scores
1232
+ bluera-knowledge search "express middleware" --format json
1216
1233
  ```
1217
1234
 
1235
+ **Search Options:**
1236
+ - `-s, --stores <stores>` - Comma-separated store names/IDs
1237
+ - `-m, --mode <mode>` - `hybrid` (default), `vector`, or `fts`
1238
+ - `-n, --limit <count>` - Max results (default: 10)
1239
+ - `-t, --threshold <score>` - Min normalized score (0-1)
1240
+ - `--min-relevance <score>` - Min raw cosine similarity (0-1)
1241
+ - `--include-content` - Show full content in results
1242
+ - `--detail <level>` - `minimal`, `contextual`, or `full`
1243
+
1218
1244
  #### List Stores
1219
1245
 
1220
1246
  ```bash
@@ -2310,6 +2310,7 @@ var IndexService = class {
2310
2310
  }
2311
2311
  if (documents.length > 0) {
2312
2312
  await this.lanceStore.addDocuments(store.id, documents);
2313
+ await this.lanceStore.createFtsIndex(store.id);
2313
2314
  }
2314
2315
  if (this.codeGraphService && sourceFiles.length > 0) {
2315
2316
  const graph = await this.codeGraphService.buildGraph(sourceFiles);
@@ -2745,6 +2746,17 @@ var SearchService = class {
2745
2746
  this.graphCache.set(storeId, result);
2746
2747
  return result;
2747
2748
  }
2749
+ /**
2750
+ * Calculate confidence level based on max raw vector similarity score.
2751
+ * Configurable via environment variables.
2752
+ */
2753
+ calculateConfidence(maxRawScore) {
2754
+ const highThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_HIGH"] ?? "0.5");
2755
+ const mediumThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_MEDIUM"] ?? "0.3");
2756
+ if (maxRawScore >= highThreshold) return "high";
2757
+ if (maxRawScore >= mediumThreshold) return "medium";
2758
+ return "low";
2759
+ }
2748
2760
  async search(query) {
2749
2761
  const startTime = Date.now();
2750
2762
  const mode = query.mode ?? "hybrid";
@@ -2761,18 +2773,52 @@ var SearchService = class {
2761
2773
  stores,
2762
2774
  detail,
2763
2775
  intent: primaryIntent,
2764
- intents
2776
+ intents,
2777
+ minRelevance: query.minRelevance
2765
2778
  },
2766
2779
  "Search query received"
2767
2780
  );
2768
2781
  let allResults = [];
2782
+ let maxRawScore = 0;
2769
2783
  const fetchLimit = limit * 3;
2770
2784
  if (mode === "vector") {
2785
+ const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
2786
+ maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
2771
2787
  allResults = await this.vectorSearch(query.query, stores, fetchLimit, query.threshold);
2772
2788
  } else if (mode === "fts") {
2773
2789
  allResults = await this.ftsSearch(query.query, stores, fetchLimit);
2774
2790
  } else {
2775
- allResults = await this.hybridSearch(query.query, stores, fetchLimit, query.threshold);
2791
+ const hybridResult = await this.hybridSearchWithMetadata(
2792
+ query.query,
2793
+ stores,
2794
+ fetchLimit,
2795
+ query.threshold
2796
+ );
2797
+ allResults = hybridResult.results;
2798
+ maxRawScore = hybridResult.maxRawScore;
2799
+ }
2800
+ if (query.minRelevance !== void 0 && maxRawScore < query.minRelevance) {
2801
+ const timeMs2 = Date.now() - startTime;
2802
+ logger2.info(
2803
+ {
2804
+ query: query.query,
2805
+ mode,
2806
+ maxRawScore,
2807
+ minRelevance: query.minRelevance,
2808
+ timeMs: timeMs2
2809
+ },
2810
+ "Search filtered by minRelevance - no sufficiently relevant results"
2811
+ );
2812
+ return {
2813
+ query: query.query,
2814
+ mode,
2815
+ stores,
2816
+ results: [],
2817
+ totalResults: 0,
2818
+ timeMs: timeMs2,
2819
+ confidence: this.calculateConfidence(maxRawScore),
2820
+ maxRawScore
2821
+ };
2776
2822
  }
2777
2823
  const dedupedResults = this.deduplicateBySource(allResults, query.query);
2778
2824
  const resultsToEnhance = dedupedResults.slice(0, limit);
@@ -2788,6 +2834,7 @@ var SearchService = class {
2788
2834
  return this.addProgressiveContext(r, query.query, detail, graph);
2789
2835
  });
2790
2836
  const timeMs = Date.now() - startTime;
2837
+ const confidence = mode !== "fts" ? this.calculateConfidence(maxRawScore) : void 0;
2791
2838
  logger2.info(
2792
2839
  {
2793
2840
  query: query.query,
@@ -2795,6 +2842,8 @@ var SearchService = class {
2795
2842
  resultCount: enhancedResults.length,
2796
2843
  dedupedFrom: allResults.length,
2797
2844
  intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
2845
+ maxRawScore: mode !== "fts" ? maxRawScore : void 0,
2846
+ confidence,
2798
2847
  timeMs
2799
2848
  },
2800
2849
  "Search complete"
@@ -2805,7 +2854,9 @@ var SearchService = class {
2805
2854
  stores,
2806
2855
  results: enhancedResults,
2807
2856
  totalResults: enhancedResults.length,
2808
- timeMs
2857
+ timeMs,
2858
+ confidence,
2859
+ maxRawScore: mode !== "fts" ? maxRawScore : void 0
2809
2860
  };
2810
2861
  }
2811
2862
  /**
@@ -2866,20 +2917,29 @@ var SearchService = class {
2866
2917
  }
2867
2918
  return normalized;
2868
2919
  }
2869
- async vectorSearch(query, stores, limit, threshold) {
2920
+ /**
2921
+ * Fetch raw vector search results without normalization.
2922
+ * Returns results with raw cosine similarity scores [0-1].
2923
+ */
2924
+ async vectorSearchRaw(query, stores, limit) {
2870
2925
  const queryVector = await this.embeddingEngine.embed(query);
2871
2926
  const results = [];
2872
2927
  for (const storeId of stores) {
2873
- const hits = await this.lanceStore.search(storeId, queryVector, limit, threshold);
2928
+ const hits = await this.lanceStore.search(storeId, queryVector, limit);
2874
2929
  results.push(
2875
2930
  ...hits.map((r) => ({
2876
2931
  id: r.id,
2877
2932
  score: r.score,
2933
+ // Raw cosine similarity (1 - distance)
2878
2934
  content: r.content,
2879
2935
  metadata: r.metadata
2880
2936
  }))
2881
2937
  );
2882
2938
  }
2939
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
2940
+ }
2941
+ async vectorSearch(query, stores, limit, threshold) {
2942
+ const results = await this.vectorSearchRaw(query, stores, limit);
2883
2943
  const normalized = this.normalizeAndFilterScores(results, threshold);
2884
2944
  return normalized.slice(0, limit);
2885
2945
  }
@@ -2898,12 +2958,19 @@ var SearchService = class {
2898
2958
  }
2899
2959
  return results.sort((a, b) => b.score - a.score).slice(0, limit);
2900
2960
  }
2901
- async hybridSearch(query, stores, limit, threshold) {
2961
+ /**
2962
+ * Internal hybrid search result with additional metadata for confidence calculation.
2963
+ */
2964
+ async hybridSearchWithMetadata(query, stores, limit, threshold) {
2902
2965
  const intents = classifyQueryIntents(query);
2903
- const [vectorResults, ftsResults] = await Promise.all([
2904
- this.vectorSearch(query, stores, limit * 2),
2905
- this.ftsSearch(query, stores, limit * 2)
2906
- ]);
2966
+ const rawVectorResults = await this.vectorSearchRaw(query, stores, limit * 2);
2967
+ const rawVectorScores = /* @__PURE__ */ new Map();
2968
+ rawVectorResults.forEach((r) => {
2969
+ rawVectorScores.set(r.id, r.score);
2970
+ });
2971
+ const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
2972
+ const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
2973
+ const ftsResults = await this.ftsSearch(query, stores, limit * 2);
2907
2974
  const vectorRanks = /* @__PURE__ */ new Map();
2908
2975
  const ftsRanks = /* @__PURE__ */ new Map();
2909
2976
  const allDocs = /* @__PURE__ */ new Map();
@@ -2923,6 +2990,7 @@ var SearchService = class {
2923
2990
  for (const [id, result] of allDocs) {
2924
2991
  const vectorRank = vectorRanks.get(id) ?? Infinity;
2925
2992
  const ftsRank = ftsRanks.get(id) ?? Infinity;
2993
+ const rawVectorScore = rawVectorScores.get(id);
2926
2994
  const vectorRRF = vectorRank !== Infinity ? vectorWeight / (k + vectorRank) : 0;
2927
2995
  const ftsRRF = ftsRank !== Infinity ? ftsWeight / (k + ftsRank) : 0;
2928
2996
  const fileTypeBoost = this.getFileTypeBoost(
@@ -2947,10 +3015,14 @@ var SearchService = class {
2947
3015
  if (ftsRank !== Infinity) {
2948
3016
  metadata.ftsRank = ftsRank;
2949
3017
  }
3018
+ if (rawVectorScore !== void 0) {
3019
+ metadata.rawVectorScore = rawVectorScore;
3020
+ }
2950
3021
  rrfScores.push({
2951
3022
  id,
2952
3023
  score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
2953
3024
  result,
3025
+ rawVectorScore,
2954
3026
  metadata
2955
3027
  });
2956
3028
  }
@@ -2987,9 +3059,9 @@ var SearchService = class {
2987
3059
  normalizedResults = [];
2988
3060
  }
2989
3061
  if (threshold !== void 0) {
2990
- return normalizedResults.filter((r) => r.score >= threshold);
3062
+ normalizedResults = normalizedResults.filter((r) => r.score >= threshold);
2991
3063
  }
2992
- return normalizedResults;
3064
+ return { results: normalizedResults, maxRawScore };
2993
3065
  }
2994
3066
  async searchAllStores(query, storeIds) {
2995
3067
  return this.search({
@@ -3022,7 +3094,7 @@ var SearchService = class {
3022
3094
  baseBoost = 0.75;
3023
3095
  break;
3024
3096
  case "test":
3025
- baseBoost = 0.7;
3097
+ baseBoost = parseFloat(process.env["SEARCH_TEST_FILE_BOOST"] ?? "0.5");
3026
3098
  break;
3027
3099
  case "config":
3028
3100
  baseBoost = 0.5;
@@ -3039,7 +3111,11 @@ var SearchService = class {
3039
3111
  totalConfidence += confidence;
3040
3112
  }
3041
3113
  const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
3042
- return baseBoost * blendedMultiplier;
3114
+ const finalBoost = baseBoost * blendedMultiplier;
3115
+ if (fileType === "test") {
3116
+ return Math.min(finalBoost, 0.6);
3117
+ }
3118
+ return finalBoost;
3043
3119
  }
3044
3120
  /**
3045
3121
  * Get a score multiplier based on URL keyword matching.
@@ -4078,7 +4154,7 @@ var LanceStore = class {
4078
4154
  return results.map((r) => ({
4079
4155
  id: createDocumentId(r.id),
4080
4156
  content: r.content,
4081
- score: r.score,
4157
+ score: r._score,
4082
4158
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
4083
4159
  metadata: JSON.parse(r.metadata)
4084
4160
  }));
@@ -4190,4 +4266,4 @@ export {
4190
4266
  createServices,
4191
4267
  destroyServices
4192
4268
  };
4193
- //# sourceMappingURL=chunk-CGDEV2RC.js.map
4269
+ //# sourceMappingURL=chunk-HUEWT6U5.js.map