bluera-knowledge 0.9.38 → 0.9.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +12 -0
- package/CHANGELOG.md +26 -0
- package/README.md +27 -1
- package/dist/{chunk-XJFV7AJW.js → chunk-HUEWT6U5.js} +90 -15
- package/dist/chunk-HUEWT6U5.js.map +1 -0
- package/dist/{chunk-ZAWIPEYX.js → chunk-IZWOEBFM.js} +2 -2
- package/dist/{chunk-36IFANFI.js → chunk-TIGPI3BE.js} +15 -6
- package/dist/chunk-TIGPI3BE.js.map +1 -0
- package/dist/index.js +22 -9
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/package.json +1 -1
- package/plugin.json +1 -1
- package/src/cli/commands/search.ts +22 -4
- package/src/mcp/handlers/search.handler.ts +7 -2
- package/src/mcp/schemas/index.ts +5 -0
- package/src/mcp/server.ts +5 -0
- package/src/services/search.service.test.ts +191 -3
- package/src/services/search.service.ts +121 -18
- package/src/types/search.ts +8 -0
- package/dist/chunk-36IFANFI.js.map +0 -1
- package/dist/chunk-XJFV7AJW.js.map +0 -1
- /package/dist/{chunk-ZAWIPEYX.js.map → chunk-IZWOEBFM.js.map} +0 -0
package/.env.example
CHANGED
|
@@ -2,3 +2,15 @@
|
|
|
2
2
|
# Valid values: trace, debug, info, warn, error, fatal
|
|
3
3
|
# Default: info
|
|
4
4
|
LOG_LEVEL=info
|
|
5
|
+
|
|
6
|
+
# Search Quality Configuration
|
|
7
|
+
# Test file boost multiplier (default: 0.5)
|
|
8
|
+
# Lower values penalize test files more in search results
|
|
9
|
+
# SEARCH_TEST_FILE_BOOST=0.5
|
|
10
|
+
|
|
11
|
+
# Confidence thresholds for raw vector similarity scoring
|
|
12
|
+
# Results with maxRawScore >= high threshold are "high" confidence
|
|
13
|
+
# Results with maxRawScore >= medium threshold are "medium" confidence
|
|
14
|
+
# Results below medium threshold are "low" confidence
|
|
15
|
+
# SEARCH_CONFIDENCE_HIGH=0.5
|
|
16
|
+
# SEARCH_CONFIDENCE_MEDIUM=0.3
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,32 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [0.9.39](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.39) (2026-01-08)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Bug Fixes
|
|
14
|
+
|
|
15
|
+
* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
|
|
16
|
+
* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
|
|
17
|
+
* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
|
|
18
|
+
* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
|
|
19
|
+
* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
|
|
20
|
+
* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
|
|
21
|
+
* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
|
|
22
|
+
* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
|
|
23
|
+
* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
|
|
24
|
+
* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
|
|
25
|
+
* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
|
|
26
|
+
* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
|
|
27
|
+
* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
|
|
28
|
+
* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
|
|
29
|
+
* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
|
|
30
|
+
|
|
5
31
|
## [0.9.38](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.38) (2026-01-08)
|
|
6
32
|
|
|
7
33
|
|
package/README.md
CHANGED
|
@@ -563,9 +563,17 @@ Store is ready for searching!
|
|
|
563
563
|
**Search across indexed knowledge stores**
|
|
564
564
|
|
|
565
565
|
```bash
|
|
566
|
-
/bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>]
|
|
566
|
+
/bluera-knowledge:search "<query>" [--stores=<names>] [--limit=<number>] [--min-relevance=<0-1>]
|
|
567
567
|
```
|
|
568
568
|
|
|
569
|
+
**Options:**
|
|
570
|
+
- `--stores=<names>` - Comma-separated store names to search (default: all stores)
|
|
571
|
+
- `--limit=<number>` - Maximum results to return (default: 10)
|
|
572
|
+
- `--min-relevance=<0-1>` - Minimum raw cosine similarity; returns empty if no results meet threshold
|
|
573
|
+
- `--threshold=<0-1>` - Minimum normalized score to include results
|
|
574
|
+
- `--mode=<mode>` - Search mode: `hybrid` (default), `vector`, or `fts`
|
|
575
|
+
- `--detail=<level>` - Context detail: `minimal` (default), `contextual`, or `full`
|
|
576
|
+
|
|
569
577
|
**Examples:**
|
|
570
578
|
```bash
|
|
571
579
|
# Search all stores
|
|
@@ -579,6 +587,9 @@ Store is ready for searching!
|
|
|
579
587
|
|
|
580
588
|
# Limit results
|
|
581
589
|
/bluera-knowledge:search "testing patterns" --limit=5
|
|
590
|
+
|
|
591
|
+
# Filter irrelevant results (returns empty if nothing is truly relevant)
|
|
592
|
+
/bluera-knowledge:search "kubernetes deployment" --min-relevance=0.4
|
|
582
593
|
```
|
|
583
594
|
|
|
584
595
|
<details>
|
|
@@ -1213,8 +1224,23 @@ bluera-knowledge search "routing" --stores react,vue
|
|
|
1213
1224
|
|
|
1214
1225
|
# Get more results with full content
|
|
1215
1226
|
bluera-knowledge search "middleware" --limit 20 --include-content
|
|
1227
|
+
|
|
1228
|
+
# Filter irrelevant results (returns empty if nothing is truly relevant)
|
|
1229
|
+
bluera-knowledge search "kubernetes deployment" --min-relevance 0.4
|
|
1230
|
+
|
|
1231
|
+
# Get JSON output with confidence and raw scores
|
|
1232
|
+
bluera-knowledge search "express middleware" --format json
|
|
1216
1233
|
```
|
|
1217
1234
|
|
|
1235
|
+
**Search Options:**
|
|
1236
|
+
- `-s, --stores <stores>` - Comma-separated store names/IDs
|
|
1237
|
+
- `-m, --mode <mode>` - `hybrid` (default), `vector`, or `fts`
|
|
1238
|
+
- `-n, --limit <count>` - Max results (default: 10)
|
|
1239
|
+
- `-t, --threshold <score>` - Min normalized score (0-1)
|
|
1240
|
+
- `--min-relevance <score>` - Min raw cosine similarity (0-1)
|
|
1241
|
+
- `--include-content` - Show full content in results
|
|
1242
|
+
- `--detail <level>` - `minimal`, `contextual`, or `full`
|
|
1243
|
+
|
|
1218
1244
|
#### List Stores
|
|
1219
1245
|
|
|
1220
1246
|
```bash
|
|
@@ -2746,6 +2746,17 @@ var SearchService = class {
|
|
|
2746
2746
|
this.graphCache.set(storeId, result);
|
|
2747
2747
|
return result;
|
|
2748
2748
|
}
|
|
2749
|
+
/**
|
|
2750
|
+
* Calculate confidence level based on max raw vector similarity score.
|
|
2751
|
+
* Configurable via environment variables.
|
|
2752
|
+
*/
|
|
2753
|
+
calculateConfidence(maxRawScore) {
|
|
2754
|
+
const highThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_HIGH"] ?? "0.5");
|
|
2755
|
+
const mediumThreshold = parseFloat(process.env["SEARCH_CONFIDENCE_MEDIUM"] ?? "0.3");
|
|
2756
|
+
if (maxRawScore >= highThreshold) return "high";
|
|
2757
|
+
if (maxRawScore >= mediumThreshold) return "medium";
|
|
2758
|
+
return "low";
|
|
2759
|
+
}
|
|
2749
2760
|
async search(query) {
|
|
2750
2761
|
const startTime = Date.now();
|
|
2751
2762
|
const mode = query.mode ?? "hybrid";
|
|
@@ -2762,18 +2773,52 @@ var SearchService = class {
|
|
|
2762
2773
|
stores,
|
|
2763
2774
|
detail,
|
|
2764
2775
|
intent: primaryIntent,
|
|
2765
|
-
intents
|
|
2776
|
+
intents,
|
|
2777
|
+
minRelevance: query.minRelevance
|
|
2766
2778
|
},
|
|
2767
2779
|
"Search query received"
|
|
2768
2780
|
);
|
|
2769
2781
|
let allResults = [];
|
|
2782
|
+
let maxRawScore = 0;
|
|
2770
2783
|
const fetchLimit = limit * 3;
|
|
2771
2784
|
if (mode === "vector") {
|
|
2785
|
+
const rawResults = await this.vectorSearchRaw(query.query, stores, fetchLimit);
|
|
2786
|
+
maxRawScore = rawResults.length > 0 ? rawResults[0]?.score ?? 0 : 0;
|
|
2772
2787
|
allResults = await this.vectorSearch(query.query, stores, fetchLimit, query.threshold);
|
|
2773
2788
|
} else if (mode === "fts") {
|
|
2774
2789
|
allResults = await this.ftsSearch(query.query, stores, fetchLimit);
|
|
2775
2790
|
} else {
|
|
2776
|
-
|
|
2791
|
+
const hybridResult = await this.hybridSearchWithMetadata(
|
|
2792
|
+
query.query,
|
|
2793
|
+
stores,
|
|
2794
|
+
fetchLimit,
|
|
2795
|
+
query.threshold
|
|
2796
|
+
);
|
|
2797
|
+
allResults = hybridResult.results;
|
|
2798
|
+
maxRawScore = hybridResult.maxRawScore;
|
|
2799
|
+
}
|
|
2800
|
+
if (query.minRelevance !== void 0 && maxRawScore < query.minRelevance) {
|
|
2801
|
+
const timeMs2 = Date.now() - startTime;
|
|
2802
|
+
logger2.info(
|
|
2803
|
+
{
|
|
2804
|
+
query: query.query,
|
|
2805
|
+
mode,
|
|
2806
|
+
maxRawScore,
|
|
2807
|
+
minRelevance: query.minRelevance,
|
|
2808
|
+
timeMs: timeMs2
|
|
2809
|
+
},
|
|
2810
|
+
"Search filtered by minRelevance - no sufficiently relevant results"
|
|
2811
|
+
);
|
|
2812
|
+
return {
|
|
2813
|
+
query: query.query,
|
|
2814
|
+
mode,
|
|
2815
|
+
stores,
|
|
2816
|
+
results: [],
|
|
2817
|
+
totalResults: 0,
|
|
2818
|
+
timeMs: timeMs2,
|
|
2819
|
+
confidence: this.calculateConfidence(maxRawScore),
|
|
2820
|
+
maxRawScore
|
|
2821
|
+
};
|
|
2777
2822
|
}
|
|
2778
2823
|
const dedupedResults = this.deduplicateBySource(allResults, query.query);
|
|
2779
2824
|
const resultsToEnhance = dedupedResults.slice(0, limit);
|
|
@@ -2789,6 +2834,7 @@ var SearchService = class {
|
|
|
2789
2834
|
return this.addProgressiveContext(r, query.query, detail, graph);
|
|
2790
2835
|
});
|
|
2791
2836
|
const timeMs = Date.now() - startTime;
|
|
2837
|
+
const confidence = mode !== "fts" ? this.calculateConfidence(maxRawScore) : void 0;
|
|
2792
2838
|
logger2.info(
|
|
2793
2839
|
{
|
|
2794
2840
|
query: query.query,
|
|
@@ -2796,6 +2842,8 @@ var SearchService = class {
|
|
|
2796
2842
|
resultCount: enhancedResults.length,
|
|
2797
2843
|
dedupedFrom: allResults.length,
|
|
2798
2844
|
intents: intents.map((i) => `${i.intent}(${i.confidence.toFixed(2)})`),
|
|
2845
|
+
maxRawScore: mode !== "fts" ? maxRawScore : void 0,
|
|
2846
|
+
confidence,
|
|
2799
2847
|
timeMs
|
|
2800
2848
|
},
|
|
2801
2849
|
"Search complete"
|
|
@@ -2806,7 +2854,9 @@ var SearchService = class {
|
|
|
2806
2854
|
stores,
|
|
2807
2855
|
results: enhancedResults,
|
|
2808
2856
|
totalResults: enhancedResults.length,
|
|
2809
|
-
timeMs
|
|
2857
|
+
timeMs,
|
|
2858
|
+
confidence,
|
|
2859
|
+
maxRawScore: mode !== "fts" ? maxRawScore : void 0
|
|
2810
2860
|
};
|
|
2811
2861
|
}
|
|
2812
2862
|
/**
|
|
@@ -2867,20 +2917,29 @@ var SearchService = class {
|
|
|
2867
2917
|
}
|
|
2868
2918
|
return normalized;
|
|
2869
2919
|
}
|
|
2870
|
-
|
|
2920
|
+
/**
|
|
2921
|
+
* Fetch raw vector search results without normalization.
|
|
2922
|
+
* Returns results with raw cosine similarity scores [0-1].
|
|
2923
|
+
*/
|
|
2924
|
+
async vectorSearchRaw(query, stores, limit) {
|
|
2871
2925
|
const queryVector = await this.embeddingEngine.embed(query);
|
|
2872
2926
|
const results = [];
|
|
2873
2927
|
for (const storeId of stores) {
|
|
2874
|
-
const hits = await this.lanceStore.search(storeId, queryVector, limit
|
|
2928
|
+
const hits = await this.lanceStore.search(storeId, queryVector, limit);
|
|
2875
2929
|
results.push(
|
|
2876
2930
|
...hits.map((r) => ({
|
|
2877
2931
|
id: r.id,
|
|
2878
2932
|
score: r.score,
|
|
2933
|
+
// Raw cosine similarity (1 - distance)
|
|
2879
2934
|
content: r.content,
|
|
2880
2935
|
metadata: r.metadata
|
|
2881
2936
|
}))
|
|
2882
2937
|
);
|
|
2883
2938
|
}
|
|
2939
|
+
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
2940
|
+
}
|
|
2941
|
+
async vectorSearch(query, stores, limit, threshold) {
|
|
2942
|
+
const results = await this.vectorSearchRaw(query, stores, limit);
|
|
2884
2943
|
const normalized = this.normalizeAndFilterScores(results, threshold);
|
|
2885
2944
|
return normalized.slice(0, limit);
|
|
2886
2945
|
}
|
|
@@ -2899,12 +2958,19 @@ var SearchService = class {
|
|
|
2899
2958
|
}
|
|
2900
2959
|
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
2901
2960
|
}
|
|
2902
|
-
|
|
2961
|
+
/**
|
|
2962
|
+
* Internal hybrid search result with additional metadata for confidence calculation.
|
|
2963
|
+
*/
|
|
2964
|
+
async hybridSearchWithMetadata(query, stores, limit, threshold) {
|
|
2903
2965
|
const intents = classifyQueryIntents(query);
|
|
2904
|
-
const
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
2966
|
+
const rawVectorResults = await this.vectorSearchRaw(query, stores, limit * 2);
|
|
2967
|
+
const rawVectorScores = /* @__PURE__ */ new Map();
|
|
2968
|
+
rawVectorResults.forEach((r) => {
|
|
2969
|
+
rawVectorScores.set(r.id, r.score);
|
|
2970
|
+
});
|
|
2971
|
+
const maxRawScore = rawVectorResults.length > 0 ? rawVectorResults[0]?.score ?? 0 : 0;
|
|
2972
|
+
const vectorResults = this.normalizeAndFilterScores(rawVectorResults);
|
|
2973
|
+
const ftsResults = await this.ftsSearch(query, stores, limit * 2);
|
|
2908
2974
|
const vectorRanks = /* @__PURE__ */ new Map();
|
|
2909
2975
|
const ftsRanks = /* @__PURE__ */ new Map();
|
|
2910
2976
|
const allDocs = /* @__PURE__ */ new Map();
|
|
@@ -2924,6 +2990,7 @@ var SearchService = class {
|
|
|
2924
2990
|
for (const [id, result] of allDocs) {
|
|
2925
2991
|
const vectorRank = vectorRanks.get(id) ?? Infinity;
|
|
2926
2992
|
const ftsRank = ftsRanks.get(id) ?? Infinity;
|
|
2993
|
+
const rawVectorScore = rawVectorScores.get(id);
|
|
2927
2994
|
const vectorRRF = vectorRank !== Infinity ? vectorWeight / (k + vectorRank) : 0;
|
|
2928
2995
|
const ftsRRF = ftsRank !== Infinity ? ftsWeight / (k + ftsRank) : 0;
|
|
2929
2996
|
const fileTypeBoost = this.getFileTypeBoost(
|
|
@@ -2948,10 +3015,14 @@ var SearchService = class {
|
|
|
2948
3015
|
if (ftsRank !== Infinity) {
|
|
2949
3016
|
metadata.ftsRank = ftsRank;
|
|
2950
3017
|
}
|
|
3018
|
+
if (rawVectorScore !== void 0) {
|
|
3019
|
+
metadata.rawVectorScore = rawVectorScore;
|
|
3020
|
+
}
|
|
2951
3021
|
rrfScores.push({
|
|
2952
3022
|
id,
|
|
2953
3023
|
score: (vectorRRF + ftsRRF) * fileTypeBoost * frameworkBoost * urlKeywordBoost * pathKeywordBoost,
|
|
2954
3024
|
result,
|
|
3025
|
+
rawVectorScore,
|
|
2955
3026
|
metadata
|
|
2956
3027
|
});
|
|
2957
3028
|
}
|
|
@@ -2988,9 +3059,9 @@ var SearchService = class {
|
|
|
2988
3059
|
normalizedResults = [];
|
|
2989
3060
|
}
|
|
2990
3061
|
if (threshold !== void 0) {
|
|
2991
|
-
|
|
3062
|
+
normalizedResults = normalizedResults.filter((r) => r.score >= threshold);
|
|
2992
3063
|
}
|
|
2993
|
-
return normalizedResults;
|
|
3064
|
+
return { results: normalizedResults, maxRawScore };
|
|
2994
3065
|
}
|
|
2995
3066
|
async searchAllStores(query, storeIds) {
|
|
2996
3067
|
return this.search({
|
|
@@ -3023,7 +3094,7 @@ var SearchService = class {
|
|
|
3023
3094
|
baseBoost = 0.75;
|
|
3024
3095
|
break;
|
|
3025
3096
|
case "test":
|
|
3026
|
-
baseBoost = 0.
|
|
3097
|
+
baseBoost = parseFloat(process.env["SEARCH_TEST_FILE_BOOST"] ?? "0.5");
|
|
3027
3098
|
break;
|
|
3028
3099
|
case "config":
|
|
3029
3100
|
baseBoost = 0.5;
|
|
@@ -3040,7 +3111,11 @@ var SearchService = class {
|
|
|
3040
3111
|
totalConfidence += confidence;
|
|
3041
3112
|
}
|
|
3042
3113
|
const blendedMultiplier = totalConfidence > 0 ? weightedMultiplier / totalConfidence : 1;
|
|
3043
|
-
|
|
3114
|
+
const finalBoost = baseBoost * blendedMultiplier;
|
|
3115
|
+
if (fileType === "test") {
|
|
3116
|
+
return Math.min(finalBoost, 0.6);
|
|
3117
|
+
}
|
|
3118
|
+
return finalBoost;
|
|
3044
3119
|
}
|
|
3045
3120
|
/**
|
|
3046
3121
|
* Get a score multiplier based on URL keyword matching.
|
|
@@ -4191,4 +4266,4 @@ export {
|
|
|
4191
4266
|
createServices,
|
|
4192
4267
|
destroyServices
|
|
4193
4268
|
};
|
|
4194
|
-
//# sourceMappingURL=chunk-
|
|
4269
|
+
//# sourceMappingURL=chunk-HUEWT6U5.js.map
|