raggrep 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -40
- package/dist/app/search/index.d.ts +2 -1
- package/dist/cli/main.js +445 -79
- package/dist/cli/main.js.map +20 -17
- package/dist/domain/entities/index.d.ts +3 -1
- package/dist/domain/entities/rankingWeights.d.ts +84 -0
- package/dist/domain/entities/searchResult.d.ts +28 -1
- package/dist/domain/services/bm25.d.ts +5 -0
- package/dist/domain/services/discriminativeTerms.d.ts +28 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/literalScorer.d.ts +9 -23
- package/dist/domain/services/matchScales.d.ts +19 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +433 -75
- package/dist/index.js.map +20 -17
- package/dist/types.d.ts +1 -1
- package/package.json +7 -4
package/dist/index.js
CHANGED
|
@@ -23,7 +23,107 @@ var init_searchResult = __esm(() => {
|
|
|
23
23
|
minScore: 0.15,
|
|
24
24
|
filePatterns: [],
|
|
25
25
|
pathFilter: [],
|
|
26
|
-
ensureFresh: true
|
|
26
|
+
ensureFresh: true,
|
|
27
|
+
rankingWeights: {},
|
|
28
|
+
quiet: false,
|
|
29
|
+
rankBy: "structured"
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// src/domain/entities/rankingWeights.ts
|
|
34
|
+
function mergeLiteralWeights(def, partial) {
|
|
35
|
+
if (!partial) {
|
|
36
|
+
return def;
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
baseScore: partial.baseScore ?? def.baseScore,
|
|
40
|
+
multipliers: {
|
|
41
|
+
definition: {
|
|
42
|
+
...def.multipliers.definition,
|
|
43
|
+
...partial.multipliers?.definition
|
|
44
|
+
},
|
|
45
|
+
reference: {
|
|
46
|
+
...def.multipliers.reference,
|
|
47
|
+
...partial.multipliers?.reference
|
|
48
|
+
},
|
|
49
|
+
import: { ...def.multipliers.import, ...partial.multipliers?.import }
|
|
50
|
+
},
|
|
51
|
+
vocabulary: { ...def.vocabulary, ...partial.vocabulary }
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function mergeRankingWeights(partial) {
|
|
55
|
+
if (!partial) {
|
|
56
|
+
return DEFAULT_RANKING_WEIGHTS;
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
discriminative: {
|
|
60
|
+
...DEFAULT_RANKING_WEIGHTS.discriminative,
|
|
61
|
+
...partial.discriminative
|
|
62
|
+
},
|
|
63
|
+
typescript: {
|
|
64
|
+
...DEFAULT_RANKING_WEIGHTS.typescript,
|
|
65
|
+
...partial.typescript
|
|
66
|
+
},
|
|
67
|
+
language: {
|
|
68
|
+
...DEFAULT_RANKING_WEIGHTS.language,
|
|
69
|
+
...partial.language
|
|
70
|
+
},
|
|
71
|
+
markdown: {
|
|
72
|
+
...DEFAULT_RANKING_WEIGHTS.markdown,
|
|
73
|
+
...partial.markdown
|
|
74
|
+
},
|
|
75
|
+
json: {
|
|
76
|
+
...DEFAULT_RANKING_WEIGHTS.json,
|
|
77
|
+
...partial.json
|
|
78
|
+
},
|
|
79
|
+
literal: mergeLiteralWeights(DEFAULT_RANKING_WEIGHTS.literal, partial.literal)
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
var DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, DEFAULT_RANKING_WEIGHTS;
|
|
83
|
+
var init_rankingWeights = __esm(() => {
|
|
84
|
+
DEFAULT_DISCRIMINATIVE_WEIGHTS = {
|
|
85
|
+
boostCap: 0.1,
|
|
86
|
+
penaltyMax: 0.16,
|
|
87
|
+
penaltyFloor: 0.72
|
|
88
|
+
};
|
|
89
|
+
DEFAULT_LITERAL_BOOST_WEIGHTS = {
|
|
90
|
+
baseScore: 0.5,
|
|
91
|
+
multipliers: {
|
|
92
|
+
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
93
|
+
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
94
|
+
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
95
|
+
},
|
|
96
|
+
vocabulary: {
|
|
97
|
+
baseMultiplier: 1.3,
|
|
98
|
+
perWordBonus: 0.1,
|
|
99
|
+
maxVocabularyBonus: 0.5,
|
|
100
|
+
minWordsForMatch: 2
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
DEFAULT_RANKING_WEIGHTS = {
|
|
104
|
+
discriminative: DEFAULT_DISCRIMINATIVE_WEIGHTS,
|
|
105
|
+
typescript: {
|
|
106
|
+
semantic: 0.43,
|
|
107
|
+
bm25: 0.42,
|
|
108
|
+
vocab: 0.15,
|
|
109
|
+
vocabBypassThreshold: 0.4
|
|
110
|
+
},
|
|
111
|
+
language: {
|
|
112
|
+
semantic: 0.7,
|
|
113
|
+
bm25: 0.3
|
|
114
|
+
},
|
|
115
|
+
markdown: {
|
|
116
|
+
semantic: 0.62,
|
|
117
|
+
bm25: 0.33,
|
|
118
|
+
docIntentBoost: 0.03,
|
|
119
|
+
headingPhraseCoverageMin: 0.25,
|
|
120
|
+
headingPhraseCoverageSpan: 0.75
|
|
121
|
+
},
|
|
122
|
+
json: {
|
|
123
|
+
bm25: 0.4,
|
|
124
|
+
literalBaseWeight: 0.6
|
|
125
|
+
},
|
|
126
|
+
literal: DEFAULT_LITERAL_BOOST_WEIGHTS
|
|
27
127
|
};
|
|
28
128
|
});
|
|
29
129
|
|
|
@@ -171,6 +271,7 @@ var init_lexicon = __esm(() => {
|
|
|
171
271
|
// src/domain/entities/index.ts
|
|
172
272
|
var init_entities = __esm(() => {
|
|
173
273
|
init_searchResult();
|
|
274
|
+
init_rankingWeights();
|
|
174
275
|
init_config();
|
|
175
276
|
init_literal();
|
|
176
277
|
init_lexicon();
|
|
@@ -317,6 +418,9 @@ class BM25Index {
|
|
|
317
418
|
return 0;
|
|
318
419
|
return Math.log(1 + (this.totalDocs - docFreq + 0.5) / (docFreq + 0.5));
|
|
319
420
|
}
|
|
421
|
+
getInverseDocumentFrequency(term) {
|
|
422
|
+
return this.idf(term.toLowerCase());
|
|
423
|
+
}
|
|
320
424
|
score(tokens, queryTerms) {
|
|
321
425
|
const docLength = tokens.length;
|
|
322
426
|
let score = 0;
|
|
@@ -2540,6 +2644,188 @@ var init_embeddings = __esm(() => {
|
|
|
2540
2644
|
init_globalEmbeddings();
|
|
2541
2645
|
});
|
|
2542
2646
|
|
|
2647
|
+
// src/domain/services/discriminativeTerms.ts
|
|
2648
|
+
function medianSorted(sorted) {
|
|
2649
|
+
const n = sorted.length;
|
|
2650
|
+
if (n === 0)
|
|
2651
|
+
return 0;
|
|
2652
|
+
const mid = Math.floor(n / 2);
|
|
2653
|
+
return n % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
2654
|
+
}
|
|
2655
|
+
function salientTermHitsChunk(term, haystack, tokenSet) {
|
|
2656
|
+
if (tokenSet.has(term) || haystack.includes(term)) {
|
|
2657
|
+
return true;
|
|
2658
|
+
}
|
|
2659
|
+
if (term.length < PREFIX_MATCH_MIN_LEN) {
|
|
2660
|
+
return false;
|
|
2661
|
+
}
|
|
2662
|
+
for (const w of tokenSet) {
|
|
2663
|
+
if (w.length < PREFIX_MATCH_MIN_LEN)
|
|
2664
|
+
continue;
|
|
2665
|
+
if (term.startsWith(w) || w.startsWith(term)) {
|
|
2666
|
+
return true;
|
|
2667
|
+
}
|
|
2668
|
+
}
|
|
2669
|
+
return false;
|
|
2670
|
+
}
|
|
2671
|
+
function scoreDiscriminativeTerms(bm25Index, query, chunkText, chunkName, weights = DEFAULT_DISCRIMINATIVE_WEIGHTS) {
|
|
2672
|
+
const empty2 = () => ({
|
|
2673
|
+
boost: 0,
|
|
2674
|
+
penaltyFactor: 1,
|
|
2675
|
+
salientTerms: [],
|
|
2676
|
+
matchedSalient: [],
|
|
2677
|
+
missingSalient: [],
|
|
2678
|
+
salientCoverage: 1
|
|
2679
|
+
});
|
|
2680
|
+
const uniqueTerms = [...new Set(tokenize(query))];
|
|
2681
|
+
if (uniqueTerms.length === 0) {
|
|
2682
|
+
return empty2();
|
|
2683
|
+
}
|
|
2684
|
+
const indexed = [];
|
|
2685
|
+
for (const term of uniqueTerms) {
|
|
2686
|
+
const idf = bm25Index.getInverseDocumentFrequency(term);
|
|
2687
|
+
if (idf > 0) {
|
|
2688
|
+
indexed.push({ term, idf });
|
|
2689
|
+
}
|
|
2690
|
+
}
|
|
2691
|
+
if (indexed.length === 0) {
|
|
2692
|
+
return empty2();
|
|
2693
|
+
}
|
|
2694
|
+
const idfSorted = [...indexed.map((x) => x.idf)].sort((a, b) => a - b);
|
|
2695
|
+
const medianIdf = medianSorted(idfSorted);
|
|
2696
|
+
const salientEntries = indexed.filter((x) => x.idf >= medianIdf);
|
|
2697
|
+
const salientTerms = [...new Set(salientEntries.map((x) => x.term))];
|
|
2698
|
+
const idfByTerm = new Map;
|
|
2699
|
+
for (const { term, idf } of salientEntries) {
|
|
2700
|
+
idfByTerm.set(term, Math.max(idfByTerm.get(term) ?? 0, idf));
|
|
2701
|
+
}
|
|
2702
|
+
let totalW = 0;
|
|
2703
|
+
for (const idf of idfByTerm.values()) {
|
|
2704
|
+
totalW += idf;
|
|
2705
|
+
}
|
|
2706
|
+
const haystack = [chunkName ?? "", chunkText].join(`
|
|
2707
|
+
`).toLowerCase();
|
|
2708
|
+
const tokenSet = new Set(tokenize(chunkName ? `${chunkName}
|
|
2709
|
+
${chunkText}` : chunkText));
|
|
2710
|
+
const matchedSalient = [];
|
|
2711
|
+
for (const term of salientTerms) {
|
|
2712
|
+
const idf = idfByTerm.get(term) ?? 0;
|
|
2713
|
+
if (idf <= 0)
|
|
2714
|
+
continue;
|
|
2715
|
+
if (salientTermHitsChunk(term, haystack, tokenSet)) {
|
|
2716
|
+
matchedSalient.push(term);
|
|
2717
|
+
}
|
|
2718
|
+
}
|
|
2719
|
+
const matchedSet = new Set(matchedSalient);
|
|
2720
|
+
const missingSalient = salientTerms.filter((t) => !matchedSet.has(t));
|
|
2721
|
+
let matchedW = 0;
|
|
2722
|
+
for (const term of matchedSalient) {
|
|
2723
|
+
matchedW += idfByTerm.get(term) ?? 0;
|
|
2724
|
+
}
|
|
2725
|
+
const salientCoverage = totalW > 0 ? matchedW / totalW : 1;
|
|
2726
|
+
const { boostCap, penaltyMax, penaltyFloor } = weights;
|
|
2727
|
+
const boost = boostCap * salientCoverage;
|
|
2728
|
+
let penaltyFactor = 1 - penaltyMax * (1 - salientCoverage);
|
|
2729
|
+
if (penaltyFactor < penaltyFloor) {
|
|
2730
|
+
penaltyFactor = penaltyFloor;
|
|
2731
|
+
}
|
|
2732
|
+
return {
|
|
2733
|
+
boost,
|
|
2734
|
+
penaltyFactor,
|
|
2735
|
+
salientTerms,
|
|
2736
|
+
matchedSalient,
|
|
2737
|
+
missingSalient,
|
|
2738
|
+
salientCoverage
|
|
2739
|
+
};
|
|
2740
|
+
}
|
|
2741
|
+
var PREFIX_MATCH_MIN_LEN = 4;
|
|
2742
|
+
var init_discriminativeTerms = __esm(() => {
|
|
2743
|
+
init_rankingWeights();
|
|
2744
|
+
});
|
|
2745
|
+
|
|
2746
|
+
// src/domain/services/matchScales.ts
|
|
2747
|
+
function semanticPctFromCosine(cosine) {
|
|
2748
|
+
return clamp01((cosine + 1) / 2);
|
|
2749
|
+
}
|
|
2750
|
+
function clamp01(x) {
|
|
2751
|
+
if (Number.isNaN(x) || !Number.isFinite(x))
|
|
2752
|
+
return 0;
|
|
2753
|
+
return Math.max(0, Math.min(1, x));
|
|
2754
|
+
}
|
|
2755
|
+
function num(ctx, key) {
|
|
2756
|
+
const v = ctx[key];
|
|
2757
|
+
return typeof v === "number" && Number.isFinite(v) ? v : 0;
|
|
2758
|
+
}
|
|
2759
|
+
function additiveStructuredBoost(ctx) {
|
|
2760
|
+
return num(ctx, "pathBoost") + num(ctx, "fileTypeBoost") + num(ctx, "chunkTypeBoost") + num(ctx, "exportBoost");
|
|
2761
|
+
}
|
|
2762
|
+
function attachMatchScales(result, rw) {
|
|
2763
|
+
const ctx = result.context ?? {};
|
|
2764
|
+
const mid = result.moduleId;
|
|
2765
|
+
let semanticMatch = 0;
|
|
2766
|
+
let structuredMatch = 0;
|
|
2767
|
+
if (mid === "language/typescript") {
|
|
2768
|
+
const cos = num(ctx, "semanticScore");
|
|
2769
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2770
|
+
const vocab = num(ctx, "vocabScore");
|
|
2771
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
2772
|
+
const tw = rw.typescript;
|
|
2773
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
2774
|
+
const denom = tw.bm25 + tw.vocab + 0.000000001;
|
|
2775
|
+
const lexCore = (tw.bm25 * bm25 + tw.vocab * vocab) / denom;
|
|
2776
|
+
structuredMatch = clamp01(lexCore + Math.min(0.35, additiveStructuredBoost(ctx)) + Math.min(0.15, phraseCov * 0.25));
|
|
2777
|
+
} else if (mid.startsWith("language/")) {
|
|
2778
|
+
const cos = num(ctx, "semanticScore");
|
|
2779
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2780
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
2781
|
+
structuredMatch = clamp01(bm25 + Math.min(0.3, additiveStructuredBoost(ctx)) + Math.min(0.12, num(ctx, "phraseCoverage") * 0.2));
|
|
2782
|
+
} else if (mid === "docs/markdown") {
|
|
2783
|
+
const cos = num(ctx, "semanticScore");
|
|
2784
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2785
|
+
const docBoost = num(ctx, "docBoost");
|
|
2786
|
+
const headingBoost = num(ctx, "headingBoost");
|
|
2787
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
2788
|
+
const mw = rw.markdown;
|
|
2789
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
2790
|
+
structuredMatch = clamp01(mw.bm25 * bm25 + docBoost + headingBoost + Math.min(0.2, phraseCov * 0.15));
|
|
2791
|
+
} else if (mid === "core") {
|
|
2792
|
+
semanticMatch = 0;
|
|
2793
|
+
const nBm = num(ctx, "bm25Score");
|
|
2794
|
+
const sym = num(ctx, "symbolScore");
|
|
2795
|
+
structuredMatch = clamp01(0.6 * nBm + 0.4 * sym);
|
|
2796
|
+
} else if (mid === "data/json") {
|
|
2797
|
+
semanticMatch = 0;
|
|
2798
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2799
|
+
const litM = num(ctx, "literalMultiplier");
|
|
2800
|
+
structuredMatch = clamp01(bm25 > 0.02 ? bm25 : Math.min(1, 0.35 + Math.min(0.65, (litM - 1) * 0.35)));
|
|
2801
|
+
} else {
|
|
2802
|
+
semanticMatch = 0;
|
|
2803
|
+
structuredMatch = clamp01(result.score);
|
|
2804
|
+
}
|
|
2805
|
+
return { ...result, semanticMatch, structuredMatch };
|
|
2806
|
+
}
|
|
2807
|
+
function compareSearchResultsByRankBy(a, b, rankBy) {
|
|
2808
|
+
if (rankBy === "combined") {
|
|
2809
|
+
return b.score - a.score;
|
|
2810
|
+
}
|
|
2811
|
+
const sa = a.semanticMatch ?? 0;
|
|
2812
|
+
const sb = b.semanticMatch ?? 0;
|
|
2813
|
+
const ta = a.structuredMatch ?? 0;
|
|
2814
|
+
const tb = b.structuredMatch ?? 0;
|
|
2815
|
+
if (rankBy === "semantic") {
|
|
2816
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
2817
|
+
return sb - sa;
|
|
2818
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
2819
|
+
return tb - ta;
|
|
2820
|
+
return b.score - a.score;
|
|
2821
|
+
}
|
|
2822
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
2823
|
+
return tb - ta;
|
|
2824
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
2825
|
+
return sb - sa;
|
|
2826
|
+
return b.score - a.score;
|
|
2827
|
+
}
|
|
2828
|
+
|
|
2543
2829
|
// src/domain/services/keywords.ts
|
|
2544
2830
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2545
2831
|
const keywords = new Set;
|
|
@@ -3198,16 +3484,16 @@ var init_literalExtractor = __esm(() => {
|
|
|
3198
3484
|
});
|
|
3199
3485
|
|
|
3200
3486
|
// src/domain/services/literalScorer.ts
|
|
3201
|
-
function calculateLiteralMultiplier(matchType, confidence) {
|
|
3202
|
-
return
|
|
3487
|
+
function calculateLiteralMultiplier(matchType, confidence, weights = DEFAULT_LW) {
|
|
3488
|
+
return weights.multipliers[matchType][confidence];
|
|
3203
3489
|
}
|
|
3204
|
-
function calculateMaxMultiplier(matches) {
|
|
3490
|
+
function calculateMaxMultiplier(matches, weights = DEFAULT_LW) {
|
|
3205
3491
|
if (!matches || matches.length === 0) {
|
|
3206
3492
|
return 1;
|
|
3207
3493
|
}
|
|
3208
|
-
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence)));
|
|
3494
|
+
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence, weights)));
|
|
3209
3495
|
}
|
|
3210
|
-
function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
3496
|
+
function calculateLiteralContribution(matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3211
3497
|
if (!matches || matches.length === 0) {
|
|
3212
3498
|
return {
|
|
3213
3499
|
multiplier: 1,
|
|
@@ -3218,7 +3504,7 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3218
3504
|
let bestMatch = null;
|
|
3219
3505
|
let bestMultiplier = 0;
|
|
3220
3506
|
for (const match of matches) {
|
|
3221
|
-
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence);
|
|
3507
|
+
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence, weights);
|
|
3222
3508
|
if (mult > bestMultiplier) {
|
|
3223
3509
|
bestMultiplier = mult;
|
|
3224
3510
|
bestMatch = match;
|
|
@@ -3232,32 +3518,20 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3232
3518
|
matchCount: matches.length
|
|
3233
3519
|
};
|
|
3234
3520
|
}
|
|
3235
|
-
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25) {
|
|
3521
|
+
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3236
3522
|
if (!matches || matches.length === 0) {
|
|
3237
3523
|
return baseScore;
|
|
3238
3524
|
}
|
|
3239
|
-
const multiplier = calculateMaxMultiplier(matches);
|
|
3525
|
+
const multiplier = calculateMaxMultiplier(matches, weights);
|
|
3240
3526
|
if (!hasSemanticOrBm25) {
|
|
3241
|
-
return
|
|
3527
|
+
return weights.baseScore * multiplier;
|
|
3242
3528
|
}
|
|
3243
3529
|
return baseScore * multiplier;
|
|
3244
3530
|
}
|
|
3245
|
-
var
|
|
3531
|
+
var DEFAULT_LW;
|
|
3246
3532
|
var init_literalScorer = __esm(() => {
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
MULTIPLIERS: {
|
|
3250
|
-
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
3251
|
-
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
3252
|
-
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
3253
|
-
},
|
|
3254
|
-
VOCABULARY: {
|
|
3255
|
-
BASE_MULTIPLIER: 1.3,
|
|
3256
|
-
PER_WORD_BONUS: 0.1,
|
|
3257
|
-
MAX_VOCABULARY_BONUS: 0.5,
|
|
3258
|
-
MIN_WORDS_FOR_MATCH: 2
|
|
3259
|
-
}
|
|
3260
|
-
};
|
|
3533
|
+
init_rankingWeights();
|
|
3534
|
+
DEFAULT_LW = DEFAULT_RANKING_WEIGHTS.literal;
|
|
3261
3535
|
});
|
|
3262
3536
|
|
|
3263
3537
|
// src/domain/services/lexicon.ts
|
|
@@ -4212,6 +4486,7 @@ var init_chunkContext = __esm(() => {
|
|
|
4212
4486
|
|
|
4213
4487
|
// src/domain/services/index.ts
|
|
4214
4488
|
var init_services = __esm(() => {
|
|
4489
|
+
init_discriminativeTerms();
|
|
4215
4490
|
init_keywords();
|
|
4216
4491
|
init_queryIntent();
|
|
4217
4492
|
init_queryLiteralParser();
|
|
@@ -5035,6 +5310,9 @@ class TypeScriptModule {
|
|
|
5035
5310
|
minScore = DEFAULT_MIN_SCORE2,
|
|
5036
5311
|
filePatterns
|
|
5037
5312
|
} = options;
|
|
5313
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
5314
|
+
const tw = rw.typescript;
|
|
5315
|
+
const lt = rw.literal;
|
|
5038
5316
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
5039
5317
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
5040
5318
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -5141,17 +5419,19 @@ class TypeScriptModule {
|
|
|
5141
5419
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5142
5420
|
const exportBoost = calculateExportBoost(chunk);
|
|
5143
5421
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5144
|
-
const baseScore =
|
|
5422
|
+
const baseScore = tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore;
|
|
5145
5423
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
5146
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
5147
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
5424
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
5425
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
5148
5426
|
const finalScore = boostedScore + additiveBoost;
|
|
5427
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
5428
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5149
5429
|
processedChunkIds.add(chunk.id);
|
|
5150
|
-
if (
|
|
5430
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > tw.vocabBypassThreshold || phraseMatch.isSignificant) {
|
|
5151
5431
|
results.push({
|
|
5152
5432
|
filepath,
|
|
5153
5433
|
chunk,
|
|
5154
|
-
score:
|
|
5434
|
+
score: adjustedScore,
|
|
5155
5435
|
moduleId: this.id,
|
|
5156
5436
|
context: {
|
|
5157
5437
|
semanticScore,
|
|
@@ -5163,6 +5443,10 @@ class TypeScriptModule {
|
|
|
5163
5443
|
fileTypeBoost,
|
|
5164
5444
|
chunkTypeBoost,
|
|
5165
5445
|
exportBoost,
|
|
5446
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
5447
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
5448
|
+
discriminativeBoost: disc.boost,
|
|
5449
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5166
5450
|
literalMultiplier: literalContribution.multiplier,
|
|
5167
5451
|
literalMatchType: literalContribution.bestMatchType,
|
|
5168
5452
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5215,15 +5499,17 @@ class TypeScriptModule {
|
|
|
5215
5499
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5216
5500
|
const exportBoost = calculateExportBoost(chunk);
|
|
5217
5501
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5218
|
-
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false);
|
|
5219
|
-
const baseScore = semanticScore > 0 ?
|
|
5220
|
-
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0);
|
|
5502
|
+
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false, lt);
|
|
5503
|
+
const baseScore = semanticScore > 0 ? tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore : lt.baseScore;
|
|
5504
|
+
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0, lt);
|
|
5221
5505
|
const finalScore = boostedScore + additiveBoost;
|
|
5506
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
5507
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5222
5508
|
processedChunkIds.add(chunkId);
|
|
5223
5509
|
results.push({
|
|
5224
5510
|
filepath,
|
|
5225
5511
|
chunk,
|
|
5226
|
-
score:
|
|
5512
|
+
score: adjustedScore,
|
|
5227
5513
|
moduleId: this.id,
|
|
5228
5514
|
context: {
|
|
5229
5515
|
semanticScore,
|
|
@@ -5235,6 +5521,10 @@ class TypeScriptModule {
|
|
|
5235
5521
|
fileTypeBoost,
|
|
5236
5522
|
chunkTypeBoost,
|
|
5237
5523
|
exportBoost,
|
|
5524
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
5525
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
5526
|
+
discriminativeBoost: disc.boost,
|
|
5527
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5238
5528
|
literalMultiplier: literalContribution.multiplier,
|
|
5239
5529
|
literalMatchType: literalContribution.bestMatchType,
|
|
5240
5530
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5271,13 +5561,14 @@ class TypeScriptModule {
|
|
|
5271
5561
|
return references;
|
|
5272
5562
|
}
|
|
5273
5563
|
}
|
|
5274
|
-
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10,
|
|
5564
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, TYPESCRIPT_EXTENSIONS, supportsFile;
|
|
5275
5565
|
var init_typescript = __esm(() => {
|
|
5276
5566
|
init_embeddings();
|
|
5277
5567
|
init_services();
|
|
5278
5568
|
init_config2();
|
|
5279
5569
|
init_parseCode();
|
|
5280
5570
|
init_storage();
|
|
5571
|
+
init_entities();
|
|
5281
5572
|
TYPESCRIPT_EXTENSIONS = [
|
|
5282
5573
|
".ts",
|
|
5283
5574
|
".tsx",
|
|
@@ -6390,6 +6681,9 @@ class PythonModule {
|
|
|
6390
6681
|
minScore = DEFAULT_MIN_SCORE3,
|
|
6391
6682
|
filePatterns
|
|
6392
6683
|
} = options;
|
|
6684
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
6685
|
+
const lw = rw.language;
|
|
6686
|
+
const lt = rw.literal;
|
|
6393
6687
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
6394
6688
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
6395
6689
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -6482,17 +6776,19 @@ class PythonModule {
|
|
|
6482
6776
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
6483
6777
|
const exportBoost = calculateExportBoost2(chunk);
|
|
6484
6778
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
6485
|
-
const baseScore =
|
|
6779
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
6486
6780
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
6487
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
6488
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
6781
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
6782
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
6489
6783
|
const finalScore = boostedScore + additiveBoost;
|
|
6784
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6785
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
6490
6786
|
processedChunkIds.add(chunk.id);
|
|
6491
|
-
if (
|
|
6787
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
6492
6788
|
results.push({
|
|
6493
6789
|
filepath,
|
|
6494
6790
|
chunk,
|
|
6495
|
-
score:
|
|
6791
|
+
score: adjustedScore,
|
|
6496
6792
|
moduleId: this.id,
|
|
6497
6793
|
context: {
|
|
6498
6794
|
semanticScore,
|
|
@@ -6501,6 +6797,10 @@ class PythonModule {
|
|
|
6501
6797
|
fileTypeBoost,
|
|
6502
6798
|
chunkTypeBoost,
|
|
6503
6799
|
exportBoost,
|
|
6800
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6801
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6802
|
+
discriminativeBoost: disc.boost,
|
|
6803
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
6504
6804
|
literalMultiplier: literalContribution.multiplier,
|
|
6505
6805
|
literalMatchType: literalContribution.bestMatchType,
|
|
6506
6806
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -6535,15 +6835,17 @@ class PythonModule {
|
|
|
6535
6835
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
6536
6836
|
const exportBoost = calculateExportBoost2(chunk);
|
|
6537
6837
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
6538
|
-
const literalContribution = calculateLiteralContribution(matches, false);
|
|
6539
|
-
const baseScore = semanticScore > 0 ?
|
|
6540
|
-
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0);
|
|
6838
|
+
const literalContribution = calculateLiteralContribution(matches, false, lt);
|
|
6839
|
+
const baseScore = semanticScore > 0 ? lw.semantic * semanticScore + lw.bm25 * bm25Score : lt.baseScore;
|
|
6840
|
+
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0, lt);
|
|
6541
6841
|
const finalScore = boostedScore + additiveBoost;
|
|
6842
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6843
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
6542
6844
|
processedChunkIds.add(chunkId);
|
|
6543
6845
|
results.push({
|
|
6544
6846
|
filepath,
|
|
6545
6847
|
chunk,
|
|
6546
|
-
score:
|
|
6848
|
+
score: adjustedScore,
|
|
6547
6849
|
moduleId: this.id,
|
|
6548
6850
|
context: {
|
|
6549
6851
|
semanticScore,
|
|
@@ -6552,6 +6854,10 @@ class PythonModule {
|
|
|
6552
6854
|
fileTypeBoost,
|
|
6553
6855
|
chunkTypeBoost,
|
|
6554
6856
|
exportBoost,
|
|
6857
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6858
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6859
|
+
discriminativeBoost: disc.boost,
|
|
6860
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
6555
6861
|
literalMultiplier: literalContribution.multiplier,
|
|
6556
6862
|
literalMatchType: literalContribution.bestMatchType,
|
|
6557
6863
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -6564,13 +6870,14 @@ class PythonModule {
|
|
|
6564
6870
|
return results.slice(0, topK);
|
|
6565
6871
|
}
|
|
6566
6872
|
}
|
|
6567
|
-
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10,
|
|
6873
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, PYTHON_EXTENSIONS, supportsFile2;
|
|
6568
6874
|
var init_python = __esm(() => {
|
|
6569
6875
|
init_embeddings();
|
|
6570
6876
|
init_services();
|
|
6571
6877
|
init_config2();
|
|
6572
6878
|
init_storage();
|
|
6573
6879
|
init_parsing();
|
|
6880
|
+
init_entities();
|
|
6574
6881
|
PYTHON_EXTENSIONS = [".py", ".pyw"];
|
|
6575
6882
|
supportsFile2 = isPythonFile;
|
|
6576
6883
|
});
|
|
@@ -6922,6 +7229,9 @@ class GoModule {
|
|
|
6922
7229
|
minScore = DEFAULT_MIN_SCORE4,
|
|
6923
7230
|
filePatterns
|
|
6924
7231
|
} = options;
|
|
7232
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7233
|
+
const lw = rw.language;
|
|
7234
|
+
const lt = rw.literal;
|
|
6925
7235
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
6926
7236
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
6927
7237
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7014,17 +7324,19 @@ class GoModule {
|
|
|
7014
7324
|
const chunkTypeBoost = calculateChunkTypeBoost3(chunk);
|
|
7015
7325
|
const exportBoost = calculateExportBoost3(chunk);
|
|
7016
7326
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7017
|
-
const baseScore =
|
|
7327
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7018
7328
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7019
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7020
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
7329
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
7330
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7021
7331
|
const finalScore = boostedScore + additiveBoost;
|
|
7332
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7333
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7022
7334
|
processedChunkIds.add(chunk.id);
|
|
7023
|
-
if (
|
|
7335
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7024
7336
|
results.push({
|
|
7025
7337
|
filepath,
|
|
7026
7338
|
chunk,
|
|
7027
|
-
score:
|
|
7339
|
+
score: adjustedScore,
|
|
7028
7340
|
moduleId: this.id,
|
|
7029
7341
|
context: {
|
|
7030
7342
|
semanticScore,
|
|
@@ -7033,6 +7345,10 @@ class GoModule {
|
|
|
7033
7345
|
fileTypeBoost,
|
|
7034
7346
|
chunkTypeBoost,
|
|
7035
7347
|
exportBoost,
|
|
7348
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7349
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7350
|
+
discriminativeBoost: disc.boost,
|
|
7351
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7036
7352
|
literalMultiplier: literalContribution.multiplier,
|
|
7037
7353
|
literalMatchType: literalContribution.bestMatchType,
|
|
7038
7354
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7045,13 +7361,14 @@ class GoModule {
|
|
|
7045
7361
|
return results.slice(0, topK);
|
|
7046
7362
|
}
|
|
7047
7363
|
}
|
|
7048
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
7364
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, GO_EXTENSIONS, supportsFile3;
|
|
7049
7365
|
var init_go = __esm(() => {
|
|
7050
7366
|
init_embeddings();
|
|
7051
7367
|
init_services();
|
|
7052
7368
|
init_config2();
|
|
7053
7369
|
init_storage();
|
|
7054
7370
|
init_parsing();
|
|
7371
|
+
init_entities();
|
|
7055
7372
|
GO_EXTENSIONS = [".go"];
|
|
7056
7373
|
supportsFile3 = isGoFile;
|
|
7057
7374
|
});
|
|
@@ -7482,6 +7799,9 @@ class RustModule {
|
|
|
7482
7799
|
minScore = DEFAULT_MIN_SCORE5,
|
|
7483
7800
|
filePatterns
|
|
7484
7801
|
} = options;
|
|
7802
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7803
|
+
const lw = rw.language;
|
|
7804
|
+
const lt = rw.literal;
|
|
7485
7805
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7486
7806
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7487
7807
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7574,17 +7894,19 @@ class RustModule {
|
|
|
7574
7894
|
const chunkTypeBoost = calculateChunkTypeBoost4(chunk);
|
|
7575
7895
|
const exportBoost = calculateExportBoost4(chunk);
|
|
7576
7896
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7577
|
-
const baseScore =
|
|
7897
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7578
7898
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7579
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7580
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
7899
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
7900
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7581
7901
|
const finalScore = boostedScore + additiveBoost;
|
|
7902
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7903
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7582
7904
|
processedChunkIds.add(chunk.id);
|
|
7583
|
-
if (
|
|
7905
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7584
7906
|
results.push({
|
|
7585
7907
|
filepath,
|
|
7586
7908
|
chunk,
|
|
7587
|
-
score:
|
|
7909
|
+
score: adjustedScore,
|
|
7588
7910
|
moduleId: this.id,
|
|
7589
7911
|
context: {
|
|
7590
7912
|
semanticScore,
|
|
@@ -7593,6 +7915,10 @@ class RustModule {
|
|
|
7593
7915
|
fileTypeBoost,
|
|
7594
7916
|
chunkTypeBoost,
|
|
7595
7917
|
exportBoost,
|
|
7918
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7919
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7920
|
+
discriminativeBoost: disc.boost,
|
|
7921
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7596
7922
|
literalMultiplier: literalContribution.multiplier,
|
|
7597
7923
|
literalMatchType: literalContribution.bestMatchType,
|
|
7598
7924
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7605,13 +7931,14 @@ class RustModule {
|
|
|
7605
7931
|
return results.slice(0, topK);
|
|
7606
7932
|
}
|
|
7607
7933
|
}
|
|
7608
|
-
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10,
|
|
7934
|
+
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, RUST_EXTENSIONS, supportsFile4;
|
|
7609
7935
|
var init_rust = __esm(() => {
|
|
7610
7936
|
init_embeddings();
|
|
7611
7937
|
init_services();
|
|
7612
7938
|
init_config2();
|
|
7613
7939
|
init_storage();
|
|
7614
7940
|
init_parsing();
|
|
7941
|
+
init_entities();
|
|
7615
7942
|
RUST_EXTENSIONS = [".rs"];
|
|
7616
7943
|
supportsFile4 = isRustFile;
|
|
7617
7944
|
});
|
|
@@ -7741,6 +8068,8 @@ class JsonModule {
|
|
|
7741
8068
|
minScore = DEFAULT_MIN_SCORE6,
|
|
7742
8069
|
filePatterns
|
|
7743
8070
|
} = options;
|
|
8071
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8072
|
+
const jw = rw.json;
|
|
7744
8073
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7745
8074
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7746
8075
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7794,9 +8123,9 @@ class JsonModule {
|
|
|
7794
8123
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
7795
8124
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7796
8125
|
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
7797
|
-
const baseScore =
|
|
7798
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
7799
|
-
const literalBase = literalMatches.length > 0 && bm25Score === 0 ?
|
|
8126
|
+
const baseScore = jw.bm25 * bm25Score;
|
|
8127
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0, rw.literal);
|
|
8128
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? rw.literal.baseScore * jw.literalBaseWeight : 0;
|
|
7800
8129
|
const finalScore = boostedScore + literalBase;
|
|
7801
8130
|
processedChunkIds.add(chunk.id);
|
|
7802
8131
|
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
@@ -7829,7 +8158,7 @@ class JsonModule {
|
|
|
7829
8158
|
if (!chunk)
|
|
7830
8159
|
continue;
|
|
7831
8160
|
const literalContribution = calculateLiteralContribution(matches, false);
|
|
7832
|
-
const score =
|
|
8161
|
+
const score = rw.literal.baseScore * literalContribution.multiplier;
|
|
7833
8162
|
processedChunkIds.add(chunkId);
|
|
7834
8163
|
results.push({
|
|
7835
8164
|
filepath,
|
|
@@ -7850,11 +8179,12 @@ class JsonModule {
|
|
|
7850
8179
|
return results.slice(0, topK);
|
|
7851
8180
|
}
|
|
7852
8181
|
}
|
|
7853
|
-
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10,
|
|
8182
|
+
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, JSON_EXTENSIONS, supportsFile5;
|
|
7854
8183
|
var init_json = __esm(() => {
|
|
7855
8184
|
init_services();
|
|
7856
8185
|
init_config2();
|
|
7857
8186
|
init_storage();
|
|
8187
|
+
init_entities();
|
|
7858
8188
|
JSON_EXTENSIONS = [".json"];
|
|
7859
8189
|
supportsFile5 = isJsonFile;
|
|
7860
8190
|
});
|
|
@@ -8089,6 +8419,8 @@ class MarkdownModule {
|
|
|
8089
8419
|
minScore = DEFAULT_MIN_SCORE7,
|
|
8090
8420
|
filePatterns
|
|
8091
8421
|
} = options;
|
|
8422
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8423
|
+
const mw = rw.markdown;
|
|
8092
8424
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8093
8425
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
8094
8426
|
let allFiles;
|
|
@@ -8154,15 +8486,18 @@ class MarkdownModule {
|
|
|
8154
8486
|
"what",
|
|
8155
8487
|
"explain"
|
|
8156
8488
|
].includes(t))) {
|
|
8157
|
-
docBoost =
|
|
8158
|
-
}
|
|
8159
|
-
const
|
|
8160
|
-
const
|
|
8161
|
-
|
|
8489
|
+
docBoost = mw.docIntentBoost;
|
|
8490
|
+
}
|
|
8491
|
+
const rawHeadingBoost = calculateHeadingLevelBoost(chunk);
|
|
8492
|
+
const headingBoost = rawHeadingBoost * (mw.headingPhraseCoverageMin + mw.headingPhraseCoverageSpan * (phraseMatch.totalTokenCount > 0 ? phraseMatch.coverage : 1));
|
|
8493
|
+
const hybridScore = mw.semantic * semanticScore + mw.bm25 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
|
|
8494
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
8495
|
+
const finalScore = (hybridScore + disc.boost) * disc.penaltyFactor;
|
|
8496
|
+
if (finalScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
|
|
8162
8497
|
results.push({
|
|
8163
8498
|
filepath,
|
|
8164
8499
|
chunk,
|
|
8165
|
-
score:
|
|
8500
|
+
score: finalScore,
|
|
8166
8501
|
moduleId: this.id,
|
|
8167
8502
|
context: {
|
|
8168
8503
|
semanticScore,
|
|
@@ -8171,7 +8506,11 @@ class MarkdownModule {
|
|
|
8171
8506
|
phraseCoverage: phraseMatch.coverage,
|
|
8172
8507
|
docBoost,
|
|
8173
8508
|
headingBoost,
|
|
8174
|
-
headingLevel: chunk.metadata?.headingLevel
|
|
8509
|
+
headingLevel: chunk.metadata?.headingLevel,
|
|
8510
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
8511
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
8512
|
+
discriminativeBoost: disc.boost,
|
|
8513
|
+
matchedSalientTerms: disc.matchedSalient
|
|
8175
8514
|
}
|
|
8176
8515
|
});
|
|
8177
8516
|
}
|
|
@@ -8180,11 +8519,12 @@ class MarkdownModule {
|
|
|
8180
8519
|
return results.slice(0, topK);
|
|
8181
8520
|
}
|
|
8182
8521
|
}
|
|
8183
|
-
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10,
|
|
8522
|
+
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, MARKDOWN_EXTENSIONS, supportsFile6;
|
|
8184
8523
|
var init_markdown = __esm(() => {
|
|
8185
8524
|
init_embeddings();
|
|
8186
8525
|
init_services();
|
|
8187
8526
|
init_config2();
|
|
8527
|
+
init_entities();
|
|
8188
8528
|
init_storage();
|
|
8189
8529
|
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
8190
8530
|
supportsFile6 = isMarkdownFile;
|
|
@@ -11615,6 +11955,7 @@ minimatch.unescape = unescape;
|
|
|
11615
11955
|
init_types();
|
|
11616
11956
|
init_config2();
|
|
11617
11957
|
init_services();
|
|
11958
|
+
init_entities();
|
|
11618
11959
|
// src/domain/usecases/exactSearch.ts
|
|
11619
11960
|
init_simpleSearch();
|
|
11620
11961
|
var DEFAULT_IGNORED_DIRS = [
|
|
@@ -11782,7 +12123,9 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
11782
12123
|
if (ensureFresh) {
|
|
11783
12124
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
11784
12125
|
}
|
|
11785
|
-
|
|
12126
|
+
if (!options.quiet) {
|
|
12127
|
+
console.log(`Searching for: "${query}"`);
|
|
12128
|
+
}
|
|
11786
12129
|
const config = await loadConfig(rootDir);
|
|
11787
12130
|
await registerBuiltInModules();
|
|
11788
12131
|
const globalManifest = await loadGlobalManifest2(rootDir, config);
|
|
@@ -11845,10 +12188,18 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
11845
12188
|
}
|
|
11846
12189
|
}
|
|
11847
12190
|
}
|
|
11848
|
-
|
|
12191
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
12192
|
+
let ranked = filteredResults.map((r) => attachMatchScales(r, rw));
|
|
12193
|
+
for (const r of ranked) {
|
|
12194
|
+
if (r.context?.exactMatchFusion) {
|
|
12195
|
+
r.structuredMatch = clamp01((r.structuredMatch ?? 0) * 1.5);
|
|
12196
|
+
}
|
|
12197
|
+
}
|
|
12198
|
+
const rankBy = options.rankBy ?? DEFAULT_SEARCH_OPTIONS.rankBy;
|
|
12199
|
+
ranked.sort((a, b) => compareSearchResultsByRankBy(a, b, rankBy));
|
|
11849
12200
|
const topK = options.topK ?? 10;
|
|
11850
12201
|
return {
|
|
11851
|
-
results:
|
|
12202
|
+
results: ranked.slice(0, topK),
|
|
11852
12203
|
exactMatches,
|
|
11853
12204
|
fusionApplied
|
|
11854
12205
|
};
|
|
@@ -11939,7 +12290,9 @@ function formatSearchResults2(results) {
|
|
|
11939
12290
|
const nameInfo = chunk.name ? ` (${chunk.name})` : "";
|
|
11940
12291
|
output += `${i + 1}. ${location}${nameInfo}
|
|
11941
12292
|
`;
|
|
11942
|
-
|
|
12293
|
+
const sm = result.semanticMatch != null ? ` | Semantic: ${(result.semanticMatch * 100).toFixed(1)}%` : "";
|
|
12294
|
+
const st = result.structuredMatch != null ? ` | Structured: ${(result.structuredMatch * 100).toFixed(1)}%` : "";
|
|
12295
|
+
output += ` Score: ${(result.score * 100).toFixed(1)}%${st}${sm} | Type: ${chunk.type}`;
|
|
11943
12296
|
output += ` | via ${formatModuleName(result.moduleId)}`;
|
|
11944
12297
|
if (chunk.isExported) {
|
|
11945
12298
|
output += " | exported";
|
|
@@ -12032,6 +12385,7 @@ function formatHybridSearchResults(hybridResults) {
|
|
|
12032
12385
|
}
|
|
12033
12386
|
|
|
12034
12387
|
// src/index.ts
|
|
12388
|
+
init_entities();
|
|
12035
12389
|
async function index(directory, options = {}) {
|
|
12036
12390
|
return indexDirectory(directory, options);
|
|
12037
12391
|
}
|
|
@@ -12060,6 +12414,8 @@ var src_default = raggrep;
|
|
|
12060
12414
|
export {
|
|
12061
12415
|
search2 as search,
|
|
12062
12416
|
reset,
|
|
12417
|
+
mergeRankingWeights,
|
|
12418
|
+
mergeLiteralWeights,
|
|
12063
12419
|
index,
|
|
12064
12420
|
hybridSearch2 as hybridSearch,
|
|
12065
12421
|
formatSearchResults2 as formatSearchResults,
|
|
@@ -12071,7 +12427,9 @@ export {
|
|
|
12071
12427
|
cleanup,
|
|
12072
12428
|
SilentLogger,
|
|
12073
12429
|
InlineProgressLogger,
|
|
12430
|
+
DEFAULT_RANKING_WEIGHTS,
|
|
12431
|
+
DEFAULT_LITERAL_BOOST_WEIGHTS,
|
|
12074
12432
|
ConsoleLogger
|
|
12075
12433
|
};
|
|
12076
12434
|
|
|
12077
|
-
//# debugId=
|
|
12435
|
+
//# debugId=E54DFEDA0CA026F464756E2164756E21
|