raggrep 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -23,7 +23,107 @@ var init_searchResult = __esm(() => {
23
23
  minScore: 0.15,
24
24
  filePatterns: [],
25
25
  pathFilter: [],
26
- ensureFresh: true
26
+ ensureFresh: true,
27
+ rankingWeights: {},
28
+ quiet: false,
29
+ rankBy: "structured"
30
+ };
31
+ });
32
+
33
+ // src/domain/entities/rankingWeights.ts
34
+ function mergeLiteralWeights(def, partial) {
35
+ if (!partial) {
36
+ return def;
37
+ }
38
+ return {
39
+ baseScore: partial.baseScore ?? def.baseScore,
40
+ multipliers: {
41
+ definition: {
42
+ ...def.multipliers.definition,
43
+ ...partial.multipliers?.definition
44
+ },
45
+ reference: {
46
+ ...def.multipliers.reference,
47
+ ...partial.multipliers?.reference
48
+ },
49
+ import: { ...def.multipliers.import, ...partial.multipliers?.import }
50
+ },
51
+ vocabulary: { ...def.vocabulary, ...partial.vocabulary }
52
+ };
53
+ }
54
+ function mergeRankingWeights(partial) {
55
+ if (!partial) {
56
+ return DEFAULT_RANKING_WEIGHTS;
57
+ }
58
+ return {
59
+ discriminative: {
60
+ ...DEFAULT_RANKING_WEIGHTS.discriminative,
61
+ ...partial.discriminative
62
+ },
63
+ typescript: {
64
+ ...DEFAULT_RANKING_WEIGHTS.typescript,
65
+ ...partial.typescript
66
+ },
67
+ language: {
68
+ ...DEFAULT_RANKING_WEIGHTS.language,
69
+ ...partial.language
70
+ },
71
+ markdown: {
72
+ ...DEFAULT_RANKING_WEIGHTS.markdown,
73
+ ...partial.markdown
74
+ },
75
+ json: {
76
+ ...DEFAULT_RANKING_WEIGHTS.json,
77
+ ...partial.json
78
+ },
79
+ literal: mergeLiteralWeights(DEFAULT_RANKING_WEIGHTS.literal, partial.literal)
80
+ };
81
+ }
82
+ var DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, DEFAULT_RANKING_WEIGHTS;
83
+ var init_rankingWeights = __esm(() => {
84
+ DEFAULT_DISCRIMINATIVE_WEIGHTS = {
85
+ boostCap: 0.1,
86
+ penaltyMax: 0.16,
87
+ penaltyFloor: 0.72
88
+ };
89
+ DEFAULT_LITERAL_BOOST_WEIGHTS = {
90
+ baseScore: 0.5,
91
+ multipliers: {
92
+ definition: { high: 2.5, medium: 2, low: 1.5 },
93
+ reference: { high: 2, medium: 1.5, low: 1.3 },
94
+ import: { high: 1.5, medium: 1.3, low: 1.1 }
95
+ },
96
+ vocabulary: {
97
+ baseMultiplier: 1.3,
98
+ perWordBonus: 0.1,
99
+ maxVocabularyBonus: 0.5,
100
+ minWordsForMatch: 2
101
+ }
102
+ };
103
+ DEFAULT_RANKING_WEIGHTS = {
104
+ discriminative: DEFAULT_DISCRIMINATIVE_WEIGHTS,
105
+ typescript: {
106
+ semantic: 0.43,
107
+ bm25: 0.42,
108
+ vocab: 0.15,
109
+ vocabBypassThreshold: 0.4
110
+ },
111
+ language: {
112
+ semantic: 0.7,
113
+ bm25: 0.3
114
+ },
115
+ markdown: {
116
+ semantic: 0.62,
117
+ bm25: 0.33,
118
+ docIntentBoost: 0.03,
119
+ headingPhraseCoverageMin: 0.25,
120
+ headingPhraseCoverageSpan: 0.75
121
+ },
122
+ json: {
123
+ bm25: 0.4,
124
+ literalBaseWeight: 0.6
125
+ },
126
+ literal: DEFAULT_LITERAL_BOOST_WEIGHTS
27
127
  };
28
128
  });
29
129
 
@@ -171,6 +271,7 @@ var init_lexicon = __esm(() => {
171
271
  // src/domain/entities/index.ts
172
272
  var init_entities = __esm(() => {
173
273
  init_searchResult();
274
+ init_rankingWeights();
174
275
  init_config();
175
276
  init_literal();
176
277
  init_lexicon();
@@ -317,6 +418,9 @@ class BM25Index {
317
418
  return 0;
318
419
  return Math.log(1 + (this.totalDocs - docFreq + 0.5) / (docFreq + 0.5));
319
420
  }
421
+ getInverseDocumentFrequency(term) {
422
+ return this.idf(term.toLowerCase());
423
+ }
320
424
  score(tokens, queryTerms) {
321
425
  const docLength = tokens.length;
322
426
  let score = 0;
@@ -2169,13 +2273,13 @@ var init_modelCache = __esm(() => {
2169
2273
  init_modelCatalog();
2170
2274
  });
2171
2275
 
2172
- // src/infrastructure/embeddings/xenovaEmbeddingProvider.ts
2276
+ // src/infrastructure/embeddings/huggingfaceEmbeddingProvider.ts
2173
2277
  import {
2174
2278
  pipeline,
2175
2279
  env
2176
- } from "@xenova/transformers";
2280
+ } from "@huggingface/transformers";
2177
2281
 
2178
- class XenovaTransformersEmbeddingProvider {
2282
+ class HuggingFaceTransformersEmbeddingProvider {
2179
2283
  extractor = null;
2180
2284
  config;
2181
2285
  isInitializing = false;
@@ -2183,7 +2287,7 @@ class XenovaTransformersEmbeddingProvider {
2183
2287
  constructor(config) {
2184
2288
  this.config = {
2185
2289
  model: config?.model ?? "bge-small-en-v1.5",
2186
- runtime: config?.runtime ?? "xenova",
2290
+ runtime: config?.runtime ?? "huggingface",
2187
2291
  showProgress: config?.showProgress ?? false,
2188
2292
  logger: config?.logger
2189
2293
  };
@@ -2307,7 +2411,7 @@ class XenovaTransformersEmbeddingProvider {
2307
2411
  }
2308
2412
  }
2309
2413
  var BATCH_SIZE = 32;
2310
- var init_xenovaEmbeddingProvider = __esm(() => {
2414
+ var init_huggingfaceEmbeddingProvider = __esm(() => {
2311
2415
  init_embeddingPaths();
2312
2416
  init_modelCatalog();
2313
2417
  init_modelCache();
@@ -2315,13 +2419,18 @@ var init_xenovaEmbeddingProvider = __esm(() => {
2315
2419
  env.allowLocalModels = true;
2316
2420
  });
2317
2421
 
2318
- // src/infrastructure/embeddings/huggingfaceEmbeddingProvider.ts
2422
+ // src/infrastructure/embeddings/xenovaEmbeddingProvider.ts
2423
+ var exports_xenovaEmbeddingProvider = {};
2424
+ __export(exports_xenovaEmbeddingProvider, {
2425
+ XenovaTransformersEmbeddingProvider: () => XenovaTransformersEmbeddingProvider,
2426
+ TransformersEmbeddingProvider: () => TransformersEmbeddingProvider
2427
+ });
2319
2428
  import {
2320
2429
  pipeline as pipeline2,
2321
2430
  env as env2
2322
- } from "@huggingface/transformers";
2431
+ } from "@xenova/transformers";
2323
2432
 
2324
- class HuggingFaceTransformersEmbeddingProvider {
2433
+ class XenovaTransformersEmbeddingProvider {
2325
2434
  extractor = null;
2326
2435
  config;
2327
2436
  isInitializing = false;
@@ -2329,7 +2438,7 @@ class HuggingFaceTransformersEmbeddingProvider {
2329
2438
  constructor(config) {
2330
2439
  this.config = {
2331
2440
  model: config?.model ?? "bge-small-en-v1.5",
2332
- runtime: config?.runtime ?? "huggingface",
2441
+ runtime: config?.runtime ?? "xenova",
2333
2442
  showProgress: config?.showProgress ?? false,
2334
2443
  logger: config?.logger
2335
2444
  };
@@ -2452,29 +2561,30 @@ class HuggingFaceTransformersEmbeddingProvider {
2452
2561
  this.extractor = null;
2453
2562
  }
2454
2563
  }
2455
- var BATCH_SIZE2 = 32;
2456
- var init_huggingfaceEmbeddingProvider = __esm(() => {
2564
+ var BATCH_SIZE2 = 32, TransformersEmbeddingProvider;
2565
+ var init_xenovaEmbeddingProvider = __esm(() => {
2457
2566
  init_embeddingPaths();
2458
2567
  init_modelCatalog();
2459
2568
  init_modelCache();
2460
2569
  env2.cacheDir = RAGGREP_MODEL_CACHE_DIR;
2461
2570
  env2.allowLocalModels = true;
2571
+ TransformersEmbeddingProvider = XenovaTransformersEmbeddingProvider;
2462
2572
  });
2463
2573
 
2464
2574
  // src/infrastructure/embeddings/embeddingProviderFactory.ts
2465
2575
  function resolveRuntime(config) {
2466
2576
  return config.runtime ?? "huggingface";
2467
2577
  }
2468
- function createEmbeddingProvider(config) {
2578
+ async function createEmbeddingProvider(config) {
2469
2579
  const runtime = resolveRuntime(config);
2470
2580
  if (runtime === "huggingface") {
2471
2581
  return new HuggingFaceTransformersEmbeddingProvider(config);
2472
2582
  }
2473
- return new XenovaTransformersEmbeddingProvider(config);
2583
+ const { XenovaTransformersEmbeddingProvider: XenovaTransformersEmbeddingProvider2 } = await Promise.resolve().then(() => (init_xenovaEmbeddingProvider(), exports_xenovaEmbeddingProvider));
2584
+ return new XenovaTransformersEmbeddingProvider2(config);
2474
2585
  }
2475
2586
  var init_embeddingProviderFactory = __esm(() => {
2476
2587
  init_huggingfaceEmbeddingProvider();
2477
- init_xenovaEmbeddingProvider();
2478
2588
  });
2479
2589
 
2480
2590
  // src/infrastructure/embeddings/globalEmbeddings.ts
@@ -2499,7 +2609,7 @@ function getEmbeddingConfig() {
2499
2609
  }
2500
2610
  async function ensureGlobalProvider() {
2501
2611
  if (!globalProvider) {
2502
- globalProvider = createEmbeddingProvider(globalConfig);
2612
+ globalProvider = await createEmbeddingProvider(globalConfig);
2503
2613
  await globalProvider.initialize?.(globalConfig);
2504
2614
  }
2505
2615
  return globalProvider;
@@ -2529,13 +2639,193 @@ var init_globalEmbeddings = __esm(() => {
2529
2639
  var init_embeddings = __esm(() => {
2530
2640
  init_modelCatalog();
2531
2641
  init_embeddingPaths();
2532
- init_xenovaEmbeddingProvider();
2533
- init_xenovaEmbeddingProvider();
2534
2642
  init_huggingfaceEmbeddingProvider();
2535
2643
  init_embeddingProviderFactory();
2536
2644
  init_globalEmbeddings();
2537
2645
  });
2538
2646
 
2647
+ // src/domain/services/discriminativeTerms.ts
2648
+ function medianSorted(sorted) {
2649
+ const n = sorted.length;
2650
+ if (n === 0)
2651
+ return 0;
2652
+ const mid = Math.floor(n / 2);
2653
+ return n % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
2654
+ }
2655
+ function salientTermHitsChunk(term, haystack, tokenSet) {
2656
+ if (tokenSet.has(term) || haystack.includes(term)) {
2657
+ return true;
2658
+ }
2659
+ if (term.length < PREFIX_MATCH_MIN_LEN) {
2660
+ return false;
2661
+ }
2662
+ for (const w of tokenSet) {
2663
+ if (w.length < PREFIX_MATCH_MIN_LEN)
2664
+ continue;
2665
+ if (term.startsWith(w) || w.startsWith(term)) {
2666
+ return true;
2667
+ }
2668
+ }
2669
+ return false;
2670
+ }
2671
+ function scoreDiscriminativeTerms(bm25Index, query, chunkText, chunkName, weights = DEFAULT_DISCRIMINATIVE_WEIGHTS) {
2672
+ const empty2 = () => ({
2673
+ boost: 0,
2674
+ penaltyFactor: 1,
2675
+ salientTerms: [],
2676
+ matchedSalient: [],
2677
+ missingSalient: [],
2678
+ salientCoverage: 1
2679
+ });
2680
+ const uniqueTerms = [...new Set(tokenize(query))];
2681
+ if (uniqueTerms.length === 0) {
2682
+ return empty2();
2683
+ }
2684
+ const indexed = [];
2685
+ for (const term of uniqueTerms) {
2686
+ const idf = bm25Index.getInverseDocumentFrequency(term);
2687
+ if (idf > 0) {
2688
+ indexed.push({ term, idf });
2689
+ }
2690
+ }
2691
+ if (indexed.length === 0) {
2692
+ return empty2();
2693
+ }
2694
+ const idfSorted = [...indexed.map((x) => x.idf)].sort((a, b) => a - b);
2695
+ const medianIdf = medianSorted(idfSorted);
2696
+ const salientEntries = indexed.filter((x) => x.idf >= medianIdf);
2697
+ const salientTerms = [...new Set(salientEntries.map((x) => x.term))];
2698
+ const idfByTerm = new Map;
2699
+ for (const { term, idf } of salientEntries) {
2700
+ idfByTerm.set(term, Math.max(idfByTerm.get(term) ?? 0, idf));
2701
+ }
2702
+ let totalW = 0;
2703
+ for (const idf of idfByTerm.values()) {
2704
+ totalW += idf;
2705
+ }
2706
+ const haystack = [chunkName ?? "", chunkText].join(`
2707
+ `).toLowerCase();
2708
+ const tokenSet = new Set(tokenize(chunkName ? `${chunkName}
2709
+ ${chunkText}` : chunkText));
2710
+ const matchedSalient = [];
2711
+ for (const term of salientTerms) {
2712
+ const idf = idfByTerm.get(term) ?? 0;
2713
+ if (idf <= 0)
2714
+ continue;
2715
+ if (salientTermHitsChunk(term, haystack, tokenSet)) {
2716
+ matchedSalient.push(term);
2717
+ }
2718
+ }
2719
+ const matchedSet = new Set(matchedSalient);
2720
+ const missingSalient = salientTerms.filter((t) => !matchedSet.has(t));
2721
+ let matchedW = 0;
2722
+ for (const term of matchedSalient) {
2723
+ matchedW += idfByTerm.get(term) ?? 0;
2724
+ }
2725
+ const salientCoverage = totalW > 0 ? matchedW / totalW : 1;
2726
+ const { boostCap, penaltyMax, penaltyFloor } = weights;
2727
+ const boost = boostCap * salientCoverage;
2728
+ let penaltyFactor = 1 - penaltyMax * (1 - salientCoverage);
2729
+ if (penaltyFactor < penaltyFloor) {
2730
+ penaltyFactor = penaltyFloor;
2731
+ }
2732
+ return {
2733
+ boost,
2734
+ penaltyFactor,
2735
+ salientTerms,
2736
+ matchedSalient,
2737
+ missingSalient,
2738
+ salientCoverage
2739
+ };
2740
+ }
2741
+ var PREFIX_MATCH_MIN_LEN = 4;
2742
+ var init_discriminativeTerms = __esm(() => {
2743
+ init_rankingWeights();
2744
+ });
2745
+
2746
+ // src/domain/services/matchScales.ts
2747
+ function semanticPctFromCosine(cosine) {
2748
+ return clamp01((cosine + 1) / 2);
2749
+ }
2750
+ function clamp01(x) {
2751
+ if (Number.isNaN(x) || !Number.isFinite(x))
2752
+ return 0;
2753
+ return Math.max(0, Math.min(1, x));
2754
+ }
2755
+ function num(ctx, key) {
2756
+ const v = ctx[key];
2757
+ return typeof v === "number" && Number.isFinite(v) ? v : 0;
2758
+ }
2759
+ function additiveStructuredBoost(ctx) {
2760
+ return num(ctx, "pathBoost") + num(ctx, "fileTypeBoost") + num(ctx, "chunkTypeBoost") + num(ctx, "exportBoost");
2761
+ }
2762
+ function attachMatchScales(result, rw) {
2763
+ const ctx = result.context ?? {};
2764
+ const mid = result.moduleId;
2765
+ let semanticMatch = 0;
2766
+ let structuredMatch = 0;
2767
+ if (mid === "language/typescript") {
2768
+ const cos = num(ctx, "semanticScore");
2769
+ const bm25 = num(ctx, "bm25Score");
2770
+ const vocab = num(ctx, "vocabScore");
2771
+ const phraseCov = num(ctx, "phraseCoverage");
2772
+ const tw = rw.typescript;
2773
+ semanticMatch = semanticPctFromCosine(cos);
2774
+ const denom = tw.bm25 + tw.vocab + 0.000000001;
2775
+ const lexCore = (tw.bm25 * bm25 + tw.vocab * vocab) / denom;
2776
+ structuredMatch = clamp01(lexCore + Math.min(0.35, additiveStructuredBoost(ctx)) + Math.min(0.15, phraseCov * 0.25));
2777
+ } else if (mid.startsWith("language/")) {
2778
+ const cos = num(ctx, "semanticScore");
2779
+ const bm25 = num(ctx, "bm25Score");
2780
+ semanticMatch = semanticPctFromCosine(cos);
2781
+ structuredMatch = clamp01(bm25 + Math.min(0.3, additiveStructuredBoost(ctx)) + Math.min(0.12, num(ctx, "phraseCoverage") * 0.2));
2782
+ } else if (mid === "docs/markdown") {
2783
+ const cos = num(ctx, "semanticScore");
2784
+ const bm25 = num(ctx, "bm25Score");
2785
+ const docBoost = num(ctx, "docBoost");
2786
+ const headingBoost = num(ctx, "headingBoost");
2787
+ const phraseCov = num(ctx, "phraseCoverage");
2788
+ const mw = rw.markdown;
2789
+ semanticMatch = semanticPctFromCosine(cos);
2790
+ structuredMatch = clamp01(mw.bm25 * bm25 + docBoost + headingBoost + Math.min(0.2, phraseCov * 0.15));
2791
+ } else if (mid === "core") {
2792
+ semanticMatch = 0;
2793
+ const nBm = num(ctx, "bm25Score");
2794
+ const sym = num(ctx, "symbolScore");
2795
+ structuredMatch = clamp01(0.6 * nBm + 0.4 * sym);
2796
+ } else if (mid === "data/json") {
2797
+ semanticMatch = 0;
2798
+ const bm25 = num(ctx, "bm25Score");
2799
+ const litM = num(ctx, "literalMultiplier");
2800
+ structuredMatch = clamp01(bm25 > 0.02 ? bm25 : Math.min(1, 0.35 + Math.min(0.65, (litM - 1) * 0.35)));
2801
+ } else {
2802
+ semanticMatch = 0;
2803
+ structuredMatch = clamp01(result.score);
2804
+ }
2805
+ return { ...result, semanticMatch, structuredMatch };
2806
+ }
2807
+ function compareSearchResultsByRankBy(a, b, rankBy) {
2808
+ if (rankBy === "combined") {
2809
+ return b.score - a.score;
2810
+ }
2811
+ const sa = a.semanticMatch ?? 0;
2812
+ const sb = b.semanticMatch ?? 0;
2813
+ const ta = a.structuredMatch ?? 0;
2814
+ const tb = b.structuredMatch ?? 0;
2815
+ if (rankBy === "semantic") {
2816
+ if (Math.abs(sb - sa) > 0.000000001)
2817
+ return sb - sa;
2818
+ if (Math.abs(tb - ta) > 0.000000001)
2819
+ return tb - ta;
2820
+ return b.score - a.score;
2821
+ }
2822
+ if (Math.abs(tb - ta) > 0.000000001)
2823
+ return tb - ta;
2824
+ if (Math.abs(sb - sa) > 0.000000001)
2825
+ return sb - sa;
2826
+ return b.score - a.score;
2827
+ }
2828
+
2539
2829
  // src/domain/services/keywords.ts
2540
2830
  function extractKeywords(content, name, maxKeywords = 50) {
2541
2831
  const keywords = new Set;
@@ -3194,16 +3484,16 @@ var init_literalExtractor = __esm(() => {
3194
3484
  });
3195
3485
 
3196
3486
  // src/domain/services/literalScorer.ts
3197
- function calculateLiteralMultiplier(matchType, confidence) {
3198
- return LITERAL_SCORING_CONSTANTS.MULTIPLIERS[matchType][confidence];
3487
+ function calculateLiteralMultiplier(matchType, confidence, weights = DEFAULT_LW) {
3488
+ return weights.multipliers[matchType][confidence];
3199
3489
  }
3200
- function calculateMaxMultiplier(matches) {
3490
+ function calculateMaxMultiplier(matches, weights = DEFAULT_LW) {
3201
3491
  if (!matches || matches.length === 0) {
3202
3492
  return 1;
3203
3493
  }
3204
- return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence)));
3494
+ return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence, weights)));
3205
3495
  }
3206
- function calculateLiteralContribution(matches, hasSemanticOrBm25) {
3496
+ function calculateLiteralContribution(matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
3207
3497
  if (!matches || matches.length === 0) {
3208
3498
  return {
3209
3499
  multiplier: 1,
@@ -3214,7 +3504,7 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
3214
3504
  let bestMatch = null;
3215
3505
  let bestMultiplier = 0;
3216
3506
  for (const match of matches) {
3217
- const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence);
3507
+ const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence, weights);
3218
3508
  if (mult > bestMultiplier) {
3219
3509
  bestMultiplier = mult;
3220
3510
  bestMatch = match;
@@ -3228,32 +3518,20 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
3228
3518
  matchCount: matches.length
3229
3519
  };
3230
3520
  }
3231
- function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25) {
3521
+ function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
3232
3522
  if (!matches || matches.length === 0) {
3233
3523
  return baseScore;
3234
3524
  }
3235
- const multiplier = calculateMaxMultiplier(matches);
3525
+ const multiplier = calculateMaxMultiplier(matches, weights);
3236
3526
  if (!hasSemanticOrBm25) {
3237
- return LITERAL_SCORING_CONSTANTS.BASE_SCORE * multiplier;
3527
+ return weights.baseScore * multiplier;
3238
3528
  }
3239
3529
  return baseScore * multiplier;
3240
3530
  }
3241
- var LITERAL_SCORING_CONSTANTS;
3531
+ var DEFAULT_LW;
3242
3532
  var init_literalScorer = __esm(() => {
3243
- LITERAL_SCORING_CONSTANTS = {
3244
- BASE_SCORE: 0.5,
3245
- MULTIPLIERS: {
3246
- definition: { high: 2.5, medium: 2, low: 1.5 },
3247
- reference: { high: 2, medium: 1.5, low: 1.3 },
3248
- import: { high: 1.5, medium: 1.3, low: 1.1 }
3249
- },
3250
- VOCABULARY: {
3251
- BASE_MULTIPLIER: 1.3,
3252
- PER_WORD_BONUS: 0.1,
3253
- MAX_VOCABULARY_BONUS: 0.5,
3254
- MIN_WORDS_FOR_MATCH: 2
3255
- }
3256
- };
3533
+ init_rankingWeights();
3534
+ DEFAULT_LW = DEFAULT_RANKING_WEIGHTS.literal;
3257
3535
  });
3258
3536
 
3259
3537
  // src/domain/services/lexicon.ts
@@ -4208,6 +4486,7 @@ var init_chunkContext = __esm(() => {
4208
4486
 
4209
4487
  // src/domain/services/index.ts
4210
4488
  var init_services = __esm(() => {
4489
+ init_discriminativeTerms();
4211
4490
  init_keywords();
4212
4491
  init_queryIntent();
4213
4492
  init_queryLiteralParser();
@@ -5031,6 +5310,9 @@ class TypeScriptModule {
5031
5310
  minScore = DEFAULT_MIN_SCORE2,
5032
5311
  filePatterns
5033
5312
  } = options;
5313
+ const rw = mergeRankingWeights(options.rankingWeights);
5314
+ const tw = rw.typescript;
5315
+ const lt = rw.literal;
5034
5316
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
5035
5317
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
5036
5318
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -5137,17 +5419,19 @@ class TypeScriptModule {
5137
5419
  const chunkTypeBoost = calculateChunkTypeBoost(chunk);
5138
5420
  const exportBoost = calculateExportBoost(chunk);
5139
5421
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
5140
- const baseScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + VOCAB_WEIGHT * vocabScore;
5422
+ const baseScore = tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore;
5141
5423
  const literalMatches = literalMatchMap.get(chunk.id) || [];
5142
- const literalContribution = calculateLiteralContribution(literalMatches, true);
5143
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
5424
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
5425
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
5144
5426
  const finalScore = boostedScore + additiveBoost;
5427
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
5428
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
5145
5429
  processedChunkIds.add(chunk.id);
5146
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > VOCAB_THRESHOLD || phraseMatch.isSignificant) {
5430
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > tw.vocabBypassThreshold || phraseMatch.isSignificant) {
5147
5431
  results.push({
5148
5432
  filepath,
5149
5433
  chunk,
5150
- score: finalScore,
5434
+ score: adjustedScore,
5151
5435
  moduleId: this.id,
5152
5436
  context: {
5153
5437
  semanticScore,
@@ -5159,6 +5443,10 @@ class TypeScriptModule {
5159
5443
  fileTypeBoost,
5160
5444
  chunkTypeBoost,
5161
5445
  exportBoost,
5446
+ discriminativeCoverage: disc.salientCoverage,
5447
+ discriminativePenaltyFactor: disc.penaltyFactor,
5448
+ discriminativeBoost: disc.boost,
5449
+ matchedSalientTerms: disc.matchedSalient,
5162
5450
  literalMultiplier: literalContribution.multiplier,
5163
5451
  literalMatchType: literalContribution.bestMatchType,
5164
5452
  literalConfidence: literalContribution.bestConfidence,
@@ -5211,15 +5499,17 @@ class TypeScriptModule {
5211
5499
  const chunkTypeBoost = calculateChunkTypeBoost(chunk);
5212
5500
  const exportBoost = calculateExportBoost(chunk);
5213
5501
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
5214
- const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false);
5215
- const baseScore = semanticScore > 0 ? SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + VOCAB_WEIGHT * vocabScore : LITERAL_SCORING_CONSTANTS.BASE_SCORE;
5216
- const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0);
5502
+ const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false, lt);
5503
+ const baseScore = semanticScore > 0 ? tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore : lt.baseScore;
5504
+ const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0, lt);
5217
5505
  const finalScore = boostedScore + additiveBoost;
5506
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
5507
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
5218
5508
  processedChunkIds.add(chunkId);
5219
5509
  results.push({
5220
5510
  filepath,
5221
5511
  chunk,
5222
- score: finalScore,
5512
+ score: adjustedScore,
5223
5513
  moduleId: this.id,
5224
5514
  context: {
5225
5515
  semanticScore,
@@ -5231,6 +5521,10 @@ class TypeScriptModule {
5231
5521
  fileTypeBoost,
5232
5522
  chunkTypeBoost,
5233
5523
  exportBoost,
5524
+ discriminativeCoverage: disc.salientCoverage,
5525
+ discriminativePenaltyFactor: disc.penaltyFactor,
5526
+ discriminativeBoost: disc.boost,
5527
+ matchedSalientTerms: disc.matchedSalient,
5234
5528
  literalMultiplier: literalContribution.multiplier,
5235
5529
  literalMatchType: literalContribution.bestMatchType,
5236
5530
  literalConfidence: literalContribution.bestConfidence,
@@ -5267,13 +5561,14 @@ class TypeScriptModule {
5267
5561
  return references;
5268
5562
  }
5269
5563
  }
5270
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.6, BM25_WEIGHT = 0.25, VOCAB_WEIGHT = 0.15, VOCAB_THRESHOLD = 0.4, TYPESCRIPT_EXTENSIONS, supportsFile;
5564
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, TYPESCRIPT_EXTENSIONS, supportsFile;
5271
5565
  var init_typescript = __esm(() => {
5272
5566
  init_embeddings();
5273
5567
  init_services();
5274
5568
  init_config2();
5275
5569
  init_parseCode();
5276
5570
  init_storage();
5571
+ init_entities();
5277
5572
  TYPESCRIPT_EXTENSIONS = [
5278
5573
  ".ts",
5279
5574
  ".tsx",
@@ -6386,6 +6681,9 @@ class PythonModule {
6386
6681
  minScore = DEFAULT_MIN_SCORE3,
6387
6682
  filePatterns
6388
6683
  } = options;
6684
+ const rw = mergeRankingWeights(options.rankingWeights);
6685
+ const lw = rw.language;
6686
+ const lt = rw.literal;
6389
6687
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
6390
6688
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
6391
6689
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -6478,17 +6776,19 @@ class PythonModule {
6478
6776
  const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
6479
6777
  const exportBoost = calculateExportBoost2(chunk);
6480
6778
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
6481
- const baseScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
6779
+ const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
6482
6780
  const literalMatches = literalMatchMap.get(chunk.id) || [];
6483
- const literalContribution = calculateLiteralContribution(literalMatches, true);
6484
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
6781
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
6782
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
6485
6783
  const finalScore = boostedScore + additiveBoost;
6784
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
6785
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
6486
6786
  processedChunkIds.add(chunk.id);
6487
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
6787
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
6488
6788
  results.push({
6489
6789
  filepath,
6490
6790
  chunk,
6491
- score: finalScore,
6791
+ score: adjustedScore,
6492
6792
  moduleId: this.id,
6493
6793
  context: {
6494
6794
  semanticScore,
@@ -6497,6 +6797,10 @@ class PythonModule {
6497
6797
  fileTypeBoost,
6498
6798
  chunkTypeBoost,
6499
6799
  exportBoost,
6800
+ discriminativeCoverage: disc.salientCoverage,
6801
+ discriminativePenaltyFactor: disc.penaltyFactor,
6802
+ discriminativeBoost: disc.boost,
6803
+ matchedSalientTerms: disc.matchedSalient,
6500
6804
  literalMultiplier: literalContribution.multiplier,
6501
6805
  literalMatchType: literalContribution.bestMatchType,
6502
6806
  literalConfidence: literalContribution.bestConfidence,
@@ -6531,15 +6835,17 @@ class PythonModule {
6531
6835
  const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
6532
6836
  const exportBoost = calculateExportBoost2(chunk);
6533
6837
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
6534
- const literalContribution = calculateLiteralContribution(matches, false);
6535
- const baseScore = semanticScore > 0 ? SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score : LITERAL_SCORING_CONSTANTS.BASE_SCORE;
6536
- const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0);
6838
+ const literalContribution = calculateLiteralContribution(matches, false, lt);
6839
+ const baseScore = semanticScore > 0 ? lw.semantic * semanticScore + lw.bm25 * bm25Score : lt.baseScore;
6840
+ const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0, lt);
6537
6841
  const finalScore = boostedScore + additiveBoost;
6842
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
6843
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
6538
6844
  processedChunkIds.add(chunkId);
6539
6845
  results.push({
6540
6846
  filepath,
6541
6847
  chunk,
6542
- score: finalScore,
6848
+ score: adjustedScore,
6543
6849
  moduleId: this.id,
6544
6850
  context: {
6545
6851
  semanticScore,
@@ -6548,6 +6854,10 @@ class PythonModule {
6548
6854
  fileTypeBoost,
6549
6855
  chunkTypeBoost,
6550
6856
  exportBoost,
6857
+ discriminativeCoverage: disc.salientCoverage,
6858
+ discriminativePenaltyFactor: disc.penaltyFactor,
6859
+ discriminativeBoost: disc.boost,
6860
+ matchedSalientTerms: disc.matchedSalient,
6551
6861
  literalMultiplier: literalContribution.multiplier,
6552
6862
  literalMatchType: literalContribution.bestMatchType,
6553
6863
  literalConfidence: literalContribution.bestConfidence,
@@ -6560,13 +6870,14 @@ class PythonModule {
6560
6870
  return results.slice(0, topK);
6561
6871
  }
6562
6872
  }
6563
- var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, PYTHON_EXTENSIONS, supportsFile2;
6873
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, PYTHON_EXTENSIONS, supportsFile2;
6564
6874
  var init_python = __esm(() => {
6565
6875
  init_embeddings();
6566
6876
  init_services();
6567
6877
  init_config2();
6568
6878
  init_storage();
6569
6879
  init_parsing();
6880
+ init_entities();
6570
6881
  PYTHON_EXTENSIONS = [".py", ".pyw"];
6571
6882
  supportsFile2 = isPythonFile;
6572
6883
  });
@@ -6918,6 +7229,9 @@ class GoModule {
6918
7229
  minScore = DEFAULT_MIN_SCORE4,
6919
7230
  filePatterns
6920
7231
  } = options;
7232
+ const rw = mergeRankingWeights(options.rankingWeights);
7233
+ const lw = rw.language;
7234
+ const lt = rw.literal;
6921
7235
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
6922
7236
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
6923
7237
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -7010,17 +7324,19 @@ class GoModule {
7010
7324
  const chunkTypeBoost = calculateChunkTypeBoost3(chunk);
7011
7325
  const exportBoost = calculateExportBoost3(chunk);
7012
7326
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
7013
- const baseScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score;
7327
+ const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
7014
7328
  const literalMatches = literalMatchMap.get(chunk.id) || [];
7015
- const literalContribution = calculateLiteralContribution(literalMatches, true);
7016
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
7329
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
7330
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
7017
7331
  const finalScore = boostedScore + additiveBoost;
7332
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
7333
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
7018
7334
  processedChunkIds.add(chunk.id);
7019
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7335
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7020
7336
  results.push({
7021
7337
  filepath,
7022
7338
  chunk,
7023
- score: finalScore,
7339
+ score: adjustedScore,
7024
7340
  moduleId: this.id,
7025
7341
  context: {
7026
7342
  semanticScore,
@@ -7029,6 +7345,10 @@ class GoModule {
7029
7345
  fileTypeBoost,
7030
7346
  chunkTypeBoost,
7031
7347
  exportBoost,
7348
+ discriminativeCoverage: disc.salientCoverage,
7349
+ discriminativePenaltyFactor: disc.penaltyFactor,
7350
+ discriminativeBoost: disc.boost,
7351
+ matchedSalientTerms: disc.matchedSalient,
7032
7352
  literalMultiplier: literalContribution.multiplier,
7033
7353
  literalMatchType: literalContribution.bestMatchType,
7034
7354
  literalConfidence: literalContribution.bestConfidence,
@@ -7041,13 +7361,14 @@ class GoModule {
7041
7361
  return results.slice(0, topK);
7042
7362
  }
7043
7363
  }
7044
- var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, GO_EXTENSIONS, supportsFile3;
7364
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, GO_EXTENSIONS, supportsFile3;
7045
7365
  var init_go = __esm(() => {
7046
7366
  init_embeddings();
7047
7367
  init_services();
7048
7368
  init_config2();
7049
7369
  init_storage();
7050
7370
  init_parsing();
7371
+ init_entities();
7051
7372
  GO_EXTENSIONS = [".go"];
7052
7373
  supportsFile3 = isGoFile;
7053
7374
  });
@@ -7478,6 +7799,9 @@ class RustModule {
7478
7799
  minScore = DEFAULT_MIN_SCORE5,
7479
7800
  filePatterns
7480
7801
  } = options;
7802
+ const rw = mergeRankingWeights(options.rankingWeights);
7803
+ const lw = rw.language;
7804
+ const lt = rw.literal;
7481
7805
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
7482
7806
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
7483
7807
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -7570,17 +7894,19 @@ class RustModule {
7570
7894
  const chunkTypeBoost = calculateChunkTypeBoost4(chunk);
7571
7895
  const exportBoost = calculateExportBoost4(chunk);
7572
7896
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
7573
- const baseScore = SEMANTIC_WEIGHT4 * semanticScore + BM25_WEIGHT4 * bm25Score;
7897
+ const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
7574
7898
  const literalMatches = literalMatchMap.get(chunk.id) || [];
7575
- const literalContribution = calculateLiteralContribution(literalMatches, true);
7576
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
7899
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
7900
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
7577
7901
  const finalScore = boostedScore + additiveBoost;
7902
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
7903
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
7578
7904
  processedChunkIds.add(chunk.id);
7579
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7905
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7580
7906
  results.push({
7581
7907
  filepath,
7582
7908
  chunk,
7583
- score: finalScore,
7909
+ score: adjustedScore,
7584
7910
  moduleId: this.id,
7585
7911
  context: {
7586
7912
  semanticScore,
@@ -7589,6 +7915,10 @@ class RustModule {
7589
7915
  fileTypeBoost,
7590
7916
  chunkTypeBoost,
7591
7917
  exportBoost,
7918
+ discriminativeCoverage: disc.salientCoverage,
7919
+ discriminativePenaltyFactor: disc.penaltyFactor,
7920
+ discriminativeBoost: disc.boost,
7921
+ matchedSalientTerms: disc.matchedSalient,
7592
7922
  literalMultiplier: literalContribution.multiplier,
7593
7923
  literalMatchType: literalContribution.bestMatchType,
7594
7924
  literalConfidence: literalContribution.bestConfidence,
@@ -7601,13 +7931,14 @@ class RustModule {
7601
7931
  return results.slice(0, topK);
7602
7932
  }
7603
7933
  }
7604
- var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, SEMANTIC_WEIGHT4 = 0.7, BM25_WEIGHT4 = 0.3, RUST_EXTENSIONS, supportsFile4;
7934
+ var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, RUST_EXTENSIONS, supportsFile4;
7605
7935
  var init_rust = __esm(() => {
7606
7936
  init_embeddings();
7607
7937
  init_services();
7608
7938
  init_config2();
7609
7939
  init_storage();
7610
7940
  init_parsing();
7941
+ init_entities();
7611
7942
  RUST_EXTENSIONS = [".rs"];
7612
7943
  supportsFile4 = isRustFile;
7613
7944
  });
@@ -7737,6 +8068,8 @@ class JsonModule {
7737
8068
  minScore = DEFAULT_MIN_SCORE6,
7738
8069
  filePatterns
7739
8070
  } = options;
8071
+ const rw = mergeRankingWeights(options.rankingWeights);
8072
+ const jw = rw.json;
7740
8073
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
7741
8074
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
7742
8075
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -7790,9 +8123,9 @@ class JsonModule {
7790
8123
  const bm25Score = bm25Scores.get(chunk.id) || 0;
7791
8124
  const literalMatches = literalMatchMap.get(chunk.id) || [];
7792
8125
  const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
7793
- const baseScore = BM25_WEIGHT5 * bm25Score;
7794
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
7795
- const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
8126
+ const baseScore = jw.bm25 * bm25Score;
8127
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0, rw.literal);
8128
+ const literalBase = literalMatches.length > 0 && bm25Score === 0 ? rw.literal.baseScore * jw.literalBaseWeight : 0;
7796
8129
  const finalScore = boostedScore + literalBase;
7797
8130
  processedChunkIds.add(chunk.id);
7798
8131
  if (finalScore >= minScore || literalMatches.length > 0) {
@@ -7825,7 +8158,7 @@ class JsonModule {
7825
8158
  if (!chunk)
7826
8159
  continue;
7827
8160
  const literalContribution = calculateLiteralContribution(matches, false);
7828
- const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
8161
+ const score = rw.literal.baseScore * literalContribution.multiplier;
7829
8162
  processedChunkIds.add(chunkId);
7830
8163
  results.push({
7831
8164
  filepath,
@@ -7846,11 +8179,12 @@ class JsonModule {
7846
8179
  return results.slice(0, topK);
7847
8180
  }
7848
8181
  }
7849
- var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, BM25_WEIGHT5 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile5;
8182
+ var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, JSON_EXTENSIONS, supportsFile5;
7850
8183
  var init_json = __esm(() => {
7851
8184
  init_services();
7852
8185
  init_config2();
7853
8186
  init_storage();
8187
+ init_entities();
7854
8188
  JSON_EXTENSIONS = [".json"];
7855
8189
  supportsFile5 = isJsonFile;
7856
8190
  });
@@ -8085,6 +8419,8 @@ class MarkdownModule {
8085
8419
  minScore = DEFAULT_MIN_SCORE7,
8086
8420
  filePatterns
8087
8421
  } = options;
8422
+ const rw = mergeRankingWeights(options.rankingWeights);
8423
+ const mw = rw.markdown;
8088
8424
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
8089
8425
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
8090
8426
  let allFiles;
@@ -8150,15 +8486,18 @@ class MarkdownModule {
8150
8486
  "what",
8151
8487
  "explain"
8152
8488
  ].includes(t))) {
8153
- docBoost = 0.05;
8154
- }
8155
- const headingBoost = calculateHeadingLevelBoost(chunk);
8156
- const hybridScore = SEMANTIC_WEIGHT5 * semanticScore + BM25_WEIGHT6 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
8157
- if (hybridScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
8489
+ docBoost = mw.docIntentBoost;
8490
+ }
8491
+ const rawHeadingBoost = calculateHeadingLevelBoost(chunk);
8492
+ const headingBoost = rawHeadingBoost * (mw.headingPhraseCoverageMin + mw.headingPhraseCoverageSpan * (phraseMatch.totalTokenCount > 0 ? phraseMatch.coverage : 1));
8493
+ const hybridScore = mw.semantic * semanticScore + mw.bm25 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
8494
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
8495
+ const finalScore = (hybridScore + disc.boost) * disc.penaltyFactor;
8496
+ if (finalScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
8158
8497
  results.push({
8159
8498
  filepath,
8160
8499
  chunk,
8161
- score: hybridScore,
8500
+ score: finalScore,
8162
8501
  moduleId: this.id,
8163
8502
  context: {
8164
8503
  semanticScore,
@@ -8167,7 +8506,11 @@ class MarkdownModule {
8167
8506
  phraseCoverage: phraseMatch.coverage,
8168
8507
  docBoost,
8169
8508
  headingBoost,
8170
- headingLevel: chunk.metadata?.headingLevel
8509
+ headingLevel: chunk.metadata?.headingLevel,
8510
+ discriminativeCoverage: disc.salientCoverage,
8511
+ discriminativePenaltyFactor: disc.penaltyFactor,
8512
+ discriminativeBoost: disc.boost,
8513
+ matchedSalientTerms: disc.matchedSalient
8171
8514
  }
8172
8515
  });
8173
8516
  }
@@ -8176,11 +8519,12 @@ class MarkdownModule {
8176
8519
  return results.slice(0, topK);
8177
8520
  }
8178
8521
  }
8179
- var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, SEMANTIC_WEIGHT5 = 0.7, BM25_WEIGHT6 = 0.3, MARKDOWN_EXTENSIONS, supportsFile6;
8522
+ var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, MARKDOWN_EXTENSIONS, supportsFile6;
8180
8523
  var init_markdown = __esm(() => {
8181
8524
  init_embeddings();
8182
8525
  init_services();
8183
8526
  init_config2();
8527
+ init_entities();
8184
8528
  init_storage();
8185
8529
  MARKDOWN_EXTENSIONS = [".md", ".txt"];
8186
8530
  supportsFile6 = isMarkdownFile;
@@ -11611,6 +11955,7 @@ minimatch.unescape = unescape;
11611
11955
  init_types();
11612
11956
  init_config2();
11613
11957
  init_services();
11958
+ init_entities();
11614
11959
  // src/domain/usecases/exactSearch.ts
11615
11960
  init_simpleSearch();
11616
11961
  var DEFAULT_IGNORED_DIRS = [
@@ -11778,7 +12123,9 @@ async function hybridSearch(rootDir, query, options = {}) {
11778
12123
  if (ensureFresh) {
11779
12124
  await ensureIndexFresh(rootDir, { quiet: true });
11780
12125
  }
11781
- console.log(`Searching for: "${query}"`);
12126
+ if (!options.quiet) {
12127
+ console.log(`Searching for: "${query}"`);
12128
+ }
11782
12129
  const config = await loadConfig(rootDir);
11783
12130
  await registerBuiltInModules();
11784
12131
  const globalManifest = await loadGlobalManifest2(rootDir, config);
@@ -11841,10 +12188,18 @@ async function hybridSearch(rootDir, query, options = {}) {
11841
12188
  }
11842
12189
  }
11843
12190
  }
11844
- filteredResults.sort((a, b) => b.score - a.score);
12191
+ const rw = mergeRankingWeights(options.rankingWeights);
12192
+ let ranked = filteredResults.map((r) => attachMatchScales(r, rw));
12193
+ for (const r of ranked) {
12194
+ if (r.context?.exactMatchFusion) {
12195
+ r.structuredMatch = clamp01((r.structuredMatch ?? 0) * 1.5);
12196
+ }
12197
+ }
12198
+ const rankBy = options.rankBy ?? DEFAULT_SEARCH_OPTIONS.rankBy;
12199
+ ranked.sort((a, b) => compareSearchResultsByRankBy(a, b, rankBy));
11845
12200
  const topK = options.topK ?? 10;
11846
12201
  return {
11847
- results: filteredResults.slice(0, topK),
12202
+ results: ranked.slice(0, topK),
11848
12203
  exactMatches,
11849
12204
  fusionApplied
11850
12205
  };
@@ -11935,7 +12290,9 @@ function formatSearchResults2(results) {
11935
12290
  const nameInfo = chunk.name ? ` (${chunk.name})` : "";
11936
12291
  output += `${i + 1}. ${location}${nameInfo}
11937
12292
  `;
11938
- output += ` Score: ${(result.score * 100).toFixed(1)}% | Type: ${chunk.type}`;
12293
+ const sm = result.semanticMatch != null ? ` | Semantic: ${(result.semanticMatch * 100).toFixed(1)}%` : "";
12294
+ const st = result.structuredMatch != null ? ` | Structured: ${(result.structuredMatch * 100).toFixed(1)}%` : "";
12295
+ output += ` Score: ${(result.score * 100).toFixed(1)}%${st}${sm} | Type: ${chunk.type}`;
11939
12296
  output += ` | via ${formatModuleName(result.moduleId)}`;
11940
12297
  if (chunk.isExported) {
11941
12298
  output += " | exported";
@@ -12028,6 +12385,7 @@ function formatHybridSearchResults(hybridResults) {
12028
12385
  }
12029
12386
 
12030
12387
  // src/index.ts
12388
+ init_entities();
12031
12389
  async function index(directory, options = {}) {
12032
12390
  return indexDirectory(directory, options);
12033
12391
  }
@@ -12056,6 +12414,8 @@ var src_default = raggrep;
12056
12414
  export {
12057
12415
  search2 as search,
12058
12416
  reset,
12417
+ mergeRankingWeights,
12418
+ mergeLiteralWeights,
12059
12419
  index,
12060
12420
  hybridSearch2 as hybridSearch,
12061
12421
  formatSearchResults2 as formatSearchResults,
@@ -12067,7 +12427,9 @@ export {
12067
12427
  cleanup,
12068
12428
  SilentLogger,
12069
12429
  InlineProgressLogger,
12430
+ DEFAULT_RANKING_WEIGHTS,
12431
+ DEFAULT_LITERAL_BOOST_WEIGHTS,
12070
12432
  ConsoleLogger
12071
12433
  };
12072
12434
 
12073
- //# debugId=5C9139B3A95BD0AB64756E2164756E21
12435
+ //# debugId=E54DFEDA0CA026F464756E2164756E21