raggrep 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -69,13 +69,13 @@ var init_modelCache = __esm(() => {
69
69
  init_modelCatalog();
70
70
  });
71
71
 
72
- // src/infrastructure/embeddings/xenovaEmbeddingProvider.ts
72
+ // src/infrastructure/embeddings/huggingfaceEmbeddingProvider.ts
73
73
  import {
74
74
  pipeline,
75
75
  env
76
- } from "@xenova/transformers";
76
+ } from "@huggingface/transformers";
77
77
 
78
- class XenovaTransformersEmbeddingProvider {
78
+ class HuggingFaceTransformersEmbeddingProvider {
79
79
  extractor = null;
80
80
  config;
81
81
  isInitializing = false;
@@ -83,7 +83,7 @@ class XenovaTransformersEmbeddingProvider {
83
83
  constructor(config) {
84
84
  this.config = {
85
85
  model: config?.model ?? "bge-small-en-v1.5",
86
- runtime: config?.runtime ?? "xenova",
86
+ runtime: config?.runtime ?? "huggingface",
87
87
  showProgress: config?.showProgress ?? false,
88
88
  logger: config?.logger
89
89
  };
@@ -207,7 +207,7 @@ class XenovaTransformersEmbeddingProvider {
207
207
  }
208
208
  }
209
209
  var BATCH_SIZE = 32;
210
- var init_xenovaEmbeddingProvider = __esm(() => {
210
+ var init_huggingfaceEmbeddingProvider = __esm(() => {
211
211
  init_embeddingPaths();
212
212
  init_modelCatalog();
213
213
  init_modelCache();
@@ -215,13 +215,18 @@ var init_xenovaEmbeddingProvider = __esm(() => {
215
215
  env.allowLocalModels = true;
216
216
  });
217
217
 
218
- // src/infrastructure/embeddings/huggingfaceEmbeddingProvider.ts
218
+ // src/infrastructure/embeddings/xenovaEmbeddingProvider.ts
219
+ var exports_xenovaEmbeddingProvider = {};
220
+ __export(exports_xenovaEmbeddingProvider, {
221
+ XenovaTransformersEmbeddingProvider: () => XenovaTransformersEmbeddingProvider,
222
+ TransformersEmbeddingProvider: () => TransformersEmbeddingProvider
223
+ });
219
224
  import {
220
225
  pipeline as pipeline2,
221
226
  env as env2
222
- } from "@huggingface/transformers";
227
+ } from "@xenova/transformers";
223
228
 
224
- class HuggingFaceTransformersEmbeddingProvider {
229
+ class XenovaTransformersEmbeddingProvider {
225
230
  extractor = null;
226
231
  config;
227
232
  isInitializing = false;
@@ -229,7 +234,7 @@ class HuggingFaceTransformersEmbeddingProvider {
229
234
  constructor(config) {
230
235
  this.config = {
231
236
  model: config?.model ?? "bge-small-en-v1.5",
232
- runtime: config?.runtime ?? "huggingface",
237
+ runtime: config?.runtime ?? "xenova",
233
238
  showProgress: config?.showProgress ?? false,
234
239
  logger: config?.logger
235
240
  };
@@ -352,29 +357,30 @@ class HuggingFaceTransformersEmbeddingProvider {
352
357
  this.extractor = null;
353
358
  }
354
359
  }
355
- var BATCH_SIZE2 = 32;
356
- var init_huggingfaceEmbeddingProvider = __esm(() => {
360
+ var BATCH_SIZE2 = 32, TransformersEmbeddingProvider;
361
+ var init_xenovaEmbeddingProvider = __esm(() => {
357
362
  init_embeddingPaths();
358
363
  init_modelCatalog();
359
364
  init_modelCache();
360
365
  env2.cacheDir = RAGGREP_MODEL_CACHE_DIR;
361
366
  env2.allowLocalModels = true;
367
+ TransformersEmbeddingProvider = XenovaTransformersEmbeddingProvider;
362
368
  });
363
369
 
364
370
  // src/infrastructure/embeddings/embeddingProviderFactory.ts
365
371
  function resolveRuntime(config) {
366
372
  return config.runtime ?? "huggingface";
367
373
  }
368
- function createEmbeddingProvider(config) {
374
+ async function createEmbeddingProvider(config) {
369
375
  const runtime = resolveRuntime(config);
370
376
  if (runtime === "huggingface") {
371
377
  return new HuggingFaceTransformersEmbeddingProvider(config);
372
378
  }
373
- return new XenovaTransformersEmbeddingProvider(config);
379
+ const { XenovaTransformersEmbeddingProvider: XenovaTransformersEmbeddingProvider2 } = await Promise.resolve().then(() => (init_xenovaEmbeddingProvider(), exports_xenovaEmbeddingProvider));
380
+ return new XenovaTransformersEmbeddingProvider2(config);
374
381
  }
375
382
  var init_embeddingProviderFactory = __esm(() => {
376
383
  init_huggingfaceEmbeddingProvider();
377
- init_xenovaEmbeddingProvider();
378
384
  });
379
385
 
380
386
  // src/infrastructure/embeddings/globalEmbeddings.ts
@@ -399,7 +405,7 @@ function getEmbeddingConfig() {
399
405
  }
400
406
  async function ensureGlobalProvider() {
401
407
  if (!globalProvider) {
402
- globalProvider = createEmbeddingProvider(globalConfig);
408
+ globalProvider = await createEmbeddingProvider(globalConfig);
403
409
  await globalProvider.initialize?.(globalConfig);
404
410
  }
405
411
  return globalProvider;
@@ -432,8 +438,6 @@ var init_globalEmbeddings = __esm(() => {
432
438
  var init_embeddings = __esm(() => {
433
439
  init_modelCatalog();
434
440
  init_embeddingPaths();
435
- init_xenovaEmbeddingProvider();
436
- init_xenovaEmbeddingProvider();
437
441
  init_huggingfaceEmbeddingProvider();
438
442
  init_embeddingProviderFactory();
439
443
  init_globalEmbeddings();
@@ -1167,7 +1171,107 @@ var init_searchResult = __esm(() => {
1167
1171
  minScore: 0.15,
1168
1172
  filePatterns: [],
1169
1173
  pathFilter: [],
1170
- ensureFresh: true
1174
+ ensureFresh: true,
1175
+ rankingWeights: {},
1176
+ quiet: false,
1177
+ rankBy: "structured"
1178
+ };
1179
+ });
1180
+
1181
+ // src/domain/entities/rankingWeights.ts
1182
+ function mergeLiteralWeights(def, partial) {
1183
+ if (!partial) {
1184
+ return def;
1185
+ }
1186
+ return {
1187
+ baseScore: partial.baseScore ?? def.baseScore,
1188
+ multipliers: {
1189
+ definition: {
1190
+ ...def.multipliers.definition,
1191
+ ...partial.multipliers?.definition
1192
+ },
1193
+ reference: {
1194
+ ...def.multipliers.reference,
1195
+ ...partial.multipliers?.reference
1196
+ },
1197
+ import: { ...def.multipliers.import, ...partial.multipliers?.import }
1198
+ },
1199
+ vocabulary: { ...def.vocabulary, ...partial.vocabulary }
1200
+ };
1201
+ }
1202
+ function mergeRankingWeights(partial) {
1203
+ if (!partial) {
1204
+ return DEFAULT_RANKING_WEIGHTS;
1205
+ }
1206
+ return {
1207
+ discriminative: {
1208
+ ...DEFAULT_RANKING_WEIGHTS.discriminative,
1209
+ ...partial.discriminative
1210
+ },
1211
+ typescript: {
1212
+ ...DEFAULT_RANKING_WEIGHTS.typescript,
1213
+ ...partial.typescript
1214
+ },
1215
+ language: {
1216
+ ...DEFAULT_RANKING_WEIGHTS.language,
1217
+ ...partial.language
1218
+ },
1219
+ markdown: {
1220
+ ...DEFAULT_RANKING_WEIGHTS.markdown,
1221
+ ...partial.markdown
1222
+ },
1223
+ json: {
1224
+ ...DEFAULT_RANKING_WEIGHTS.json,
1225
+ ...partial.json
1226
+ },
1227
+ literal: mergeLiteralWeights(DEFAULT_RANKING_WEIGHTS.literal, partial.literal)
1228
+ };
1229
+ }
1230
+ var DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, DEFAULT_RANKING_WEIGHTS;
1231
+ var init_rankingWeights = __esm(() => {
1232
+ DEFAULT_DISCRIMINATIVE_WEIGHTS = {
1233
+ boostCap: 0.1,
1234
+ penaltyMax: 0.16,
1235
+ penaltyFloor: 0.72
1236
+ };
1237
+ DEFAULT_LITERAL_BOOST_WEIGHTS = {
1238
+ baseScore: 0.5,
1239
+ multipliers: {
1240
+ definition: { high: 2.5, medium: 2, low: 1.5 },
1241
+ reference: { high: 2, medium: 1.5, low: 1.3 },
1242
+ import: { high: 1.5, medium: 1.3, low: 1.1 }
1243
+ },
1244
+ vocabulary: {
1245
+ baseMultiplier: 1.3,
1246
+ perWordBonus: 0.1,
1247
+ maxVocabularyBonus: 0.5,
1248
+ minWordsForMatch: 2
1249
+ }
1250
+ };
1251
+ DEFAULT_RANKING_WEIGHTS = {
1252
+ discriminative: DEFAULT_DISCRIMINATIVE_WEIGHTS,
1253
+ typescript: {
1254
+ semantic: 0.43,
1255
+ bm25: 0.42,
1256
+ vocab: 0.15,
1257
+ vocabBypassThreshold: 0.4
1258
+ },
1259
+ language: {
1260
+ semantic: 0.7,
1261
+ bm25: 0.3
1262
+ },
1263
+ markdown: {
1264
+ semantic: 0.62,
1265
+ bm25: 0.33,
1266
+ docIntentBoost: 0.03,
1267
+ headingPhraseCoverageMin: 0.25,
1268
+ headingPhraseCoverageSpan: 0.75
1269
+ },
1270
+ json: {
1271
+ bm25: 0.4,
1272
+ literalBaseWeight: 0.6
1273
+ },
1274
+ literal: DEFAULT_LITERAL_BOOST_WEIGHTS
1171
1275
  };
1172
1276
  });
1173
1277
 
@@ -1315,6 +1419,7 @@ var init_lexicon = __esm(() => {
1315
1419
  // src/domain/entities/index.ts
1316
1420
  var init_entities = __esm(() => {
1317
1421
  init_searchResult();
1422
+ init_rankingWeights();
1318
1423
  init_config();
1319
1424
  init_literal();
1320
1425
  init_lexicon();
@@ -1435,6 +1540,9 @@ class BM25Index {
1435
1540
  return 0;
1436
1541
  return Math.log(1 + (this.totalDocs - docFreq + 0.5) / (docFreq + 0.5));
1437
1542
  }
1543
+ getInverseDocumentFrequency(term) {
1544
+ return this.idf(term.toLowerCase());
1545
+ }
1438
1546
  score(tokens, queryTerms) {
1439
1547
  const docLength = tokens.length;
1440
1548
  let score = 0;
@@ -3261,6 +3369,188 @@ var init_core = __esm(() => {
3261
3369
  init_symbols();
3262
3370
  });
3263
3371
 
3372
+ // src/domain/services/discriminativeTerms.ts
3373
+ function medianSorted(sorted) {
3374
+ const n = sorted.length;
3375
+ if (n === 0)
3376
+ return 0;
3377
+ const mid = Math.floor(n / 2);
3378
+ return n % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
3379
+ }
3380
+ function salientTermHitsChunk(term, haystack, tokenSet) {
3381
+ if (tokenSet.has(term) || haystack.includes(term)) {
3382
+ return true;
3383
+ }
3384
+ if (term.length < PREFIX_MATCH_MIN_LEN) {
3385
+ return false;
3386
+ }
3387
+ for (const w of tokenSet) {
3388
+ if (w.length < PREFIX_MATCH_MIN_LEN)
3389
+ continue;
3390
+ if (term.startsWith(w) || w.startsWith(term)) {
3391
+ return true;
3392
+ }
3393
+ }
3394
+ return false;
3395
+ }
3396
+ function scoreDiscriminativeTerms(bm25Index, query, chunkText, chunkName, weights = DEFAULT_DISCRIMINATIVE_WEIGHTS) {
3397
+ const empty2 = () => ({
3398
+ boost: 0,
3399
+ penaltyFactor: 1,
3400
+ salientTerms: [],
3401
+ matchedSalient: [],
3402
+ missingSalient: [],
3403
+ salientCoverage: 1
3404
+ });
3405
+ const uniqueTerms = [...new Set(tokenize(query))];
3406
+ if (uniqueTerms.length === 0) {
3407
+ return empty2();
3408
+ }
3409
+ const indexed = [];
3410
+ for (const term of uniqueTerms) {
3411
+ const idf = bm25Index.getInverseDocumentFrequency(term);
3412
+ if (idf > 0) {
3413
+ indexed.push({ term, idf });
3414
+ }
3415
+ }
3416
+ if (indexed.length === 0) {
3417
+ return empty2();
3418
+ }
3419
+ const idfSorted = [...indexed.map((x) => x.idf)].sort((a, b) => a - b);
3420
+ const medianIdf = medianSorted(idfSorted);
3421
+ const salientEntries = indexed.filter((x) => x.idf >= medianIdf);
3422
+ const salientTerms = [...new Set(salientEntries.map((x) => x.term))];
3423
+ const idfByTerm = new Map;
3424
+ for (const { term, idf } of salientEntries) {
3425
+ idfByTerm.set(term, Math.max(idfByTerm.get(term) ?? 0, idf));
3426
+ }
3427
+ let totalW = 0;
3428
+ for (const idf of idfByTerm.values()) {
3429
+ totalW += idf;
3430
+ }
3431
+ const haystack = [chunkName ?? "", chunkText].join(`
3432
+ `).toLowerCase();
3433
+ const tokenSet = new Set(tokenize(chunkName ? `${chunkName}
3434
+ ${chunkText}` : chunkText));
3435
+ const matchedSalient = [];
3436
+ for (const term of salientTerms) {
3437
+ const idf = idfByTerm.get(term) ?? 0;
3438
+ if (idf <= 0)
3439
+ continue;
3440
+ if (salientTermHitsChunk(term, haystack, tokenSet)) {
3441
+ matchedSalient.push(term);
3442
+ }
3443
+ }
3444
+ const matchedSet = new Set(matchedSalient);
3445
+ const missingSalient = salientTerms.filter((t) => !matchedSet.has(t));
3446
+ let matchedW = 0;
3447
+ for (const term of matchedSalient) {
3448
+ matchedW += idfByTerm.get(term) ?? 0;
3449
+ }
3450
+ const salientCoverage = totalW > 0 ? matchedW / totalW : 1;
3451
+ const { boostCap, penaltyMax, penaltyFloor } = weights;
3452
+ const boost = boostCap * salientCoverage;
3453
+ let penaltyFactor = 1 - penaltyMax * (1 - salientCoverage);
3454
+ if (penaltyFactor < penaltyFloor) {
3455
+ penaltyFactor = penaltyFloor;
3456
+ }
3457
+ return {
3458
+ boost,
3459
+ penaltyFactor,
3460
+ salientTerms,
3461
+ matchedSalient,
3462
+ missingSalient,
3463
+ salientCoverage
3464
+ };
3465
+ }
3466
+ var PREFIX_MATCH_MIN_LEN = 4;
3467
+ var init_discriminativeTerms = __esm(() => {
3468
+ init_rankingWeights();
3469
+ });
3470
+
3471
+ // src/domain/services/matchScales.ts
3472
+ function semanticPctFromCosine(cosine) {
3473
+ return clamp01((cosine + 1) / 2);
3474
+ }
3475
+ function clamp01(x) {
3476
+ if (Number.isNaN(x) || !Number.isFinite(x))
3477
+ return 0;
3478
+ return Math.max(0, Math.min(1, x));
3479
+ }
3480
+ function num(ctx, key) {
3481
+ const v = ctx[key];
3482
+ return typeof v === "number" && Number.isFinite(v) ? v : 0;
3483
+ }
3484
+ function additiveStructuredBoost(ctx) {
3485
+ return num(ctx, "pathBoost") + num(ctx, "fileTypeBoost") + num(ctx, "chunkTypeBoost") + num(ctx, "exportBoost");
3486
+ }
3487
+ function attachMatchScales(result, rw) {
3488
+ const ctx = result.context ?? {};
3489
+ const mid = result.moduleId;
3490
+ let semanticMatch = 0;
3491
+ let structuredMatch = 0;
3492
+ if (mid === "language/typescript") {
3493
+ const cos = num(ctx, "semanticScore");
3494
+ const bm25 = num(ctx, "bm25Score");
3495
+ const vocab = num(ctx, "vocabScore");
3496
+ const phraseCov = num(ctx, "phraseCoverage");
3497
+ const tw = rw.typescript;
3498
+ semanticMatch = semanticPctFromCosine(cos);
3499
+ const denom = tw.bm25 + tw.vocab + 0.000000001;
3500
+ const lexCore = (tw.bm25 * bm25 + tw.vocab * vocab) / denom;
3501
+ structuredMatch = clamp01(lexCore + Math.min(0.35, additiveStructuredBoost(ctx)) + Math.min(0.15, phraseCov * 0.25));
3502
+ } else if (mid.startsWith("language/")) {
3503
+ const cos = num(ctx, "semanticScore");
3504
+ const bm25 = num(ctx, "bm25Score");
3505
+ semanticMatch = semanticPctFromCosine(cos);
3506
+ structuredMatch = clamp01(bm25 + Math.min(0.3, additiveStructuredBoost(ctx)) + Math.min(0.12, num(ctx, "phraseCoverage") * 0.2));
3507
+ } else if (mid === "docs/markdown") {
3508
+ const cos = num(ctx, "semanticScore");
3509
+ const bm25 = num(ctx, "bm25Score");
3510
+ const docBoost = num(ctx, "docBoost");
3511
+ const headingBoost = num(ctx, "headingBoost");
3512
+ const phraseCov = num(ctx, "phraseCoverage");
3513
+ const mw = rw.markdown;
3514
+ semanticMatch = semanticPctFromCosine(cos);
3515
+ structuredMatch = clamp01(mw.bm25 * bm25 + docBoost + headingBoost + Math.min(0.2, phraseCov * 0.15));
3516
+ } else if (mid === "core") {
3517
+ semanticMatch = 0;
3518
+ const nBm = num(ctx, "bm25Score");
3519
+ const sym = num(ctx, "symbolScore");
3520
+ structuredMatch = clamp01(0.6 * nBm + 0.4 * sym);
3521
+ } else if (mid === "data/json") {
3522
+ semanticMatch = 0;
3523
+ const bm25 = num(ctx, "bm25Score");
3524
+ const litM = num(ctx, "literalMultiplier");
3525
+ structuredMatch = clamp01(bm25 > 0.02 ? bm25 : Math.min(1, 0.35 + Math.min(0.65, (litM - 1) * 0.35)));
3526
+ } else {
3527
+ semanticMatch = 0;
3528
+ structuredMatch = clamp01(result.score);
3529
+ }
3530
+ return { ...result, semanticMatch, structuredMatch };
3531
+ }
3532
+ function compareSearchResultsByRankBy(a, b, rankBy) {
3533
+ if (rankBy === "combined") {
3534
+ return b.score - a.score;
3535
+ }
3536
+ const sa = a.semanticMatch ?? 0;
3537
+ const sb = b.semanticMatch ?? 0;
3538
+ const ta = a.structuredMatch ?? 0;
3539
+ const tb = b.structuredMatch ?? 0;
3540
+ if (rankBy === "semantic") {
3541
+ if (Math.abs(sb - sa) > 0.000000001)
3542
+ return sb - sa;
3543
+ if (Math.abs(tb - ta) > 0.000000001)
3544
+ return tb - ta;
3545
+ return b.score - a.score;
3546
+ }
3547
+ if (Math.abs(tb - ta) > 0.000000001)
3548
+ return tb - ta;
3549
+ if (Math.abs(sb - sa) > 0.000000001)
3550
+ return sb - sa;
3551
+ return b.score - a.score;
3552
+ }
3553
+
3264
3554
  // src/domain/services/keywords.ts
3265
3555
  function extractKeywords(content, name, maxKeywords = 50) {
3266
3556
  const keywords = new Set;
@@ -3919,16 +4209,16 @@ var init_literalExtractor = __esm(() => {
3919
4209
  });
3920
4210
 
3921
4211
  // src/domain/services/literalScorer.ts
3922
- function calculateLiteralMultiplier(matchType, confidence) {
3923
- return LITERAL_SCORING_CONSTANTS.MULTIPLIERS[matchType][confidence];
4212
+ function calculateLiteralMultiplier(matchType, confidence, weights = DEFAULT_LW) {
4213
+ return weights.multipliers[matchType][confidence];
3924
4214
  }
3925
- function calculateMaxMultiplier(matches) {
4215
+ function calculateMaxMultiplier(matches, weights = DEFAULT_LW) {
3926
4216
  if (!matches || matches.length === 0) {
3927
4217
  return 1;
3928
4218
  }
3929
- return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence)));
4219
+ return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence, weights)));
3930
4220
  }
3931
- function calculateLiteralContribution(matches, hasSemanticOrBm25) {
4221
+ function calculateLiteralContribution(matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
3932
4222
  if (!matches || matches.length === 0) {
3933
4223
  return {
3934
4224
  multiplier: 1,
@@ -3939,7 +4229,7 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
3939
4229
  let bestMatch = null;
3940
4230
  let bestMultiplier = 0;
3941
4231
  for (const match of matches) {
3942
- const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence);
4232
+ const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence, weights);
3943
4233
  if (mult > bestMultiplier) {
3944
4234
  bestMultiplier = mult;
3945
4235
  bestMatch = match;
@@ -3953,32 +4243,20 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
3953
4243
  matchCount: matches.length
3954
4244
  };
3955
4245
  }
3956
- function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25) {
4246
+ function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
3957
4247
  if (!matches || matches.length === 0) {
3958
4248
  return baseScore;
3959
4249
  }
3960
- const multiplier = calculateMaxMultiplier(matches);
4250
+ const multiplier = calculateMaxMultiplier(matches, weights);
3961
4251
  if (!hasSemanticOrBm25) {
3962
- return LITERAL_SCORING_CONSTANTS.BASE_SCORE * multiplier;
4252
+ return weights.baseScore * multiplier;
3963
4253
  }
3964
4254
  return baseScore * multiplier;
3965
4255
  }
3966
- var LITERAL_SCORING_CONSTANTS;
4256
+ var DEFAULT_LW;
3967
4257
  var init_literalScorer = __esm(() => {
3968
- LITERAL_SCORING_CONSTANTS = {
3969
- BASE_SCORE: 0.5,
3970
- MULTIPLIERS: {
3971
- definition: { high: 2.5, medium: 2, low: 1.5 },
3972
- reference: { high: 2, medium: 1.5, low: 1.3 },
3973
- import: { high: 1.5, medium: 1.3, low: 1.1 }
3974
- },
3975
- VOCABULARY: {
3976
- BASE_MULTIPLIER: 1.3,
3977
- PER_WORD_BONUS: 0.1,
3978
- MAX_VOCABULARY_BONUS: 0.5,
3979
- MIN_WORDS_FOR_MATCH: 2
3980
- }
3981
- };
4258
+ init_rankingWeights();
4259
+ DEFAULT_LW = DEFAULT_RANKING_WEIGHTS.literal;
3982
4260
  });
3983
4261
 
3984
4262
  // src/domain/services/lexicon.ts
@@ -4933,6 +5211,7 @@ var init_chunkContext = __esm(() => {
4933
5211
 
4934
5212
  // src/domain/services/index.ts
4935
5213
  var init_services = __esm(() => {
5214
+ init_discriminativeTerms();
4936
5215
  init_keywords();
4937
5216
  init_queryIntent();
4938
5217
  init_queryLiteralParser();
@@ -5756,6 +6035,9 @@ class TypeScriptModule {
5756
6035
  minScore = DEFAULT_MIN_SCORE2,
5757
6036
  filePatterns
5758
6037
  } = options;
6038
+ const rw = mergeRankingWeights(options.rankingWeights);
6039
+ const tw = rw.typescript;
6040
+ const lt = rw.literal;
5759
6041
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
5760
6042
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
5761
6043
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -5862,17 +6144,19 @@ class TypeScriptModule {
5862
6144
  const chunkTypeBoost = calculateChunkTypeBoost(chunk);
5863
6145
  const exportBoost = calculateExportBoost(chunk);
5864
6146
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
5865
- const baseScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + VOCAB_WEIGHT * vocabScore;
6147
+ const baseScore = tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore;
5866
6148
  const literalMatches = literalMatchMap.get(chunk.id) || [];
5867
- const literalContribution = calculateLiteralContribution(literalMatches, true);
5868
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
6149
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
6150
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
5869
6151
  const finalScore = boostedScore + additiveBoost;
6152
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
6153
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
5870
6154
  processedChunkIds.add(chunk.id);
5871
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > VOCAB_THRESHOLD || phraseMatch.isSignificant) {
6155
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > tw.vocabBypassThreshold || phraseMatch.isSignificant) {
5872
6156
  results.push({
5873
6157
  filepath,
5874
6158
  chunk,
5875
- score: finalScore,
6159
+ score: adjustedScore,
5876
6160
  moduleId: this.id,
5877
6161
  context: {
5878
6162
  semanticScore,
@@ -5884,6 +6168,10 @@ class TypeScriptModule {
5884
6168
  fileTypeBoost,
5885
6169
  chunkTypeBoost,
5886
6170
  exportBoost,
6171
+ discriminativeCoverage: disc.salientCoverage,
6172
+ discriminativePenaltyFactor: disc.penaltyFactor,
6173
+ discriminativeBoost: disc.boost,
6174
+ matchedSalientTerms: disc.matchedSalient,
5887
6175
  literalMultiplier: literalContribution.multiplier,
5888
6176
  literalMatchType: literalContribution.bestMatchType,
5889
6177
  literalConfidence: literalContribution.bestConfidence,
@@ -5936,15 +6224,17 @@ class TypeScriptModule {
5936
6224
  const chunkTypeBoost = calculateChunkTypeBoost(chunk);
5937
6225
  const exportBoost = calculateExportBoost(chunk);
5938
6226
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
5939
- const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false);
5940
- const baseScore = semanticScore > 0 ? SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + VOCAB_WEIGHT * vocabScore : LITERAL_SCORING_CONSTANTS.BASE_SCORE;
5941
- const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0);
6227
+ const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false, lt);
6228
+ const baseScore = semanticScore > 0 ? tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore : lt.baseScore;
6229
+ const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0, lt);
5942
6230
  const finalScore = boostedScore + additiveBoost;
6231
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
6232
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
5943
6233
  processedChunkIds.add(chunkId);
5944
6234
  results.push({
5945
6235
  filepath,
5946
6236
  chunk,
5947
- score: finalScore,
6237
+ score: adjustedScore,
5948
6238
  moduleId: this.id,
5949
6239
  context: {
5950
6240
  semanticScore,
@@ -5956,6 +6246,10 @@ class TypeScriptModule {
5956
6246
  fileTypeBoost,
5957
6247
  chunkTypeBoost,
5958
6248
  exportBoost,
6249
+ discriminativeCoverage: disc.salientCoverage,
6250
+ discriminativePenaltyFactor: disc.penaltyFactor,
6251
+ discriminativeBoost: disc.boost,
6252
+ matchedSalientTerms: disc.matchedSalient,
5959
6253
  literalMultiplier: literalContribution.multiplier,
5960
6254
  literalMatchType: literalContribution.bestMatchType,
5961
6255
  literalConfidence: literalContribution.bestConfidence,
@@ -5992,13 +6286,14 @@ class TypeScriptModule {
5992
6286
  return references;
5993
6287
  }
5994
6288
  }
5995
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.6, BM25_WEIGHT = 0.25, VOCAB_WEIGHT = 0.15, VOCAB_THRESHOLD = 0.4, TYPESCRIPT_EXTENSIONS, supportsFile;
6289
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, TYPESCRIPT_EXTENSIONS, supportsFile;
5996
6290
  var init_typescript = __esm(() => {
5997
6291
  init_embeddings();
5998
6292
  init_services();
5999
6293
  init_config2();
6000
6294
  init_parseCode();
6001
6295
  init_storage();
6296
+ init_entities();
6002
6297
  TYPESCRIPT_EXTENSIONS = [
6003
6298
  ".ts",
6004
6299
  ".tsx",
@@ -7111,6 +7406,9 @@ class PythonModule {
7111
7406
  minScore = DEFAULT_MIN_SCORE3,
7112
7407
  filePatterns
7113
7408
  } = options;
7409
+ const rw = mergeRankingWeights(options.rankingWeights);
7410
+ const lw = rw.language;
7411
+ const lt = rw.literal;
7114
7412
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
7115
7413
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
7116
7414
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -7203,17 +7501,19 @@ class PythonModule {
7203
7501
  const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
7204
7502
  const exportBoost = calculateExportBoost2(chunk);
7205
7503
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
7206
- const baseScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
7504
+ const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
7207
7505
  const literalMatches = literalMatchMap.get(chunk.id) || [];
7208
- const literalContribution = calculateLiteralContribution(literalMatches, true);
7209
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
7506
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
7507
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
7210
7508
  const finalScore = boostedScore + additiveBoost;
7509
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
7510
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
7211
7511
  processedChunkIds.add(chunk.id);
7212
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7512
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7213
7513
  results.push({
7214
7514
  filepath,
7215
7515
  chunk,
7216
- score: finalScore,
7516
+ score: adjustedScore,
7217
7517
  moduleId: this.id,
7218
7518
  context: {
7219
7519
  semanticScore,
@@ -7222,6 +7522,10 @@ class PythonModule {
7222
7522
  fileTypeBoost,
7223
7523
  chunkTypeBoost,
7224
7524
  exportBoost,
7525
+ discriminativeCoverage: disc.salientCoverage,
7526
+ discriminativePenaltyFactor: disc.penaltyFactor,
7527
+ discriminativeBoost: disc.boost,
7528
+ matchedSalientTerms: disc.matchedSalient,
7225
7529
  literalMultiplier: literalContribution.multiplier,
7226
7530
  literalMatchType: literalContribution.bestMatchType,
7227
7531
  literalConfidence: literalContribution.bestConfidence,
@@ -7256,15 +7560,17 @@ class PythonModule {
7256
7560
  const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
7257
7561
  const exportBoost = calculateExportBoost2(chunk);
7258
7562
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
7259
- const literalContribution = calculateLiteralContribution(matches, false);
7260
- const baseScore = semanticScore > 0 ? SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score : LITERAL_SCORING_CONSTANTS.BASE_SCORE;
7261
- const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0);
7563
+ const literalContribution = calculateLiteralContribution(matches, false, lt);
7564
+ const baseScore = semanticScore > 0 ? lw.semantic * semanticScore + lw.bm25 * bm25Score : lt.baseScore;
7565
+ const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0, lt);
7262
7566
  const finalScore = boostedScore + additiveBoost;
7567
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
7568
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
7263
7569
  processedChunkIds.add(chunkId);
7264
7570
  results.push({
7265
7571
  filepath,
7266
7572
  chunk,
7267
- score: finalScore,
7573
+ score: adjustedScore,
7268
7574
  moduleId: this.id,
7269
7575
  context: {
7270
7576
  semanticScore,
@@ -7273,6 +7579,10 @@ class PythonModule {
7273
7579
  fileTypeBoost,
7274
7580
  chunkTypeBoost,
7275
7581
  exportBoost,
7582
+ discriminativeCoverage: disc.salientCoverage,
7583
+ discriminativePenaltyFactor: disc.penaltyFactor,
7584
+ discriminativeBoost: disc.boost,
7585
+ matchedSalientTerms: disc.matchedSalient,
7276
7586
  literalMultiplier: literalContribution.multiplier,
7277
7587
  literalMatchType: literalContribution.bestMatchType,
7278
7588
  literalConfidence: literalContribution.bestConfidence,
@@ -7285,13 +7595,14 @@ class PythonModule {
7285
7595
  return results.slice(0, topK);
7286
7596
  }
7287
7597
  }
7288
- var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, PYTHON_EXTENSIONS, supportsFile2;
7598
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, PYTHON_EXTENSIONS, supportsFile2;
7289
7599
  var init_python = __esm(() => {
7290
7600
  init_embeddings();
7291
7601
  init_services();
7292
7602
  init_config2();
7293
7603
  init_storage();
7294
7604
  init_parsing();
7605
+ init_entities();
7295
7606
  PYTHON_EXTENSIONS = [".py", ".pyw"];
7296
7607
  supportsFile2 = isPythonFile;
7297
7608
  });
@@ -7643,6 +7954,9 @@ class GoModule {
7643
7954
  minScore = DEFAULT_MIN_SCORE4,
7644
7955
  filePatterns
7645
7956
  } = options;
7957
+ const rw = mergeRankingWeights(options.rankingWeights);
7958
+ const lw = rw.language;
7959
+ const lt = rw.literal;
7646
7960
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
7647
7961
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
7648
7962
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -7735,17 +8049,19 @@ class GoModule {
7735
8049
  const chunkTypeBoost = calculateChunkTypeBoost3(chunk);
7736
8050
  const exportBoost = calculateExportBoost3(chunk);
7737
8051
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
7738
- const baseScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score;
8052
+ const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
7739
8053
  const literalMatches = literalMatchMap.get(chunk.id) || [];
7740
- const literalContribution = calculateLiteralContribution(literalMatches, true);
7741
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
8054
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
8055
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
7742
8056
  const finalScore = boostedScore + additiveBoost;
8057
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
8058
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
7743
8059
  processedChunkIds.add(chunk.id);
7744
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
8060
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
7745
8061
  results.push({
7746
8062
  filepath,
7747
8063
  chunk,
7748
- score: finalScore,
8064
+ score: adjustedScore,
7749
8065
  moduleId: this.id,
7750
8066
  context: {
7751
8067
  semanticScore,
@@ -7754,6 +8070,10 @@ class GoModule {
7754
8070
  fileTypeBoost,
7755
8071
  chunkTypeBoost,
7756
8072
  exportBoost,
8073
+ discriminativeCoverage: disc.salientCoverage,
8074
+ discriminativePenaltyFactor: disc.penaltyFactor,
8075
+ discriminativeBoost: disc.boost,
8076
+ matchedSalientTerms: disc.matchedSalient,
7757
8077
  literalMultiplier: literalContribution.multiplier,
7758
8078
  literalMatchType: literalContribution.bestMatchType,
7759
8079
  literalConfidence: literalContribution.bestConfidence,
@@ -7766,13 +8086,14 @@ class GoModule {
7766
8086
  return results.slice(0, topK);
7767
8087
  }
7768
8088
  }
7769
- var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, GO_EXTENSIONS, supportsFile3;
8089
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, GO_EXTENSIONS, supportsFile3;
7770
8090
  var init_go = __esm(() => {
7771
8091
  init_embeddings();
7772
8092
  init_services();
7773
8093
  init_config2();
7774
8094
  init_storage();
7775
8095
  init_parsing();
8096
+ init_entities();
7776
8097
  GO_EXTENSIONS = [".go"];
7777
8098
  supportsFile3 = isGoFile;
7778
8099
  });
@@ -8203,6 +8524,9 @@ class RustModule {
8203
8524
  minScore = DEFAULT_MIN_SCORE5,
8204
8525
  filePatterns
8205
8526
  } = options;
8527
+ const rw = mergeRankingWeights(options.rankingWeights);
8528
+ const lw = rw.language;
8529
+ const lt = rw.literal;
8206
8530
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
8207
8531
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
8208
8532
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -8295,17 +8619,19 @@ class RustModule {
8295
8619
  const chunkTypeBoost = calculateChunkTypeBoost4(chunk);
8296
8620
  const exportBoost = calculateExportBoost4(chunk);
8297
8621
  const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
8298
- const baseScore = SEMANTIC_WEIGHT4 * semanticScore + BM25_WEIGHT4 * bm25Score;
8622
+ const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
8299
8623
  const literalMatches = literalMatchMap.get(chunk.id) || [];
8300
- const literalContribution = calculateLiteralContribution(literalMatches, true);
8301
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
8624
+ const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
8625
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
8302
8626
  const finalScore = boostedScore + additiveBoost;
8627
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
8628
+ const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
8303
8629
  processedChunkIds.add(chunk.id);
8304
- if (finalScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
8630
+ if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
8305
8631
  results.push({
8306
8632
  filepath,
8307
8633
  chunk,
8308
- score: finalScore,
8634
+ score: adjustedScore,
8309
8635
  moduleId: this.id,
8310
8636
  context: {
8311
8637
  semanticScore,
@@ -8314,6 +8640,10 @@ class RustModule {
8314
8640
  fileTypeBoost,
8315
8641
  chunkTypeBoost,
8316
8642
  exportBoost,
8643
+ discriminativeCoverage: disc.salientCoverage,
8644
+ discriminativePenaltyFactor: disc.penaltyFactor,
8645
+ discriminativeBoost: disc.boost,
8646
+ matchedSalientTerms: disc.matchedSalient,
8317
8647
  literalMultiplier: literalContribution.multiplier,
8318
8648
  literalMatchType: literalContribution.bestMatchType,
8319
8649
  literalConfidence: literalContribution.bestConfidence,
@@ -8326,13 +8656,14 @@ class RustModule {
8326
8656
  return results.slice(0, topK);
8327
8657
  }
8328
8658
  }
8329
- var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, SEMANTIC_WEIGHT4 = 0.7, BM25_WEIGHT4 = 0.3, RUST_EXTENSIONS, supportsFile4;
8659
+ var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, RUST_EXTENSIONS, supportsFile4;
8330
8660
  var init_rust = __esm(() => {
8331
8661
  init_embeddings();
8332
8662
  init_services();
8333
8663
  init_config2();
8334
8664
  init_storage();
8335
8665
  init_parsing();
8666
+ init_entities();
8336
8667
  RUST_EXTENSIONS = [".rs"];
8337
8668
  supportsFile4 = isRustFile;
8338
8669
  });
@@ -8462,6 +8793,8 @@ class JsonModule {
8462
8793
  minScore = DEFAULT_MIN_SCORE6,
8463
8794
  filePatterns
8464
8795
  } = options;
8796
+ const rw = mergeRankingWeights(options.rankingWeights);
8797
+ const jw = rw.json;
8465
8798
  const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
8466
8799
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
8467
8800
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
@@ -8515,9 +8848,9 @@ class JsonModule {
8515
8848
  const bm25Score = bm25Scores.get(chunk.id) || 0;
8516
8849
  const literalMatches = literalMatchMap.get(chunk.id) || [];
8517
8850
  const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
8518
- const baseScore = BM25_WEIGHT5 * bm25Score;
8519
- const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
8520
- const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
8851
+ const baseScore = jw.bm25 * bm25Score;
8852
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0, rw.literal);
8853
+ const literalBase = literalMatches.length > 0 && bm25Score === 0 ? rw.literal.baseScore * jw.literalBaseWeight : 0;
8521
8854
  const finalScore = boostedScore + literalBase;
8522
8855
  processedChunkIds.add(chunk.id);
8523
8856
  if (finalScore >= minScore || literalMatches.length > 0) {
@@ -8550,7 +8883,7 @@ class JsonModule {
8550
8883
  if (!chunk)
8551
8884
  continue;
8552
8885
  const literalContribution = calculateLiteralContribution(matches, false);
8553
- const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
8886
+ const score = rw.literal.baseScore * literalContribution.multiplier;
8554
8887
  processedChunkIds.add(chunkId);
8555
8888
  results.push({
8556
8889
  filepath,
@@ -8571,11 +8904,12 @@ class JsonModule {
8571
8904
  return results.slice(0, topK);
8572
8905
  }
8573
8906
  }
8574
- var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, BM25_WEIGHT5 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile5;
8907
+ var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, JSON_EXTENSIONS, supportsFile5;
8575
8908
  var init_json = __esm(() => {
8576
8909
  init_services();
8577
8910
  init_config2();
8578
8911
  init_storage();
8912
+ init_entities();
8579
8913
  JSON_EXTENSIONS = [".json"];
8580
8914
  supportsFile5 = isJsonFile;
8581
8915
  });
@@ -8810,6 +9144,8 @@ class MarkdownModule {
8810
9144
  minScore = DEFAULT_MIN_SCORE7,
8811
9145
  filePatterns
8812
9146
  } = options;
9147
+ const rw = mergeRankingWeights(options.rankingWeights);
9148
+ const mw = rw.markdown;
8813
9149
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
8814
9150
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
8815
9151
  let allFiles;
@@ -8875,15 +9211,18 @@ class MarkdownModule {
8875
9211
  "what",
8876
9212
  "explain"
8877
9213
  ].includes(t))) {
8878
- docBoost = 0.05;
8879
- }
8880
- const headingBoost = calculateHeadingLevelBoost(chunk);
8881
- const hybridScore = SEMANTIC_WEIGHT5 * semanticScore + BM25_WEIGHT6 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
8882
- if (hybridScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
9214
+ docBoost = mw.docIntentBoost;
9215
+ }
9216
+ const rawHeadingBoost = calculateHeadingLevelBoost(chunk);
9217
+ const headingBoost = rawHeadingBoost * (mw.headingPhraseCoverageMin + mw.headingPhraseCoverageSpan * (phraseMatch.totalTokenCount > 0 ? phraseMatch.coverage : 1));
9218
+ const hybridScore = mw.semantic * semanticScore + mw.bm25 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
9219
+ const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
9220
+ const finalScore = (hybridScore + disc.boost) * disc.penaltyFactor;
9221
+ if (finalScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
8883
9222
  results.push({
8884
9223
  filepath,
8885
9224
  chunk,
8886
- score: hybridScore,
9225
+ score: finalScore,
8887
9226
  moduleId: this.id,
8888
9227
  context: {
8889
9228
  semanticScore,
@@ -8892,7 +9231,11 @@ class MarkdownModule {
8892
9231
  phraseCoverage: phraseMatch.coverage,
8893
9232
  docBoost,
8894
9233
  headingBoost,
8895
- headingLevel: chunk.metadata?.headingLevel
9234
+ headingLevel: chunk.metadata?.headingLevel,
9235
+ discriminativeCoverage: disc.salientCoverage,
9236
+ discriminativePenaltyFactor: disc.penaltyFactor,
9237
+ discriminativeBoost: disc.boost,
9238
+ matchedSalientTerms: disc.matchedSalient
8896
9239
  }
8897
9240
  });
8898
9241
  }
@@ -8901,11 +9244,12 @@ class MarkdownModule {
8901
9244
  return results.slice(0, topK);
8902
9245
  }
8903
9246
  }
8904
- var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, SEMANTIC_WEIGHT5 = 0.7, BM25_WEIGHT6 = 0.3, MARKDOWN_EXTENSIONS, supportsFile6;
9247
+ var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, MARKDOWN_EXTENSIONS, supportsFile6;
8905
9248
  var init_markdown = __esm(() => {
8906
9249
  init_embeddings();
8907
9250
  init_services();
8908
9251
  init_config2();
9252
+ init_entities();
8909
9253
  init_storage();
8910
9254
  MARKDOWN_EXTENSIONS = [".md", ".txt"];
8911
9255
  supportsFile6 = isMarkdownFile;
@@ -11950,7 +12294,9 @@ async function hybridSearch(rootDir, query, options = {}) {
11950
12294
  if (ensureFresh) {
11951
12295
  await ensureIndexFresh(rootDir, { quiet: true });
11952
12296
  }
11953
- console.log(`Searching for: "${query}"`);
12297
+ if (!options.quiet) {
12298
+ console.log(`Searching for: "${query}"`);
12299
+ }
11954
12300
  const config = await loadConfig(rootDir);
11955
12301
  await registerBuiltInModules();
11956
12302
  const globalManifest = await loadGlobalManifest2(rootDir, config);
@@ -12013,10 +12359,18 @@ async function hybridSearch(rootDir, query, options = {}) {
12013
12359
  }
12014
12360
  }
12015
12361
  }
12016
- filteredResults.sort((a, b) => b.score - a.score);
12362
+ const rw = mergeRankingWeights(options.rankingWeights);
12363
+ let ranked = filteredResults.map((r) => attachMatchScales(r, rw));
12364
+ for (const r of ranked) {
12365
+ if (r.context?.exactMatchFusion) {
12366
+ r.structuredMatch = clamp01((r.structuredMatch ?? 0) * 1.5);
12367
+ }
12368
+ }
12369
+ const rankBy = options.rankBy ?? DEFAULT_SEARCH_OPTIONS.rankBy;
12370
+ ranked.sort((a, b) => compareSearchResultsByRankBy(a, b, rankBy));
12017
12371
  const topK = options.topK ?? 10;
12018
12372
  return {
12019
- results: filteredResults.slice(0, topK),
12373
+ results: ranked.slice(0, topK),
12020
12374
  exactMatches,
12021
12375
  fusionApplied
12022
12376
  };
@@ -12107,7 +12461,9 @@ function formatSearchResults2(results) {
12107
12461
  const nameInfo = chunk.name ? ` (${chunk.name})` : "";
12108
12462
  output += `${i + 1}. ${location}${nameInfo}
12109
12463
  `;
12110
- output += ` Score: ${(result.score * 100).toFixed(1)}% | Type: ${chunk.type}`;
12464
+ const sm = result.semanticMatch != null ? ` | Semantic: ${(result.semanticMatch * 100).toFixed(1)}%` : "";
12465
+ const st = result.structuredMatch != null ? ` | Structured: ${(result.structuredMatch * 100).toFixed(1)}%` : "";
12466
+ output += ` Score: ${(result.score * 100).toFixed(1)}%${st}${sm} | Type: ${chunk.type}`;
12111
12467
  output += ` | via ${formatModuleName(result.moduleId)}`;
12112
12468
  if (chunk.isExported) {
12113
12469
  output += " | exported";
@@ -12205,6 +12561,7 @@ var init_search = __esm(() => {
12205
12561
  init_registry();
12206
12562
  init_indexer();
12207
12563
  init_services();
12564
+ init_entities();
12208
12565
  init_usecases();
12209
12566
  init_filesystem();
12210
12567
  });
@@ -12739,7 +13096,7 @@ import { stat as stat3 } from "fs/promises";
12739
13096
  // package.json
12740
13097
  var package_default = {
12741
13098
  name: "raggrep",
12742
- version: "0.17.0",
13099
+ version: "0.18.0",
12743
13100
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
12744
13101
  type: "module",
12745
13102
  main: "./dist/index.js",
@@ -12767,9 +13124,12 @@ var package_default = {
12767
13124
  prepublishOnly: "bun run build",
12768
13125
  raggrep: "bun run src/app/cli/main.ts",
12769
13126
  test: "bun test",
12770
- typecheck: "tsc --noEmit -p tsconfig.json && tsc --noEmit -p scripts/tsconfig.json",
12771
- "bench:embeddings": "bun run scripts/benchmark-embedding-runtimes.ts",
12772
- "bench:retrieval": "bun run scripts/benchmark-retrieval-quality.ts",
13127
+ typecheck: "tsc --noEmit -p tsconfig.json && tsc --noEmit -p research/tsconfig.json",
13128
+ "bench:embeddings": "bun run research/bench/benchmark-embedding-runtimes.ts",
13129
+ "bench:retrieval": "bun run research/bench/benchmark-retrieval-quality.ts",
13130
+ "eval:golden": "bun run research/eval/run-golden-queries.ts",
13131
+ "bench:golden-convex": "bun run research/bench/benchmark-raggrep-golden-queries.ts",
13132
+ "bench:golden-hillclimb": "bun run research/bench/benchmark-raggrep-hillclimb.ts",
12773
13133
  dev: "bun run src/app/cli/main.ts"
12774
13134
  },
12775
13135
  keywords: [
@@ -12801,7 +13161,7 @@ var package_default = {
12801
13161
  "@xenova/transformers": "^2.17.0",
12802
13162
  chokidar: "^5.0.0",
12803
13163
  fdir: "^6.5.0",
12804
- glob: "^10.0.0",
13164
+ glob: "^11.0.0",
12805
13165
  minimatch: "^10.1.1",
12806
13166
  typescript: "^5.0.0",
12807
13167
  "web-tree-sitter": "^0.26.3"
@@ -12809,6 +13169,10 @@ var package_default = {
12809
13169
  devDependencies: {
12810
13170
  "@types/bun": "latest",
12811
13171
  "@types/node": "^20.0.0"
13172
+ },
13173
+ overrides: {
13174
+ sharp: "^0.34.5",
13175
+ "global-agent": "^4.1.3"
12812
13176
  }
12813
13177
  };
12814
13178
 
@@ -12914,6 +13278,14 @@ function parseFlags(args2) {
12914
13278
  console.error("--dir / -C requires a path to the project directory to index or search.");
12915
13279
  process.exit(1);
12916
13280
  }
13281
+ } else if (arg === "--rank-by") {
13282
+ const v = args2[++i];
13283
+ if (v === "structured" || v === "semantic" || v === "combined") {
13284
+ flags.rankBy = v;
13285
+ } else {
13286
+ console.error(`--rank-by must be structured, semantic, or combined (got: ${v})`);
13287
+ process.exit(1);
13288
+ }
12917
13289
  } else if (arg === "--tool") {
12918
13290
  flags.forceTool = true;
12919
13291
  } else if (arg === "--skill") {
@@ -13045,6 +13417,7 @@ Options:
13045
13417
  -s, --min-score <n> Minimum similarity score 0-1 (default: 0.15)
13046
13418
  -t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
13047
13419
  -f, --filter <path> Filter by path or glob pattern (can be used multiple times)
13420
+ --rank-by <mode> Order results: structured (default), semantic, or combined (fused score only)
13048
13421
  -T, --timing Show timing breakdown for performance profiling
13049
13422
  -h, --help Show this help message
13050
13423
 
@@ -13143,6 +13516,7 @@ Examples:
13143
13516
  minScore: flags.minScore,
13144
13517
  filePatterns,
13145
13518
  pathFilter: flags.pathFilter,
13519
+ rankBy: flags.rankBy,
13146
13520
  ensureFresh: false
13147
13521
  });
13148
13522
  console.log(formatHybridSearchResults2(hybridResults));
@@ -13378,4 +13752,4 @@ Run 'raggrep <command> --help' for more information.
13378
13752
  }
13379
13753
  main();
13380
13754
 
13381
- //# debugId=6B05E3A822FD1AE664756E2164756E21
13755
+ //# debugId=9CA948E12F18492C64756E2164756E21