raggrep 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -40
- package/dist/app/search/index.d.ts +2 -1
- package/dist/cli/main.js +445 -79
- package/dist/cli/main.js.map +20 -17
- package/dist/domain/entities/index.d.ts +3 -1
- package/dist/domain/entities/rankingWeights.d.ts +84 -0
- package/dist/domain/entities/searchResult.d.ts +28 -1
- package/dist/domain/services/bm25.d.ts +5 -0
- package/dist/domain/services/discriminativeTerms.d.ts +28 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/literalScorer.d.ts +9 -23
- package/dist/domain/services/matchScales.d.ts +19 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +433 -75
- package/dist/index.js.map +20 -17
- package/dist/types.d.ts +1 -1
- package/package.json +7 -4
package/dist/cli/main.js
CHANGED
|
@@ -1171,7 +1171,107 @@ var init_searchResult = __esm(() => {
|
|
|
1171
1171
|
minScore: 0.15,
|
|
1172
1172
|
filePatterns: [],
|
|
1173
1173
|
pathFilter: [],
|
|
1174
|
-
ensureFresh: true
|
|
1174
|
+
ensureFresh: true,
|
|
1175
|
+
rankingWeights: {},
|
|
1176
|
+
quiet: false,
|
|
1177
|
+
rankBy: "structured"
|
|
1178
|
+
};
|
|
1179
|
+
});
|
|
1180
|
+
|
|
1181
|
+
// src/domain/entities/rankingWeights.ts
|
|
1182
|
+
function mergeLiteralWeights(def, partial) {
|
|
1183
|
+
if (!partial) {
|
|
1184
|
+
return def;
|
|
1185
|
+
}
|
|
1186
|
+
return {
|
|
1187
|
+
baseScore: partial.baseScore ?? def.baseScore,
|
|
1188
|
+
multipliers: {
|
|
1189
|
+
definition: {
|
|
1190
|
+
...def.multipliers.definition,
|
|
1191
|
+
...partial.multipliers?.definition
|
|
1192
|
+
},
|
|
1193
|
+
reference: {
|
|
1194
|
+
...def.multipliers.reference,
|
|
1195
|
+
...partial.multipliers?.reference
|
|
1196
|
+
},
|
|
1197
|
+
import: { ...def.multipliers.import, ...partial.multipliers?.import }
|
|
1198
|
+
},
|
|
1199
|
+
vocabulary: { ...def.vocabulary, ...partial.vocabulary }
|
|
1200
|
+
};
|
|
1201
|
+
}
|
|
1202
|
+
function mergeRankingWeights(partial) {
|
|
1203
|
+
if (!partial) {
|
|
1204
|
+
return DEFAULT_RANKING_WEIGHTS;
|
|
1205
|
+
}
|
|
1206
|
+
return {
|
|
1207
|
+
discriminative: {
|
|
1208
|
+
...DEFAULT_RANKING_WEIGHTS.discriminative,
|
|
1209
|
+
...partial.discriminative
|
|
1210
|
+
},
|
|
1211
|
+
typescript: {
|
|
1212
|
+
...DEFAULT_RANKING_WEIGHTS.typescript,
|
|
1213
|
+
...partial.typescript
|
|
1214
|
+
},
|
|
1215
|
+
language: {
|
|
1216
|
+
...DEFAULT_RANKING_WEIGHTS.language,
|
|
1217
|
+
...partial.language
|
|
1218
|
+
},
|
|
1219
|
+
markdown: {
|
|
1220
|
+
...DEFAULT_RANKING_WEIGHTS.markdown,
|
|
1221
|
+
...partial.markdown
|
|
1222
|
+
},
|
|
1223
|
+
json: {
|
|
1224
|
+
...DEFAULT_RANKING_WEIGHTS.json,
|
|
1225
|
+
...partial.json
|
|
1226
|
+
},
|
|
1227
|
+
literal: mergeLiteralWeights(DEFAULT_RANKING_WEIGHTS.literal, partial.literal)
|
|
1228
|
+
};
|
|
1229
|
+
}
|
|
1230
|
+
var DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, DEFAULT_RANKING_WEIGHTS;
|
|
1231
|
+
var init_rankingWeights = __esm(() => {
|
|
1232
|
+
DEFAULT_DISCRIMINATIVE_WEIGHTS = {
|
|
1233
|
+
boostCap: 0.1,
|
|
1234
|
+
penaltyMax: 0.16,
|
|
1235
|
+
penaltyFloor: 0.72
|
|
1236
|
+
};
|
|
1237
|
+
DEFAULT_LITERAL_BOOST_WEIGHTS = {
|
|
1238
|
+
baseScore: 0.5,
|
|
1239
|
+
multipliers: {
|
|
1240
|
+
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
1241
|
+
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
1242
|
+
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
1243
|
+
},
|
|
1244
|
+
vocabulary: {
|
|
1245
|
+
baseMultiplier: 1.3,
|
|
1246
|
+
perWordBonus: 0.1,
|
|
1247
|
+
maxVocabularyBonus: 0.5,
|
|
1248
|
+
minWordsForMatch: 2
|
|
1249
|
+
}
|
|
1250
|
+
};
|
|
1251
|
+
DEFAULT_RANKING_WEIGHTS = {
|
|
1252
|
+
discriminative: DEFAULT_DISCRIMINATIVE_WEIGHTS,
|
|
1253
|
+
typescript: {
|
|
1254
|
+
semantic: 0.43,
|
|
1255
|
+
bm25: 0.42,
|
|
1256
|
+
vocab: 0.15,
|
|
1257
|
+
vocabBypassThreshold: 0.4
|
|
1258
|
+
},
|
|
1259
|
+
language: {
|
|
1260
|
+
semantic: 0.7,
|
|
1261
|
+
bm25: 0.3
|
|
1262
|
+
},
|
|
1263
|
+
markdown: {
|
|
1264
|
+
semantic: 0.62,
|
|
1265
|
+
bm25: 0.33,
|
|
1266
|
+
docIntentBoost: 0.03,
|
|
1267
|
+
headingPhraseCoverageMin: 0.25,
|
|
1268
|
+
headingPhraseCoverageSpan: 0.75
|
|
1269
|
+
},
|
|
1270
|
+
json: {
|
|
1271
|
+
bm25: 0.4,
|
|
1272
|
+
literalBaseWeight: 0.6
|
|
1273
|
+
},
|
|
1274
|
+
literal: DEFAULT_LITERAL_BOOST_WEIGHTS
|
|
1175
1275
|
};
|
|
1176
1276
|
});
|
|
1177
1277
|
|
|
@@ -1319,6 +1419,7 @@ var init_lexicon = __esm(() => {
|
|
|
1319
1419
|
// src/domain/entities/index.ts
|
|
1320
1420
|
var init_entities = __esm(() => {
|
|
1321
1421
|
init_searchResult();
|
|
1422
|
+
init_rankingWeights();
|
|
1322
1423
|
init_config();
|
|
1323
1424
|
init_literal();
|
|
1324
1425
|
init_lexicon();
|
|
@@ -1439,6 +1540,9 @@ class BM25Index {
|
|
|
1439
1540
|
return 0;
|
|
1440
1541
|
return Math.log(1 + (this.totalDocs - docFreq + 0.5) / (docFreq + 0.5));
|
|
1441
1542
|
}
|
|
1543
|
+
getInverseDocumentFrequency(term) {
|
|
1544
|
+
return this.idf(term.toLowerCase());
|
|
1545
|
+
}
|
|
1442
1546
|
score(tokens, queryTerms) {
|
|
1443
1547
|
const docLength = tokens.length;
|
|
1444
1548
|
let score = 0;
|
|
@@ -3265,6 +3369,188 @@ var init_core = __esm(() => {
|
|
|
3265
3369
|
init_symbols();
|
|
3266
3370
|
});
|
|
3267
3371
|
|
|
3372
|
+
// src/domain/services/discriminativeTerms.ts
|
|
3373
|
+
function medianSorted(sorted) {
|
|
3374
|
+
const n = sorted.length;
|
|
3375
|
+
if (n === 0)
|
|
3376
|
+
return 0;
|
|
3377
|
+
const mid = Math.floor(n / 2);
|
|
3378
|
+
return n % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
3379
|
+
}
|
|
3380
|
+
function salientTermHitsChunk(term, haystack, tokenSet) {
|
|
3381
|
+
if (tokenSet.has(term) || haystack.includes(term)) {
|
|
3382
|
+
return true;
|
|
3383
|
+
}
|
|
3384
|
+
if (term.length < PREFIX_MATCH_MIN_LEN) {
|
|
3385
|
+
return false;
|
|
3386
|
+
}
|
|
3387
|
+
for (const w of tokenSet) {
|
|
3388
|
+
if (w.length < PREFIX_MATCH_MIN_LEN)
|
|
3389
|
+
continue;
|
|
3390
|
+
if (term.startsWith(w) || w.startsWith(term)) {
|
|
3391
|
+
return true;
|
|
3392
|
+
}
|
|
3393
|
+
}
|
|
3394
|
+
return false;
|
|
3395
|
+
}
|
|
3396
|
+
function scoreDiscriminativeTerms(bm25Index, query, chunkText, chunkName, weights = DEFAULT_DISCRIMINATIVE_WEIGHTS) {
|
|
3397
|
+
const empty2 = () => ({
|
|
3398
|
+
boost: 0,
|
|
3399
|
+
penaltyFactor: 1,
|
|
3400
|
+
salientTerms: [],
|
|
3401
|
+
matchedSalient: [],
|
|
3402
|
+
missingSalient: [],
|
|
3403
|
+
salientCoverage: 1
|
|
3404
|
+
});
|
|
3405
|
+
const uniqueTerms = [...new Set(tokenize(query))];
|
|
3406
|
+
if (uniqueTerms.length === 0) {
|
|
3407
|
+
return empty2();
|
|
3408
|
+
}
|
|
3409
|
+
const indexed = [];
|
|
3410
|
+
for (const term of uniqueTerms) {
|
|
3411
|
+
const idf = bm25Index.getInverseDocumentFrequency(term);
|
|
3412
|
+
if (idf > 0) {
|
|
3413
|
+
indexed.push({ term, idf });
|
|
3414
|
+
}
|
|
3415
|
+
}
|
|
3416
|
+
if (indexed.length === 0) {
|
|
3417
|
+
return empty2();
|
|
3418
|
+
}
|
|
3419
|
+
const idfSorted = [...indexed.map((x) => x.idf)].sort((a, b) => a - b);
|
|
3420
|
+
const medianIdf = medianSorted(idfSorted);
|
|
3421
|
+
const salientEntries = indexed.filter((x) => x.idf >= medianIdf);
|
|
3422
|
+
const salientTerms = [...new Set(salientEntries.map((x) => x.term))];
|
|
3423
|
+
const idfByTerm = new Map;
|
|
3424
|
+
for (const { term, idf } of salientEntries) {
|
|
3425
|
+
idfByTerm.set(term, Math.max(idfByTerm.get(term) ?? 0, idf));
|
|
3426
|
+
}
|
|
3427
|
+
let totalW = 0;
|
|
3428
|
+
for (const idf of idfByTerm.values()) {
|
|
3429
|
+
totalW += idf;
|
|
3430
|
+
}
|
|
3431
|
+
const haystack = [chunkName ?? "", chunkText].join(`
|
|
3432
|
+
`).toLowerCase();
|
|
3433
|
+
const tokenSet = new Set(tokenize(chunkName ? `${chunkName}
|
|
3434
|
+
${chunkText}` : chunkText));
|
|
3435
|
+
const matchedSalient = [];
|
|
3436
|
+
for (const term of salientTerms) {
|
|
3437
|
+
const idf = idfByTerm.get(term) ?? 0;
|
|
3438
|
+
if (idf <= 0)
|
|
3439
|
+
continue;
|
|
3440
|
+
if (salientTermHitsChunk(term, haystack, tokenSet)) {
|
|
3441
|
+
matchedSalient.push(term);
|
|
3442
|
+
}
|
|
3443
|
+
}
|
|
3444
|
+
const matchedSet = new Set(matchedSalient);
|
|
3445
|
+
const missingSalient = salientTerms.filter((t) => !matchedSet.has(t));
|
|
3446
|
+
let matchedW = 0;
|
|
3447
|
+
for (const term of matchedSalient) {
|
|
3448
|
+
matchedW += idfByTerm.get(term) ?? 0;
|
|
3449
|
+
}
|
|
3450
|
+
const salientCoverage = totalW > 0 ? matchedW / totalW : 1;
|
|
3451
|
+
const { boostCap, penaltyMax, penaltyFloor } = weights;
|
|
3452
|
+
const boost = boostCap * salientCoverage;
|
|
3453
|
+
let penaltyFactor = 1 - penaltyMax * (1 - salientCoverage);
|
|
3454
|
+
if (penaltyFactor < penaltyFloor) {
|
|
3455
|
+
penaltyFactor = penaltyFloor;
|
|
3456
|
+
}
|
|
3457
|
+
return {
|
|
3458
|
+
boost,
|
|
3459
|
+
penaltyFactor,
|
|
3460
|
+
salientTerms,
|
|
3461
|
+
matchedSalient,
|
|
3462
|
+
missingSalient,
|
|
3463
|
+
salientCoverage
|
|
3464
|
+
};
|
|
3465
|
+
}
|
|
3466
|
+
var PREFIX_MATCH_MIN_LEN = 4;
|
|
3467
|
+
var init_discriminativeTerms = __esm(() => {
|
|
3468
|
+
init_rankingWeights();
|
|
3469
|
+
});
|
|
3470
|
+
|
|
3471
|
+
// src/domain/services/matchScales.ts
|
|
3472
|
+
function semanticPctFromCosine(cosine) {
|
|
3473
|
+
return clamp01((cosine + 1) / 2);
|
|
3474
|
+
}
|
|
3475
|
+
function clamp01(x) {
|
|
3476
|
+
if (Number.isNaN(x) || !Number.isFinite(x))
|
|
3477
|
+
return 0;
|
|
3478
|
+
return Math.max(0, Math.min(1, x));
|
|
3479
|
+
}
|
|
3480
|
+
function num(ctx, key) {
|
|
3481
|
+
const v = ctx[key];
|
|
3482
|
+
return typeof v === "number" && Number.isFinite(v) ? v : 0;
|
|
3483
|
+
}
|
|
3484
|
+
function additiveStructuredBoost(ctx) {
|
|
3485
|
+
return num(ctx, "pathBoost") + num(ctx, "fileTypeBoost") + num(ctx, "chunkTypeBoost") + num(ctx, "exportBoost");
|
|
3486
|
+
}
|
|
3487
|
+
function attachMatchScales(result, rw) {
|
|
3488
|
+
const ctx = result.context ?? {};
|
|
3489
|
+
const mid = result.moduleId;
|
|
3490
|
+
let semanticMatch = 0;
|
|
3491
|
+
let structuredMatch = 0;
|
|
3492
|
+
if (mid === "language/typescript") {
|
|
3493
|
+
const cos = num(ctx, "semanticScore");
|
|
3494
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3495
|
+
const vocab = num(ctx, "vocabScore");
|
|
3496
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
3497
|
+
const tw = rw.typescript;
|
|
3498
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
3499
|
+
const denom = tw.bm25 + tw.vocab + 0.000000001;
|
|
3500
|
+
const lexCore = (tw.bm25 * bm25 + tw.vocab * vocab) / denom;
|
|
3501
|
+
structuredMatch = clamp01(lexCore + Math.min(0.35, additiveStructuredBoost(ctx)) + Math.min(0.15, phraseCov * 0.25));
|
|
3502
|
+
} else if (mid.startsWith("language/")) {
|
|
3503
|
+
const cos = num(ctx, "semanticScore");
|
|
3504
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3505
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
3506
|
+
structuredMatch = clamp01(bm25 + Math.min(0.3, additiveStructuredBoost(ctx)) + Math.min(0.12, num(ctx, "phraseCoverage") * 0.2));
|
|
3507
|
+
} else if (mid === "docs/markdown") {
|
|
3508
|
+
const cos = num(ctx, "semanticScore");
|
|
3509
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3510
|
+
const docBoost = num(ctx, "docBoost");
|
|
3511
|
+
const headingBoost = num(ctx, "headingBoost");
|
|
3512
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
3513
|
+
const mw = rw.markdown;
|
|
3514
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
3515
|
+
structuredMatch = clamp01(mw.bm25 * bm25 + docBoost + headingBoost + Math.min(0.2, phraseCov * 0.15));
|
|
3516
|
+
} else if (mid === "core") {
|
|
3517
|
+
semanticMatch = 0;
|
|
3518
|
+
const nBm = num(ctx, "bm25Score");
|
|
3519
|
+
const sym = num(ctx, "symbolScore");
|
|
3520
|
+
structuredMatch = clamp01(0.6 * nBm + 0.4 * sym);
|
|
3521
|
+
} else if (mid === "data/json") {
|
|
3522
|
+
semanticMatch = 0;
|
|
3523
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3524
|
+
const litM = num(ctx, "literalMultiplier");
|
|
3525
|
+
structuredMatch = clamp01(bm25 > 0.02 ? bm25 : Math.min(1, 0.35 + Math.min(0.65, (litM - 1) * 0.35)));
|
|
3526
|
+
} else {
|
|
3527
|
+
semanticMatch = 0;
|
|
3528
|
+
structuredMatch = clamp01(result.score);
|
|
3529
|
+
}
|
|
3530
|
+
return { ...result, semanticMatch, structuredMatch };
|
|
3531
|
+
}
|
|
3532
|
+
function compareSearchResultsByRankBy(a, b, rankBy) {
|
|
3533
|
+
if (rankBy === "combined") {
|
|
3534
|
+
return b.score - a.score;
|
|
3535
|
+
}
|
|
3536
|
+
const sa = a.semanticMatch ?? 0;
|
|
3537
|
+
const sb = b.semanticMatch ?? 0;
|
|
3538
|
+
const ta = a.structuredMatch ?? 0;
|
|
3539
|
+
const tb = b.structuredMatch ?? 0;
|
|
3540
|
+
if (rankBy === "semantic") {
|
|
3541
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
3542
|
+
return sb - sa;
|
|
3543
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
3544
|
+
return tb - ta;
|
|
3545
|
+
return b.score - a.score;
|
|
3546
|
+
}
|
|
3547
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
3548
|
+
return tb - ta;
|
|
3549
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
3550
|
+
return sb - sa;
|
|
3551
|
+
return b.score - a.score;
|
|
3552
|
+
}
|
|
3553
|
+
|
|
3268
3554
|
// src/domain/services/keywords.ts
|
|
3269
3555
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
3270
3556
|
const keywords = new Set;
|
|
@@ -3923,16 +4209,16 @@ var init_literalExtractor = __esm(() => {
|
|
|
3923
4209
|
});
|
|
3924
4210
|
|
|
3925
4211
|
// src/domain/services/literalScorer.ts
|
|
3926
|
-
function calculateLiteralMultiplier(matchType, confidence) {
|
|
3927
|
-
return
|
|
4212
|
+
function calculateLiteralMultiplier(matchType, confidence, weights = DEFAULT_LW) {
|
|
4213
|
+
return weights.multipliers[matchType][confidence];
|
|
3928
4214
|
}
|
|
3929
|
-
function calculateMaxMultiplier(matches) {
|
|
4215
|
+
function calculateMaxMultiplier(matches, weights = DEFAULT_LW) {
|
|
3930
4216
|
if (!matches || matches.length === 0) {
|
|
3931
4217
|
return 1;
|
|
3932
4218
|
}
|
|
3933
|
-
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence)));
|
|
4219
|
+
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence, weights)));
|
|
3934
4220
|
}
|
|
3935
|
-
function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
4221
|
+
function calculateLiteralContribution(matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3936
4222
|
if (!matches || matches.length === 0) {
|
|
3937
4223
|
return {
|
|
3938
4224
|
multiplier: 1,
|
|
@@ -3943,7 +4229,7 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3943
4229
|
let bestMatch = null;
|
|
3944
4230
|
let bestMultiplier = 0;
|
|
3945
4231
|
for (const match of matches) {
|
|
3946
|
-
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence);
|
|
4232
|
+
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence, weights);
|
|
3947
4233
|
if (mult > bestMultiplier) {
|
|
3948
4234
|
bestMultiplier = mult;
|
|
3949
4235
|
bestMatch = match;
|
|
@@ -3957,32 +4243,20 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3957
4243
|
matchCount: matches.length
|
|
3958
4244
|
};
|
|
3959
4245
|
}
|
|
3960
|
-
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25) {
|
|
4246
|
+
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3961
4247
|
if (!matches || matches.length === 0) {
|
|
3962
4248
|
return baseScore;
|
|
3963
4249
|
}
|
|
3964
|
-
const multiplier = calculateMaxMultiplier(matches);
|
|
4250
|
+
const multiplier = calculateMaxMultiplier(matches, weights);
|
|
3965
4251
|
if (!hasSemanticOrBm25) {
|
|
3966
|
-
return
|
|
4252
|
+
return weights.baseScore * multiplier;
|
|
3967
4253
|
}
|
|
3968
4254
|
return baseScore * multiplier;
|
|
3969
4255
|
}
|
|
3970
|
-
var
|
|
4256
|
+
var DEFAULT_LW;
|
|
3971
4257
|
var init_literalScorer = __esm(() => {
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
MULTIPLIERS: {
|
|
3975
|
-
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
3976
|
-
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
3977
|
-
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
3978
|
-
},
|
|
3979
|
-
VOCABULARY: {
|
|
3980
|
-
BASE_MULTIPLIER: 1.3,
|
|
3981
|
-
PER_WORD_BONUS: 0.1,
|
|
3982
|
-
MAX_VOCABULARY_BONUS: 0.5,
|
|
3983
|
-
MIN_WORDS_FOR_MATCH: 2
|
|
3984
|
-
}
|
|
3985
|
-
};
|
|
4258
|
+
init_rankingWeights();
|
|
4259
|
+
DEFAULT_LW = DEFAULT_RANKING_WEIGHTS.literal;
|
|
3986
4260
|
});
|
|
3987
4261
|
|
|
3988
4262
|
// src/domain/services/lexicon.ts
|
|
@@ -4937,6 +5211,7 @@ var init_chunkContext = __esm(() => {
|
|
|
4937
5211
|
|
|
4938
5212
|
// src/domain/services/index.ts
|
|
4939
5213
|
var init_services = __esm(() => {
|
|
5214
|
+
init_discriminativeTerms();
|
|
4940
5215
|
init_keywords();
|
|
4941
5216
|
init_queryIntent();
|
|
4942
5217
|
init_queryLiteralParser();
|
|
@@ -5760,6 +6035,9 @@ class TypeScriptModule {
|
|
|
5760
6035
|
minScore = DEFAULT_MIN_SCORE2,
|
|
5761
6036
|
filePatterns
|
|
5762
6037
|
} = options;
|
|
6038
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
6039
|
+
const tw = rw.typescript;
|
|
6040
|
+
const lt = rw.literal;
|
|
5763
6041
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
5764
6042
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
5765
6043
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -5866,17 +6144,19 @@ class TypeScriptModule {
|
|
|
5866
6144
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5867
6145
|
const exportBoost = calculateExportBoost(chunk);
|
|
5868
6146
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5869
|
-
const baseScore =
|
|
6147
|
+
const baseScore = tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore;
|
|
5870
6148
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
5871
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
5872
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
6149
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
6150
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
5873
6151
|
const finalScore = boostedScore + additiveBoost;
|
|
6152
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6153
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5874
6154
|
processedChunkIds.add(chunk.id);
|
|
5875
|
-
if (
|
|
6155
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > tw.vocabBypassThreshold || phraseMatch.isSignificant) {
|
|
5876
6156
|
results.push({
|
|
5877
6157
|
filepath,
|
|
5878
6158
|
chunk,
|
|
5879
|
-
score:
|
|
6159
|
+
score: adjustedScore,
|
|
5880
6160
|
moduleId: this.id,
|
|
5881
6161
|
context: {
|
|
5882
6162
|
semanticScore,
|
|
@@ -5888,6 +6168,10 @@ class TypeScriptModule {
|
|
|
5888
6168
|
fileTypeBoost,
|
|
5889
6169
|
chunkTypeBoost,
|
|
5890
6170
|
exportBoost,
|
|
6171
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6172
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6173
|
+
discriminativeBoost: disc.boost,
|
|
6174
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5891
6175
|
literalMultiplier: literalContribution.multiplier,
|
|
5892
6176
|
literalMatchType: literalContribution.bestMatchType,
|
|
5893
6177
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5940,15 +6224,17 @@ class TypeScriptModule {
|
|
|
5940
6224
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5941
6225
|
const exportBoost = calculateExportBoost(chunk);
|
|
5942
6226
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5943
|
-
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false);
|
|
5944
|
-
const baseScore = semanticScore > 0 ?
|
|
5945
|
-
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0);
|
|
6227
|
+
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false, lt);
|
|
6228
|
+
const baseScore = semanticScore > 0 ? tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore : lt.baseScore;
|
|
6229
|
+
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0, lt);
|
|
5946
6230
|
const finalScore = boostedScore + additiveBoost;
|
|
6231
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6232
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5947
6233
|
processedChunkIds.add(chunkId);
|
|
5948
6234
|
results.push({
|
|
5949
6235
|
filepath,
|
|
5950
6236
|
chunk,
|
|
5951
|
-
score:
|
|
6237
|
+
score: adjustedScore,
|
|
5952
6238
|
moduleId: this.id,
|
|
5953
6239
|
context: {
|
|
5954
6240
|
semanticScore,
|
|
@@ -5960,6 +6246,10 @@ class TypeScriptModule {
|
|
|
5960
6246
|
fileTypeBoost,
|
|
5961
6247
|
chunkTypeBoost,
|
|
5962
6248
|
exportBoost,
|
|
6249
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6250
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6251
|
+
discriminativeBoost: disc.boost,
|
|
6252
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5963
6253
|
literalMultiplier: literalContribution.multiplier,
|
|
5964
6254
|
literalMatchType: literalContribution.bestMatchType,
|
|
5965
6255
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5996,13 +6286,14 @@ class TypeScriptModule {
|
|
|
5996
6286
|
return references;
|
|
5997
6287
|
}
|
|
5998
6288
|
}
|
|
5999
|
-
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10,
|
|
6289
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, TYPESCRIPT_EXTENSIONS, supportsFile;
|
|
6000
6290
|
var init_typescript = __esm(() => {
|
|
6001
6291
|
init_embeddings();
|
|
6002
6292
|
init_services();
|
|
6003
6293
|
init_config2();
|
|
6004
6294
|
init_parseCode();
|
|
6005
6295
|
init_storage();
|
|
6296
|
+
init_entities();
|
|
6006
6297
|
TYPESCRIPT_EXTENSIONS = [
|
|
6007
6298
|
".ts",
|
|
6008
6299
|
".tsx",
|
|
@@ -7115,6 +7406,9 @@ class PythonModule {
|
|
|
7115
7406
|
minScore = DEFAULT_MIN_SCORE3,
|
|
7116
7407
|
filePatterns
|
|
7117
7408
|
} = options;
|
|
7409
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7410
|
+
const lw = rw.language;
|
|
7411
|
+
const lt = rw.literal;
|
|
7118
7412
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7119
7413
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7120
7414
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7207,17 +7501,19 @@ class PythonModule {
|
|
|
7207
7501
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
7208
7502
|
const exportBoost = calculateExportBoost2(chunk);
|
|
7209
7503
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7210
|
-
const baseScore =
|
|
7504
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7211
7505
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7212
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7213
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
7506
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
7507
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7214
7508
|
const finalScore = boostedScore + additiveBoost;
|
|
7509
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7510
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7215
7511
|
processedChunkIds.add(chunk.id);
|
|
7216
|
-
if (
|
|
7512
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7217
7513
|
results.push({
|
|
7218
7514
|
filepath,
|
|
7219
7515
|
chunk,
|
|
7220
|
-
score:
|
|
7516
|
+
score: adjustedScore,
|
|
7221
7517
|
moduleId: this.id,
|
|
7222
7518
|
context: {
|
|
7223
7519
|
semanticScore,
|
|
@@ -7226,6 +7522,10 @@ class PythonModule {
|
|
|
7226
7522
|
fileTypeBoost,
|
|
7227
7523
|
chunkTypeBoost,
|
|
7228
7524
|
exportBoost,
|
|
7525
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7526
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7527
|
+
discriminativeBoost: disc.boost,
|
|
7528
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7229
7529
|
literalMultiplier: literalContribution.multiplier,
|
|
7230
7530
|
literalMatchType: literalContribution.bestMatchType,
|
|
7231
7531
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7260,15 +7560,17 @@ class PythonModule {
|
|
|
7260
7560
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
7261
7561
|
const exportBoost = calculateExportBoost2(chunk);
|
|
7262
7562
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7263
|
-
const literalContribution = calculateLiteralContribution(matches, false);
|
|
7264
|
-
const baseScore = semanticScore > 0 ?
|
|
7265
|
-
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0);
|
|
7563
|
+
const literalContribution = calculateLiteralContribution(matches, false, lt);
|
|
7564
|
+
const baseScore = semanticScore > 0 ? lw.semantic * semanticScore + lw.bm25 * bm25Score : lt.baseScore;
|
|
7565
|
+
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0, lt);
|
|
7266
7566
|
const finalScore = boostedScore + additiveBoost;
|
|
7567
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7568
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7267
7569
|
processedChunkIds.add(chunkId);
|
|
7268
7570
|
results.push({
|
|
7269
7571
|
filepath,
|
|
7270
7572
|
chunk,
|
|
7271
|
-
score:
|
|
7573
|
+
score: adjustedScore,
|
|
7272
7574
|
moduleId: this.id,
|
|
7273
7575
|
context: {
|
|
7274
7576
|
semanticScore,
|
|
@@ -7277,6 +7579,10 @@ class PythonModule {
|
|
|
7277
7579
|
fileTypeBoost,
|
|
7278
7580
|
chunkTypeBoost,
|
|
7279
7581
|
exportBoost,
|
|
7582
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7583
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7584
|
+
discriminativeBoost: disc.boost,
|
|
7585
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7280
7586
|
literalMultiplier: literalContribution.multiplier,
|
|
7281
7587
|
literalMatchType: literalContribution.bestMatchType,
|
|
7282
7588
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7289,13 +7595,14 @@ class PythonModule {
|
|
|
7289
7595
|
return results.slice(0, topK);
|
|
7290
7596
|
}
|
|
7291
7597
|
}
|
|
7292
|
-
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10,
|
|
7598
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, PYTHON_EXTENSIONS, supportsFile2;
|
|
7293
7599
|
var init_python = __esm(() => {
|
|
7294
7600
|
init_embeddings();
|
|
7295
7601
|
init_services();
|
|
7296
7602
|
init_config2();
|
|
7297
7603
|
init_storage();
|
|
7298
7604
|
init_parsing();
|
|
7605
|
+
init_entities();
|
|
7299
7606
|
PYTHON_EXTENSIONS = [".py", ".pyw"];
|
|
7300
7607
|
supportsFile2 = isPythonFile;
|
|
7301
7608
|
});
|
|
@@ -7647,6 +7954,9 @@ class GoModule {
|
|
|
7647
7954
|
minScore = DEFAULT_MIN_SCORE4,
|
|
7648
7955
|
filePatterns
|
|
7649
7956
|
} = options;
|
|
7957
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7958
|
+
const lw = rw.language;
|
|
7959
|
+
const lt = rw.literal;
|
|
7650
7960
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7651
7961
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7652
7962
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7739,17 +8049,19 @@ class GoModule {
|
|
|
7739
8049
|
const chunkTypeBoost = calculateChunkTypeBoost3(chunk);
|
|
7740
8050
|
const exportBoost = calculateExportBoost3(chunk);
|
|
7741
8051
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7742
|
-
const baseScore =
|
|
8052
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7743
8053
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7744
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7745
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
8054
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
8055
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7746
8056
|
const finalScore = boostedScore + additiveBoost;
|
|
8057
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
8058
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7747
8059
|
processedChunkIds.add(chunk.id);
|
|
7748
|
-
if (
|
|
8060
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7749
8061
|
results.push({
|
|
7750
8062
|
filepath,
|
|
7751
8063
|
chunk,
|
|
7752
|
-
score:
|
|
8064
|
+
score: adjustedScore,
|
|
7753
8065
|
moduleId: this.id,
|
|
7754
8066
|
context: {
|
|
7755
8067
|
semanticScore,
|
|
@@ -7758,6 +8070,10 @@ class GoModule {
|
|
|
7758
8070
|
fileTypeBoost,
|
|
7759
8071
|
chunkTypeBoost,
|
|
7760
8072
|
exportBoost,
|
|
8073
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
8074
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
8075
|
+
discriminativeBoost: disc.boost,
|
|
8076
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7761
8077
|
literalMultiplier: literalContribution.multiplier,
|
|
7762
8078
|
literalMatchType: literalContribution.bestMatchType,
|
|
7763
8079
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7770,13 +8086,14 @@ class GoModule {
|
|
|
7770
8086
|
return results.slice(0, topK);
|
|
7771
8087
|
}
|
|
7772
8088
|
}
|
|
7773
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
8089
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, GO_EXTENSIONS, supportsFile3;
|
|
7774
8090
|
var init_go = __esm(() => {
|
|
7775
8091
|
init_embeddings();
|
|
7776
8092
|
init_services();
|
|
7777
8093
|
init_config2();
|
|
7778
8094
|
init_storage();
|
|
7779
8095
|
init_parsing();
|
|
8096
|
+
init_entities();
|
|
7780
8097
|
GO_EXTENSIONS = [".go"];
|
|
7781
8098
|
supportsFile3 = isGoFile;
|
|
7782
8099
|
});
|
|
@@ -8207,6 +8524,9 @@ class RustModule {
|
|
|
8207
8524
|
minScore = DEFAULT_MIN_SCORE5,
|
|
8208
8525
|
filePatterns
|
|
8209
8526
|
} = options;
|
|
8527
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8528
|
+
const lw = rw.language;
|
|
8529
|
+
const lt = rw.literal;
|
|
8210
8530
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
8211
8531
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8212
8532
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -8299,17 +8619,19 @@ class RustModule {
|
|
|
8299
8619
|
const chunkTypeBoost = calculateChunkTypeBoost4(chunk);
|
|
8300
8620
|
const exportBoost = calculateExportBoost4(chunk);
|
|
8301
8621
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
8302
|
-
const baseScore =
|
|
8622
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
8303
8623
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
8304
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
8305
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
8624
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
8625
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
8306
8626
|
const finalScore = boostedScore + additiveBoost;
|
|
8627
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
8628
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
8307
8629
|
processedChunkIds.add(chunk.id);
|
|
8308
|
-
if (
|
|
8630
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
8309
8631
|
results.push({
|
|
8310
8632
|
filepath,
|
|
8311
8633
|
chunk,
|
|
8312
|
-
score:
|
|
8634
|
+
score: adjustedScore,
|
|
8313
8635
|
moduleId: this.id,
|
|
8314
8636
|
context: {
|
|
8315
8637
|
semanticScore,
|
|
@@ -8318,6 +8640,10 @@ class RustModule {
|
|
|
8318
8640
|
fileTypeBoost,
|
|
8319
8641
|
chunkTypeBoost,
|
|
8320
8642
|
exportBoost,
|
|
8643
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
8644
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
8645
|
+
discriminativeBoost: disc.boost,
|
|
8646
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
8321
8647
|
literalMultiplier: literalContribution.multiplier,
|
|
8322
8648
|
literalMatchType: literalContribution.bestMatchType,
|
|
8323
8649
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -8330,13 +8656,14 @@ class RustModule {
|
|
|
8330
8656
|
return results.slice(0, topK);
|
|
8331
8657
|
}
|
|
8332
8658
|
}
|
|
8333
|
-
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10,
|
|
8659
|
+
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, RUST_EXTENSIONS, supportsFile4;
|
|
8334
8660
|
var init_rust = __esm(() => {
|
|
8335
8661
|
init_embeddings();
|
|
8336
8662
|
init_services();
|
|
8337
8663
|
init_config2();
|
|
8338
8664
|
init_storage();
|
|
8339
8665
|
init_parsing();
|
|
8666
|
+
init_entities();
|
|
8340
8667
|
RUST_EXTENSIONS = [".rs"];
|
|
8341
8668
|
supportsFile4 = isRustFile;
|
|
8342
8669
|
});
|
|
@@ -8466,6 +8793,8 @@ class JsonModule {
|
|
|
8466
8793
|
minScore = DEFAULT_MIN_SCORE6,
|
|
8467
8794
|
filePatterns
|
|
8468
8795
|
} = options;
|
|
8796
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8797
|
+
const jw = rw.json;
|
|
8469
8798
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
8470
8799
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8471
8800
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -8519,9 +8848,9 @@ class JsonModule {
|
|
|
8519
8848
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
8520
8849
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
8521
8850
|
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
8522
|
-
const baseScore =
|
|
8523
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
8524
|
-
const literalBase = literalMatches.length > 0 && bm25Score === 0 ?
|
|
8851
|
+
const baseScore = jw.bm25 * bm25Score;
|
|
8852
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0, rw.literal);
|
|
8853
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? rw.literal.baseScore * jw.literalBaseWeight : 0;
|
|
8525
8854
|
const finalScore = boostedScore + literalBase;
|
|
8526
8855
|
processedChunkIds.add(chunk.id);
|
|
8527
8856
|
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
@@ -8554,7 +8883,7 @@ class JsonModule {
|
|
|
8554
8883
|
if (!chunk)
|
|
8555
8884
|
continue;
|
|
8556
8885
|
const literalContribution = calculateLiteralContribution(matches, false);
|
|
8557
|
-
const score =
|
|
8886
|
+
const score = rw.literal.baseScore * literalContribution.multiplier;
|
|
8558
8887
|
processedChunkIds.add(chunkId);
|
|
8559
8888
|
results.push({
|
|
8560
8889
|
filepath,
|
|
@@ -8575,11 +8904,12 @@ class JsonModule {
|
|
|
8575
8904
|
return results.slice(0, topK);
|
|
8576
8905
|
}
|
|
8577
8906
|
}
|
|
8578
|
-
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10,
|
|
8907
|
+
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, JSON_EXTENSIONS, supportsFile5;
|
|
8579
8908
|
var init_json = __esm(() => {
|
|
8580
8909
|
init_services();
|
|
8581
8910
|
init_config2();
|
|
8582
8911
|
init_storage();
|
|
8912
|
+
init_entities();
|
|
8583
8913
|
JSON_EXTENSIONS = [".json"];
|
|
8584
8914
|
supportsFile5 = isJsonFile;
|
|
8585
8915
|
});
|
|
@@ -8814,6 +9144,8 @@ class MarkdownModule {
|
|
|
8814
9144
|
minScore = DEFAULT_MIN_SCORE7,
|
|
8815
9145
|
filePatterns
|
|
8816
9146
|
} = options;
|
|
9147
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
9148
|
+
const mw = rw.markdown;
|
|
8817
9149
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8818
9150
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
8819
9151
|
let allFiles;
|
|
@@ -8879,15 +9211,18 @@ class MarkdownModule {
|
|
|
8879
9211
|
"what",
|
|
8880
9212
|
"explain"
|
|
8881
9213
|
].includes(t))) {
|
|
8882
|
-
docBoost =
|
|
8883
|
-
}
|
|
8884
|
-
const
|
|
8885
|
-
const
|
|
8886
|
-
|
|
9214
|
+
docBoost = mw.docIntentBoost;
|
|
9215
|
+
}
|
|
9216
|
+
const rawHeadingBoost = calculateHeadingLevelBoost(chunk);
|
|
9217
|
+
const headingBoost = rawHeadingBoost * (mw.headingPhraseCoverageMin + mw.headingPhraseCoverageSpan * (phraseMatch.totalTokenCount > 0 ? phraseMatch.coverage : 1));
|
|
9218
|
+
const hybridScore = mw.semantic * semanticScore + mw.bm25 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
|
|
9219
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
9220
|
+
const finalScore = (hybridScore + disc.boost) * disc.penaltyFactor;
|
|
9221
|
+
if (finalScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
|
|
8887
9222
|
results.push({
|
|
8888
9223
|
filepath,
|
|
8889
9224
|
chunk,
|
|
8890
|
-
score:
|
|
9225
|
+
score: finalScore,
|
|
8891
9226
|
moduleId: this.id,
|
|
8892
9227
|
context: {
|
|
8893
9228
|
semanticScore,
|
|
@@ -8896,7 +9231,11 @@ class MarkdownModule {
|
|
|
8896
9231
|
phraseCoverage: phraseMatch.coverage,
|
|
8897
9232
|
docBoost,
|
|
8898
9233
|
headingBoost,
|
|
8899
|
-
headingLevel: chunk.metadata?.headingLevel
|
|
9234
|
+
headingLevel: chunk.metadata?.headingLevel,
|
|
9235
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
9236
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
9237
|
+
discriminativeBoost: disc.boost,
|
|
9238
|
+
matchedSalientTerms: disc.matchedSalient
|
|
8900
9239
|
}
|
|
8901
9240
|
});
|
|
8902
9241
|
}
|
|
@@ -8905,11 +9244,12 @@ class MarkdownModule {
|
|
|
8905
9244
|
return results.slice(0, topK);
|
|
8906
9245
|
}
|
|
8907
9246
|
}
|
|
8908
|
-
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10,
|
|
9247
|
+
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, MARKDOWN_EXTENSIONS, supportsFile6;
|
|
8909
9248
|
var init_markdown = __esm(() => {
|
|
8910
9249
|
init_embeddings();
|
|
8911
9250
|
init_services();
|
|
8912
9251
|
init_config2();
|
|
9252
|
+
init_entities();
|
|
8913
9253
|
init_storage();
|
|
8914
9254
|
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
8915
9255
|
supportsFile6 = isMarkdownFile;
|
|
@@ -11954,7 +12294,9 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
11954
12294
|
if (ensureFresh) {
|
|
11955
12295
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
11956
12296
|
}
|
|
11957
|
-
|
|
12297
|
+
if (!options.quiet) {
|
|
12298
|
+
console.log(`Searching for: "${query}"`);
|
|
12299
|
+
}
|
|
11958
12300
|
const config = await loadConfig(rootDir);
|
|
11959
12301
|
await registerBuiltInModules();
|
|
11960
12302
|
const globalManifest = await loadGlobalManifest2(rootDir, config);
|
|
@@ -12017,10 +12359,18 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
12017
12359
|
}
|
|
12018
12360
|
}
|
|
12019
12361
|
}
|
|
12020
|
-
|
|
12362
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
12363
|
+
let ranked = filteredResults.map((r) => attachMatchScales(r, rw));
|
|
12364
|
+
for (const r of ranked) {
|
|
12365
|
+
if (r.context?.exactMatchFusion) {
|
|
12366
|
+
r.structuredMatch = clamp01((r.structuredMatch ?? 0) * 1.5);
|
|
12367
|
+
}
|
|
12368
|
+
}
|
|
12369
|
+
const rankBy = options.rankBy ?? DEFAULT_SEARCH_OPTIONS.rankBy;
|
|
12370
|
+
ranked.sort((a, b) => compareSearchResultsByRankBy(a, b, rankBy));
|
|
12021
12371
|
const topK = options.topK ?? 10;
|
|
12022
12372
|
return {
|
|
12023
|
-
results:
|
|
12373
|
+
results: ranked.slice(0, topK),
|
|
12024
12374
|
exactMatches,
|
|
12025
12375
|
fusionApplied
|
|
12026
12376
|
};
|
|
@@ -12111,7 +12461,9 @@ function formatSearchResults2(results) {
|
|
|
12111
12461
|
const nameInfo = chunk.name ? ` (${chunk.name})` : "";
|
|
12112
12462
|
output += `${i + 1}. ${location}${nameInfo}
|
|
12113
12463
|
`;
|
|
12114
|
-
|
|
12464
|
+
const sm = result.semanticMatch != null ? ` | Semantic: ${(result.semanticMatch * 100).toFixed(1)}%` : "";
|
|
12465
|
+
const st = result.structuredMatch != null ? ` | Structured: ${(result.structuredMatch * 100).toFixed(1)}%` : "";
|
|
12466
|
+
output += ` Score: ${(result.score * 100).toFixed(1)}%${st}${sm} | Type: ${chunk.type}`;
|
|
12115
12467
|
output += ` | via ${formatModuleName(result.moduleId)}`;
|
|
12116
12468
|
if (chunk.isExported) {
|
|
12117
12469
|
output += " | exported";
|
|
@@ -12209,6 +12561,7 @@ var init_search = __esm(() => {
|
|
|
12209
12561
|
init_registry();
|
|
12210
12562
|
init_indexer();
|
|
12211
12563
|
init_services();
|
|
12564
|
+
init_entities();
|
|
12212
12565
|
init_usecases();
|
|
12213
12566
|
init_filesystem();
|
|
12214
12567
|
});
|
|
@@ -12743,7 +13096,7 @@ import { stat as stat3 } from "fs/promises";
|
|
|
12743
13096
|
// package.json
|
|
12744
13097
|
var package_default = {
|
|
12745
13098
|
name: "raggrep",
|
|
12746
|
-
version: "0.
|
|
13099
|
+
version: "0.18.0",
|
|
12747
13100
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
12748
13101
|
type: "module",
|
|
12749
13102
|
main: "./dist/index.js",
|
|
@@ -12771,9 +13124,12 @@ var package_default = {
|
|
|
12771
13124
|
prepublishOnly: "bun run build",
|
|
12772
13125
|
raggrep: "bun run src/app/cli/main.ts",
|
|
12773
13126
|
test: "bun test",
|
|
12774
|
-
typecheck: "tsc --noEmit -p tsconfig.json && tsc --noEmit -p
|
|
12775
|
-
"bench:embeddings": "bun run
|
|
12776
|
-
"bench:retrieval": "bun run
|
|
13127
|
+
typecheck: "tsc --noEmit -p tsconfig.json && tsc --noEmit -p research/tsconfig.json",
|
|
13128
|
+
"bench:embeddings": "bun run research/bench/benchmark-embedding-runtimes.ts",
|
|
13129
|
+
"bench:retrieval": "bun run research/bench/benchmark-retrieval-quality.ts",
|
|
13130
|
+
"eval:golden": "bun run research/eval/run-golden-queries.ts",
|
|
13131
|
+
"bench:golden-convex": "bun run research/bench/benchmark-raggrep-golden-queries.ts",
|
|
13132
|
+
"bench:golden-hillclimb": "bun run research/bench/benchmark-raggrep-hillclimb.ts",
|
|
12777
13133
|
dev: "bun run src/app/cli/main.ts"
|
|
12778
13134
|
},
|
|
12779
13135
|
keywords: [
|
|
@@ -12922,6 +13278,14 @@ function parseFlags(args2) {
|
|
|
12922
13278
|
console.error("--dir / -C requires a path to the project directory to index or search.");
|
|
12923
13279
|
process.exit(1);
|
|
12924
13280
|
}
|
|
13281
|
+
} else if (arg === "--rank-by") {
|
|
13282
|
+
const v = args2[++i];
|
|
13283
|
+
if (v === "structured" || v === "semantic" || v === "combined") {
|
|
13284
|
+
flags.rankBy = v;
|
|
13285
|
+
} else {
|
|
13286
|
+
console.error(`--rank-by must be structured, semantic, or combined (got: ${v})`);
|
|
13287
|
+
process.exit(1);
|
|
13288
|
+
}
|
|
12925
13289
|
} else if (arg === "--tool") {
|
|
12926
13290
|
flags.forceTool = true;
|
|
12927
13291
|
} else if (arg === "--skill") {
|
|
@@ -13053,6 +13417,7 @@ Options:
|
|
|
13053
13417
|
-s, --min-score <n> Minimum similarity score 0-1 (default: 0.15)
|
|
13054
13418
|
-t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
|
|
13055
13419
|
-f, --filter <path> Filter by path or glob pattern (can be used multiple times)
|
|
13420
|
+
--rank-by <mode> Order results: structured (default), semantic, or combined (fused score only)
|
|
13056
13421
|
-T, --timing Show timing breakdown for performance profiling
|
|
13057
13422
|
-h, --help Show this help message
|
|
13058
13423
|
|
|
@@ -13151,6 +13516,7 @@ Examples:
|
|
|
13151
13516
|
minScore: flags.minScore,
|
|
13152
13517
|
filePatterns,
|
|
13153
13518
|
pathFilter: flags.pathFilter,
|
|
13519
|
+
rankBy: flags.rankBy,
|
|
13154
13520
|
ensureFresh: false
|
|
13155
13521
|
});
|
|
13156
13522
|
console.log(formatHybridSearchResults2(hybridResults));
|
|
@@ -13386,4 +13752,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
13386
13752
|
}
|
|
13387
13753
|
main();
|
|
13388
13754
|
|
|
13389
|
-
//# debugId=
|
|
13755
|
+
//# debugId=9CA948E12F18492C64756E2164756E21
|