raggrep 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -40
- package/dist/app/search/index.d.ts +2 -1
- package/dist/cli/main.js +471 -97
- package/dist/cli/main.js.map +24 -21
- package/dist/domain/entities/index.d.ts +3 -1
- package/dist/domain/entities/rankingWeights.d.ts +84 -0
- package/dist/domain/entities/searchResult.d.ts +28 -1
- package/dist/domain/services/bm25.d.ts +5 -0
- package/dist/domain/services/discriminativeTerms.d.ts +28 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/literalScorer.d.ts +9 -23
- package/dist/domain/services/matchScales.d.ts +19 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +454 -92
- package/dist/index.js.map +24 -21
- package/dist/infrastructure/embeddings/embeddingProviderFactory.d.ts +6 -1
- package/dist/infrastructure/embeddings/index.d.ts +6 -3
- package/dist/infrastructure/index.d.ts +1 -1
- package/dist/types.d.ts +1 -1
- package/package.json +12 -5
package/dist/cli/main.js
CHANGED
|
@@ -69,13 +69,13 @@ var init_modelCache = __esm(() => {
|
|
|
69
69
|
init_modelCatalog();
|
|
70
70
|
});
|
|
71
71
|
|
|
72
|
-
// src/infrastructure/embeddings/
|
|
72
|
+
// src/infrastructure/embeddings/huggingfaceEmbeddingProvider.ts
|
|
73
73
|
import {
|
|
74
74
|
pipeline,
|
|
75
75
|
env
|
|
76
|
-
} from "@
|
|
76
|
+
} from "@huggingface/transformers";
|
|
77
77
|
|
|
78
|
-
class
|
|
78
|
+
class HuggingFaceTransformersEmbeddingProvider {
|
|
79
79
|
extractor = null;
|
|
80
80
|
config;
|
|
81
81
|
isInitializing = false;
|
|
@@ -83,7 +83,7 @@ class XenovaTransformersEmbeddingProvider {
|
|
|
83
83
|
constructor(config) {
|
|
84
84
|
this.config = {
|
|
85
85
|
model: config?.model ?? "bge-small-en-v1.5",
|
|
86
|
-
runtime: config?.runtime ?? "
|
|
86
|
+
runtime: config?.runtime ?? "huggingface",
|
|
87
87
|
showProgress: config?.showProgress ?? false,
|
|
88
88
|
logger: config?.logger
|
|
89
89
|
};
|
|
@@ -207,7 +207,7 @@ class XenovaTransformersEmbeddingProvider {
|
|
|
207
207
|
}
|
|
208
208
|
}
|
|
209
209
|
var BATCH_SIZE = 32;
|
|
210
|
-
var
|
|
210
|
+
var init_huggingfaceEmbeddingProvider = __esm(() => {
|
|
211
211
|
init_embeddingPaths();
|
|
212
212
|
init_modelCatalog();
|
|
213
213
|
init_modelCache();
|
|
@@ -215,13 +215,18 @@ var init_xenovaEmbeddingProvider = __esm(() => {
|
|
|
215
215
|
env.allowLocalModels = true;
|
|
216
216
|
});
|
|
217
217
|
|
|
218
|
-
// src/infrastructure/embeddings/
|
|
218
|
+
// src/infrastructure/embeddings/xenovaEmbeddingProvider.ts
|
|
219
|
+
var exports_xenovaEmbeddingProvider = {};
|
|
220
|
+
__export(exports_xenovaEmbeddingProvider, {
|
|
221
|
+
XenovaTransformersEmbeddingProvider: () => XenovaTransformersEmbeddingProvider,
|
|
222
|
+
TransformersEmbeddingProvider: () => TransformersEmbeddingProvider
|
|
223
|
+
});
|
|
219
224
|
import {
|
|
220
225
|
pipeline as pipeline2,
|
|
221
226
|
env as env2
|
|
222
|
-
} from "@
|
|
227
|
+
} from "@xenova/transformers";
|
|
223
228
|
|
|
224
|
-
class
|
|
229
|
+
class XenovaTransformersEmbeddingProvider {
|
|
225
230
|
extractor = null;
|
|
226
231
|
config;
|
|
227
232
|
isInitializing = false;
|
|
@@ -229,7 +234,7 @@ class HuggingFaceTransformersEmbeddingProvider {
|
|
|
229
234
|
constructor(config) {
|
|
230
235
|
this.config = {
|
|
231
236
|
model: config?.model ?? "bge-small-en-v1.5",
|
|
232
|
-
runtime: config?.runtime ?? "
|
|
237
|
+
runtime: config?.runtime ?? "xenova",
|
|
233
238
|
showProgress: config?.showProgress ?? false,
|
|
234
239
|
logger: config?.logger
|
|
235
240
|
};
|
|
@@ -352,29 +357,30 @@ class HuggingFaceTransformersEmbeddingProvider {
|
|
|
352
357
|
this.extractor = null;
|
|
353
358
|
}
|
|
354
359
|
}
|
|
355
|
-
var BATCH_SIZE2 = 32;
|
|
356
|
-
var
|
|
360
|
+
var BATCH_SIZE2 = 32, TransformersEmbeddingProvider;
|
|
361
|
+
var init_xenovaEmbeddingProvider = __esm(() => {
|
|
357
362
|
init_embeddingPaths();
|
|
358
363
|
init_modelCatalog();
|
|
359
364
|
init_modelCache();
|
|
360
365
|
env2.cacheDir = RAGGREP_MODEL_CACHE_DIR;
|
|
361
366
|
env2.allowLocalModels = true;
|
|
367
|
+
TransformersEmbeddingProvider = XenovaTransformersEmbeddingProvider;
|
|
362
368
|
});
|
|
363
369
|
|
|
364
370
|
// src/infrastructure/embeddings/embeddingProviderFactory.ts
|
|
365
371
|
function resolveRuntime(config) {
|
|
366
372
|
return config.runtime ?? "huggingface";
|
|
367
373
|
}
|
|
368
|
-
function createEmbeddingProvider(config) {
|
|
374
|
+
async function createEmbeddingProvider(config) {
|
|
369
375
|
const runtime = resolveRuntime(config);
|
|
370
376
|
if (runtime === "huggingface") {
|
|
371
377
|
return new HuggingFaceTransformersEmbeddingProvider(config);
|
|
372
378
|
}
|
|
373
|
-
|
|
379
|
+
const { XenovaTransformersEmbeddingProvider: XenovaTransformersEmbeddingProvider2 } = await Promise.resolve().then(() => (init_xenovaEmbeddingProvider(), exports_xenovaEmbeddingProvider));
|
|
380
|
+
return new XenovaTransformersEmbeddingProvider2(config);
|
|
374
381
|
}
|
|
375
382
|
var init_embeddingProviderFactory = __esm(() => {
|
|
376
383
|
init_huggingfaceEmbeddingProvider();
|
|
377
|
-
init_xenovaEmbeddingProvider();
|
|
378
384
|
});
|
|
379
385
|
|
|
380
386
|
// src/infrastructure/embeddings/globalEmbeddings.ts
|
|
@@ -399,7 +405,7 @@ function getEmbeddingConfig() {
|
|
|
399
405
|
}
|
|
400
406
|
async function ensureGlobalProvider() {
|
|
401
407
|
if (!globalProvider) {
|
|
402
|
-
globalProvider = createEmbeddingProvider(globalConfig);
|
|
408
|
+
globalProvider = await createEmbeddingProvider(globalConfig);
|
|
403
409
|
await globalProvider.initialize?.(globalConfig);
|
|
404
410
|
}
|
|
405
411
|
return globalProvider;
|
|
@@ -432,8 +438,6 @@ var init_globalEmbeddings = __esm(() => {
|
|
|
432
438
|
var init_embeddings = __esm(() => {
|
|
433
439
|
init_modelCatalog();
|
|
434
440
|
init_embeddingPaths();
|
|
435
|
-
init_xenovaEmbeddingProvider();
|
|
436
|
-
init_xenovaEmbeddingProvider();
|
|
437
441
|
init_huggingfaceEmbeddingProvider();
|
|
438
442
|
init_embeddingProviderFactory();
|
|
439
443
|
init_globalEmbeddings();
|
|
@@ -1167,7 +1171,107 @@ var init_searchResult = __esm(() => {
|
|
|
1167
1171
|
minScore: 0.15,
|
|
1168
1172
|
filePatterns: [],
|
|
1169
1173
|
pathFilter: [],
|
|
1170
|
-
ensureFresh: true
|
|
1174
|
+
ensureFresh: true,
|
|
1175
|
+
rankingWeights: {},
|
|
1176
|
+
quiet: false,
|
|
1177
|
+
rankBy: "structured"
|
|
1178
|
+
};
|
|
1179
|
+
});
|
|
1180
|
+
|
|
1181
|
+
// src/domain/entities/rankingWeights.ts
|
|
1182
|
+
function mergeLiteralWeights(def, partial) {
|
|
1183
|
+
if (!partial) {
|
|
1184
|
+
return def;
|
|
1185
|
+
}
|
|
1186
|
+
return {
|
|
1187
|
+
baseScore: partial.baseScore ?? def.baseScore,
|
|
1188
|
+
multipliers: {
|
|
1189
|
+
definition: {
|
|
1190
|
+
...def.multipliers.definition,
|
|
1191
|
+
...partial.multipliers?.definition
|
|
1192
|
+
},
|
|
1193
|
+
reference: {
|
|
1194
|
+
...def.multipliers.reference,
|
|
1195
|
+
...partial.multipliers?.reference
|
|
1196
|
+
},
|
|
1197
|
+
import: { ...def.multipliers.import, ...partial.multipliers?.import }
|
|
1198
|
+
},
|
|
1199
|
+
vocabulary: { ...def.vocabulary, ...partial.vocabulary }
|
|
1200
|
+
};
|
|
1201
|
+
}
|
|
1202
|
+
function mergeRankingWeights(partial) {
|
|
1203
|
+
if (!partial) {
|
|
1204
|
+
return DEFAULT_RANKING_WEIGHTS;
|
|
1205
|
+
}
|
|
1206
|
+
return {
|
|
1207
|
+
discriminative: {
|
|
1208
|
+
...DEFAULT_RANKING_WEIGHTS.discriminative,
|
|
1209
|
+
...partial.discriminative
|
|
1210
|
+
},
|
|
1211
|
+
typescript: {
|
|
1212
|
+
...DEFAULT_RANKING_WEIGHTS.typescript,
|
|
1213
|
+
...partial.typescript
|
|
1214
|
+
},
|
|
1215
|
+
language: {
|
|
1216
|
+
...DEFAULT_RANKING_WEIGHTS.language,
|
|
1217
|
+
...partial.language
|
|
1218
|
+
},
|
|
1219
|
+
markdown: {
|
|
1220
|
+
...DEFAULT_RANKING_WEIGHTS.markdown,
|
|
1221
|
+
...partial.markdown
|
|
1222
|
+
},
|
|
1223
|
+
json: {
|
|
1224
|
+
...DEFAULT_RANKING_WEIGHTS.json,
|
|
1225
|
+
...partial.json
|
|
1226
|
+
},
|
|
1227
|
+
literal: mergeLiteralWeights(DEFAULT_RANKING_WEIGHTS.literal, partial.literal)
|
|
1228
|
+
};
|
|
1229
|
+
}
|
|
1230
|
+
var DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, DEFAULT_RANKING_WEIGHTS;
|
|
1231
|
+
var init_rankingWeights = __esm(() => {
|
|
1232
|
+
DEFAULT_DISCRIMINATIVE_WEIGHTS = {
|
|
1233
|
+
boostCap: 0.1,
|
|
1234
|
+
penaltyMax: 0.16,
|
|
1235
|
+
penaltyFloor: 0.72
|
|
1236
|
+
};
|
|
1237
|
+
DEFAULT_LITERAL_BOOST_WEIGHTS = {
|
|
1238
|
+
baseScore: 0.5,
|
|
1239
|
+
multipliers: {
|
|
1240
|
+
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
1241
|
+
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
1242
|
+
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
1243
|
+
},
|
|
1244
|
+
vocabulary: {
|
|
1245
|
+
baseMultiplier: 1.3,
|
|
1246
|
+
perWordBonus: 0.1,
|
|
1247
|
+
maxVocabularyBonus: 0.5,
|
|
1248
|
+
minWordsForMatch: 2
|
|
1249
|
+
}
|
|
1250
|
+
};
|
|
1251
|
+
DEFAULT_RANKING_WEIGHTS = {
|
|
1252
|
+
discriminative: DEFAULT_DISCRIMINATIVE_WEIGHTS,
|
|
1253
|
+
typescript: {
|
|
1254
|
+
semantic: 0.43,
|
|
1255
|
+
bm25: 0.42,
|
|
1256
|
+
vocab: 0.15,
|
|
1257
|
+
vocabBypassThreshold: 0.4
|
|
1258
|
+
},
|
|
1259
|
+
language: {
|
|
1260
|
+
semantic: 0.7,
|
|
1261
|
+
bm25: 0.3
|
|
1262
|
+
},
|
|
1263
|
+
markdown: {
|
|
1264
|
+
semantic: 0.62,
|
|
1265
|
+
bm25: 0.33,
|
|
1266
|
+
docIntentBoost: 0.03,
|
|
1267
|
+
headingPhraseCoverageMin: 0.25,
|
|
1268
|
+
headingPhraseCoverageSpan: 0.75
|
|
1269
|
+
},
|
|
1270
|
+
json: {
|
|
1271
|
+
bm25: 0.4,
|
|
1272
|
+
literalBaseWeight: 0.6
|
|
1273
|
+
},
|
|
1274
|
+
literal: DEFAULT_LITERAL_BOOST_WEIGHTS
|
|
1171
1275
|
};
|
|
1172
1276
|
});
|
|
1173
1277
|
|
|
@@ -1315,6 +1419,7 @@ var init_lexicon = __esm(() => {
|
|
|
1315
1419
|
// src/domain/entities/index.ts
|
|
1316
1420
|
var init_entities = __esm(() => {
|
|
1317
1421
|
init_searchResult();
|
|
1422
|
+
init_rankingWeights();
|
|
1318
1423
|
init_config();
|
|
1319
1424
|
init_literal();
|
|
1320
1425
|
init_lexicon();
|
|
@@ -1435,6 +1540,9 @@ class BM25Index {
|
|
|
1435
1540
|
return 0;
|
|
1436
1541
|
return Math.log(1 + (this.totalDocs - docFreq + 0.5) / (docFreq + 0.5));
|
|
1437
1542
|
}
|
|
1543
|
+
getInverseDocumentFrequency(term) {
|
|
1544
|
+
return this.idf(term.toLowerCase());
|
|
1545
|
+
}
|
|
1438
1546
|
score(tokens, queryTerms) {
|
|
1439
1547
|
const docLength = tokens.length;
|
|
1440
1548
|
let score = 0;
|
|
@@ -3261,6 +3369,188 @@ var init_core = __esm(() => {
|
|
|
3261
3369
|
init_symbols();
|
|
3262
3370
|
});
|
|
3263
3371
|
|
|
3372
|
+
// src/domain/services/discriminativeTerms.ts
|
|
3373
|
+
function medianSorted(sorted) {
|
|
3374
|
+
const n = sorted.length;
|
|
3375
|
+
if (n === 0)
|
|
3376
|
+
return 0;
|
|
3377
|
+
const mid = Math.floor(n / 2);
|
|
3378
|
+
return n % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
3379
|
+
}
|
|
3380
|
+
function salientTermHitsChunk(term, haystack, tokenSet) {
|
|
3381
|
+
if (tokenSet.has(term) || haystack.includes(term)) {
|
|
3382
|
+
return true;
|
|
3383
|
+
}
|
|
3384
|
+
if (term.length < PREFIX_MATCH_MIN_LEN) {
|
|
3385
|
+
return false;
|
|
3386
|
+
}
|
|
3387
|
+
for (const w of tokenSet) {
|
|
3388
|
+
if (w.length < PREFIX_MATCH_MIN_LEN)
|
|
3389
|
+
continue;
|
|
3390
|
+
if (term.startsWith(w) || w.startsWith(term)) {
|
|
3391
|
+
return true;
|
|
3392
|
+
}
|
|
3393
|
+
}
|
|
3394
|
+
return false;
|
|
3395
|
+
}
|
|
3396
|
+
function scoreDiscriminativeTerms(bm25Index, query, chunkText, chunkName, weights = DEFAULT_DISCRIMINATIVE_WEIGHTS) {
|
|
3397
|
+
const empty2 = () => ({
|
|
3398
|
+
boost: 0,
|
|
3399
|
+
penaltyFactor: 1,
|
|
3400
|
+
salientTerms: [],
|
|
3401
|
+
matchedSalient: [],
|
|
3402
|
+
missingSalient: [],
|
|
3403
|
+
salientCoverage: 1
|
|
3404
|
+
});
|
|
3405
|
+
const uniqueTerms = [...new Set(tokenize(query))];
|
|
3406
|
+
if (uniqueTerms.length === 0) {
|
|
3407
|
+
return empty2();
|
|
3408
|
+
}
|
|
3409
|
+
const indexed = [];
|
|
3410
|
+
for (const term of uniqueTerms) {
|
|
3411
|
+
const idf = bm25Index.getInverseDocumentFrequency(term);
|
|
3412
|
+
if (idf > 0) {
|
|
3413
|
+
indexed.push({ term, idf });
|
|
3414
|
+
}
|
|
3415
|
+
}
|
|
3416
|
+
if (indexed.length === 0) {
|
|
3417
|
+
return empty2();
|
|
3418
|
+
}
|
|
3419
|
+
const idfSorted = [...indexed.map((x) => x.idf)].sort((a, b) => a - b);
|
|
3420
|
+
const medianIdf = medianSorted(idfSorted);
|
|
3421
|
+
const salientEntries = indexed.filter((x) => x.idf >= medianIdf);
|
|
3422
|
+
const salientTerms = [...new Set(salientEntries.map((x) => x.term))];
|
|
3423
|
+
const idfByTerm = new Map;
|
|
3424
|
+
for (const { term, idf } of salientEntries) {
|
|
3425
|
+
idfByTerm.set(term, Math.max(idfByTerm.get(term) ?? 0, idf));
|
|
3426
|
+
}
|
|
3427
|
+
let totalW = 0;
|
|
3428
|
+
for (const idf of idfByTerm.values()) {
|
|
3429
|
+
totalW += idf;
|
|
3430
|
+
}
|
|
3431
|
+
const haystack = [chunkName ?? "", chunkText].join(`
|
|
3432
|
+
`).toLowerCase();
|
|
3433
|
+
const tokenSet = new Set(tokenize(chunkName ? `${chunkName}
|
|
3434
|
+
${chunkText}` : chunkText));
|
|
3435
|
+
const matchedSalient = [];
|
|
3436
|
+
for (const term of salientTerms) {
|
|
3437
|
+
const idf = idfByTerm.get(term) ?? 0;
|
|
3438
|
+
if (idf <= 0)
|
|
3439
|
+
continue;
|
|
3440
|
+
if (salientTermHitsChunk(term, haystack, tokenSet)) {
|
|
3441
|
+
matchedSalient.push(term);
|
|
3442
|
+
}
|
|
3443
|
+
}
|
|
3444
|
+
const matchedSet = new Set(matchedSalient);
|
|
3445
|
+
const missingSalient = salientTerms.filter((t) => !matchedSet.has(t));
|
|
3446
|
+
let matchedW = 0;
|
|
3447
|
+
for (const term of matchedSalient) {
|
|
3448
|
+
matchedW += idfByTerm.get(term) ?? 0;
|
|
3449
|
+
}
|
|
3450
|
+
const salientCoverage = totalW > 0 ? matchedW / totalW : 1;
|
|
3451
|
+
const { boostCap, penaltyMax, penaltyFloor } = weights;
|
|
3452
|
+
const boost = boostCap * salientCoverage;
|
|
3453
|
+
let penaltyFactor = 1 - penaltyMax * (1 - salientCoverage);
|
|
3454
|
+
if (penaltyFactor < penaltyFloor) {
|
|
3455
|
+
penaltyFactor = penaltyFloor;
|
|
3456
|
+
}
|
|
3457
|
+
return {
|
|
3458
|
+
boost,
|
|
3459
|
+
penaltyFactor,
|
|
3460
|
+
salientTerms,
|
|
3461
|
+
matchedSalient,
|
|
3462
|
+
missingSalient,
|
|
3463
|
+
salientCoverage
|
|
3464
|
+
};
|
|
3465
|
+
}
|
|
3466
|
+
var PREFIX_MATCH_MIN_LEN = 4;
|
|
3467
|
+
var init_discriminativeTerms = __esm(() => {
|
|
3468
|
+
init_rankingWeights();
|
|
3469
|
+
});
|
|
3470
|
+
|
|
3471
|
+
// src/domain/services/matchScales.ts
|
|
3472
|
+
function semanticPctFromCosine(cosine) {
|
|
3473
|
+
return clamp01((cosine + 1) / 2);
|
|
3474
|
+
}
|
|
3475
|
+
function clamp01(x) {
|
|
3476
|
+
if (Number.isNaN(x) || !Number.isFinite(x))
|
|
3477
|
+
return 0;
|
|
3478
|
+
return Math.max(0, Math.min(1, x));
|
|
3479
|
+
}
|
|
3480
|
+
function num(ctx, key) {
|
|
3481
|
+
const v = ctx[key];
|
|
3482
|
+
return typeof v === "number" && Number.isFinite(v) ? v : 0;
|
|
3483
|
+
}
|
|
3484
|
+
function additiveStructuredBoost(ctx) {
|
|
3485
|
+
return num(ctx, "pathBoost") + num(ctx, "fileTypeBoost") + num(ctx, "chunkTypeBoost") + num(ctx, "exportBoost");
|
|
3486
|
+
}
|
|
3487
|
+
function attachMatchScales(result, rw) {
|
|
3488
|
+
const ctx = result.context ?? {};
|
|
3489
|
+
const mid = result.moduleId;
|
|
3490
|
+
let semanticMatch = 0;
|
|
3491
|
+
let structuredMatch = 0;
|
|
3492
|
+
if (mid === "language/typescript") {
|
|
3493
|
+
const cos = num(ctx, "semanticScore");
|
|
3494
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3495
|
+
const vocab = num(ctx, "vocabScore");
|
|
3496
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
3497
|
+
const tw = rw.typescript;
|
|
3498
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
3499
|
+
const denom = tw.bm25 + tw.vocab + 0.000000001;
|
|
3500
|
+
const lexCore = (tw.bm25 * bm25 + tw.vocab * vocab) / denom;
|
|
3501
|
+
structuredMatch = clamp01(lexCore + Math.min(0.35, additiveStructuredBoost(ctx)) + Math.min(0.15, phraseCov * 0.25));
|
|
3502
|
+
} else if (mid.startsWith("language/")) {
|
|
3503
|
+
const cos = num(ctx, "semanticScore");
|
|
3504
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3505
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
3506
|
+
structuredMatch = clamp01(bm25 + Math.min(0.3, additiveStructuredBoost(ctx)) + Math.min(0.12, num(ctx, "phraseCoverage") * 0.2));
|
|
3507
|
+
} else if (mid === "docs/markdown") {
|
|
3508
|
+
const cos = num(ctx, "semanticScore");
|
|
3509
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3510
|
+
const docBoost = num(ctx, "docBoost");
|
|
3511
|
+
const headingBoost = num(ctx, "headingBoost");
|
|
3512
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
3513
|
+
const mw = rw.markdown;
|
|
3514
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
3515
|
+
structuredMatch = clamp01(mw.bm25 * bm25 + docBoost + headingBoost + Math.min(0.2, phraseCov * 0.15));
|
|
3516
|
+
} else if (mid === "core") {
|
|
3517
|
+
semanticMatch = 0;
|
|
3518
|
+
const nBm = num(ctx, "bm25Score");
|
|
3519
|
+
const sym = num(ctx, "symbolScore");
|
|
3520
|
+
structuredMatch = clamp01(0.6 * nBm + 0.4 * sym);
|
|
3521
|
+
} else if (mid === "data/json") {
|
|
3522
|
+
semanticMatch = 0;
|
|
3523
|
+
const bm25 = num(ctx, "bm25Score");
|
|
3524
|
+
const litM = num(ctx, "literalMultiplier");
|
|
3525
|
+
structuredMatch = clamp01(bm25 > 0.02 ? bm25 : Math.min(1, 0.35 + Math.min(0.65, (litM - 1) * 0.35)));
|
|
3526
|
+
} else {
|
|
3527
|
+
semanticMatch = 0;
|
|
3528
|
+
structuredMatch = clamp01(result.score);
|
|
3529
|
+
}
|
|
3530
|
+
return { ...result, semanticMatch, structuredMatch };
|
|
3531
|
+
}
|
|
3532
|
+
function compareSearchResultsByRankBy(a, b, rankBy) {
|
|
3533
|
+
if (rankBy === "combined") {
|
|
3534
|
+
return b.score - a.score;
|
|
3535
|
+
}
|
|
3536
|
+
const sa = a.semanticMatch ?? 0;
|
|
3537
|
+
const sb = b.semanticMatch ?? 0;
|
|
3538
|
+
const ta = a.structuredMatch ?? 0;
|
|
3539
|
+
const tb = b.structuredMatch ?? 0;
|
|
3540
|
+
if (rankBy === "semantic") {
|
|
3541
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
3542
|
+
return sb - sa;
|
|
3543
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
3544
|
+
return tb - ta;
|
|
3545
|
+
return b.score - a.score;
|
|
3546
|
+
}
|
|
3547
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
3548
|
+
return tb - ta;
|
|
3549
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
3550
|
+
return sb - sa;
|
|
3551
|
+
return b.score - a.score;
|
|
3552
|
+
}
|
|
3553
|
+
|
|
3264
3554
|
// src/domain/services/keywords.ts
|
|
3265
3555
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
3266
3556
|
const keywords = new Set;
|
|
@@ -3919,16 +4209,16 @@ var init_literalExtractor = __esm(() => {
|
|
|
3919
4209
|
});
|
|
3920
4210
|
|
|
3921
4211
|
// src/domain/services/literalScorer.ts
|
|
3922
|
-
function calculateLiteralMultiplier(matchType, confidence) {
|
|
3923
|
-
return
|
|
4212
|
+
function calculateLiteralMultiplier(matchType, confidence, weights = DEFAULT_LW) {
|
|
4213
|
+
return weights.multipliers[matchType][confidence];
|
|
3924
4214
|
}
|
|
3925
|
-
function calculateMaxMultiplier(matches) {
|
|
4215
|
+
function calculateMaxMultiplier(matches, weights = DEFAULT_LW) {
|
|
3926
4216
|
if (!matches || matches.length === 0) {
|
|
3927
4217
|
return 1;
|
|
3928
4218
|
}
|
|
3929
|
-
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence)));
|
|
4219
|
+
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence, weights)));
|
|
3930
4220
|
}
|
|
3931
|
-
function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
4221
|
+
function calculateLiteralContribution(matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3932
4222
|
if (!matches || matches.length === 0) {
|
|
3933
4223
|
return {
|
|
3934
4224
|
multiplier: 1,
|
|
@@ -3939,7 +4229,7 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3939
4229
|
let bestMatch = null;
|
|
3940
4230
|
let bestMultiplier = 0;
|
|
3941
4231
|
for (const match of matches) {
|
|
3942
|
-
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence);
|
|
4232
|
+
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence, weights);
|
|
3943
4233
|
if (mult > bestMultiplier) {
|
|
3944
4234
|
bestMultiplier = mult;
|
|
3945
4235
|
bestMatch = match;
|
|
@@ -3953,32 +4243,20 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3953
4243
|
matchCount: matches.length
|
|
3954
4244
|
};
|
|
3955
4245
|
}
|
|
3956
|
-
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25) {
|
|
4246
|
+
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3957
4247
|
if (!matches || matches.length === 0) {
|
|
3958
4248
|
return baseScore;
|
|
3959
4249
|
}
|
|
3960
|
-
const multiplier = calculateMaxMultiplier(matches);
|
|
4250
|
+
const multiplier = calculateMaxMultiplier(matches, weights);
|
|
3961
4251
|
if (!hasSemanticOrBm25) {
|
|
3962
|
-
return
|
|
4252
|
+
return weights.baseScore * multiplier;
|
|
3963
4253
|
}
|
|
3964
4254
|
return baseScore * multiplier;
|
|
3965
4255
|
}
|
|
3966
|
-
var
|
|
4256
|
+
var DEFAULT_LW;
|
|
3967
4257
|
var init_literalScorer = __esm(() => {
|
|
3968
|
-
|
|
3969
|
-
|
|
3970
|
-
MULTIPLIERS: {
|
|
3971
|
-
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
3972
|
-
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
3973
|
-
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
3974
|
-
},
|
|
3975
|
-
VOCABULARY: {
|
|
3976
|
-
BASE_MULTIPLIER: 1.3,
|
|
3977
|
-
PER_WORD_BONUS: 0.1,
|
|
3978
|
-
MAX_VOCABULARY_BONUS: 0.5,
|
|
3979
|
-
MIN_WORDS_FOR_MATCH: 2
|
|
3980
|
-
}
|
|
3981
|
-
};
|
|
4258
|
+
init_rankingWeights();
|
|
4259
|
+
DEFAULT_LW = DEFAULT_RANKING_WEIGHTS.literal;
|
|
3982
4260
|
});
|
|
3983
4261
|
|
|
3984
4262
|
// src/domain/services/lexicon.ts
|
|
@@ -4933,6 +5211,7 @@ var init_chunkContext = __esm(() => {
|
|
|
4933
5211
|
|
|
4934
5212
|
// src/domain/services/index.ts
|
|
4935
5213
|
var init_services = __esm(() => {
|
|
5214
|
+
init_discriminativeTerms();
|
|
4936
5215
|
init_keywords();
|
|
4937
5216
|
init_queryIntent();
|
|
4938
5217
|
init_queryLiteralParser();
|
|
@@ -5756,6 +6035,9 @@ class TypeScriptModule {
|
|
|
5756
6035
|
minScore = DEFAULT_MIN_SCORE2,
|
|
5757
6036
|
filePatterns
|
|
5758
6037
|
} = options;
|
|
6038
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
6039
|
+
const tw = rw.typescript;
|
|
6040
|
+
const lt = rw.literal;
|
|
5759
6041
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
5760
6042
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
5761
6043
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -5862,17 +6144,19 @@ class TypeScriptModule {
|
|
|
5862
6144
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5863
6145
|
const exportBoost = calculateExportBoost(chunk);
|
|
5864
6146
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5865
|
-
const baseScore =
|
|
6147
|
+
const baseScore = tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore;
|
|
5866
6148
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
5867
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
5868
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
6149
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
6150
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
5869
6151
|
const finalScore = boostedScore + additiveBoost;
|
|
6152
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6153
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5870
6154
|
processedChunkIds.add(chunk.id);
|
|
5871
|
-
if (
|
|
6155
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > tw.vocabBypassThreshold || phraseMatch.isSignificant) {
|
|
5872
6156
|
results.push({
|
|
5873
6157
|
filepath,
|
|
5874
6158
|
chunk,
|
|
5875
|
-
score:
|
|
6159
|
+
score: adjustedScore,
|
|
5876
6160
|
moduleId: this.id,
|
|
5877
6161
|
context: {
|
|
5878
6162
|
semanticScore,
|
|
@@ -5884,6 +6168,10 @@ class TypeScriptModule {
|
|
|
5884
6168
|
fileTypeBoost,
|
|
5885
6169
|
chunkTypeBoost,
|
|
5886
6170
|
exportBoost,
|
|
6171
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6172
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6173
|
+
discriminativeBoost: disc.boost,
|
|
6174
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5887
6175
|
literalMultiplier: literalContribution.multiplier,
|
|
5888
6176
|
literalMatchType: literalContribution.bestMatchType,
|
|
5889
6177
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5936,15 +6224,17 @@ class TypeScriptModule {
|
|
|
5936
6224
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5937
6225
|
const exportBoost = calculateExportBoost(chunk);
|
|
5938
6226
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5939
|
-
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false);
|
|
5940
|
-
const baseScore = semanticScore > 0 ?
|
|
5941
|
-
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0);
|
|
6227
|
+
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false, lt);
|
|
6228
|
+
const baseScore = semanticScore > 0 ? tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore : lt.baseScore;
|
|
6229
|
+
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0, lt);
|
|
5942
6230
|
const finalScore = boostedScore + additiveBoost;
|
|
6231
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6232
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5943
6233
|
processedChunkIds.add(chunkId);
|
|
5944
6234
|
results.push({
|
|
5945
6235
|
filepath,
|
|
5946
6236
|
chunk,
|
|
5947
|
-
score:
|
|
6237
|
+
score: adjustedScore,
|
|
5948
6238
|
moduleId: this.id,
|
|
5949
6239
|
context: {
|
|
5950
6240
|
semanticScore,
|
|
@@ -5956,6 +6246,10 @@ class TypeScriptModule {
|
|
|
5956
6246
|
fileTypeBoost,
|
|
5957
6247
|
chunkTypeBoost,
|
|
5958
6248
|
exportBoost,
|
|
6249
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6250
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6251
|
+
discriminativeBoost: disc.boost,
|
|
6252
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5959
6253
|
literalMultiplier: literalContribution.multiplier,
|
|
5960
6254
|
literalMatchType: literalContribution.bestMatchType,
|
|
5961
6255
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5992,13 +6286,14 @@ class TypeScriptModule {
|
|
|
5992
6286
|
return references;
|
|
5993
6287
|
}
|
|
5994
6288
|
}
|
|
5995
|
-
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10,
|
|
6289
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, TYPESCRIPT_EXTENSIONS, supportsFile;
|
|
5996
6290
|
var init_typescript = __esm(() => {
|
|
5997
6291
|
init_embeddings();
|
|
5998
6292
|
init_services();
|
|
5999
6293
|
init_config2();
|
|
6000
6294
|
init_parseCode();
|
|
6001
6295
|
init_storage();
|
|
6296
|
+
init_entities();
|
|
6002
6297
|
TYPESCRIPT_EXTENSIONS = [
|
|
6003
6298
|
".ts",
|
|
6004
6299
|
".tsx",
|
|
@@ -7111,6 +7406,9 @@ class PythonModule {
|
|
|
7111
7406
|
minScore = DEFAULT_MIN_SCORE3,
|
|
7112
7407
|
filePatterns
|
|
7113
7408
|
} = options;
|
|
7409
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7410
|
+
const lw = rw.language;
|
|
7411
|
+
const lt = rw.literal;
|
|
7114
7412
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7115
7413
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7116
7414
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7203,17 +7501,19 @@ class PythonModule {
|
|
|
7203
7501
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
7204
7502
|
const exportBoost = calculateExportBoost2(chunk);
|
|
7205
7503
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7206
|
-
const baseScore =
|
|
7504
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7207
7505
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7208
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7209
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
7506
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
7507
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7210
7508
|
const finalScore = boostedScore + additiveBoost;
|
|
7509
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7510
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7211
7511
|
processedChunkIds.add(chunk.id);
|
|
7212
|
-
if (
|
|
7512
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7213
7513
|
results.push({
|
|
7214
7514
|
filepath,
|
|
7215
7515
|
chunk,
|
|
7216
|
-
score:
|
|
7516
|
+
score: adjustedScore,
|
|
7217
7517
|
moduleId: this.id,
|
|
7218
7518
|
context: {
|
|
7219
7519
|
semanticScore,
|
|
@@ -7222,6 +7522,10 @@ class PythonModule {
|
|
|
7222
7522
|
fileTypeBoost,
|
|
7223
7523
|
chunkTypeBoost,
|
|
7224
7524
|
exportBoost,
|
|
7525
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7526
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7527
|
+
discriminativeBoost: disc.boost,
|
|
7528
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7225
7529
|
literalMultiplier: literalContribution.multiplier,
|
|
7226
7530
|
literalMatchType: literalContribution.bestMatchType,
|
|
7227
7531
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7256,15 +7560,17 @@ class PythonModule {
|
|
|
7256
7560
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
7257
7561
|
const exportBoost = calculateExportBoost2(chunk);
|
|
7258
7562
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7259
|
-
const literalContribution = calculateLiteralContribution(matches, false);
|
|
7260
|
-
const baseScore = semanticScore > 0 ?
|
|
7261
|
-
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0);
|
|
7563
|
+
const literalContribution = calculateLiteralContribution(matches, false, lt);
|
|
7564
|
+
const baseScore = semanticScore > 0 ? lw.semantic * semanticScore + lw.bm25 * bm25Score : lt.baseScore;
|
|
7565
|
+
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0, lt);
|
|
7262
7566
|
const finalScore = boostedScore + additiveBoost;
|
|
7567
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7568
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7263
7569
|
processedChunkIds.add(chunkId);
|
|
7264
7570
|
results.push({
|
|
7265
7571
|
filepath,
|
|
7266
7572
|
chunk,
|
|
7267
|
-
score:
|
|
7573
|
+
score: adjustedScore,
|
|
7268
7574
|
moduleId: this.id,
|
|
7269
7575
|
context: {
|
|
7270
7576
|
semanticScore,
|
|
@@ -7273,6 +7579,10 @@ class PythonModule {
|
|
|
7273
7579
|
fileTypeBoost,
|
|
7274
7580
|
chunkTypeBoost,
|
|
7275
7581
|
exportBoost,
|
|
7582
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7583
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7584
|
+
discriminativeBoost: disc.boost,
|
|
7585
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7276
7586
|
literalMultiplier: literalContribution.multiplier,
|
|
7277
7587
|
literalMatchType: literalContribution.bestMatchType,
|
|
7278
7588
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7285,13 +7595,14 @@ class PythonModule {
|
|
|
7285
7595
|
return results.slice(0, topK);
|
|
7286
7596
|
}
|
|
7287
7597
|
}
|
|
7288
|
-
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10,
|
|
7598
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, PYTHON_EXTENSIONS, supportsFile2;
|
|
7289
7599
|
var init_python = __esm(() => {
|
|
7290
7600
|
init_embeddings();
|
|
7291
7601
|
init_services();
|
|
7292
7602
|
init_config2();
|
|
7293
7603
|
init_storage();
|
|
7294
7604
|
init_parsing();
|
|
7605
|
+
init_entities();
|
|
7295
7606
|
PYTHON_EXTENSIONS = [".py", ".pyw"];
|
|
7296
7607
|
supportsFile2 = isPythonFile;
|
|
7297
7608
|
});
|
|
@@ -7643,6 +7954,9 @@ class GoModule {
|
|
|
7643
7954
|
minScore = DEFAULT_MIN_SCORE4,
|
|
7644
7955
|
filePatterns
|
|
7645
7956
|
} = options;
|
|
7957
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7958
|
+
const lw = rw.language;
|
|
7959
|
+
const lt = rw.literal;
|
|
7646
7960
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7647
7961
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7648
7962
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7735,17 +8049,19 @@ class GoModule {
|
|
|
7735
8049
|
const chunkTypeBoost = calculateChunkTypeBoost3(chunk);
|
|
7736
8050
|
const exportBoost = calculateExportBoost3(chunk);
|
|
7737
8051
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7738
|
-
const baseScore =
|
|
8052
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7739
8053
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7740
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7741
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
8054
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
8055
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7742
8056
|
const finalScore = boostedScore + additiveBoost;
|
|
8057
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
8058
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7743
8059
|
processedChunkIds.add(chunk.id);
|
|
7744
|
-
if (
|
|
8060
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7745
8061
|
results.push({
|
|
7746
8062
|
filepath,
|
|
7747
8063
|
chunk,
|
|
7748
|
-
score:
|
|
8064
|
+
score: adjustedScore,
|
|
7749
8065
|
moduleId: this.id,
|
|
7750
8066
|
context: {
|
|
7751
8067
|
semanticScore,
|
|
@@ -7754,6 +8070,10 @@ class GoModule {
|
|
|
7754
8070
|
fileTypeBoost,
|
|
7755
8071
|
chunkTypeBoost,
|
|
7756
8072
|
exportBoost,
|
|
8073
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
8074
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
8075
|
+
discriminativeBoost: disc.boost,
|
|
8076
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7757
8077
|
literalMultiplier: literalContribution.multiplier,
|
|
7758
8078
|
literalMatchType: literalContribution.bestMatchType,
|
|
7759
8079
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7766,13 +8086,14 @@ class GoModule {
|
|
|
7766
8086
|
return results.slice(0, topK);
|
|
7767
8087
|
}
|
|
7768
8088
|
}
|
|
7769
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
8089
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, GO_EXTENSIONS, supportsFile3;
|
|
7770
8090
|
var init_go = __esm(() => {
|
|
7771
8091
|
init_embeddings();
|
|
7772
8092
|
init_services();
|
|
7773
8093
|
init_config2();
|
|
7774
8094
|
init_storage();
|
|
7775
8095
|
init_parsing();
|
|
8096
|
+
init_entities();
|
|
7776
8097
|
GO_EXTENSIONS = [".go"];
|
|
7777
8098
|
supportsFile3 = isGoFile;
|
|
7778
8099
|
});
|
|
@@ -8203,6 +8524,9 @@ class RustModule {
|
|
|
8203
8524
|
minScore = DEFAULT_MIN_SCORE5,
|
|
8204
8525
|
filePatterns
|
|
8205
8526
|
} = options;
|
|
8527
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8528
|
+
const lw = rw.language;
|
|
8529
|
+
const lt = rw.literal;
|
|
8206
8530
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
8207
8531
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8208
8532
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -8295,17 +8619,19 @@ class RustModule {
|
|
|
8295
8619
|
const chunkTypeBoost = calculateChunkTypeBoost4(chunk);
|
|
8296
8620
|
const exportBoost = calculateExportBoost4(chunk);
|
|
8297
8621
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
8298
|
-
const baseScore =
|
|
8622
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
8299
8623
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
8300
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
8301
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
8624
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
8625
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
8302
8626
|
const finalScore = boostedScore + additiveBoost;
|
|
8627
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
8628
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
8303
8629
|
processedChunkIds.add(chunk.id);
|
|
8304
|
-
if (
|
|
8630
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
8305
8631
|
results.push({
|
|
8306
8632
|
filepath,
|
|
8307
8633
|
chunk,
|
|
8308
|
-
score:
|
|
8634
|
+
score: adjustedScore,
|
|
8309
8635
|
moduleId: this.id,
|
|
8310
8636
|
context: {
|
|
8311
8637
|
semanticScore,
|
|
@@ -8314,6 +8640,10 @@ class RustModule {
|
|
|
8314
8640
|
fileTypeBoost,
|
|
8315
8641
|
chunkTypeBoost,
|
|
8316
8642
|
exportBoost,
|
|
8643
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
8644
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
8645
|
+
discriminativeBoost: disc.boost,
|
|
8646
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
8317
8647
|
literalMultiplier: literalContribution.multiplier,
|
|
8318
8648
|
literalMatchType: literalContribution.bestMatchType,
|
|
8319
8649
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -8326,13 +8656,14 @@ class RustModule {
|
|
|
8326
8656
|
return results.slice(0, topK);
|
|
8327
8657
|
}
|
|
8328
8658
|
}
|
|
8329
|
-
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10,
|
|
8659
|
+
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, RUST_EXTENSIONS, supportsFile4;
|
|
8330
8660
|
var init_rust = __esm(() => {
|
|
8331
8661
|
init_embeddings();
|
|
8332
8662
|
init_services();
|
|
8333
8663
|
init_config2();
|
|
8334
8664
|
init_storage();
|
|
8335
8665
|
init_parsing();
|
|
8666
|
+
init_entities();
|
|
8336
8667
|
RUST_EXTENSIONS = [".rs"];
|
|
8337
8668
|
supportsFile4 = isRustFile;
|
|
8338
8669
|
});
|
|
@@ -8462,6 +8793,8 @@ class JsonModule {
|
|
|
8462
8793
|
minScore = DEFAULT_MIN_SCORE6,
|
|
8463
8794
|
filePatterns
|
|
8464
8795
|
} = options;
|
|
8796
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8797
|
+
const jw = rw.json;
|
|
8465
8798
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
8466
8799
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8467
8800
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -8515,9 +8848,9 @@ class JsonModule {
|
|
|
8515
8848
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
8516
8849
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
8517
8850
|
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
8518
|
-
const baseScore =
|
|
8519
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
8520
|
-
const literalBase = literalMatches.length > 0 && bm25Score === 0 ?
|
|
8851
|
+
const baseScore = jw.bm25 * bm25Score;
|
|
8852
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0, rw.literal);
|
|
8853
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? rw.literal.baseScore * jw.literalBaseWeight : 0;
|
|
8521
8854
|
const finalScore = boostedScore + literalBase;
|
|
8522
8855
|
processedChunkIds.add(chunk.id);
|
|
8523
8856
|
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
@@ -8550,7 +8883,7 @@ class JsonModule {
|
|
|
8550
8883
|
if (!chunk)
|
|
8551
8884
|
continue;
|
|
8552
8885
|
const literalContribution = calculateLiteralContribution(matches, false);
|
|
8553
|
-
const score =
|
|
8886
|
+
const score = rw.literal.baseScore * literalContribution.multiplier;
|
|
8554
8887
|
processedChunkIds.add(chunkId);
|
|
8555
8888
|
results.push({
|
|
8556
8889
|
filepath,
|
|
@@ -8571,11 +8904,12 @@ class JsonModule {
|
|
|
8571
8904
|
return results.slice(0, topK);
|
|
8572
8905
|
}
|
|
8573
8906
|
}
|
|
8574
|
-
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10,
|
|
8907
|
+
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, JSON_EXTENSIONS, supportsFile5;
|
|
8575
8908
|
var init_json = __esm(() => {
|
|
8576
8909
|
init_services();
|
|
8577
8910
|
init_config2();
|
|
8578
8911
|
init_storage();
|
|
8912
|
+
init_entities();
|
|
8579
8913
|
JSON_EXTENSIONS = [".json"];
|
|
8580
8914
|
supportsFile5 = isJsonFile;
|
|
8581
8915
|
});
|
|
@@ -8810,6 +9144,8 @@ class MarkdownModule {
|
|
|
8810
9144
|
minScore = DEFAULT_MIN_SCORE7,
|
|
8811
9145
|
filePatterns
|
|
8812
9146
|
} = options;
|
|
9147
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
9148
|
+
const mw = rw.markdown;
|
|
8813
9149
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8814
9150
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
8815
9151
|
let allFiles;
|
|
@@ -8875,15 +9211,18 @@ class MarkdownModule {
|
|
|
8875
9211
|
"what",
|
|
8876
9212
|
"explain"
|
|
8877
9213
|
].includes(t))) {
|
|
8878
|
-
docBoost =
|
|
8879
|
-
}
|
|
8880
|
-
const
|
|
8881
|
-
const
|
|
8882
|
-
|
|
9214
|
+
docBoost = mw.docIntentBoost;
|
|
9215
|
+
}
|
|
9216
|
+
const rawHeadingBoost = calculateHeadingLevelBoost(chunk);
|
|
9217
|
+
const headingBoost = rawHeadingBoost * (mw.headingPhraseCoverageMin + mw.headingPhraseCoverageSpan * (phraseMatch.totalTokenCount > 0 ? phraseMatch.coverage : 1));
|
|
9218
|
+
const hybridScore = mw.semantic * semanticScore + mw.bm25 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
|
|
9219
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
9220
|
+
const finalScore = (hybridScore + disc.boost) * disc.penaltyFactor;
|
|
9221
|
+
if (finalScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
|
|
8883
9222
|
results.push({
|
|
8884
9223
|
filepath,
|
|
8885
9224
|
chunk,
|
|
8886
|
-
score:
|
|
9225
|
+
score: finalScore,
|
|
8887
9226
|
moduleId: this.id,
|
|
8888
9227
|
context: {
|
|
8889
9228
|
semanticScore,
|
|
@@ -8892,7 +9231,11 @@ class MarkdownModule {
|
|
|
8892
9231
|
phraseCoverage: phraseMatch.coverage,
|
|
8893
9232
|
docBoost,
|
|
8894
9233
|
headingBoost,
|
|
8895
|
-
headingLevel: chunk.metadata?.headingLevel
|
|
9234
|
+
headingLevel: chunk.metadata?.headingLevel,
|
|
9235
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
9236
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
9237
|
+
discriminativeBoost: disc.boost,
|
|
9238
|
+
matchedSalientTerms: disc.matchedSalient
|
|
8896
9239
|
}
|
|
8897
9240
|
});
|
|
8898
9241
|
}
|
|
@@ -8901,11 +9244,12 @@ class MarkdownModule {
|
|
|
8901
9244
|
return results.slice(0, topK);
|
|
8902
9245
|
}
|
|
8903
9246
|
}
|
|
8904
|
-
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10,
|
|
9247
|
+
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, MARKDOWN_EXTENSIONS, supportsFile6;
|
|
8905
9248
|
var init_markdown = __esm(() => {
|
|
8906
9249
|
init_embeddings();
|
|
8907
9250
|
init_services();
|
|
8908
9251
|
init_config2();
|
|
9252
|
+
init_entities();
|
|
8909
9253
|
init_storage();
|
|
8910
9254
|
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
8911
9255
|
supportsFile6 = isMarkdownFile;
|
|
@@ -11950,7 +12294,9 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
11950
12294
|
if (ensureFresh) {
|
|
11951
12295
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
11952
12296
|
}
|
|
11953
|
-
|
|
12297
|
+
if (!options.quiet) {
|
|
12298
|
+
console.log(`Searching for: "${query}"`);
|
|
12299
|
+
}
|
|
11954
12300
|
const config = await loadConfig(rootDir);
|
|
11955
12301
|
await registerBuiltInModules();
|
|
11956
12302
|
const globalManifest = await loadGlobalManifest2(rootDir, config);
|
|
@@ -12013,10 +12359,18 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
12013
12359
|
}
|
|
12014
12360
|
}
|
|
12015
12361
|
}
|
|
12016
|
-
|
|
12362
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
12363
|
+
let ranked = filteredResults.map((r) => attachMatchScales(r, rw));
|
|
12364
|
+
for (const r of ranked) {
|
|
12365
|
+
if (r.context?.exactMatchFusion) {
|
|
12366
|
+
r.structuredMatch = clamp01((r.structuredMatch ?? 0) * 1.5);
|
|
12367
|
+
}
|
|
12368
|
+
}
|
|
12369
|
+
const rankBy = options.rankBy ?? DEFAULT_SEARCH_OPTIONS.rankBy;
|
|
12370
|
+
ranked.sort((a, b) => compareSearchResultsByRankBy(a, b, rankBy));
|
|
12017
12371
|
const topK = options.topK ?? 10;
|
|
12018
12372
|
return {
|
|
12019
|
-
results:
|
|
12373
|
+
results: ranked.slice(0, topK),
|
|
12020
12374
|
exactMatches,
|
|
12021
12375
|
fusionApplied
|
|
12022
12376
|
};
|
|
@@ -12107,7 +12461,9 @@ function formatSearchResults2(results) {
|
|
|
12107
12461
|
const nameInfo = chunk.name ? ` (${chunk.name})` : "";
|
|
12108
12462
|
output += `${i + 1}. ${location}${nameInfo}
|
|
12109
12463
|
`;
|
|
12110
|
-
|
|
12464
|
+
const sm = result.semanticMatch != null ? ` | Semantic: ${(result.semanticMatch * 100).toFixed(1)}%` : "";
|
|
12465
|
+
const st = result.structuredMatch != null ? ` | Structured: ${(result.structuredMatch * 100).toFixed(1)}%` : "";
|
|
12466
|
+
output += ` Score: ${(result.score * 100).toFixed(1)}%${st}${sm} | Type: ${chunk.type}`;
|
|
12111
12467
|
output += ` | via ${formatModuleName(result.moduleId)}`;
|
|
12112
12468
|
if (chunk.isExported) {
|
|
12113
12469
|
output += " | exported";
|
|
@@ -12205,6 +12561,7 @@ var init_search = __esm(() => {
|
|
|
12205
12561
|
init_registry();
|
|
12206
12562
|
init_indexer();
|
|
12207
12563
|
init_services();
|
|
12564
|
+
init_entities();
|
|
12208
12565
|
init_usecases();
|
|
12209
12566
|
init_filesystem();
|
|
12210
12567
|
});
|
|
@@ -12739,7 +13096,7 @@ import { stat as stat3 } from "fs/promises";
|
|
|
12739
13096
|
// package.json
|
|
12740
13097
|
var package_default = {
|
|
12741
13098
|
name: "raggrep",
|
|
12742
|
-
version: "0.
|
|
13099
|
+
version: "0.18.0",
|
|
12743
13100
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
12744
13101
|
type: "module",
|
|
12745
13102
|
main: "./dist/index.js",
|
|
@@ -12767,9 +13124,12 @@ var package_default = {
|
|
|
12767
13124
|
prepublishOnly: "bun run build",
|
|
12768
13125
|
raggrep: "bun run src/app/cli/main.ts",
|
|
12769
13126
|
test: "bun test",
|
|
12770
|
-
typecheck: "tsc --noEmit -p tsconfig.json && tsc --noEmit -p
|
|
12771
|
-
"bench:embeddings": "bun run
|
|
12772
|
-
"bench:retrieval": "bun run
|
|
13127
|
+
typecheck: "tsc --noEmit -p tsconfig.json && tsc --noEmit -p research/tsconfig.json",
|
|
13128
|
+
"bench:embeddings": "bun run research/bench/benchmark-embedding-runtimes.ts",
|
|
13129
|
+
"bench:retrieval": "bun run research/bench/benchmark-retrieval-quality.ts",
|
|
13130
|
+
"eval:golden": "bun run research/eval/run-golden-queries.ts",
|
|
13131
|
+
"bench:golden-convex": "bun run research/bench/benchmark-raggrep-golden-queries.ts",
|
|
13132
|
+
"bench:golden-hillclimb": "bun run research/bench/benchmark-raggrep-hillclimb.ts",
|
|
12773
13133
|
dev: "bun run src/app/cli/main.ts"
|
|
12774
13134
|
},
|
|
12775
13135
|
keywords: [
|
|
@@ -12801,7 +13161,7 @@ var package_default = {
|
|
|
12801
13161
|
"@xenova/transformers": "^2.17.0",
|
|
12802
13162
|
chokidar: "^5.0.0",
|
|
12803
13163
|
fdir: "^6.5.0",
|
|
12804
|
-
glob: "^
|
|
13164
|
+
glob: "^11.0.0",
|
|
12805
13165
|
minimatch: "^10.1.1",
|
|
12806
13166
|
typescript: "^5.0.0",
|
|
12807
13167
|
"web-tree-sitter": "^0.26.3"
|
|
@@ -12809,6 +13169,10 @@ var package_default = {
|
|
|
12809
13169
|
devDependencies: {
|
|
12810
13170
|
"@types/bun": "latest",
|
|
12811
13171
|
"@types/node": "^20.0.0"
|
|
13172
|
+
},
|
|
13173
|
+
overrides: {
|
|
13174
|
+
sharp: "^0.34.5",
|
|
13175
|
+
"global-agent": "^4.1.3"
|
|
12812
13176
|
}
|
|
12813
13177
|
};
|
|
12814
13178
|
|
|
@@ -12914,6 +13278,14 @@ function parseFlags(args2) {
|
|
|
12914
13278
|
console.error("--dir / -C requires a path to the project directory to index or search.");
|
|
12915
13279
|
process.exit(1);
|
|
12916
13280
|
}
|
|
13281
|
+
} else if (arg === "--rank-by") {
|
|
13282
|
+
const v = args2[++i];
|
|
13283
|
+
if (v === "structured" || v === "semantic" || v === "combined") {
|
|
13284
|
+
flags.rankBy = v;
|
|
13285
|
+
} else {
|
|
13286
|
+
console.error(`--rank-by must be structured, semantic, or combined (got: ${v})`);
|
|
13287
|
+
process.exit(1);
|
|
13288
|
+
}
|
|
12917
13289
|
} else if (arg === "--tool") {
|
|
12918
13290
|
flags.forceTool = true;
|
|
12919
13291
|
} else if (arg === "--skill") {
|
|
@@ -13045,6 +13417,7 @@ Options:
|
|
|
13045
13417
|
-s, --min-score <n> Minimum similarity score 0-1 (default: 0.15)
|
|
13046
13418
|
-t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
|
|
13047
13419
|
-f, --filter <path> Filter by path or glob pattern (can be used multiple times)
|
|
13420
|
+
--rank-by <mode> Order results: structured (default), semantic, or combined (fused score only)
|
|
13048
13421
|
-T, --timing Show timing breakdown for performance profiling
|
|
13049
13422
|
-h, --help Show this help message
|
|
13050
13423
|
|
|
@@ -13143,6 +13516,7 @@ Examples:
|
|
|
13143
13516
|
minScore: flags.minScore,
|
|
13144
13517
|
filePatterns,
|
|
13145
13518
|
pathFilter: flags.pathFilter,
|
|
13519
|
+
rankBy: flags.rankBy,
|
|
13146
13520
|
ensureFresh: false
|
|
13147
13521
|
});
|
|
13148
13522
|
console.log(formatHybridSearchResults2(hybridResults));
|
|
@@ -13378,4 +13752,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
13378
13752
|
}
|
|
13379
13753
|
main();
|
|
13380
13754
|
|
|
13381
|
-
//# debugId=
|
|
13755
|
+
//# debugId=9CA948E12F18492C64756E2164756E21
|