raggrep 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -40
- package/dist/app/search/index.d.ts +2 -1
- package/dist/cli/main.js +471 -97
- package/dist/cli/main.js.map +24 -21
- package/dist/domain/entities/index.d.ts +3 -1
- package/dist/domain/entities/rankingWeights.d.ts +84 -0
- package/dist/domain/entities/searchResult.d.ts +28 -1
- package/dist/domain/services/bm25.d.ts +5 -0
- package/dist/domain/services/discriminativeTerms.d.ts +28 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/literalScorer.d.ts +9 -23
- package/dist/domain/services/matchScales.d.ts +19 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +454 -92
- package/dist/index.js.map +24 -21
- package/dist/infrastructure/embeddings/embeddingProviderFactory.d.ts +6 -1
- package/dist/infrastructure/embeddings/index.d.ts +6 -3
- package/dist/infrastructure/index.d.ts +1 -1
- package/dist/types.d.ts +1 -1
- package/package.json +12 -5
package/dist/index.js
CHANGED
|
@@ -23,7 +23,107 @@ var init_searchResult = __esm(() => {
|
|
|
23
23
|
minScore: 0.15,
|
|
24
24
|
filePatterns: [],
|
|
25
25
|
pathFilter: [],
|
|
26
|
-
ensureFresh: true
|
|
26
|
+
ensureFresh: true,
|
|
27
|
+
rankingWeights: {},
|
|
28
|
+
quiet: false,
|
|
29
|
+
rankBy: "structured"
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// src/domain/entities/rankingWeights.ts
|
|
34
|
+
function mergeLiteralWeights(def, partial) {
|
|
35
|
+
if (!partial) {
|
|
36
|
+
return def;
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
baseScore: partial.baseScore ?? def.baseScore,
|
|
40
|
+
multipliers: {
|
|
41
|
+
definition: {
|
|
42
|
+
...def.multipliers.definition,
|
|
43
|
+
...partial.multipliers?.definition
|
|
44
|
+
},
|
|
45
|
+
reference: {
|
|
46
|
+
...def.multipliers.reference,
|
|
47
|
+
...partial.multipliers?.reference
|
|
48
|
+
},
|
|
49
|
+
import: { ...def.multipliers.import, ...partial.multipliers?.import }
|
|
50
|
+
},
|
|
51
|
+
vocabulary: { ...def.vocabulary, ...partial.vocabulary }
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function mergeRankingWeights(partial) {
|
|
55
|
+
if (!partial) {
|
|
56
|
+
return DEFAULT_RANKING_WEIGHTS;
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
discriminative: {
|
|
60
|
+
...DEFAULT_RANKING_WEIGHTS.discriminative,
|
|
61
|
+
...partial.discriminative
|
|
62
|
+
},
|
|
63
|
+
typescript: {
|
|
64
|
+
...DEFAULT_RANKING_WEIGHTS.typescript,
|
|
65
|
+
...partial.typescript
|
|
66
|
+
},
|
|
67
|
+
language: {
|
|
68
|
+
...DEFAULT_RANKING_WEIGHTS.language,
|
|
69
|
+
...partial.language
|
|
70
|
+
},
|
|
71
|
+
markdown: {
|
|
72
|
+
...DEFAULT_RANKING_WEIGHTS.markdown,
|
|
73
|
+
...partial.markdown
|
|
74
|
+
},
|
|
75
|
+
json: {
|
|
76
|
+
...DEFAULT_RANKING_WEIGHTS.json,
|
|
77
|
+
...partial.json
|
|
78
|
+
},
|
|
79
|
+
literal: mergeLiteralWeights(DEFAULT_RANKING_WEIGHTS.literal, partial.literal)
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
var DEFAULT_DISCRIMINATIVE_WEIGHTS, DEFAULT_LITERAL_BOOST_WEIGHTS, DEFAULT_RANKING_WEIGHTS;
|
|
83
|
+
var init_rankingWeights = __esm(() => {
|
|
84
|
+
DEFAULT_DISCRIMINATIVE_WEIGHTS = {
|
|
85
|
+
boostCap: 0.1,
|
|
86
|
+
penaltyMax: 0.16,
|
|
87
|
+
penaltyFloor: 0.72
|
|
88
|
+
};
|
|
89
|
+
DEFAULT_LITERAL_BOOST_WEIGHTS = {
|
|
90
|
+
baseScore: 0.5,
|
|
91
|
+
multipliers: {
|
|
92
|
+
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
93
|
+
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
94
|
+
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
95
|
+
},
|
|
96
|
+
vocabulary: {
|
|
97
|
+
baseMultiplier: 1.3,
|
|
98
|
+
perWordBonus: 0.1,
|
|
99
|
+
maxVocabularyBonus: 0.5,
|
|
100
|
+
minWordsForMatch: 2
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
DEFAULT_RANKING_WEIGHTS = {
|
|
104
|
+
discriminative: DEFAULT_DISCRIMINATIVE_WEIGHTS,
|
|
105
|
+
typescript: {
|
|
106
|
+
semantic: 0.43,
|
|
107
|
+
bm25: 0.42,
|
|
108
|
+
vocab: 0.15,
|
|
109
|
+
vocabBypassThreshold: 0.4
|
|
110
|
+
},
|
|
111
|
+
language: {
|
|
112
|
+
semantic: 0.7,
|
|
113
|
+
bm25: 0.3
|
|
114
|
+
},
|
|
115
|
+
markdown: {
|
|
116
|
+
semantic: 0.62,
|
|
117
|
+
bm25: 0.33,
|
|
118
|
+
docIntentBoost: 0.03,
|
|
119
|
+
headingPhraseCoverageMin: 0.25,
|
|
120
|
+
headingPhraseCoverageSpan: 0.75
|
|
121
|
+
},
|
|
122
|
+
json: {
|
|
123
|
+
bm25: 0.4,
|
|
124
|
+
literalBaseWeight: 0.6
|
|
125
|
+
},
|
|
126
|
+
literal: DEFAULT_LITERAL_BOOST_WEIGHTS
|
|
27
127
|
};
|
|
28
128
|
});
|
|
29
129
|
|
|
@@ -171,6 +271,7 @@ var init_lexicon = __esm(() => {
|
|
|
171
271
|
// src/domain/entities/index.ts
|
|
172
272
|
var init_entities = __esm(() => {
|
|
173
273
|
init_searchResult();
|
|
274
|
+
init_rankingWeights();
|
|
174
275
|
init_config();
|
|
175
276
|
init_literal();
|
|
176
277
|
init_lexicon();
|
|
@@ -317,6 +418,9 @@ class BM25Index {
|
|
|
317
418
|
return 0;
|
|
318
419
|
return Math.log(1 + (this.totalDocs - docFreq + 0.5) / (docFreq + 0.5));
|
|
319
420
|
}
|
|
421
|
+
getInverseDocumentFrequency(term) {
|
|
422
|
+
return this.idf(term.toLowerCase());
|
|
423
|
+
}
|
|
320
424
|
score(tokens, queryTerms) {
|
|
321
425
|
const docLength = tokens.length;
|
|
322
426
|
let score = 0;
|
|
@@ -2169,13 +2273,13 @@ var init_modelCache = __esm(() => {
|
|
|
2169
2273
|
init_modelCatalog();
|
|
2170
2274
|
});
|
|
2171
2275
|
|
|
2172
|
-
// src/infrastructure/embeddings/
|
|
2276
|
+
// src/infrastructure/embeddings/huggingfaceEmbeddingProvider.ts
|
|
2173
2277
|
import {
|
|
2174
2278
|
pipeline,
|
|
2175
2279
|
env
|
|
2176
|
-
} from "@
|
|
2280
|
+
} from "@huggingface/transformers";
|
|
2177
2281
|
|
|
2178
|
-
class
|
|
2282
|
+
class HuggingFaceTransformersEmbeddingProvider {
|
|
2179
2283
|
extractor = null;
|
|
2180
2284
|
config;
|
|
2181
2285
|
isInitializing = false;
|
|
@@ -2183,7 +2287,7 @@ class XenovaTransformersEmbeddingProvider {
|
|
|
2183
2287
|
constructor(config) {
|
|
2184
2288
|
this.config = {
|
|
2185
2289
|
model: config?.model ?? "bge-small-en-v1.5",
|
|
2186
|
-
runtime: config?.runtime ?? "
|
|
2290
|
+
runtime: config?.runtime ?? "huggingface",
|
|
2187
2291
|
showProgress: config?.showProgress ?? false,
|
|
2188
2292
|
logger: config?.logger
|
|
2189
2293
|
};
|
|
@@ -2307,7 +2411,7 @@ class XenovaTransformersEmbeddingProvider {
|
|
|
2307
2411
|
}
|
|
2308
2412
|
}
|
|
2309
2413
|
var BATCH_SIZE = 32;
|
|
2310
|
-
var
|
|
2414
|
+
var init_huggingfaceEmbeddingProvider = __esm(() => {
|
|
2311
2415
|
init_embeddingPaths();
|
|
2312
2416
|
init_modelCatalog();
|
|
2313
2417
|
init_modelCache();
|
|
@@ -2315,13 +2419,18 @@ var init_xenovaEmbeddingProvider = __esm(() => {
|
|
|
2315
2419
|
env.allowLocalModels = true;
|
|
2316
2420
|
});
|
|
2317
2421
|
|
|
2318
|
-
// src/infrastructure/embeddings/
|
|
2422
|
+
// src/infrastructure/embeddings/xenovaEmbeddingProvider.ts
|
|
2423
|
+
var exports_xenovaEmbeddingProvider = {};
|
|
2424
|
+
__export(exports_xenovaEmbeddingProvider, {
|
|
2425
|
+
XenovaTransformersEmbeddingProvider: () => XenovaTransformersEmbeddingProvider,
|
|
2426
|
+
TransformersEmbeddingProvider: () => TransformersEmbeddingProvider
|
|
2427
|
+
});
|
|
2319
2428
|
import {
|
|
2320
2429
|
pipeline as pipeline2,
|
|
2321
2430
|
env as env2
|
|
2322
|
-
} from "@
|
|
2431
|
+
} from "@xenova/transformers";
|
|
2323
2432
|
|
|
2324
|
-
class
|
|
2433
|
+
class XenovaTransformersEmbeddingProvider {
|
|
2325
2434
|
extractor = null;
|
|
2326
2435
|
config;
|
|
2327
2436
|
isInitializing = false;
|
|
@@ -2329,7 +2438,7 @@ class HuggingFaceTransformersEmbeddingProvider {
|
|
|
2329
2438
|
constructor(config) {
|
|
2330
2439
|
this.config = {
|
|
2331
2440
|
model: config?.model ?? "bge-small-en-v1.5",
|
|
2332
|
-
runtime: config?.runtime ?? "
|
|
2441
|
+
runtime: config?.runtime ?? "xenova",
|
|
2333
2442
|
showProgress: config?.showProgress ?? false,
|
|
2334
2443
|
logger: config?.logger
|
|
2335
2444
|
};
|
|
@@ -2452,29 +2561,30 @@ class HuggingFaceTransformersEmbeddingProvider {
|
|
|
2452
2561
|
this.extractor = null;
|
|
2453
2562
|
}
|
|
2454
2563
|
}
|
|
2455
|
-
var BATCH_SIZE2 = 32;
|
|
2456
|
-
var
|
|
2564
|
+
var BATCH_SIZE2 = 32, TransformersEmbeddingProvider;
|
|
2565
|
+
var init_xenovaEmbeddingProvider = __esm(() => {
|
|
2457
2566
|
init_embeddingPaths();
|
|
2458
2567
|
init_modelCatalog();
|
|
2459
2568
|
init_modelCache();
|
|
2460
2569
|
env2.cacheDir = RAGGREP_MODEL_CACHE_DIR;
|
|
2461
2570
|
env2.allowLocalModels = true;
|
|
2571
|
+
TransformersEmbeddingProvider = XenovaTransformersEmbeddingProvider;
|
|
2462
2572
|
});
|
|
2463
2573
|
|
|
2464
2574
|
// src/infrastructure/embeddings/embeddingProviderFactory.ts
|
|
2465
2575
|
function resolveRuntime(config) {
|
|
2466
2576
|
return config.runtime ?? "huggingface";
|
|
2467
2577
|
}
|
|
2468
|
-
function createEmbeddingProvider(config) {
|
|
2578
|
+
async function createEmbeddingProvider(config) {
|
|
2469
2579
|
const runtime = resolveRuntime(config);
|
|
2470
2580
|
if (runtime === "huggingface") {
|
|
2471
2581
|
return new HuggingFaceTransformersEmbeddingProvider(config);
|
|
2472
2582
|
}
|
|
2473
|
-
|
|
2583
|
+
const { XenovaTransformersEmbeddingProvider: XenovaTransformersEmbeddingProvider2 } = await Promise.resolve().then(() => (init_xenovaEmbeddingProvider(), exports_xenovaEmbeddingProvider));
|
|
2584
|
+
return new XenovaTransformersEmbeddingProvider2(config);
|
|
2474
2585
|
}
|
|
2475
2586
|
var init_embeddingProviderFactory = __esm(() => {
|
|
2476
2587
|
init_huggingfaceEmbeddingProvider();
|
|
2477
|
-
init_xenovaEmbeddingProvider();
|
|
2478
2588
|
});
|
|
2479
2589
|
|
|
2480
2590
|
// src/infrastructure/embeddings/globalEmbeddings.ts
|
|
@@ -2499,7 +2609,7 @@ function getEmbeddingConfig() {
|
|
|
2499
2609
|
}
|
|
2500
2610
|
async function ensureGlobalProvider() {
|
|
2501
2611
|
if (!globalProvider) {
|
|
2502
|
-
globalProvider = createEmbeddingProvider(globalConfig);
|
|
2612
|
+
globalProvider = await createEmbeddingProvider(globalConfig);
|
|
2503
2613
|
await globalProvider.initialize?.(globalConfig);
|
|
2504
2614
|
}
|
|
2505
2615
|
return globalProvider;
|
|
@@ -2529,13 +2639,193 @@ var init_globalEmbeddings = __esm(() => {
|
|
|
2529
2639
|
var init_embeddings = __esm(() => {
|
|
2530
2640
|
init_modelCatalog();
|
|
2531
2641
|
init_embeddingPaths();
|
|
2532
|
-
init_xenovaEmbeddingProvider();
|
|
2533
|
-
init_xenovaEmbeddingProvider();
|
|
2534
2642
|
init_huggingfaceEmbeddingProvider();
|
|
2535
2643
|
init_embeddingProviderFactory();
|
|
2536
2644
|
init_globalEmbeddings();
|
|
2537
2645
|
});
|
|
2538
2646
|
|
|
2647
|
+
// src/domain/services/discriminativeTerms.ts
|
|
2648
|
+
function medianSorted(sorted) {
|
|
2649
|
+
const n = sorted.length;
|
|
2650
|
+
if (n === 0)
|
|
2651
|
+
return 0;
|
|
2652
|
+
const mid = Math.floor(n / 2);
|
|
2653
|
+
return n % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
2654
|
+
}
|
|
2655
|
+
function salientTermHitsChunk(term, haystack, tokenSet) {
|
|
2656
|
+
if (tokenSet.has(term) || haystack.includes(term)) {
|
|
2657
|
+
return true;
|
|
2658
|
+
}
|
|
2659
|
+
if (term.length < PREFIX_MATCH_MIN_LEN) {
|
|
2660
|
+
return false;
|
|
2661
|
+
}
|
|
2662
|
+
for (const w of tokenSet) {
|
|
2663
|
+
if (w.length < PREFIX_MATCH_MIN_LEN)
|
|
2664
|
+
continue;
|
|
2665
|
+
if (term.startsWith(w) || w.startsWith(term)) {
|
|
2666
|
+
return true;
|
|
2667
|
+
}
|
|
2668
|
+
}
|
|
2669
|
+
return false;
|
|
2670
|
+
}
|
|
2671
|
+
function scoreDiscriminativeTerms(bm25Index, query, chunkText, chunkName, weights = DEFAULT_DISCRIMINATIVE_WEIGHTS) {
|
|
2672
|
+
const empty2 = () => ({
|
|
2673
|
+
boost: 0,
|
|
2674
|
+
penaltyFactor: 1,
|
|
2675
|
+
salientTerms: [],
|
|
2676
|
+
matchedSalient: [],
|
|
2677
|
+
missingSalient: [],
|
|
2678
|
+
salientCoverage: 1
|
|
2679
|
+
});
|
|
2680
|
+
const uniqueTerms = [...new Set(tokenize(query))];
|
|
2681
|
+
if (uniqueTerms.length === 0) {
|
|
2682
|
+
return empty2();
|
|
2683
|
+
}
|
|
2684
|
+
const indexed = [];
|
|
2685
|
+
for (const term of uniqueTerms) {
|
|
2686
|
+
const idf = bm25Index.getInverseDocumentFrequency(term);
|
|
2687
|
+
if (idf > 0) {
|
|
2688
|
+
indexed.push({ term, idf });
|
|
2689
|
+
}
|
|
2690
|
+
}
|
|
2691
|
+
if (indexed.length === 0) {
|
|
2692
|
+
return empty2();
|
|
2693
|
+
}
|
|
2694
|
+
const idfSorted = [...indexed.map((x) => x.idf)].sort((a, b) => a - b);
|
|
2695
|
+
const medianIdf = medianSorted(idfSorted);
|
|
2696
|
+
const salientEntries = indexed.filter((x) => x.idf >= medianIdf);
|
|
2697
|
+
const salientTerms = [...new Set(salientEntries.map((x) => x.term))];
|
|
2698
|
+
const idfByTerm = new Map;
|
|
2699
|
+
for (const { term, idf } of salientEntries) {
|
|
2700
|
+
idfByTerm.set(term, Math.max(idfByTerm.get(term) ?? 0, idf));
|
|
2701
|
+
}
|
|
2702
|
+
let totalW = 0;
|
|
2703
|
+
for (const idf of idfByTerm.values()) {
|
|
2704
|
+
totalW += idf;
|
|
2705
|
+
}
|
|
2706
|
+
const haystack = [chunkName ?? "", chunkText].join(`
|
|
2707
|
+
`).toLowerCase();
|
|
2708
|
+
const tokenSet = new Set(tokenize(chunkName ? `${chunkName}
|
|
2709
|
+
${chunkText}` : chunkText));
|
|
2710
|
+
const matchedSalient = [];
|
|
2711
|
+
for (const term of salientTerms) {
|
|
2712
|
+
const idf = idfByTerm.get(term) ?? 0;
|
|
2713
|
+
if (idf <= 0)
|
|
2714
|
+
continue;
|
|
2715
|
+
if (salientTermHitsChunk(term, haystack, tokenSet)) {
|
|
2716
|
+
matchedSalient.push(term);
|
|
2717
|
+
}
|
|
2718
|
+
}
|
|
2719
|
+
const matchedSet = new Set(matchedSalient);
|
|
2720
|
+
const missingSalient = salientTerms.filter((t) => !matchedSet.has(t));
|
|
2721
|
+
let matchedW = 0;
|
|
2722
|
+
for (const term of matchedSalient) {
|
|
2723
|
+
matchedW += idfByTerm.get(term) ?? 0;
|
|
2724
|
+
}
|
|
2725
|
+
const salientCoverage = totalW > 0 ? matchedW / totalW : 1;
|
|
2726
|
+
const { boostCap, penaltyMax, penaltyFloor } = weights;
|
|
2727
|
+
const boost = boostCap * salientCoverage;
|
|
2728
|
+
let penaltyFactor = 1 - penaltyMax * (1 - salientCoverage);
|
|
2729
|
+
if (penaltyFactor < penaltyFloor) {
|
|
2730
|
+
penaltyFactor = penaltyFloor;
|
|
2731
|
+
}
|
|
2732
|
+
return {
|
|
2733
|
+
boost,
|
|
2734
|
+
penaltyFactor,
|
|
2735
|
+
salientTerms,
|
|
2736
|
+
matchedSalient,
|
|
2737
|
+
missingSalient,
|
|
2738
|
+
salientCoverage
|
|
2739
|
+
};
|
|
2740
|
+
}
|
|
2741
|
+
var PREFIX_MATCH_MIN_LEN = 4;
|
|
2742
|
+
var init_discriminativeTerms = __esm(() => {
|
|
2743
|
+
init_rankingWeights();
|
|
2744
|
+
});
|
|
2745
|
+
|
|
2746
|
+
// src/domain/services/matchScales.ts
|
|
2747
|
+
function semanticPctFromCosine(cosine) {
|
|
2748
|
+
return clamp01((cosine + 1) / 2);
|
|
2749
|
+
}
|
|
2750
|
+
function clamp01(x) {
|
|
2751
|
+
if (Number.isNaN(x) || !Number.isFinite(x))
|
|
2752
|
+
return 0;
|
|
2753
|
+
return Math.max(0, Math.min(1, x));
|
|
2754
|
+
}
|
|
2755
|
+
function num(ctx, key) {
|
|
2756
|
+
const v = ctx[key];
|
|
2757
|
+
return typeof v === "number" && Number.isFinite(v) ? v : 0;
|
|
2758
|
+
}
|
|
2759
|
+
function additiveStructuredBoost(ctx) {
|
|
2760
|
+
return num(ctx, "pathBoost") + num(ctx, "fileTypeBoost") + num(ctx, "chunkTypeBoost") + num(ctx, "exportBoost");
|
|
2761
|
+
}
|
|
2762
|
+
function attachMatchScales(result, rw) {
|
|
2763
|
+
const ctx = result.context ?? {};
|
|
2764
|
+
const mid = result.moduleId;
|
|
2765
|
+
let semanticMatch = 0;
|
|
2766
|
+
let structuredMatch = 0;
|
|
2767
|
+
if (mid === "language/typescript") {
|
|
2768
|
+
const cos = num(ctx, "semanticScore");
|
|
2769
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2770
|
+
const vocab = num(ctx, "vocabScore");
|
|
2771
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
2772
|
+
const tw = rw.typescript;
|
|
2773
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
2774
|
+
const denom = tw.bm25 + tw.vocab + 0.000000001;
|
|
2775
|
+
const lexCore = (tw.bm25 * bm25 + tw.vocab * vocab) / denom;
|
|
2776
|
+
structuredMatch = clamp01(lexCore + Math.min(0.35, additiveStructuredBoost(ctx)) + Math.min(0.15, phraseCov * 0.25));
|
|
2777
|
+
} else if (mid.startsWith("language/")) {
|
|
2778
|
+
const cos = num(ctx, "semanticScore");
|
|
2779
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2780
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
2781
|
+
structuredMatch = clamp01(bm25 + Math.min(0.3, additiveStructuredBoost(ctx)) + Math.min(0.12, num(ctx, "phraseCoverage") * 0.2));
|
|
2782
|
+
} else if (mid === "docs/markdown") {
|
|
2783
|
+
const cos = num(ctx, "semanticScore");
|
|
2784
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2785
|
+
const docBoost = num(ctx, "docBoost");
|
|
2786
|
+
const headingBoost = num(ctx, "headingBoost");
|
|
2787
|
+
const phraseCov = num(ctx, "phraseCoverage");
|
|
2788
|
+
const mw = rw.markdown;
|
|
2789
|
+
semanticMatch = semanticPctFromCosine(cos);
|
|
2790
|
+
structuredMatch = clamp01(mw.bm25 * bm25 + docBoost + headingBoost + Math.min(0.2, phraseCov * 0.15));
|
|
2791
|
+
} else if (mid === "core") {
|
|
2792
|
+
semanticMatch = 0;
|
|
2793
|
+
const nBm = num(ctx, "bm25Score");
|
|
2794
|
+
const sym = num(ctx, "symbolScore");
|
|
2795
|
+
structuredMatch = clamp01(0.6 * nBm + 0.4 * sym);
|
|
2796
|
+
} else if (mid === "data/json") {
|
|
2797
|
+
semanticMatch = 0;
|
|
2798
|
+
const bm25 = num(ctx, "bm25Score");
|
|
2799
|
+
const litM = num(ctx, "literalMultiplier");
|
|
2800
|
+
structuredMatch = clamp01(bm25 > 0.02 ? bm25 : Math.min(1, 0.35 + Math.min(0.65, (litM - 1) * 0.35)));
|
|
2801
|
+
} else {
|
|
2802
|
+
semanticMatch = 0;
|
|
2803
|
+
structuredMatch = clamp01(result.score);
|
|
2804
|
+
}
|
|
2805
|
+
return { ...result, semanticMatch, structuredMatch };
|
|
2806
|
+
}
|
|
2807
|
+
function compareSearchResultsByRankBy(a, b, rankBy) {
|
|
2808
|
+
if (rankBy === "combined") {
|
|
2809
|
+
return b.score - a.score;
|
|
2810
|
+
}
|
|
2811
|
+
const sa = a.semanticMatch ?? 0;
|
|
2812
|
+
const sb = b.semanticMatch ?? 0;
|
|
2813
|
+
const ta = a.structuredMatch ?? 0;
|
|
2814
|
+
const tb = b.structuredMatch ?? 0;
|
|
2815
|
+
if (rankBy === "semantic") {
|
|
2816
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
2817
|
+
return sb - sa;
|
|
2818
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
2819
|
+
return tb - ta;
|
|
2820
|
+
return b.score - a.score;
|
|
2821
|
+
}
|
|
2822
|
+
if (Math.abs(tb - ta) > 0.000000001)
|
|
2823
|
+
return tb - ta;
|
|
2824
|
+
if (Math.abs(sb - sa) > 0.000000001)
|
|
2825
|
+
return sb - sa;
|
|
2826
|
+
return b.score - a.score;
|
|
2827
|
+
}
|
|
2828
|
+
|
|
2539
2829
|
// src/domain/services/keywords.ts
|
|
2540
2830
|
function extractKeywords(content, name, maxKeywords = 50) {
|
|
2541
2831
|
const keywords = new Set;
|
|
@@ -3194,16 +3484,16 @@ var init_literalExtractor = __esm(() => {
|
|
|
3194
3484
|
});
|
|
3195
3485
|
|
|
3196
3486
|
// src/domain/services/literalScorer.ts
|
|
3197
|
-
function calculateLiteralMultiplier(matchType, confidence) {
|
|
3198
|
-
return
|
|
3487
|
+
function calculateLiteralMultiplier(matchType, confidence, weights = DEFAULT_LW) {
|
|
3488
|
+
return weights.multipliers[matchType][confidence];
|
|
3199
3489
|
}
|
|
3200
|
-
function calculateMaxMultiplier(matches) {
|
|
3490
|
+
function calculateMaxMultiplier(matches, weights = DEFAULT_LW) {
|
|
3201
3491
|
if (!matches || matches.length === 0) {
|
|
3202
3492
|
return 1;
|
|
3203
3493
|
}
|
|
3204
|
-
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence)));
|
|
3494
|
+
return Math.max(...matches.map((m) => calculateLiteralMultiplier(m.indexedLiteral.matchType, m.queryLiteral.confidence, weights)));
|
|
3205
3495
|
}
|
|
3206
|
-
function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
3496
|
+
function calculateLiteralContribution(matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3207
3497
|
if (!matches || matches.length === 0) {
|
|
3208
3498
|
return {
|
|
3209
3499
|
multiplier: 1,
|
|
@@ -3214,7 +3504,7 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3214
3504
|
let bestMatch = null;
|
|
3215
3505
|
let bestMultiplier = 0;
|
|
3216
3506
|
for (const match of matches) {
|
|
3217
|
-
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence);
|
|
3507
|
+
const mult = calculateLiteralMultiplier(match.indexedLiteral.matchType, match.queryLiteral.confidence, weights);
|
|
3218
3508
|
if (mult > bestMultiplier) {
|
|
3219
3509
|
bestMultiplier = mult;
|
|
3220
3510
|
bestMatch = match;
|
|
@@ -3228,32 +3518,20 @@ function calculateLiteralContribution(matches, hasSemanticOrBm25) {
|
|
|
3228
3518
|
matchCount: matches.length
|
|
3229
3519
|
};
|
|
3230
3520
|
}
|
|
3231
|
-
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25) {
|
|
3521
|
+
function applyLiteralBoost(baseScore, matches, hasSemanticOrBm25, weights = DEFAULT_LW) {
|
|
3232
3522
|
if (!matches || matches.length === 0) {
|
|
3233
3523
|
return baseScore;
|
|
3234
3524
|
}
|
|
3235
|
-
const multiplier = calculateMaxMultiplier(matches);
|
|
3525
|
+
const multiplier = calculateMaxMultiplier(matches, weights);
|
|
3236
3526
|
if (!hasSemanticOrBm25) {
|
|
3237
|
-
return
|
|
3527
|
+
return weights.baseScore * multiplier;
|
|
3238
3528
|
}
|
|
3239
3529
|
return baseScore * multiplier;
|
|
3240
3530
|
}
|
|
3241
|
-
var
|
|
3531
|
+
var DEFAULT_LW;
|
|
3242
3532
|
var init_literalScorer = __esm(() => {
|
|
3243
|
-
|
|
3244
|
-
|
|
3245
|
-
MULTIPLIERS: {
|
|
3246
|
-
definition: { high: 2.5, medium: 2, low: 1.5 },
|
|
3247
|
-
reference: { high: 2, medium: 1.5, low: 1.3 },
|
|
3248
|
-
import: { high: 1.5, medium: 1.3, low: 1.1 }
|
|
3249
|
-
},
|
|
3250
|
-
VOCABULARY: {
|
|
3251
|
-
BASE_MULTIPLIER: 1.3,
|
|
3252
|
-
PER_WORD_BONUS: 0.1,
|
|
3253
|
-
MAX_VOCABULARY_BONUS: 0.5,
|
|
3254
|
-
MIN_WORDS_FOR_MATCH: 2
|
|
3255
|
-
}
|
|
3256
|
-
};
|
|
3533
|
+
init_rankingWeights();
|
|
3534
|
+
DEFAULT_LW = DEFAULT_RANKING_WEIGHTS.literal;
|
|
3257
3535
|
});
|
|
3258
3536
|
|
|
3259
3537
|
// src/domain/services/lexicon.ts
|
|
@@ -4208,6 +4486,7 @@ var init_chunkContext = __esm(() => {
|
|
|
4208
4486
|
|
|
4209
4487
|
// src/domain/services/index.ts
|
|
4210
4488
|
var init_services = __esm(() => {
|
|
4489
|
+
init_discriminativeTerms();
|
|
4211
4490
|
init_keywords();
|
|
4212
4491
|
init_queryIntent();
|
|
4213
4492
|
init_queryLiteralParser();
|
|
@@ -5031,6 +5310,9 @@ class TypeScriptModule {
|
|
|
5031
5310
|
minScore = DEFAULT_MIN_SCORE2,
|
|
5032
5311
|
filePatterns
|
|
5033
5312
|
} = options;
|
|
5313
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
5314
|
+
const tw = rw.typescript;
|
|
5315
|
+
const lt = rw.literal;
|
|
5034
5316
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
5035
5317
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
5036
5318
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -5137,17 +5419,19 @@ class TypeScriptModule {
|
|
|
5137
5419
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5138
5420
|
const exportBoost = calculateExportBoost(chunk);
|
|
5139
5421
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5140
|
-
const baseScore =
|
|
5422
|
+
const baseScore = tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore;
|
|
5141
5423
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
5142
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
5143
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
5424
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
5425
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
5144
5426
|
const finalScore = boostedScore + additiveBoost;
|
|
5427
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
5428
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5145
5429
|
processedChunkIds.add(chunk.id);
|
|
5146
|
-
if (
|
|
5430
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0 || vocabScore > tw.vocabBypassThreshold || phraseMatch.isSignificant) {
|
|
5147
5431
|
results.push({
|
|
5148
5432
|
filepath,
|
|
5149
5433
|
chunk,
|
|
5150
|
-
score:
|
|
5434
|
+
score: adjustedScore,
|
|
5151
5435
|
moduleId: this.id,
|
|
5152
5436
|
context: {
|
|
5153
5437
|
semanticScore,
|
|
@@ -5159,6 +5443,10 @@ class TypeScriptModule {
|
|
|
5159
5443
|
fileTypeBoost,
|
|
5160
5444
|
chunkTypeBoost,
|
|
5161
5445
|
exportBoost,
|
|
5446
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
5447
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
5448
|
+
discriminativeBoost: disc.boost,
|
|
5449
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5162
5450
|
literalMultiplier: literalContribution.multiplier,
|
|
5163
5451
|
literalMatchType: literalContribution.bestMatchType,
|
|
5164
5452
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5211,15 +5499,17 @@ class TypeScriptModule {
|
|
|
5211
5499
|
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
5212
5500
|
const exportBoost = calculateExportBoost(chunk);
|
|
5213
5501
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost + phraseMatch.boost;
|
|
5214
|
-
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false);
|
|
5215
|
-
const baseScore = semanticScore > 0 ?
|
|
5216
|
-
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0);
|
|
5502
|
+
const literalContribution = calculateLiteralContribution(chunkLiteralMatches, false, lt);
|
|
5503
|
+
const baseScore = semanticScore > 0 ? tw.semantic * semanticScore + tw.bm25 * bm25Score + tw.vocab * vocabScore : lt.baseScore;
|
|
5504
|
+
const boostedScore = applyLiteralBoost(baseScore, chunkLiteralMatches, semanticScore > 0, lt);
|
|
5217
5505
|
const finalScore = boostedScore + additiveBoost;
|
|
5506
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
5507
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
5218
5508
|
processedChunkIds.add(chunkId);
|
|
5219
5509
|
results.push({
|
|
5220
5510
|
filepath,
|
|
5221
5511
|
chunk,
|
|
5222
|
-
score:
|
|
5512
|
+
score: adjustedScore,
|
|
5223
5513
|
moduleId: this.id,
|
|
5224
5514
|
context: {
|
|
5225
5515
|
semanticScore,
|
|
@@ -5231,6 +5521,10 @@ class TypeScriptModule {
|
|
|
5231
5521
|
fileTypeBoost,
|
|
5232
5522
|
chunkTypeBoost,
|
|
5233
5523
|
exportBoost,
|
|
5524
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
5525
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
5526
|
+
discriminativeBoost: disc.boost,
|
|
5527
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
5234
5528
|
literalMultiplier: literalContribution.multiplier,
|
|
5235
5529
|
literalMatchType: literalContribution.bestMatchType,
|
|
5236
5530
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -5267,13 +5561,14 @@ class TypeScriptModule {
|
|
|
5267
5561
|
return references;
|
|
5268
5562
|
}
|
|
5269
5563
|
}
|
|
5270
|
-
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10,
|
|
5564
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, TYPESCRIPT_EXTENSIONS, supportsFile;
|
|
5271
5565
|
var init_typescript = __esm(() => {
|
|
5272
5566
|
init_embeddings();
|
|
5273
5567
|
init_services();
|
|
5274
5568
|
init_config2();
|
|
5275
5569
|
init_parseCode();
|
|
5276
5570
|
init_storage();
|
|
5571
|
+
init_entities();
|
|
5277
5572
|
TYPESCRIPT_EXTENSIONS = [
|
|
5278
5573
|
".ts",
|
|
5279
5574
|
".tsx",
|
|
@@ -6386,6 +6681,9 @@ class PythonModule {
|
|
|
6386
6681
|
minScore = DEFAULT_MIN_SCORE3,
|
|
6387
6682
|
filePatterns
|
|
6388
6683
|
} = options;
|
|
6684
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
6685
|
+
const lw = rw.language;
|
|
6686
|
+
const lt = rw.literal;
|
|
6389
6687
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
6390
6688
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
6391
6689
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -6478,17 +6776,19 @@ class PythonModule {
|
|
|
6478
6776
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
6479
6777
|
const exportBoost = calculateExportBoost2(chunk);
|
|
6480
6778
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
6481
|
-
const baseScore =
|
|
6779
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
6482
6780
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
6483
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
6484
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
6781
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
6782
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
6485
6783
|
const finalScore = boostedScore + additiveBoost;
|
|
6784
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6785
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
6486
6786
|
processedChunkIds.add(chunk.id);
|
|
6487
|
-
if (
|
|
6787
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
6488
6788
|
results.push({
|
|
6489
6789
|
filepath,
|
|
6490
6790
|
chunk,
|
|
6491
|
-
score:
|
|
6791
|
+
score: adjustedScore,
|
|
6492
6792
|
moduleId: this.id,
|
|
6493
6793
|
context: {
|
|
6494
6794
|
semanticScore,
|
|
@@ -6497,6 +6797,10 @@ class PythonModule {
|
|
|
6497
6797
|
fileTypeBoost,
|
|
6498
6798
|
chunkTypeBoost,
|
|
6499
6799
|
exportBoost,
|
|
6800
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6801
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6802
|
+
discriminativeBoost: disc.boost,
|
|
6803
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
6500
6804
|
literalMultiplier: literalContribution.multiplier,
|
|
6501
6805
|
literalMatchType: literalContribution.bestMatchType,
|
|
6502
6806
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -6531,15 +6835,17 @@ class PythonModule {
|
|
|
6531
6835
|
const chunkTypeBoost = calculateChunkTypeBoost2(chunk);
|
|
6532
6836
|
const exportBoost = calculateExportBoost2(chunk);
|
|
6533
6837
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
6534
|
-
const literalContribution = calculateLiteralContribution(matches, false);
|
|
6535
|
-
const baseScore = semanticScore > 0 ?
|
|
6536
|
-
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0);
|
|
6838
|
+
const literalContribution = calculateLiteralContribution(matches, false, lt);
|
|
6839
|
+
const baseScore = semanticScore > 0 ? lw.semantic * semanticScore + lw.bm25 * bm25Score : lt.baseScore;
|
|
6840
|
+
const boostedScore = applyLiteralBoost(baseScore, matches, semanticScore > 0, lt);
|
|
6537
6841
|
const finalScore = boostedScore + additiveBoost;
|
|
6842
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
6843
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
6538
6844
|
processedChunkIds.add(chunkId);
|
|
6539
6845
|
results.push({
|
|
6540
6846
|
filepath,
|
|
6541
6847
|
chunk,
|
|
6542
|
-
score:
|
|
6848
|
+
score: adjustedScore,
|
|
6543
6849
|
moduleId: this.id,
|
|
6544
6850
|
context: {
|
|
6545
6851
|
semanticScore,
|
|
@@ -6548,6 +6854,10 @@ class PythonModule {
|
|
|
6548
6854
|
fileTypeBoost,
|
|
6549
6855
|
chunkTypeBoost,
|
|
6550
6856
|
exportBoost,
|
|
6857
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
6858
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
6859
|
+
discriminativeBoost: disc.boost,
|
|
6860
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
6551
6861
|
literalMultiplier: literalContribution.multiplier,
|
|
6552
6862
|
literalMatchType: literalContribution.bestMatchType,
|
|
6553
6863
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -6560,13 +6870,14 @@ class PythonModule {
|
|
|
6560
6870
|
return results.slice(0, topK);
|
|
6561
6871
|
}
|
|
6562
6872
|
}
|
|
6563
|
-
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10,
|
|
6873
|
+
var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, PYTHON_EXTENSIONS, supportsFile2;
|
|
6564
6874
|
var init_python = __esm(() => {
|
|
6565
6875
|
init_embeddings();
|
|
6566
6876
|
init_services();
|
|
6567
6877
|
init_config2();
|
|
6568
6878
|
init_storage();
|
|
6569
6879
|
init_parsing();
|
|
6880
|
+
init_entities();
|
|
6570
6881
|
PYTHON_EXTENSIONS = [".py", ".pyw"];
|
|
6571
6882
|
supportsFile2 = isPythonFile;
|
|
6572
6883
|
});
|
|
@@ -6918,6 +7229,9 @@ class GoModule {
|
|
|
6918
7229
|
minScore = DEFAULT_MIN_SCORE4,
|
|
6919
7230
|
filePatterns
|
|
6920
7231
|
} = options;
|
|
7232
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7233
|
+
const lw = rw.language;
|
|
7234
|
+
const lt = rw.literal;
|
|
6921
7235
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
6922
7236
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
6923
7237
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7010,17 +7324,19 @@ class GoModule {
|
|
|
7010
7324
|
const chunkTypeBoost = calculateChunkTypeBoost3(chunk);
|
|
7011
7325
|
const exportBoost = calculateExportBoost3(chunk);
|
|
7012
7326
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7013
|
-
const baseScore =
|
|
7327
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7014
7328
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7015
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7016
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
7329
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
7330
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7017
7331
|
const finalScore = boostedScore + additiveBoost;
|
|
7332
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7333
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7018
7334
|
processedChunkIds.add(chunk.id);
|
|
7019
|
-
if (
|
|
7335
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7020
7336
|
results.push({
|
|
7021
7337
|
filepath,
|
|
7022
7338
|
chunk,
|
|
7023
|
-
score:
|
|
7339
|
+
score: adjustedScore,
|
|
7024
7340
|
moduleId: this.id,
|
|
7025
7341
|
context: {
|
|
7026
7342
|
semanticScore,
|
|
@@ -7029,6 +7345,10 @@ class GoModule {
|
|
|
7029
7345
|
fileTypeBoost,
|
|
7030
7346
|
chunkTypeBoost,
|
|
7031
7347
|
exportBoost,
|
|
7348
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7349
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7350
|
+
discriminativeBoost: disc.boost,
|
|
7351
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7032
7352
|
literalMultiplier: literalContribution.multiplier,
|
|
7033
7353
|
literalMatchType: literalContribution.bestMatchType,
|
|
7034
7354
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7041,13 +7361,14 @@ class GoModule {
|
|
|
7041
7361
|
return results.slice(0, topK);
|
|
7042
7362
|
}
|
|
7043
7363
|
}
|
|
7044
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
7364
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, GO_EXTENSIONS, supportsFile3;
|
|
7045
7365
|
var init_go = __esm(() => {
|
|
7046
7366
|
init_embeddings();
|
|
7047
7367
|
init_services();
|
|
7048
7368
|
init_config2();
|
|
7049
7369
|
init_storage();
|
|
7050
7370
|
init_parsing();
|
|
7371
|
+
init_entities();
|
|
7051
7372
|
GO_EXTENSIONS = [".go"];
|
|
7052
7373
|
supportsFile3 = isGoFile;
|
|
7053
7374
|
});
|
|
@@ -7478,6 +7799,9 @@ class RustModule {
|
|
|
7478
7799
|
minScore = DEFAULT_MIN_SCORE5,
|
|
7479
7800
|
filePatterns
|
|
7480
7801
|
} = options;
|
|
7802
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
7803
|
+
const lw = rw.language;
|
|
7804
|
+
const lt = rw.literal;
|
|
7481
7805
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7482
7806
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7483
7807
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7570,17 +7894,19 @@ class RustModule {
|
|
|
7570
7894
|
const chunkTypeBoost = calculateChunkTypeBoost4(chunk);
|
|
7571
7895
|
const exportBoost = calculateExportBoost4(chunk);
|
|
7572
7896
|
const additiveBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
7573
|
-
const baseScore =
|
|
7897
|
+
const baseScore = lw.semantic * semanticScore + lw.bm25 * bm25Score;
|
|
7574
7898
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7575
|
-
const literalContribution = calculateLiteralContribution(literalMatches, true);
|
|
7576
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true);
|
|
7899
|
+
const literalContribution = calculateLiteralContribution(literalMatches, true, lt);
|
|
7900
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, true, lt);
|
|
7577
7901
|
const finalScore = boostedScore + additiveBoost;
|
|
7902
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
7903
|
+
const adjustedScore = (finalScore + disc.boost) * disc.penaltyFactor;
|
|
7578
7904
|
processedChunkIds.add(chunk.id);
|
|
7579
|
-
if (
|
|
7905
|
+
if (adjustedScore >= minScore || bm25Score > 0.3 || literalMatches.length > 0) {
|
|
7580
7906
|
results.push({
|
|
7581
7907
|
filepath,
|
|
7582
7908
|
chunk,
|
|
7583
|
-
score:
|
|
7909
|
+
score: adjustedScore,
|
|
7584
7910
|
moduleId: this.id,
|
|
7585
7911
|
context: {
|
|
7586
7912
|
semanticScore,
|
|
@@ -7589,6 +7915,10 @@ class RustModule {
|
|
|
7589
7915
|
fileTypeBoost,
|
|
7590
7916
|
chunkTypeBoost,
|
|
7591
7917
|
exportBoost,
|
|
7918
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
7919
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
7920
|
+
discriminativeBoost: disc.boost,
|
|
7921
|
+
matchedSalientTerms: disc.matchedSalient,
|
|
7592
7922
|
literalMultiplier: literalContribution.multiplier,
|
|
7593
7923
|
literalMatchType: literalContribution.bestMatchType,
|
|
7594
7924
|
literalConfidence: literalContribution.bestConfidence,
|
|
@@ -7601,13 +7931,14 @@ class RustModule {
|
|
|
7601
7931
|
return results.slice(0, topK);
|
|
7602
7932
|
}
|
|
7603
7933
|
}
|
|
7604
|
-
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10,
|
|
7934
|
+
var DEFAULT_MIN_SCORE5 = 0.15, DEFAULT_TOP_K5 = 10, RUST_EXTENSIONS, supportsFile4;
|
|
7605
7935
|
var init_rust = __esm(() => {
|
|
7606
7936
|
init_embeddings();
|
|
7607
7937
|
init_services();
|
|
7608
7938
|
init_config2();
|
|
7609
7939
|
init_storage();
|
|
7610
7940
|
init_parsing();
|
|
7941
|
+
init_entities();
|
|
7611
7942
|
RUST_EXTENSIONS = [".rs"];
|
|
7612
7943
|
supportsFile4 = isRustFile;
|
|
7613
7944
|
});
|
|
@@ -7737,6 +8068,8 @@ class JsonModule {
|
|
|
7737
8068
|
minScore = DEFAULT_MIN_SCORE6,
|
|
7738
8069
|
filePatterns
|
|
7739
8070
|
} = options;
|
|
8071
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8072
|
+
const jw = rw.json;
|
|
7740
8073
|
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
7741
8074
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
7742
8075
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
@@ -7790,9 +8123,9 @@ class JsonModule {
|
|
|
7790
8123
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
7791
8124
|
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
7792
8125
|
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
7793
|
-
const baseScore =
|
|
7794
|
-
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
7795
|
-
const literalBase = literalMatches.length > 0 && bm25Score === 0 ?
|
|
8126
|
+
const baseScore = jw.bm25 * bm25Score;
|
|
8127
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0, rw.literal);
|
|
8128
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? rw.literal.baseScore * jw.literalBaseWeight : 0;
|
|
7796
8129
|
const finalScore = boostedScore + literalBase;
|
|
7797
8130
|
processedChunkIds.add(chunk.id);
|
|
7798
8131
|
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
@@ -7825,7 +8158,7 @@ class JsonModule {
|
|
|
7825
8158
|
if (!chunk)
|
|
7826
8159
|
continue;
|
|
7827
8160
|
const literalContribution = calculateLiteralContribution(matches, false);
|
|
7828
|
-
const score =
|
|
8161
|
+
const score = rw.literal.baseScore * literalContribution.multiplier;
|
|
7829
8162
|
processedChunkIds.add(chunkId);
|
|
7830
8163
|
results.push({
|
|
7831
8164
|
filepath,
|
|
@@ -7846,11 +8179,12 @@ class JsonModule {
|
|
|
7846
8179
|
return results.slice(0, topK);
|
|
7847
8180
|
}
|
|
7848
8181
|
}
|
|
7849
|
-
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10,
|
|
8182
|
+
var DEFAULT_MIN_SCORE6 = 0.1, DEFAULT_TOP_K6 = 10, JSON_EXTENSIONS, supportsFile5;
|
|
7850
8183
|
var init_json = __esm(() => {
|
|
7851
8184
|
init_services();
|
|
7852
8185
|
init_config2();
|
|
7853
8186
|
init_storage();
|
|
8187
|
+
init_entities();
|
|
7854
8188
|
JSON_EXTENSIONS = [".json"];
|
|
7855
8189
|
supportsFile5 = isJsonFile;
|
|
7856
8190
|
});
|
|
@@ -8085,6 +8419,8 @@ class MarkdownModule {
|
|
|
8085
8419
|
minScore = DEFAULT_MIN_SCORE7,
|
|
8086
8420
|
filePatterns
|
|
8087
8421
|
} = options;
|
|
8422
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
8423
|
+
const mw = rw.markdown;
|
|
8088
8424
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
8089
8425
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
8090
8426
|
let allFiles;
|
|
@@ -8150,15 +8486,18 @@ class MarkdownModule {
|
|
|
8150
8486
|
"what",
|
|
8151
8487
|
"explain"
|
|
8152
8488
|
].includes(t))) {
|
|
8153
|
-
docBoost =
|
|
8154
|
-
}
|
|
8155
|
-
const
|
|
8156
|
-
const
|
|
8157
|
-
|
|
8489
|
+
docBoost = mw.docIntentBoost;
|
|
8490
|
+
}
|
|
8491
|
+
const rawHeadingBoost = calculateHeadingLevelBoost(chunk);
|
|
8492
|
+
const headingBoost = rawHeadingBoost * (mw.headingPhraseCoverageMin + mw.headingPhraseCoverageSpan * (phraseMatch.totalTokenCount > 0 ? phraseMatch.coverage : 1));
|
|
8493
|
+
const hybridScore = mw.semantic * semanticScore + mw.bm25 * bm25Score + docBoost + headingBoost + phraseMatch.boost;
|
|
8494
|
+
const disc = scoreDiscriminativeTerms(bm25Index, query, chunk.content, chunk.name, rw.discriminative);
|
|
8495
|
+
const finalScore = (hybridScore + disc.boost) * disc.penaltyFactor;
|
|
8496
|
+
if (finalScore >= minScore || bm25Score > 0.3 || phraseMatch.isSignificant) {
|
|
8158
8497
|
results.push({
|
|
8159
8498
|
filepath,
|
|
8160
8499
|
chunk,
|
|
8161
|
-
score:
|
|
8500
|
+
score: finalScore,
|
|
8162
8501
|
moduleId: this.id,
|
|
8163
8502
|
context: {
|
|
8164
8503
|
semanticScore,
|
|
@@ -8167,7 +8506,11 @@ class MarkdownModule {
|
|
|
8167
8506
|
phraseCoverage: phraseMatch.coverage,
|
|
8168
8507
|
docBoost,
|
|
8169
8508
|
headingBoost,
|
|
8170
|
-
headingLevel: chunk.metadata?.headingLevel
|
|
8509
|
+
headingLevel: chunk.metadata?.headingLevel,
|
|
8510
|
+
discriminativeCoverage: disc.salientCoverage,
|
|
8511
|
+
discriminativePenaltyFactor: disc.penaltyFactor,
|
|
8512
|
+
discriminativeBoost: disc.boost,
|
|
8513
|
+
matchedSalientTerms: disc.matchedSalient
|
|
8171
8514
|
}
|
|
8172
8515
|
});
|
|
8173
8516
|
}
|
|
@@ -8176,11 +8519,12 @@ class MarkdownModule {
|
|
|
8176
8519
|
return results.slice(0, topK);
|
|
8177
8520
|
}
|
|
8178
8521
|
}
|
|
8179
|
-
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10,
|
|
8522
|
+
var DEFAULT_MIN_SCORE7 = 0.15, DEFAULT_TOP_K7 = 10, MARKDOWN_EXTENSIONS, supportsFile6;
|
|
8180
8523
|
var init_markdown = __esm(() => {
|
|
8181
8524
|
init_embeddings();
|
|
8182
8525
|
init_services();
|
|
8183
8526
|
init_config2();
|
|
8527
|
+
init_entities();
|
|
8184
8528
|
init_storage();
|
|
8185
8529
|
MARKDOWN_EXTENSIONS = [".md", ".txt"];
|
|
8186
8530
|
supportsFile6 = isMarkdownFile;
|
|
@@ -11611,6 +11955,7 @@ minimatch.unescape = unescape;
|
|
|
11611
11955
|
init_types();
|
|
11612
11956
|
init_config2();
|
|
11613
11957
|
init_services();
|
|
11958
|
+
init_entities();
|
|
11614
11959
|
// src/domain/usecases/exactSearch.ts
|
|
11615
11960
|
init_simpleSearch();
|
|
11616
11961
|
var DEFAULT_IGNORED_DIRS = [
|
|
@@ -11778,7 +12123,9 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
11778
12123
|
if (ensureFresh) {
|
|
11779
12124
|
await ensureIndexFresh(rootDir, { quiet: true });
|
|
11780
12125
|
}
|
|
11781
|
-
|
|
12126
|
+
if (!options.quiet) {
|
|
12127
|
+
console.log(`Searching for: "${query}"`);
|
|
12128
|
+
}
|
|
11782
12129
|
const config = await loadConfig(rootDir);
|
|
11783
12130
|
await registerBuiltInModules();
|
|
11784
12131
|
const globalManifest = await loadGlobalManifest2(rootDir, config);
|
|
@@ -11841,10 +12188,18 @@ async function hybridSearch(rootDir, query, options = {}) {
|
|
|
11841
12188
|
}
|
|
11842
12189
|
}
|
|
11843
12190
|
}
|
|
11844
|
-
|
|
12191
|
+
const rw = mergeRankingWeights(options.rankingWeights);
|
|
12192
|
+
let ranked = filteredResults.map((r) => attachMatchScales(r, rw));
|
|
12193
|
+
for (const r of ranked) {
|
|
12194
|
+
if (r.context?.exactMatchFusion) {
|
|
12195
|
+
r.structuredMatch = clamp01((r.structuredMatch ?? 0) * 1.5);
|
|
12196
|
+
}
|
|
12197
|
+
}
|
|
12198
|
+
const rankBy = options.rankBy ?? DEFAULT_SEARCH_OPTIONS.rankBy;
|
|
12199
|
+
ranked.sort((a, b) => compareSearchResultsByRankBy(a, b, rankBy));
|
|
11845
12200
|
const topK = options.topK ?? 10;
|
|
11846
12201
|
return {
|
|
11847
|
-
results:
|
|
12202
|
+
results: ranked.slice(0, topK),
|
|
11848
12203
|
exactMatches,
|
|
11849
12204
|
fusionApplied
|
|
11850
12205
|
};
|
|
@@ -11935,7 +12290,9 @@ function formatSearchResults2(results) {
|
|
|
11935
12290
|
const nameInfo = chunk.name ? ` (${chunk.name})` : "";
|
|
11936
12291
|
output += `${i + 1}. ${location}${nameInfo}
|
|
11937
12292
|
`;
|
|
11938
|
-
|
|
12293
|
+
const sm = result.semanticMatch != null ? ` | Semantic: ${(result.semanticMatch * 100).toFixed(1)}%` : "";
|
|
12294
|
+
const st = result.structuredMatch != null ? ` | Structured: ${(result.structuredMatch * 100).toFixed(1)}%` : "";
|
|
12295
|
+
output += ` Score: ${(result.score * 100).toFixed(1)}%${st}${sm} | Type: ${chunk.type}`;
|
|
11939
12296
|
output += ` | via ${formatModuleName(result.moduleId)}`;
|
|
11940
12297
|
if (chunk.isExported) {
|
|
11941
12298
|
output += " | exported";
|
|
@@ -12028,6 +12385,7 @@ function formatHybridSearchResults(hybridResults) {
|
|
|
12028
12385
|
}
|
|
12029
12386
|
|
|
12030
12387
|
// src/index.ts
|
|
12388
|
+
init_entities();
|
|
12031
12389
|
async function index(directory, options = {}) {
|
|
12032
12390
|
return indexDirectory(directory, options);
|
|
12033
12391
|
}
|
|
@@ -12056,6 +12414,8 @@ var src_default = raggrep;
|
|
|
12056
12414
|
export {
|
|
12057
12415
|
search2 as search,
|
|
12058
12416
|
reset,
|
|
12417
|
+
mergeRankingWeights,
|
|
12418
|
+
mergeLiteralWeights,
|
|
12059
12419
|
index,
|
|
12060
12420
|
hybridSearch2 as hybridSearch,
|
|
12061
12421
|
formatSearchResults2 as formatSearchResults,
|
|
@@ -12067,7 +12427,9 @@ export {
|
|
|
12067
12427
|
cleanup,
|
|
12068
12428
|
SilentLogger,
|
|
12069
12429
|
InlineProgressLogger,
|
|
12430
|
+
DEFAULT_RANKING_WEIGHTS,
|
|
12431
|
+
DEFAULT_LITERAL_BOOST_WEIGHTS,
|
|
12070
12432
|
ConsoleLogger
|
|
12071
12433
|
};
|
|
12072
12434
|
|
|
12073
|
-
//# debugId=
|
|
12435
|
+
//# debugId=E54DFEDA0CA026F464756E2164756E21
|