raggrep 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/main.js +167 -138
- package/dist/cli/main.js.map +6 -5
- package/dist/domain/services/index.d.ts +1 -0
- package/dist/domain/services/jsonPathExtractor.d.ts +29 -0
- package/dist/domain/services/jsonPathExtractor.test.d.ts +4 -0
- package/dist/index.js +166 -137
- package/dist/index.js.map +6 -5
- package/dist/modules/data/json/index.d.ts +28 -10
- package/package.json +1 -1
|
@@ -13,3 +13,4 @@ export { parseQueryLiterals } from "./queryLiteralParser";
|
|
|
13
13
|
export { extractLiterals, extractLiteralsWithReferences, } from "./literalExtractor";
|
|
14
14
|
export { calculateLiteralMultiplier, calculateMaxMultiplier, calculateLiteralContribution, applyLiteralBoost, mergeWithLiteralBoost, LITERAL_SCORING_CONSTANTS, type LiteralScoreContribution, type MergeInput, type MergeOutput, } from "./literalScorer";
|
|
15
15
|
export { getSynonyms, expandQuery, DEFAULT_LEXICON, EXPANSION_WEIGHTS, DEFAULT_EXPANSION_OPTIONS, } from "./lexicon";
|
|
16
|
+
export { extractJsonPaths, extractJsonKeywords } from "./jsonPathExtractor";
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON Path Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts dot-notation key paths from JSON objects as literals.
|
|
5
|
+
* Used for literal-based indexing of JSON files.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* // user.json: { name: { first: "john" } }
|
|
9
|
+
* extractJsonPaths({ name: { first: "john" } }, "user")
|
|
10
|
+
* // Returns literals for: "user.name", "user.name.first"
|
|
11
|
+
*/
|
|
12
|
+
import type { ExtractedLiteral } from "../entities/literal";
|
|
13
|
+
/**
|
|
14
|
+
* Extract all key paths from a JSON object as literals.
|
|
15
|
+
* Prefixes all paths with the filename (without extension).
|
|
16
|
+
*
|
|
17
|
+
* @param obj - Parsed JSON object
|
|
18
|
+
* @param fileBasename - Filename without extension (e.g., "user" from "user.json")
|
|
19
|
+
* @returns Array of literals representing all dot-notation paths
|
|
20
|
+
*/
|
|
21
|
+
export declare function extractJsonPaths(obj: unknown, fileBasename: string): ExtractedLiteral[];
|
|
22
|
+
/**
|
|
23
|
+
* Extract keywords from JSON for BM25 indexing.
|
|
24
|
+
* Extracts both keys and string values.
|
|
25
|
+
*
|
|
26
|
+
* @param obj - Parsed JSON object
|
|
27
|
+
* @returns Array of keywords for BM25 indexing
|
|
28
|
+
*/
|
|
29
|
+
export declare function extractJsonKeywords(obj: unknown): string[];
|
package/dist/index.js
CHANGED
|
@@ -2548,44 +2548,10 @@ var init_queryIntent = __esm(() => {
|
|
|
2548
2548
|
});
|
|
2549
2549
|
|
|
2550
2550
|
// src/domain/services/chunking.ts
|
|
2551
|
-
function createLineBasedChunks(content, options = {}) {
|
|
2552
|
-
const {
|
|
2553
|
-
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2554
|
-
overlap = DEFAULT_OVERLAP,
|
|
2555
|
-
minLinesForMultipleChunks = chunkSize
|
|
2556
|
-
} = options;
|
|
2557
|
-
const lines = content.split(`
|
|
2558
|
-
`);
|
|
2559
|
-
const chunks = [];
|
|
2560
|
-
if (lines.length <= minLinesForMultipleChunks) {
|
|
2561
|
-
return [
|
|
2562
|
-
{
|
|
2563
|
-
content,
|
|
2564
|
-
startLine: 1,
|
|
2565
|
-
endLine: lines.length,
|
|
2566
|
-
type: "file"
|
|
2567
|
-
}
|
|
2568
|
-
];
|
|
2569
|
-
}
|
|
2570
|
-
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2571
|
-
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2572
|
-
chunks.push({
|
|
2573
|
-
content: lines.slice(i, endIdx).join(`
|
|
2574
|
-
`),
|
|
2575
|
-
startLine: i + 1,
|
|
2576
|
-
endLine: endIdx,
|
|
2577
|
-
type: "block"
|
|
2578
|
-
});
|
|
2579
|
-
if (endIdx >= lines.length)
|
|
2580
|
-
break;
|
|
2581
|
-
}
|
|
2582
|
-
return chunks;
|
|
2583
|
-
}
|
|
2584
2551
|
function generateChunkId(filepath, startLine, endLine) {
|
|
2585
2552
|
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2586
2553
|
return `${safePath}-${startLine}-${endLine}`;
|
|
2587
2554
|
}
|
|
2588
|
-
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2589
2555
|
|
|
2590
2556
|
// src/domain/services/queryLiteralParser.ts
|
|
2591
2557
|
function parseQueryLiterals(query) {
|
|
@@ -3453,6 +3419,63 @@ var init_lexicon2 = __esm(() => {
|
|
|
3453
3419
|
defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
|
|
3454
3420
|
});
|
|
3455
3421
|
|
|
3422
|
+
// src/domain/services/jsonPathExtractor.ts
|
|
3423
|
+
function extractJsonPaths(obj, fileBasename) {
|
|
3424
|
+
const paths = extractPathsRecursive(obj, fileBasename);
|
|
3425
|
+
return paths.map((path8) => ({
|
|
3426
|
+
value: path8,
|
|
3427
|
+
type: "identifier",
|
|
3428
|
+
matchType: "definition"
|
|
3429
|
+
}));
|
|
3430
|
+
}
|
|
3431
|
+
function extractPathsRecursive(obj, prefix) {
|
|
3432
|
+
const paths = [];
|
|
3433
|
+
if (obj === null || obj === undefined) {
|
|
3434
|
+
return paths;
|
|
3435
|
+
}
|
|
3436
|
+
if (Array.isArray(obj)) {
|
|
3437
|
+
obj.forEach((item, index) => {
|
|
3438
|
+
const indexedPrefix = `${prefix}[${index}]`;
|
|
3439
|
+
paths.push(indexedPrefix);
|
|
3440
|
+
if (item !== null && typeof item === "object") {
|
|
3441
|
+
paths.push(...extractPathsRecursive(item, indexedPrefix));
|
|
3442
|
+
}
|
|
3443
|
+
});
|
|
3444
|
+
} else if (typeof obj === "object") {
|
|
3445
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3446
|
+
const fullPath = `${prefix}.${key}`;
|
|
3447
|
+
paths.push(fullPath);
|
|
3448
|
+
if (value !== null && typeof value === "object") {
|
|
3449
|
+
paths.push(...extractPathsRecursive(value, fullPath));
|
|
3450
|
+
}
|
|
3451
|
+
}
|
|
3452
|
+
}
|
|
3453
|
+
return paths;
|
|
3454
|
+
}
|
|
3455
|
+
function extractJsonKeywords(obj) {
|
|
3456
|
+
const keywords = new Set;
|
|
3457
|
+
const extract = (value, parentKey) => {
|
|
3458
|
+
if (value === null || value === undefined) {
|
|
3459
|
+
return;
|
|
3460
|
+
}
|
|
3461
|
+
if (typeof value === "string") {
|
|
3462
|
+
const words = value.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-./]+/).filter((w) => w.length > 2);
|
|
3463
|
+
words.forEach((w) => keywords.add(w));
|
|
3464
|
+
} else if (Array.isArray(value)) {
|
|
3465
|
+
value.forEach((item) => extract(item));
|
|
3466
|
+
} else if (typeof value === "object") {
|
|
3467
|
+
for (const [key, val] of Object.entries(value)) {
|
|
3468
|
+
keywords.add(key.toLowerCase());
|
|
3469
|
+
const keyWords = key.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-]+/).filter((w) => w.length > 2);
|
|
3470
|
+
keyWords.forEach((w) => keywords.add(w));
|
|
3471
|
+
extract(val, key);
|
|
3472
|
+
}
|
|
3473
|
+
}
|
|
3474
|
+
};
|
|
3475
|
+
extract(obj);
|
|
3476
|
+
return Array.from(keywords);
|
|
3477
|
+
}
|
|
3478
|
+
|
|
3456
3479
|
// src/domain/services/index.ts
|
|
3457
3480
|
var init_services = __esm(() => {
|
|
3458
3481
|
init_keywords();
|
|
@@ -4383,113 +4406,66 @@ function isJsonFile(filepath) {
|
|
|
4383
4406
|
const ext = path11.extname(filepath).toLowerCase();
|
|
4384
4407
|
return JSON_EXTENSIONS.includes(ext);
|
|
4385
4408
|
}
|
|
4386
|
-
function extractJsonKeys(obj, prefix = "") {
|
|
4387
|
-
const keys = [];
|
|
4388
|
-
if (obj === null || obj === undefined) {
|
|
4389
|
-
return keys;
|
|
4390
|
-
}
|
|
4391
|
-
if (Array.isArray(obj)) {
|
|
4392
|
-
obj.forEach((item, index) => {
|
|
4393
|
-
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
4394
|
-
});
|
|
4395
|
-
} else if (typeof obj === "object") {
|
|
4396
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
4397
|
-
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
4398
|
-
keys.push(key);
|
|
4399
|
-
keys.push(...extractJsonKeys(value, fullKey));
|
|
4400
|
-
}
|
|
4401
|
-
}
|
|
4402
|
-
return keys;
|
|
4403
|
-
}
|
|
4404
|
-
function extractJsonKeywords(content) {
|
|
4405
|
-
try {
|
|
4406
|
-
const parsed = JSON.parse(content);
|
|
4407
|
-
const keys = extractJsonKeys(parsed);
|
|
4408
|
-
const stringValues = [];
|
|
4409
|
-
const extractStrings = (obj) => {
|
|
4410
|
-
if (typeof obj === "string") {
|
|
4411
|
-
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
4412
|
-
stringValues.push(...words);
|
|
4413
|
-
} else if (Array.isArray(obj)) {
|
|
4414
|
-
obj.forEach(extractStrings);
|
|
4415
|
-
} else if (obj && typeof obj === "object") {
|
|
4416
|
-
Object.values(obj).forEach(extractStrings);
|
|
4417
|
-
}
|
|
4418
|
-
};
|
|
4419
|
-
extractStrings(parsed);
|
|
4420
|
-
return [...new Set([...keys, ...stringValues])];
|
|
4421
|
-
} catch {
|
|
4422
|
-
return [];
|
|
4423
|
-
}
|
|
4424
|
-
}
|
|
4425
4409
|
|
|
4426
4410
|
class JsonModule {
|
|
4427
4411
|
id = "data/json";
|
|
4428
4412
|
name = "JSON Search";
|
|
4429
|
-
description = "JSON file search with
|
|
4430
|
-
version = "
|
|
4413
|
+
description = "JSON file search with literal-based key path indexing";
|
|
4414
|
+
version = "2.0.0";
|
|
4431
4415
|
supportsFile(filepath) {
|
|
4432
4416
|
return isJsonFile(filepath);
|
|
4433
4417
|
}
|
|
4434
|
-
embeddingConfig = null;
|
|
4435
4418
|
symbolicIndex = null;
|
|
4419
|
+
literalIndex = null;
|
|
4436
4420
|
pendingSummaries = new Map;
|
|
4421
|
+
pendingLiterals = new Map;
|
|
4437
4422
|
rootDir = "";
|
|
4438
4423
|
logger = undefined;
|
|
4439
4424
|
async initialize(config) {
|
|
4440
|
-
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
4441
4425
|
this.logger = config.options?.logger;
|
|
4442
|
-
if (this.logger) {
|
|
4443
|
-
this.embeddingConfig = {
|
|
4444
|
-
...this.embeddingConfig,
|
|
4445
|
-
logger: this.logger
|
|
4446
|
-
};
|
|
4447
|
-
}
|
|
4448
|
-
configureEmbeddings(this.embeddingConfig);
|
|
4449
4426
|
this.pendingSummaries.clear();
|
|
4427
|
+
this.pendingLiterals.clear();
|
|
4450
4428
|
}
|
|
4451
4429
|
async indexFile(filepath, content, ctx) {
|
|
4452
4430
|
if (!isJsonFile(filepath)) {
|
|
4453
4431
|
return null;
|
|
4454
4432
|
}
|
|
4455
4433
|
this.rootDir = ctx.rootDir;
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
}
|
|
4460
|
-
if (textChunks.length === 0) {
|
|
4434
|
+
let parsed;
|
|
4435
|
+
try {
|
|
4436
|
+
parsed = JSON.parse(content);
|
|
4437
|
+
} catch {
|
|
4461
4438
|
return null;
|
|
4462
4439
|
}
|
|
4463
|
-
const
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
|
|
4467
|
-
const
|
|
4468
|
-
const
|
|
4469
|
-
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
|
|
4475
|
-
|
|
4476
|
-
try {
|
|
4477
|
-
return JSON.parse(content);
|
|
4478
|
-
} catch {
|
|
4479
|
-
return {};
|
|
4440
|
+
const fileBasename = path11.basename(filepath, path11.extname(filepath));
|
|
4441
|
+
const jsonPathLiterals = extractJsonPaths(parsed, fileBasename);
|
|
4442
|
+
const lines = content.split(`
|
|
4443
|
+
`);
|
|
4444
|
+
const lineCount = lines.length;
|
|
4445
|
+
const chunkId = generateChunkId(filepath, 1, lineCount);
|
|
4446
|
+
const chunks = [
|
|
4447
|
+
{
|
|
4448
|
+
id: chunkId,
|
|
4449
|
+
content,
|
|
4450
|
+
startLine: 1,
|
|
4451
|
+
endLine: lineCount,
|
|
4452
|
+
type: "file"
|
|
4480
4453
|
}
|
|
4481
|
-
|
|
4454
|
+
];
|
|
4455
|
+
if (jsonPathLiterals.length > 0) {
|
|
4456
|
+
this.pendingLiterals.set(chunkId, {
|
|
4457
|
+
filepath,
|
|
4458
|
+
literals: jsonPathLiterals
|
|
4459
|
+
});
|
|
4460
|
+
}
|
|
4482
4461
|
const stats = await ctx.getFileStats(filepath);
|
|
4483
|
-
const currentConfig = getEmbeddingConfig();
|
|
4484
4462
|
const moduleData = {
|
|
4485
|
-
|
|
4486
|
-
embeddingModel: currentConfig.model,
|
|
4487
|
-
jsonKeys
|
|
4463
|
+
jsonPaths: jsonPathLiterals.map((l) => l.value)
|
|
4488
4464
|
};
|
|
4489
|
-
const keywords = extractJsonKeywords(
|
|
4465
|
+
const keywords = extractJsonKeywords(parsed);
|
|
4490
4466
|
const fileSummary = {
|
|
4491
4467
|
filepath,
|
|
4492
|
-
chunkCount:
|
|
4468
|
+
chunkCount: 1,
|
|
4493
4469
|
chunkTypes: ["file"],
|
|
4494
4470
|
keywords,
|
|
4495
4471
|
exports: [],
|
|
@@ -4512,7 +4488,24 @@ class JsonModule {
|
|
|
4512
4488
|
}
|
|
4513
4489
|
this.symbolicIndex.buildBM25Index();
|
|
4514
4490
|
await this.symbolicIndex.save();
|
|
4491
|
+
this.literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4492
|
+
await this.literalIndex.initialize();
|
|
4493
|
+
const indexedFilepaths = new Set;
|
|
4494
|
+
for (const filepath of this.pendingSummaries.keys()) {
|
|
4495
|
+
indexedFilepaths.add(filepath);
|
|
4496
|
+
}
|
|
4497
|
+
for (const { filepath } of this.pendingLiterals.values()) {
|
|
4498
|
+
indexedFilepaths.add(filepath);
|
|
4499
|
+
}
|
|
4500
|
+
for (const filepath of indexedFilepaths) {
|
|
4501
|
+
this.literalIndex.removeFile(filepath);
|
|
4502
|
+
}
|
|
4503
|
+
for (const [chunkId, { filepath, literals }] of this.pendingLiterals) {
|
|
4504
|
+
this.literalIndex.addLiterals(chunkId, filepath, literals);
|
|
4505
|
+
}
|
|
4506
|
+
await this.literalIndex.save();
|
|
4515
4507
|
this.pendingSummaries.clear();
|
|
4508
|
+
this.pendingLiterals.clear();
|
|
4516
4509
|
}
|
|
4517
4510
|
async search(query, ctx, options = {}) {
|
|
4518
4511
|
const {
|
|
@@ -4520,8 +4513,15 @@ class JsonModule {
|
|
|
4520
4513
|
minScore = DEFAULT_MIN_SCORE3,
|
|
4521
4514
|
filePatterns
|
|
4522
4515
|
} = options;
|
|
4516
|
+
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
4523
4517
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
4524
4518
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
4519
|
+
const literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4520
|
+
let literalMatchMap = new Map;
|
|
4521
|
+
try {
|
|
4522
|
+
await literalIndex.initialize();
|
|
4523
|
+
literalMatchMap = literalIndex.buildMatchMap(queryLiterals);
|
|
4524
|
+
} catch {}
|
|
4525
4525
|
let allFiles;
|
|
4526
4526
|
try {
|
|
4527
4527
|
await symbolicIndex.initialize();
|
|
@@ -4541,25 +4541,16 @@ class JsonModule {
|
|
|
4541
4541
|
});
|
|
4542
4542
|
});
|
|
4543
4543
|
}
|
|
4544
|
-
const queryEmbedding = await getEmbedding(query);
|
|
4545
4544
|
const bm25Index = new BM25Index;
|
|
4546
4545
|
const allChunksData = [];
|
|
4547
4546
|
for (const filepath of filesToSearch) {
|
|
4548
4547
|
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
4549
4548
|
if (!fileIndex)
|
|
4550
4549
|
continue;
|
|
4551
|
-
const
|
|
4552
|
-
if (!moduleData?.embeddings)
|
|
4553
|
-
continue;
|
|
4554
|
-
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
4555
|
-
const chunk = fileIndex.chunks[i];
|
|
4556
|
-
const embedding = moduleData.embeddings[i];
|
|
4557
|
-
if (!embedding)
|
|
4558
|
-
continue;
|
|
4550
|
+
for (const chunk of fileIndex.chunks) {
|
|
4559
4551
|
allChunksData.push({
|
|
4560
4552
|
filepath: fileIndex.filepath,
|
|
4561
|
-
chunk
|
|
4562
|
-
embedding
|
|
4553
|
+
chunk
|
|
4563
4554
|
});
|
|
4564
4555
|
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
4565
4556
|
}
|
|
@@ -4569,32 +4560,70 @@ class JsonModule {
|
|
|
4569
4560
|
for (const result of bm25Results) {
|
|
4570
4561
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
4571
4562
|
}
|
|
4572
|
-
const queryTerms = extractQueryTerms(query);
|
|
4573
4563
|
const results = [];
|
|
4574
|
-
|
|
4575
|
-
|
|
4564
|
+
const processedChunkIds = new Set;
|
|
4565
|
+
for (const { filepath, chunk } of allChunksData) {
|
|
4576
4566
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
4577
|
-
const
|
|
4578
|
-
|
|
4567
|
+
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
4568
|
+
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
4569
|
+
const baseScore = BM25_WEIGHT2 * bm25Score;
|
|
4570
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
4571
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
|
|
4572
|
+
const finalScore = boostedScore + literalBase;
|
|
4573
|
+
processedChunkIds.add(chunk.id);
|
|
4574
|
+
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
4579
4575
|
results.push({
|
|
4580
4576
|
filepath,
|
|
4581
4577
|
chunk,
|
|
4582
|
-
score:
|
|
4578
|
+
score: finalScore,
|
|
4583
4579
|
moduleId: this.id,
|
|
4584
4580
|
context: {
|
|
4585
|
-
|
|
4586
|
-
|
|
4581
|
+
bm25Score,
|
|
4582
|
+
literalMultiplier: literalContribution.multiplier,
|
|
4583
|
+
literalMatchType: literalContribution.bestMatchType,
|
|
4584
|
+
literalConfidence: literalContribution.bestConfidence,
|
|
4585
|
+
literalMatchCount: literalContribution.matchCount
|
|
4587
4586
|
}
|
|
4588
4587
|
});
|
|
4589
4588
|
}
|
|
4590
4589
|
}
|
|
4590
|
+
for (const [chunkId, matches] of literalMatchMap) {
|
|
4591
|
+
if (processedChunkIds.has(chunkId)) {
|
|
4592
|
+
continue;
|
|
4593
|
+
}
|
|
4594
|
+
const filepath = matches[0]?.filepath;
|
|
4595
|
+
if (!filepath)
|
|
4596
|
+
continue;
|
|
4597
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
4598
|
+
if (!fileIndex)
|
|
4599
|
+
continue;
|
|
4600
|
+
const chunk = fileIndex.chunks.find((c) => c.id === chunkId);
|
|
4601
|
+
if (!chunk)
|
|
4602
|
+
continue;
|
|
4603
|
+
const literalContribution = calculateLiteralContribution(matches, false);
|
|
4604
|
+
const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
|
|
4605
|
+
processedChunkIds.add(chunkId);
|
|
4606
|
+
results.push({
|
|
4607
|
+
filepath,
|
|
4608
|
+
chunk,
|
|
4609
|
+
score,
|
|
4610
|
+
moduleId: this.id,
|
|
4611
|
+
context: {
|
|
4612
|
+
bm25Score: 0,
|
|
4613
|
+
literalMultiplier: literalContribution.multiplier,
|
|
4614
|
+
literalMatchType: literalContribution.bestMatchType,
|
|
4615
|
+
literalConfidence: literalContribution.bestConfidence,
|
|
4616
|
+
literalMatchCount: literalContribution.matchCount,
|
|
4617
|
+
literalOnly: true
|
|
4618
|
+
}
|
|
4619
|
+
});
|
|
4620
|
+
}
|
|
4591
4621
|
results.sort((a, b) => b.score - a.score);
|
|
4592
4622
|
return results.slice(0, topK);
|
|
4593
4623
|
}
|
|
4594
4624
|
}
|
|
4595
|
-
var DEFAULT_MIN_SCORE3 = 0.
|
|
4625
|
+
var DEFAULT_MIN_SCORE3 = 0.1, DEFAULT_TOP_K3 = 10, BM25_WEIGHT2 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile2;
|
|
4596
4626
|
var init_json = __esm(() => {
|
|
4597
|
-
init_embeddings();
|
|
4598
4627
|
init_services();
|
|
4599
4628
|
init_config2();
|
|
4600
4629
|
init_storage();
|
|
@@ -4864,7 +4893,7 @@ ${section.content}` : section.content,
|
|
|
4864
4893
|
].includes(t))) {
|
|
4865
4894
|
docBoost = 0.05;
|
|
4866
4895
|
}
|
|
4867
|
-
const hybridScore =
|
|
4896
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
4868
4897
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
4869
4898
|
results.push({
|
|
4870
4899
|
filepath,
|
|
@@ -4883,7 +4912,7 @@ ${section.content}` : section.content,
|
|
|
4883
4912
|
return results.slice(0, topK);
|
|
4884
4913
|
}
|
|
4885
4914
|
}
|
|
4886
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
4915
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
|
|
4887
4916
|
var init_markdown = __esm(() => {
|
|
4888
4917
|
init_embeddings();
|
|
4889
4918
|
init_services();
|
|
@@ -6058,4 +6087,4 @@ export {
|
|
|
6058
6087
|
ConsoleLogger
|
|
6059
6088
|
};
|
|
6060
6089
|
|
|
6061
|
-
//# debugId=
|
|
6090
|
+
//# debugId=7A45B6717CB7C82E64756E2164756E21
|