grepmax 0.17.7 → 0.17.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Phase 3 design probe. For each platform symbol-name miss, determine whether
|
|
4
|
+
* the expected DEFINITION chunk is:
|
|
5
|
+
* (a) inside the top-200 fusion pool but lost downstream -> a ranking fix
|
|
6
|
+
* (b) outside the pool (but inside the 500-row retrieval) -> recovery fix
|
|
7
|
+
* (c) outside retrieval entirely -> unreachable
|
|
8
|
+
* and whether a 1-hop ref->def walk from the top-K fusion seeds reaches it
|
|
9
|
+
* (i.e. some seed's referenced_symbols contains the query symbol).
|
|
10
|
+
*
|
|
11
|
+
* Run: npx tsx src/eval-graph-recovery-probe.ts
|
|
12
|
+
*/
|
|
13
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
16
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
17
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
18
|
+
}
|
|
19
|
+
Object.defineProperty(o, k2, desc);
|
|
20
|
+
}) : (function(o, m, k, k2) {
|
|
21
|
+
if (k2 === undefined) k2 = k;
|
|
22
|
+
o[k2] = m[k];
|
|
23
|
+
}));
|
|
24
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
25
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
26
|
+
}) : function(o, v) {
|
|
27
|
+
o["default"] = v;
|
|
28
|
+
});
|
|
29
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
30
|
+
var ownKeys = function(o) {
|
|
31
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
32
|
+
var ar = [];
|
|
33
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
34
|
+
return ar;
|
|
35
|
+
};
|
|
36
|
+
return ownKeys(o);
|
|
37
|
+
};
|
|
38
|
+
return function (mod) {
|
|
39
|
+
if (mod && mod.__esModule) return mod;
|
|
40
|
+
var result = {};
|
|
41
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
42
|
+
__setModuleDefault(result, mod);
|
|
43
|
+
return result;
|
|
44
|
+
};
|
|
45
|
+
})();
|
|
46
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
47
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
48
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
49
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
50
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
51
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
52
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
53
|
+
});
|
|
54
|
+
};
|
|
55
|
+
var _a, _b;
|
|
56
|
+
var _c;
|
|
57
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
58
|
+
(_a = (_c = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_c.GMAX_WORKER_COUNT = "1");
|
|
59
|
+
const path = __importStar(require("node:path"));
|
|
60
|
+
const config_1 = require("./config");
|
|
61
|
+
const vector_db_1 = require("./lib/store/vector-db");
|
|
62
|
+
const exit_1 = require("./lib/utils/exit");
|
|
63
|
+
const pool_1 = require("./lib/workers/pool");
|
|
64
|
+
const filter_builder_1 = require("./lib/utils/filter-builder");
|
|
65
|
+
const PLATFORM_ROOT = path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/beyond/platform");
|
|
66
|
+
const PRE_K = 500;
|
|
67
|
+
const STAGE1_K = 200;
|
|
68
|
+
const SEED_K = 20;
|
|
69
|
+
const RRF_K = 60;
|
|
70
|
+
// (query symbol, expected definition file) — the rank-0 platform misses.
|
|
71
|
+
const CASES = [
|
|
72
|
+
["BeyondError", "packages/shared/src/errors.ts"],
|
|
73
|
+
["ErrorCodes", "packages/shared/src/errors.ts"],
|
|
74
|
+
["createDb", "packages/db/src/index.ts"],
|
|
75
|
+
["createDbAsync", "packages/db/src/index.ts"],
|
|
76
|
+
["authMiddleware", "packages/api/src/middleware/auth.ts"],
|
|
77
|
+
["requireAuth", "packages/api/src/middleware/auth.ts"],
|
|
78
|
+
["resolveActor", "packages/api/src/middleware/auth.ts"],
|
|
79
|
+
["getActor", "packages/api/src/middleware/auth.ts"],
|
|
80
|
+
["errorHandler", "packages/api/src/middleware/error.ts"],
|
|
81
|
+
["initializeApp", "packages/api/src/app.ts"],
|
|
82
|
+
];
|
|
83
|
+
function toStrArr(val) {
|
|
84
|
+
if (!val)
|
|
85
|
+
return [];
|
|
86
|
+
if (Array.isArray(val))
|
|
87
|
+
return val.filter((v) => typeof v === "string");
|
|
88
|
+
const m = val;
|
|
89
|
+
if (typeof m.toArray === "function") {
|
|
90
|
+
try {
|
|
91
|
+
const a = m.toArray();
|
|
92
|
+
return Array.isArray(a) ? a.filter((v) => typeof v === "string") : [];
|
|
93
|
+
}
|
|
94
|
+
catch (_a) {
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return [];
|
|
99
|
+
}
|
|
100
|
+
function probe(table, sym, expectedFile) {
|
|
101
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
102
|
+
const pool = (0, pool_1.getWorkerPool)();
|
|
103
|
+
const { dense } = yield pool.encodeQuery(sym);
|
|
104
|
+
const prefix = PLATFORM_ROOT.endsWith("/") ? PLATFORM_ROOT : `${PLATFORM_ROOT}/`;
|
|
105
|
+
const where = `path LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`;
|
|
106
|
+
const columns = ["id", "path", "chunk_index", "defined_symbols", "referenced_symbols"];
|
|
107
|
+
const vectorRows = (yield table.vectorSearch(dense).select([...columns, "_distance"]).where(where).limit(PRE_K).toArray());
|
|
108
|
+
let ftsRows = [];
|
|
109
|
+
try {
|
|
110
|
+
ftsRows = (yield table.search(sym).select([...columns, "_score"]).where(where).limit(PRE_K).toArray());
|
|
111
|
+
}
|
|
112
|
+
catch (_a) { }
|
|
113
|
+
const scores = new Map();
|
|
114
|
+
const docMap = new Map();
|
|
115
|
+
const keyOf = (d) => d.id || `${d.path}:${d.chunk_index}`;
|
|
116
|
+
vectorRows.forEach((d, r) => { const k = keyOf(d); docMap.set(k, d); scores.set(k, (scores.get(k) || 0) + 1 / (RRF_K + r + 1)); });
|
|
117
|
+
ftsRows.forEach((d, r) => { const k = keyOf(d); if (!docMap.has(k))
|
|
118
|
+
docMap.set(k, d); scores.set(k, (scores.get(k) || 0) + 1 / (RRF_K + r + 1)); });
|
|
119
|
+
const fusedKeys = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]).map(([k]) => k);
|
|
120
|
+
const fused = fusedKeys.map((k) => docMap.get(k));
|
|
121
|
+
// Locate the expected definition chunk among retrieved rows.
|
|
122
|
+
const isExpected = (d) => String(d.path).toLowerCase().endsWith(`/${expectedFile.toLowerCase()}`) &&
|
|
123
|
+
toStrArr(d.defined_symbols).includes(sym);
|
|
124
|
+
let defRetrievalRank = -1; // rank within full union (by fusion order)
|
|
125
|
+
let defInPool = false;
|
|
126
|
+
fused.forEach((d, i) => {
|
|
127
|
+
if (defRetrievalRank < 0 && isExpected(d)) {
|
|
128
|
+
defRetrievalRank = i + 1;
|
|
129
|
+
defInPool = i < STAGE1_K;
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
// Was the def chunk retrieved at all (in the 500 union) even if low?
|
|
133
|
+
const defInUnion = fused.some(isExpected);
|
|
134
|
+
// If not in the union, query directly to confirm it exists in the index.
|
|
135
|
+
let defExistsInIndex = defInUnion;
|
|
136
|
+
if (!defInUnion) {
|
|
137
|
+
const direct = (yield table.query().select(columns).where(`${where} AND array_contains(defined_symbols, '${(0, filter_builder_1.escapeSqlString)(sym)}')`).limit(50).toArray());
|
|
138
|
+
defExistsInIndex = direct.some((d) => String(d.path).toLowerCase().endsWith(`/${expectedFile.toLowerCase()}`));
|
|
139
|
+
}
|
|
140
|
+
// ref->def reachability: among top-SEED_K fusion seeds, how many reference `sym`?
|
|
141
|
+
const seeds = fused.slice(0, SEED_K);
|
|
142
|
+
let seedsRefSym = 0;
|
|
143
|
+
let firstRefSeedRank = -1;
|
|
144
|
+
seeds.forEach((d, i) => {
|
|
145
|
+
if (toStrArr(d.referenced_symbols).includes(sym)) {
|
|
146
|
+
seedsRefSym++;
|
|
147
|
+
if (firstRefSeedRank < 0)
|
|
148
|
+
firstRefSeedRank = i + 1;
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
const loc = defRetrievalRank > 0
|
|
152
|
+
? (defInPool ? `pool#${defRetrievalRank}` : `union#${defRetrievalRank}(>200)`)
|
|
153
|
+
: (defExistsInIndex ? "OUTSIDE-500" : "NOT-IN-INDEX?");
|
|
154
|
+
return { sym, expectedFile, loc, seedsRefSym, firstRefSeedRank };
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
function main() {
|
|
158
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
159
|
+
const db = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
|
|
160
|
+
const table = yield db.ensureTable();
|
|
161
|
+
console.log("sym expectedDefChunk seeds_ref firstRefSeed");
|
|
162
|
+
for (const [sym, file] of CASES) {
|
|
163
|
+
const r = yield probe(table, sym, file);
|
|
164
|
+
console.log(`${r.sym.padEnd(15)} ${r.loc.padEnd(24)} ${String(r.seedsRefSym).padStart(3)}/20 ${r.firstRefSeedRank > 0 ? `rank${r.firstRefSeedRank}` : "-"}`);
|
|
165
|
+
}
|
|
166
|
+
yield db.close();
|
|
167
|
+
yield (0, exit_1.gracefulExit)(0);
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
main().catch((e) => { console.error(e); process.exit(1); });
|
package/dist/eval-oss.js
CHANGED
|
@@ -133,25 +133,44 @@ const DATASETS = {
|
|
|
133
133
|
cases: PLATFORM_CASES,
|
|
134
134
|
},
|
|
135
135
|
};
|
|
136
|
-
// A chunk matches when its file path ends with the expected file AND
|
|
137
|
-
//
|
|
138
|
-
//
|
|
139
|
-
//
|
|
140
|
-
//
|
|
141
|
-
|
|
136
|
+
// A chunk matches when its file path ends with the expected file AND either:
|
|
137
|
+
// (b) it declares the queried symbol (`defined_symbols` includes it), OR
|
|
138
|
+
// (a) the expected line falls within [start_line, end_line].
|
|
139
|
+
//
|
|
140
|
+
// (b) is the primary, drift-robust criterion for these symbol-lookup cases: it
|
|
141
|
+
// credits the searcher for surfacing the chunk that *defines* the symbol,
|
|
142
|
+
// regardless of where the hand-curated `expectedLine` lands relative to
|
|
143
|
+
// post-reindex chunk boundaries. Before this, stale expectedLine values (e.g.
|
|
144
|
+
// `requireAuth` def moved to lines 57-76 but the case said 45) and one-line
|
|
145
|
+
// boundary off-by-ones (BeyondError chunk starts at line 37; the line check
|
|
146
|
+
// tested `36 >= 37`) scored 7/15 platform cases as misses even though the
|
|
147
|
+
// defining chunk was returned at ranks 1-3 — masking real recall (0.333 → 0.800).
|
|
148
|
+
// (a) is kept as a fallback for re-export / binding-site cases (express `query`,
|
|
149
|
+
// lodash `merge`) whose answer chunk legitimately doesn't carry the symbol in
|
|
150
|
+
// `defined_symbols`. end_line falls back to start_line + 200 when missing.
|
|
151
|
+
function chunkMatches(chunk, expectedFile, expectedLine, expectedSymbol) {
|
|
142
152
|
var _a, _b, _c, _d, _e, _f;
|
|
143
153
|
const path = String(((_a = chunk.metadata) === null || _a === void 0 ? void 0 : _a.path) || "").toLowerCase();
|
|
144
154
|
if (!path.endsWith(`/${expectedFile.toLowerCase()}`) && !path.endsWith(expectedFile.toLowerCase())) {
|
|
145
155
|
return false;
|
|
146
156
|
}
|
|
157
|
+
// (b) defining-chunk match
|
|
158
|
+
if (expectedSymbol) {
|
|
159
|
+
const defs = chunk.defined_symbols;
|
|
160
|
+
if (Array.isArray(defs) && defs.includes(expectedSymbol))
|
|
161
|
+
return true;
|
|
162
|
+
}
|
|
163
|
+
// (a) line-range match — chunks are 0-indexed start_line; expected line is 1-indexed
|
|
147
164
|
const start = Number((_d = (_c = (_b = chunk.generated_metadata) === null || _b === void 0 ? void 0 : _b.start_line) !== null && _c !== void 0 ? _c : chunk.start_line) !== null && _d !== void 0 ? _d : 0);
|
|
148
165
|
const numLines = Number((_f = (_e = chunk.generated_metadata) === null || _e === void 0 ? void 0 : _e.num_lines) !== null && _f !== void 0 ? _f : 0);
|
|
149
166
|
const end = numLines > 0 ? start + numLines : start + 200;
|
|
150
|
-
// chunks are 0-indexed start_line; expected line is 1-indexed
|
|
151
167
|
return expectedLine - 1 >= start && expectedLine - 1 <= end;
|
|
152
168
|
}
|
|
153
169
|
function evaluateOss(response, c, timeMs) {
|
|
154
|
-
const idx = response.data.findIndex((chunk) =>
|
|
170
|
+
const idx = response.data.findIndex((chunk) =>
|
|
171
|
+
// `query` is the symbol name (sverklo P1 shape), so it doubles as the
|
|
172
|
+
// expected defined-symbol for the drift-robust match branch.
|
|
173
|
+
chunkMatches(chunk, c.expectedFile, c.expectedLine, c.query));
|
|
155
174
|
const rank = idx + 1; // 0 = miss
|
|
156
175
|
const rr = rank > 0 ? 1 / rank : 0;
|
|
157
176
|
const recall10 = rank > 0 && rank <= 10 ? 1 : 0;
|
|
@@ -10,12 +10,40 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.Searcher = void 0;
|
|
13
|
+
exports.asSymbolQuery = asSymbolQuery;
|
|
13
14
|
exports.buildWhereClause = buildWhereClause;
|
|
14
15
|
const config_1 = require("../../config");
|
|
15
16
|
const filter_builder_1 = require("../utils/filter-builder");
|
|
16
17
|
const pool_1 = require("../workers/pool");
|
|
17
18
|
const intent_1 = require("./intent");
|
|
18
19
|
const pagerank_1 = require("./pagerank");
|
|
20
|
+
// Reads a defined_symbols / referenced_symbols column that may arrive as a plain
|
|
21
|
+
// array or a LanceDB Arrow proxy (.toArray()).
|
|
22
|
+
function readSymbolArray(val) {
|
|
23
|
+
if (!val)
|
|
24
|
+
return [];
|
|
25
|
+
if (Array.isArray(val))
|
|
26
|
+
return val.filter((v) => typeof v === "string");
|
|
27
|
+
const maybe = val;
|
|
28
|
+
if (typeof maybe.toArray === "function") {
|
|
29
|
+
try {
|
|
30
|
+
const a = maybe.toArray();
|
|
31
|
+
return Array.isArray(a) ? a.filter((v) => typeof v === "string") : [];
|
|
32
|
+
}
|
|
33
|
+
catch (_a) {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return [];
|
|
38
|
+
}
|
|
39
|
+
// A query that is a single bare identifier ("BeyondError", "requireAuth", "map")
|
|
40
|
+
// is almost always a symbol lookup — the user wants the chunk that *defines*
|
|
41
|
+
// that symbol. Returns the trimmed identifier, or null for natural-language
|
|
42
|
+
// queries. Drives the symbol-definition promotion in search().
|
|
43
|
+
function asSymbolQuery(query) {
|
|
44
|
+
const q = query.trim();
|
|
45
|
+
return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(q) ? q : null;
|
|
46
|
+
}
|
|
19
47
|
function buildWhereClause(pathPrefix, filters, searchIntent) {
|
|
20
48
|
var _a;
|
|
21
49
|
const parts = [];
|
|
@@ -339,7 +367,7 @@ class Searcher {
|
|
|
339
367
|
}
|
|
340
368
|
search(query, top_k, _search_options, _filters, pathPrefix, intent, signal) {
|
|
341
369
|
return __awaiter(this, void 0, void 0, function* () {
|
|
342
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
|
|
370
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
343
371
|
const finalLimit = top_k !== null && top_k !== void 0 ? top_k : 10;
|
|
344
372
|
// ColBERT rerank is opt-in as of v0.17.1. On the 97-case eval it
|
|
345
373
|
// regresses MRR@10 by ~3% and doubles query latency; sweep across
|
|
@@ -348,6 +376,8 @@ class Searcher {
|
|
|
348
376
|
let doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : false;
|
|
349
377
|
const explain = (_b = _search_options === null || _search_options === void 0 ? void 0 : _search_options.explain) !== null && _b !== void 0 ? _b : false;
|
|
350
378
|
const searchIntent = intent || (0, intent_1.detectIntent)(query);
|
|
379
|
+
// Bare-identifier queries get symbol-definition promotion (see below).
|
|
380
|
+
const symbolQuery = asSymbolQuery(query);
|
|
351
381
|
const pool = (0, pool_1.getWorkerPool)();
|
|
352
382
|
if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
|
|
353
383
|
const err = new Error("Aborted");
|
|
@@ -372,7 +402,7 @@ class Searcher {
|
|
|
372
402
|
try {
|
|
373
403
|
table = yield this.db.ensureTable();
|
|
374
404
|
}
|
|
375
|
-
catch (
|
|
405
|
+
catch (_o) {
|
|
376
406
|
return { data: [] };
|
|
377
407
|
}
|
|
378
408
|
// Ensure FTS index exists (lazy init, retry periodically on failure)
|
|
@@ -396,11 +426,14 @@ class Searcher {
|
|
|
396
426
|
// it in the lightweight path only when the flag is on so we don't bloat the
|
|
397
427
|
// default query path.
|
|
398
428
|
const pagerankEnabled = process.env.GMAX_PAGERANK === "1" && !!pathPrefix;
|
|
429
|
+
// Symbol-definition promotion needs defined_symbols on every candidate, not
|
|
430
|
+
// just the final display set — load it for bare-symbol queries too.
|
|
431
|
+
const needDefinedSymbols = pagerankEnabled || symbolQuery !== null;
|
|
399
432
|
const LIGHTWEIGHT_COLUMNS = [
|
|
400
433
|
"id", "path", "hash", "chunk_index", "start_line", "end_line",
|
|
401
434
|
"is_anchor", "chunk_type", "role", "complexity", "is_exported",
|
|
402
435
|
"content", "parent_symbol", "referenced_symbols", "pooled_colbert_48d",
|
|
403
|
-
...(
|
|
436
|
+
...(needDefinedSymbols ? ["defined_symbols"] : []),
|
|
404
437
|
];
|
|
405
438
|
// _distance is auto-added by vectorSearch, _score by FTS — include each
|
|
406
439
|
// in the respective query to suppress LanceDB deprecation warnings
|
|
@@ -438,7 +471,7 @@ class Searcher {
|
|
|
438
471
|
this.ftsAvailable = true;
|
|
439
472
|
console.warn("[Searcher] Rebuilt FTS index with position support — retry search");
|
|
440
473
|
}
|
|
441
|
-
catch (
|
|
474
|
+
catch (_p) { }
|
|
442
475
|
}
|
|
443
476
|
else {
|
|
444
477
|
console.warn(`[Searcher] FTS search failed (will retry later): ${msg}`);
|
|
@@ -552,6 +585,30 @@ class Searcher {
|
|
|
552
585
|
return { data: [] };
|
|
553
586
|
}
|
|
554
587
|
const rerankCandidates = stage2Candidates.slice(0, RERANK_TOP);
|
|
588
|
+
// Symbol-definition promotion (1/2): membership. For a bare-symbol query,
|
|
589
|
+
// ensure the chunk(s) that actually DEFINE the symbol reach the rerank set
|
|
590
|
+
// even when the cosine / RERANK_TOP cuts would drop them — e.g. ErrorCodes
|
|
591
|
+
// sits at pooled-cosine rank 24 (> RERANK_TOP=20) and resolveActor at fusion
|
|
592
|
+
// rank 91 (> the stage-2 cut). Pulled from the top-200 fusion pool, bounded
|
|
593
|
+
// so the rerank batch stays small. Must run before Phase B so the injected
|
|
594
|
+
// chunks get their colbert data fetched for reranking. The score boost in
|
|
595
|
+
// (2/2) below then lets them win dedup over their own method-child chunks.
|
|
596
|
+
if (symbolQuery && rerankCandidates.length > 0) {
|
|
597
|
+
const present = new Set(rerankCandidates.map((d) => d.id).filter(Boolean));
|
|
598
|
+
const MAX_INJECT = 5;
|
|
599
|
+
let injected = 0;
|
|
600
|
+
for (const d of topCandidates) {
|
|
601
|
+
if (injected >= MAX_INJECT)
|
|
602
|
+
break;
|
|
603
|
+
if (!d.id || present.has(d.id))
|
|
604
|
+
continue;
|
|
605
|
+
if (readSymbolArray(d.defined_symbols).includes(symbolQuery)) {
|
|
606
|
+
rerankCandidates.push(d);
|
|
607
|
+
present.add(d.id);
|
|
608
|
+
injected++;
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
}
|
|
555
612
|
// Phase B: Lazy-load colbert data only for the ~20 rerank candidates
|
|
556
613
|
if (doRerank && rerankCandidates.length > 0) {
|
|
557
614
|
const rerankIds = rerankCandidates
|
|
@@ -599,13 +656,25 @@ class Searcher {
|
|
|
599
656
|
// Small tie-breaker so later items don't all share 0
|
|
600
657
|
return fusedScore || 1 / (idx + 1);
|
|
601
658
|
});
|
|
659
|
+
// Symbol-definition promotion (2/2): score. Multiplicatively boost any
|
|
660
|
+
// candidate that defines the queried symbol so the definition chunk outranks
|
|
661
|
+
// its own method-child chunks (e.g. the `BeyondError` class chunk vs its
|
|
662
|
+
// constructor/toJSON, which otherwise score higher on the literal and evict
|
|
663
|
+
// the parent in overlap dedup). Multiplicative keeps it scale-invariant
|
|
664
|
+
// across the rerank-on (ColBERT maxsim) and rerank-off (fusion) score ranges.
|
|
665
|
+
const envDefBoost = Number.parseFloat((_k = process.env.GMAX_DEF_BOOST) !== null && _k !== void 0 ? _k : "");
|
|
666
|
+
const DEF_MATCH_BOOST = Number.isFinite(envDefBoost) && envDefBoost >= 1 ? envDefBoost : 5;
|
|
602
667
|
const scored = rerankCandidates.map((doc, idx) => {
|
|
603
668
|
var _a, _b;
|
|
604
669
|
const base = (_a = scores === null || scores === void 0 ? void 0 : scores[idx]) !== null && _a !== void 0 ? _a : 0;
|
|
605
670
|
const key = doc.id || `${doc.path}:${doc.chunk_index}`;
|
|
606
671
|
const fusedScore = (_b = candidateScores.get(key)) !== null && _b !== void 0 ? _b : 0;
|
|
607
672
|
const blended = base + FUSED_WEIGHT * fusedScore;
|
|
608
|
-
|
|
673
|
+
let boosted = this.applyStructureBoost(doc, blended, searchIntent);
|
|
674
|
+
if (symbolQuery &&
|
|
675
|
+
readSymbolArray(doc.defined_symbols).includes(symbolQuery)) {
|
|
676
|
+
boosted *= DEF_MATCH_BOOST;
|
|
677
|
+
}
|
|
609
678
|
return {
|
|
610
679
|
record: doc,
|
|
611
680
|
score: boosted,
|
|
@@ -622,7 +691,7 @@ class Searcher {
|
|
|
622
691
|
try {
|
|
623
692
|
const { scores: prScores, max: prMax } = yield (0, pagerank_1.loadOrComputePageRank)(this.db, pathPrefix);
|
|
624
693
|
if (prMax > 0) {
|
|
625
|
-
const envWeight = Number.parseFloat((
|
|
694
|
+
const envWeight = Number.parseFloat((_l = process.env.GMAX_PR_WEIGHT) !== null && _l !== void 0 ? _l : "");
|
|
626
695
|
const PR_WEIGHT = Number.isFinite(envWeight) && envWeight >= 0 ? envWeight : 0.05;
|
|
627
696
|
for (const item of scored) {
|
|
628
697
|
const raw = item.record.defined_symbols;
|
|
@@ -637,7 +706,7 @@ class Searcher {
|
|
|
637
706
|
defs = arr.filter((v) => typeof v === "string");
|
|
638
707
|
}
|
|
639
708
|
}
|
|
640
|
-
catch (
|
|
709
|
+
catch (_q) { }
|
|
641
710
|
}
|
|
642
711
|
const norm = (0, pagerank_1.pageRankBoostForSymbols)(defs, prScores, prMax);
|
|
643
712
|
item.score += PR_WEIGHT * norm;
|
|
@@ -655,7 +724,7 @@ class Searcher {
|
|
|
655
724
|
// Item 10: Per-file diversification
|
|
656
725
|
const seenFiles = new Map();
|
|
657
726
|
const diversified = [];
|
|
658
|
-
const envMaxPerFile = Number.parseInt((
|
|
727
|
+
const envMaxPerFile = Number.parseInt((_m = process.env.GMAX_MAX_PER_FILE) !== null && _m !== void 0 ? _m : "", 10);
|
|
659
728
|
const MAX_PER_FILE = Number.isFinite(envMaxPerFile) && envMaxPerFile > 0 ? envMaxPerFile : 3;
|
|
660
729
|
for (const item of uniqueScored) {
|
|
661
730
|
const path = item.record.path || "";
|
package/package.json
CHANGED