grepmax 0.7.25 → 0.7.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -10,10 +10,67 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.Searcher = void 0;
|
|
13
|
+
exports.buildWhereClause = buildWhereClause;
|
|
13
14
|
const config_1 = require("../../config");
|
|
14
15
|
const filter_builder_1 = require("../utils/filter-builder");
|
|
15
16
|
const pool_1 = require("../workers/pool");
|
|
16
17
|
const intent_1 = require("./intent");
|
|
18
|
+
function buildWhereClause(pathPrefix, filters, searchIntent) {
|
|
19
|
+
var _a;
|
|
20
|
+
const parts = [];
|
|
21
|
+
if (pathPrefix) {
|
|
22
|
+
parts.push(`path LIKE '${(0, filter_builder_1.escapeSqlString)((0, filter_builder_1.normalizePath)(pathPrefix))}%'`);
|
|
23
|
+
}
|
|
24
|
+
const fileFilter = filters === null || filters === void 0 ? void 0 : filters.file;
|
|
25
|
+
if (typeof fileFilter === "string" && fileFilter) {
|
|
26
|
+
parts.push(`path LIKE '%/${(0, filter_builder_1.escapeSqlString)(fileFilter)}'`);
|
|
27
|
+
}
|
|
28
|
+
const excludeFilter = filters === null || filters === void 0 ? void 0 : filters.exclude;
|
|
29
|
+
if (typeof excludeFilter === "string" && excludeFilter) {
|
|
30
|
+
const absExclude = pathPrefix
|
|
31
|
+
? (0, filter_builder_1.normalizePath)(pathPrefix + excludeFilter)
|
|
32
|
+
: excludeFilter;
|
|
33
|
+
parts.push(`path NOT LIKE '${(0, filter_builder_1.escapeSqlString)(absExclude)}%'`);
|
|
34
|
+
}
|
|
35
|
+
const langFilter = filters === null || filters === void 0 ? void 0 : filters.language;
|
|
36
|
+
if (typeof langFilter === "string" && langFilter) {
|
|
37
|
+
const ext = langFilter.startsWith(".") ? langFilter : `.${langFilter}`;
|
|
38
|
+
parts.push(`path LIKE '%${(0, filter_builder_1.escapeSqlString)(ext)}'`);
|
|
39
|
+
}
|
|
40
|
+
const roleFilter = filters === null || filters === void 0 ? void 0 : filters.role;
|
|
41
|
+
if (typeof roleFilter === "string" && roleFilter) {
|
|
42
|
+
parts.push(`role = '${(0, filter_builder_1.escapeSqlString)(roleFilter)}'`);
|
|
43
|
+
}
|
|
44
|
+
const projectRoots = filters === null || filters === void 0 ? void 0 : filters.project_roots;
|
|
45
|
+
if (typeof projectRoots === "string" && projectRoots) {
|
|
46
|
+
const roots = projectRoots.split(",");
|
|
47
|
+
const clauses = roots.map((r) => {
|
|
48
|
+
const prefix = r.endsWith("/") ? r : `${r}/`;
|
|
49
|
+
return `path LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`;
|
|
50
|
+
});
|
|
51
|
+
parts.push(`(${clauses.join(" OR ")})`);
|
|
52
|
+
}
|
|
53
|
+
const excludeRoots = filters === null || filters === void 0 ? void 0 : filters.exclude_project_roots;
|
|
54
|
+
if (typeof excludeRoots === "string" && excludeRoots) {
|
|
55
|
+
for (const r of excludeRoots.split(",")) {
|
|
56
|
+
const prefix = r.endsWith("/") ? r : `${r}/`;
|
|
57
|
+
parts.push(`path NOT LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
const defFilter = filters === null || filters === void 0 ? void 0 : filters.def;
|
|
61
|
+
if (typeof defFilter === "string" && defFilter) {
|
|
62
|
+
parts.push(`array_contains(defined_symbols, '${(0, filter_builder_1.escapeSqlString)(defFilter)}')`);
|
|
63
|
+
}
|
|
64
|
+
else if (searchIntent.type === "DEFINITION" &&
|
|
65
|
+
((_a = searchIntent.filters) === null || _a === void 0 ? void 0 : _a.definitionsOnly)) {
|
|
66
|
+
parts.push(`(role = 'DEFINITION' OR array_length(defined_symbols) > 0)`);
|
|
67
|
+
}
|
|
68
|
+
const refFilter = filters === null || filters === void 0 ? void 0 : filters.ref;
|
|
69
|
+
if (typeof refFilter === "string" && refFilter) {
|
|
70
|
+
parts.push(`array_contains(referenced_symbols, '${(0, filter_builder_1.escapeSqlString)(refFilter)}')`);
|
|
71
|
+
}
|
|
72
|
+
return parts.length > 0 ? parts.join(" AND ") : undefined;
|
|
73
|
+
}
|
|
17
74
|
class Searcher {
|
|
18
75
|
constructor(db) {
|
|
19
76
|
this.db = db;
|
|
@@ -254,7 +311,7 @@ class Searcher {
|
|
|
254
311
|
}
|
|
255
312
|
search(query, top_k, _search_options, _filters, pathPrefix, intent, signal) {
|
|
256
313
|
return __awaiter(this, void 0, void 0, function* () {
|
|
257
|
-
var _a, _b, _c, _d, _e, _f, _g
|
|
314
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
258
315
|
const finalLimit = top_k !== null && top_k !== void 0 ? top_k : 10;
|
|
259
316
|
const doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : true;
|
|
260
317
|
const searchIntent = intent || (0, intent_1.detectIntent)(query);
|
|
@@ -273,74 +330,8 @@ class Searcher {
|
|
|
273
330
|
if (colbertDim !== config_1.CONFIG.COLBERT_DIM) {
|
|
274
331
|
throw new Error(`[Searcher] Query ColBERT dim (${colbertDim}) != Config (${config_1.CONFIG.COLBERT_DIM})`);
|
|
275
332
|
}
|
|
276
|
-
const
|
|
277
|
-
|
|
278
|
-
whereClauseParts.push(`path LIKE '${(0, filter_builder_1.escapeSqlString)((0, filter_builder_1.normalizePath)(pathPrefix))}%'`);
|
|
279
|
-
}
|
|
280
|
-
// Handle file name filter
|
|
281
|
-
const fileFilter = _filters === null || _filters === void 0 ? void 0 : _filters.file;
|
|
282
|
-
if (typeof fileFilter === "string" && fileFilter) {
|
|
283
|
-
whereClauseParts.push(`path LIKE '%/${(0, filter_builder_1.escapeSqlString)(fileFilter)}'`);
|
|
284
|
-
}
|
|
285
|
-
// Handle exclude filter
|
|
286
|
-
const excludeFilter = _filters === null || _filters === void 0 ? void 0 : _filters.exclude;
|
|
287
|
-
if (typeof excludeFilter === "string" && excludeFilter) {
|
|
288
|
-
const absExclude = pathPrefix
|
|
289
|
-
? (0, filter_builder_1.normalizePath)(pathPrefix + excludeFilter)
|
|
290
|
-
: excludeFilter;
|
|
291
|
-
whereClauseParts.push(`path NOT LIKE '${(0, filter_builder_1.escapeSqlString)(absExclude)}%'`);
|
|
292
|
-
}
|
|
293
|
-
// Handle language filter (by file extension)
|
|
294
|
-
const langFilter = _filters === null || _filters === void 0 ? void 0 : _filters.language;
|
|
295
|
-
if (typeof langFilter === "string" && langFilter) {
|
|
296
|
-
const ext = langFilter.startsWith(".") ? langFilter : `.${langFilter}`;
|
|
297
|
-
whereClauseParts.push(`path LIKE '%${(0, filter_builder_1.escapeSqlString)(ext)}'`);
|
|
298
|
-
}
|
|
299
|
-
// Handle role filter
|
|
300
|
-
const roleFilter = _filters === null || _filters === void 0 ? void 0 : _filters.role;
|
|
301
|
-
if (typeof roleFilter === "string" && roleFilter) {
|
|
302
|
-
whereClauseParts.push(`role = '${(0, filter_builder_1.escapeSqlString)(roleFilter)}'`);
|
|
303
|
-
}
|
|
304
|
-
// Handle project roots filter (from search_all projects param)
|
|
305
|
-
const projectRoots = _filters === null || _filters === void 0 ? void 0 : _filters.project_roots;
|
|
306
|
-
if (typeof projectRoots === "string" && projectRoots) {
|
|
307
|
-
const roots = projectRoots.split(",");
|
|
308
|
-
const clauses = roots.map((r) => {
|
|
309
|
-
const prefix = r.endsWith("/") ? r : `${r}/`;
|
|
310
|
-
return `path LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`;
|
|
311
|
-
});
|
|
312
|
-
whereClauseParts.push(`(${clauses.join(" OR ")})`);
|
|
313
|
-
}
|
|
314
|
-
// Handle exclude project roots filter
|
|
315
|
-
const excludeRoots = _filters === null || _filters === void 0 ? void 0 : _filters.exclude_project_roots;
|
|
316
|
-
if (typeof excludeRoots === "string" && excludeRoots) {
|
|
317
|
-
for (const r of excludeRoots.split(",")) {
|
|
318
|
-
const prefix = r.endsWith("/") ? r : `${r}/`;
|
|
319
|
-
whereClauseParts.push(`path NOT LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`);
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
// Handle --def (definition) filter
|
|
323
|
-
const defFilter = _filters === null || _filters === void 0 ? void 0 : _filters.def;
|
|
324
|
-
if (typeof defFilter === "string" && defFilter) {
|
|
325
|
-
whereClauseParts.push(`array_contains(defined_symbols, '${(0, filter_builder_1.escapeSqlString)(defFilter)}')`);
|
|
326
|
-
}
|
|
327
|
-
else if (searchIntent.type === "DEFINITION" &&
|
|
328
|
-
((_b = searchIntent.filters) === null || _b === void 0 ? void 0 : _b.definitionsOnly)) {
|
|
329
|
-
// If intent is DEFINITION but no specific symbol provided, filter by role
|
|
330
|
-
whereClauseParts.push(`(role = 'DEFINITION' OR array_length(defined_symbols) > 0)`);
|
|
331
|
-
}
|
|
332
|
-
// Handle --ref (reference) filter
|
|
333
|
-
const refFilter = _filters === null || _filters === void 0 ? void 0 : _filters.ref;
|
|
334
|
-
if (typeof refFilter === "string" && refFilter) {
|
|
335
|
-
whereClauseParts.push(`array_contains(referenced_symbols, '${(0, filter_builder_1.escapeSqlString)(refFilter)}')`);
|
|
336
|
-
}
|
|
337
|
-
else if (searchIntent.type === "USAGE" &&
|
|
338
|
-
((_c = searchIntent.filters) === null || _c === void 0 ? void 0 : _c.usagesOnly)) {
|
|
339
|
-
// If intent is USAGE, we might want to filter out definitions?
|
|
340
|
-
// For now, let's just rely on boosting.
|
|
341
|
-
}
|
|
342
|
-
const whereClause = whereClauseParts.length > 0 ? whereClauseParts.join(" AND ") : undefined;
|
|
343
|
-
const envPreK = Number.parseInt((_d = process.env.GMAX_PRE_K) !== null && _d !== void 0 ? _d : "", 10);
|
|
333
|
+
const whereClause = buildWhereClause(pathPrefix, _filters, searchIntent);
|
|
334
|
+
const envPreK = Number.parseInt((_b = process.env.GMAX_PRE_K) !== null && _b !== void 0 ? _b : "", 10);
|
|
344
335
|
const PRE_RERANK_K = Number.isFinite(envPreK) && envPreK > 0
|
|
345
336
|
? envPreK
|
|
346
337
|
: Math.max(finalLimit * 5, 500);
|
|
@@ -348,7 +339,7 @@ class Searcher {
|
|
|
348
339
|
try {
|
|
349
340
|
table = yield this.db.ensureTable();
|
|
350
341
|
}
|
|
351
|
-
catch (
|
|
342
|
+
catch (_h) {
|
|
352
343
|
return { data: [] };
|
|
353
344
|
}
|
|
354
345
|
// Ensure FTS index exists (lazy init, retry periodically on failure)
|
|
@@ -417,17 +408,17 @@ class Searcher {
|
|
|
417
408
|
.filter(Boolean);
|
|
418
409
|
// Item 8: Widen PRE_RERANK_K
|
|
419
410
|
// Retrieve a wide set for Stage 1 filtering
|
|
420
|
-
const envStage1 = Number.parseInt((
|
|
411
|
+
const envStage1 = Number.parseInt((_c = process.env.GMAX_STAGE1_K) !== null && _c !== void 0 ? _c : "", 10);
|
|
421
412
|
const STAGE1_K = Number.isFinite(envStage1) && envStage1 > 0 ? envStage1 : 200;
|
|
422
413
|
const topCandidates = fused.slice(0, STAGE1_K);
|
|
423
414
|
// Item 9: Two-stage rerank
|
|
424
415
|
// Stage 1: Cheap pooled cosine filter
|
|
425
416
|
let stage2Candidates = topCandidates;
|
|
426
|
-
const envStage2K = Number.parseInt((
|
|
417
|
+
const envStage2K = Number.parseInt((_d = process.env.GMAX_STAGE2_K) !== null && _d !== void 0 ? _d : "", 10);
|
|
427
418
|
const STAGE2_K = Number.isFinite(envStage2K) && envStage2K > 0 ? envStage2K : 40;
|
|
428
|
-
const envRerankTop = Number.parseInt((
|
|
419
|
+
const envRerankTop = Number.parseInt((_e = process.env.GMAX_RERANK_TOP) !== null && _e !== void 0 ? _e : "", 10);
|
|
429
420
|
const RERANK_TOP = Number.isFinite(envRerankTop) && envRerankTop > 0 ? envRerankTop : 20;
|
|
430
|
-
const envBlend = Number.parseFloat((
|
|
421
|
+
const envBlend = Number.parseFloat((_f = process.env.GMAX_RERANK_BLEND) !== null && _f !== void 0 ? _f : "");
|
|
431
422
|
const FUSED_WEIGHT = Number.isFinite(envBlend) && envBlend >= 0 ? envBlend : 0.5;
|
|
432
423
|
if (queryPooled && topCandidates.length > STAGE2_K) {
|
|
433
424
|
const cosineScores = topCandidates.map((doc) => {
|
|
@@ -493,7 +484,7 @@ class Searcher {
|
|
|
493
484
|
// Item 10: Per-file diversification
|
|
494
485
|
const seenFiles = new Map();
|
|
495
486
|
const diversified = [];
|
|
496
|
-
const envMaxPerFile = Number.parseInt((
|
|
487
|
+
const envMaxPerFile = Number.parseInt((_g = process.env.GMAX_MAX_PER_FILE) !== null && _g !== void 0 ? _g : "", 10);
|
|
497
488
|
const MAX_PER_FILE = Number.isFinite(envMaxPerFile) && envMaxPerFile > 0 ? envMaxPerFile : 3;
|
|
498
489
|
for (const item of uniqueScored) {
|
|
499
490
|
const path = item.record.path || "";
|
package/package.json
CHANGED