grepmax 0.17.6 → 0.17.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -339,13 +339,13 @@ class Searcher {
339
339
  }
340
340
  search(query, top_k, _search_options, _filters, pathPrefix, intent, signal) {
341
341
  return __awaiter(this, void 0, void 0, function* () {
342
- var _a, _b, _c, _d, _e, _f, _g, _h, _j;
342
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
343
343
  const finalLimit = top_k !== null && top_k !== void 0 ? top_k : 10;
344
344
  // ColBERT rerank is opt-in as of v0.17.1. On the 97-case eval it
345
345
  // regresses MRR@10 by ~3% and doubles query latency; sweep across
346
346
  // FUSED_WEIGHT ∈ {0,0.1,0.5,1,2} showed rerank scores dominate
347
347
  // fused scores ~30:1 so blend tuning can't recover the loss.
348
- const doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : false;
348
+ let doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : false;
349
349
  const explain = (_b = _search_options === null || _search_options === void 0 ? void 0 : _search_options.explain) !== null && _b !== void 0 ? _b : false;
350
350
  const searchIntent = intent || (0, intent_1.detectIntent)(query);
351
351
  const pool = (0, pool_1.getWorkerPool)();
@@ -372,7 +372,7 @@ class Searcher {
372
372
  try {
373
373
  table = yield this.db.ensureTable();
374
374
  }
375
- catch (_k) {
375
+ catch (_m) {
376
376
  return { data: [] };
377
377
  }
378
378
  // Ensure FTS index exists (lazy init, retry periodically on failure)
@@ -438,7 +438,7 @@ class Searcher {
438
438
  this.ftsAvailable = true;
439
439
  console.warn("[Searcher] Rebuilt FTS index with position support — retry search");
440
440
  }
441
- catch (_l) { }
441
+ catch (_o) { }
442
442
  }
443
443
  else {
444
444
  console.warn(`[Searcher] FTS search failed (will retry later): ${msg}`);
@@ -474,9 +474,46 @@ class Searcher {
474
474
  // Free raw search results — docMap holds the only needed references
475
475
  vectorResults.length = 0;
476
476
  ftsResults.length = 0;
477
+ // Candidate-concentration gate (Bundle B, v0.17.2 OSS-fixture finding):
478
+ // ColBERT rerank is shape-sensitive. When the post-fusion pool clusters
479
+ // into one file (single-file-repo / concentrated shape, e.g. lodash) rerank
480
+ // lifts recall sharply (+0.283 MRR on the OSS bench); on modular/spread
481
+ // pools (express, platform) it regresses. So detect the concentrated regime
482
+ // here and *add* rerank-on for it. This only ever flips doRerank false→true:
483
+ // an explicit GMAX_RERANK=1 (doRerank already true) is never overridden off.
484
+ if (!doRerank) {
485
+ const envConcThreshold = Number.parseFloat((_d = process.env.GMAX_CONCENTRATION_THRESHOLD) !== null && _d !== void 0 ? _d : "");
486
+ // <= 0 (or NaN with the default) keeps the gate active at 0.7; a value > 1
487
+ // disables it (no possible share reaches it), giving a rerank-fully-off
488
+ // baseline for sweeps without touching the doRerank default. 0.7 is the
489
+ // sweep winner: highest threshold (least spurious firing) that still
490
+ // retains lodash's +0.15 MRR lift while leaving express/platform flat.
491
+ const CONCENTRATION_THRESHOLD = Number.isFinite(envConcThreshold) && envConcThreshold > 0
492
+ ? envConcThreshold
493
+ : 0.7;
494
+ // Histogram a fixed top-K window (not finalLimit) so the threshold stays
495
+ // calibrated across callers regardless of the result window they request.
496
+ const CONCENTRATION_K = 10;
497
+ const window = fused.slice(0, CONCENTRATION_K);
498
+ if (window.length > 0 && CONCENTRATION_THRESHOLD <= 1) {
499
+ const buckets = new Map();
500
+ for (const doc of window) {
501
+ buckets.set(doc.path, ((_e = buckets.get(doc.path)) !== null && _e !== void 0 ? _e : 0) + 1);
502
+ }
503
+ let maxBucket = 0;
504
+ for (const count of buckets.values()) {
505
+ if (count > maxBucket)
506
+ maxBucket = count;
507
+ }
508
+ const share = maxBucket / window.length;
509
+ if (share >= CONCENTRATION_THRESHOLD) {
510
+ doRerank = true;
511
+ }
512
+ }
513
+ }
477
514
  // Item 8: Widen PRE_RERANK_K
478
515
  // Retrieve a wide set for Stage 1 filtering
479
- const envStage1 = Number.parseInt((_d = process.env.GMAX_STAGE1_K) !== null && _d !== void 0 ? _d : "", 10);
516
+ const envStage1 = Number.parseInt((_f = process.env.GMAX_STAGE1_K) !== null && _f !== void 0 ? _f : "", 10);
480
517
  const STAGE1_K = Number.isFinite(envStage1) && envStage1 > 0 ? envStage1 : 200;
481
518
  const topCandidates = fused.slice(0, STAGE1_K);
482
519
  // Free docMap — topCandidates already holds record references
@@ -484,11 +521,11 @@ class Searcher {
484
521
  // Item 9: Two-stage rerank
485
522
  // Stage 1: Cheap pooled cosine filter
486
523
  let stage2Candidates = topCandidates;
487
- const envStage2K = Number.parseInt((_e = process.env.GMAX_STAGE2_K) !== null && _e !== void 0 ? _e : "", 10);
524
+ const envStage2K = Number.parseInt((_g = process.env.GMAX_STAGE2_K) !== null && _g !== void 0 ? _g : "", 10);
488
525
  const STAGE2_K = Number.isFinite(envStage2K) && envStage2K > 0 ? envStage2K : 40;
489
- const envRerankTop = Number.parseInt((_f = process.env.GMAX_RERANK_TOP) !== null && _f !== void 0 ? _f : "", 10);
526
+ const envRerankTop = Number.parseInt((_h = process.env.GMAX_RERANK_TOP) !== null && _h !== void 0 ? _h : "", 10);
490
527
  const RERANK_TOP = Number.isFinite(envRerankTop) && envRerankTop > 0 ? envRerankTop : 20;
491
- const envBlend = Number.parseFloat((_g = process.env.GMAX_RERANK_BLEND) !== null && _g !== void 0 ? _g : "");
528
+ const envBlend = Number.parseFloat((_j = process.env.GMAX_RERANK_BLEND) !== null && _j !== void 0 ? _j : "");
492
529
  const FUSED_WEIGHT = Number.isFinite(envBlend) && envBlend >= 0 ? envBlend : 0.5;
493
530
  if (queryPooled && topCandidates.length > STAGE2_K) {
494
531
  const cosineScores = topCandidates.map((doc) => {
@@ -585,7 +622,7 @@ class Searcher {
585
622
  try {
586
623
  const { scores: prScores, max: prMax } = yield (0, pagerank_1.loadOrComputePageRank)(this.db, pathPrefix);
587
624
  if (prMax > 0) {
588
- const envWeight = Number.parseFloat((_h = process.env.GMAX_PR_WEIGHT) !== null && _h !== void 0 ? _h : "");
625
+ const envWeight = Number.parseFloat((_k = process.env.GMAX_PR_WEIGHT) !== null && _k !== void 0 ? _k : "");
589
626
  const PR_WEIGHT = Number.isFinite(envWeight) && envWeight >= 0 ? envWeight : 0.05;
590
627
  for (const item of scored) {
591
628
  const raw = item.record.defined_symbols;
@@ -600,7 +637,7 @@ class Searcher {
600
637
  defs = arr.filter((v) => typeof v === "string");
601
638
  }
602
639
  }
603
- catch (_m) { }
640
+ catch (_p) { }
604
641
  }
605
642
  const norm = (0, pagerank_1.pageRankBoostForSymbols)(defs, prScores, prMax);
606
643
  item.score += PR_WEIGHT * norm;
@@ -618,7 +655,7 @@ class Searcher {
618
655
  // Item 10: Per-file diversification
619
656
  const seenFiles = new Map();
620
657
  const diversified = [];
621
- const envMaxPerFile = Number.parseInt((_j = process.env.GMAX_MAX_PER_FILE) !== null && _j !== void 0 ? _j : "", 10);
658
+ const envMaxPerFile = Number.parseInt((_l = process.env.GMAX_MAX_PER_FILE) !== null && _l !== void 0 ? _l : "", 10);
622
659
  const MAX_PER_FILE = Number.isFinite(envMaxPerFile) && envMaxPerFile > 0 ? envMaxPerFile : 3;
623
660
  for (const item of uniqueScored) {
624
661
  const path = item.record.path || "";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.6",
3
+ "version": "0.17.7",
4
4
  "author": "Robert Owens <78518764+reowens@users.noreply.github.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.6",
3
+ "version": "0.17.7",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",