grepmax 0.17.8 → 0.17.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,12 +10,40 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.Searcher = void 0;
13
+ exports.asSymbolQuery = asSymbolQuery;
13
14
  exports.buildWhereClause = buildWhereClause;
14
15
  const config_1 = require("../../config");
15
16
  const filter_builder_1 = require("../utils/filter-builder");
16
17
  const pool_1 = require("../workers/pool");
17
18
  const intent_1 = require("./intent");
18
19
  const pagerank_1 = require("./pagerank");
20
+ // Reads a defined_symbols / referenced_symbols column that may arrive as a plain
21
+ // array or a LanceDB Arrow proxy (.toArray()).
22
+ function readSymbolArray(val) {
23
+ if (!val)
24
+ return [];
25
+ if (Array.isArray(val))
26
+ return val.filter((v) => typeof v === "string");
27
+ const maybe = val;
28
+ if (typeof maybe.toArray === "function") {
29
+ try {
30
+ const a = maybe.toArray();
31
+ return Array.isArray(a) ? a.filter((v) => typeof v === "string") : [];
32
+ }
33
+ catch (_a) {
34
+ return [];
35
+ }
36
+ }
37
+ return [];
38
+ }
39
+ // A query that is a single bare identifier ("BeyondError", "requireAuth", "map")
40
+ // is almost always a symbol lookup — the user wants the chunk that *defines*
41
+ // that symbol. Returns the trimmed identifier, or null for natural-language
42
+ // queries. Drives the symbol-definition promotion in search().
43
+ function asSymbolQuery(query) {
44
+ const q = query.trim();
45
+ return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(q) ? q : null;
46
+ }
19
47
  function buildWhereClause(pathPrefix, filters, searchIntent) {
20
48
  var _a;
21
49
  const parts = [];
@@ -339,7 +367,7 @@ class Searcher {
339
367
  }
340
368
  search(query, top_k, _search_options, _filters, pathPrefix, intent, signal) {
341
369
  return __awaiter(this, void 0, void 0, function* () {
342
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
370
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
343
371
  const finalLimit = top_k !== null && top_k !== void 0 ? top_k : 10;
344
372
  // ColBERT rerank is opt-in as of v0.17.1. On the 97-case eval it
345
373
  // regresses MRR@10 by ~3% and doubles query latency; sweep across
@@ -348,6 +376,8 @@ class Searcher {
348
376
  let doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : false;
349
377
  const explain = (_b = _search_options === null || _search_options === void 0 ? void 0 : _search_options.explain) !== null && _b !== void 0 ? _b : false;
350
378
  const searchIntent = intent || (0, intent_1.detectIntent)(query);
379
+ // Bare-identifier queries get symbol-definition promotion (see below).
380
+ const symbolQuery = asSymbolQuery(query);
351
381
  const pool = (0, pool_1.getWorkerPool)();
352
382
  if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
353
383
  const err = new Error("Aborted");
@@ -372,7 +402,7 @@ class Searcher {
372
402
  try {
373
403
  table = yield this.db.ensureTable();
374
404
  }
375
- catch (_m) {
405
+ catch (_o) {
376
406
  return { data: [] };
377
407
  }
378
408
  // Ensure FTS index exists (lazy init, retry periodically on failure)
@@ -396,11 +426,14 @@ class Searcher {
396
426
  // it in the lightweight path only when the flag is on so we don't bloat the
397
427
  // default query path.
398
428
  const pagerankEnabled = process.env.GMAX_PAGERANK === "1" && !!pathPrefix;
429
+ // Symbol-definition promotion needs defined_symbols on every candidate, not
430
+ // just the final display set — load it for bare-symbol queries too.
431
+ const needDefinedSymbols = pagerankEnabled || symbolQuery !== null;
399
432
  const LIGHTWEIGHT_COLUMNS = [
400
433
  "id", "path", "hash", "chunk_index", "start_line", "end_line",
401
434
  "is_anchor", "chunk_type", "role", "complexity", "is_exported",
402
435
  "content", "parent_symbol", "referenced_symbols", "pooled_colbert_48d",
403
- ...(pagerankEnabled ? ["defined_symbols"] : []),
436
+ ...(needDefinedSymbols ? ["defined_symbols"] : []),
404
437
  ];
405
438
  // _distance is auto-added by vectorSearch, _score by FTS — include each
406
439
  // in the respective query to suppress LanceDB deprecation warnings
@@ -438,7 +471,7 @@ class Searcher {
438
471
  this.ftsAvailable = true;
439
472
  console.warn("[Searcher] Rebuilt FTS index with position support — retry search");
440
473
  }
441
- catch (_o) { }
474
+ catch (_p) { }
442
475
  }
443
476
  else {
444
477
  console.warn(`[Searcher] FTS search failed (will retry later): ${msg}`);
@@ -552,6 +585,30 @@ class Searcher {
552
585
  return { data: [] };
553
586
  }
554
587
  const rerankCandidates = stage2Candidates.slice(0, RERANK_TOP);
588
+ // Symbol-definition promotion (1/2): membership. For a bare-symbol query,
589
+ // ensure the chunk(s) that actually DEFINE the symbol reach the rerank set
590
+ // even when the cosine / RERANK_TOP cuts would drop them — e.g. ErrorCodes
591
+ // sits at pooled-cosine rank 24 (> RERANK_TOP=20) and resolveActor at fusion
592
+ // rank 91 (> the stage-2 cut). Pulled from the top-200 fusion pool, bounded
593
+ // so the rerank batch stays small. Must run before Phase B so the injected
594
+ // chunks get their colbert data fetched for reranking. The score boost in
595
+ // (2/2) below then lets them win dedup over their own method-child chunks.
596
+ if (symbolQuery && rerankCandidates.length > 0) {
597
+ const present = new Set(rerankCandidates.map((d) => d.id).filter(Boolean));
598
+ const MAX_INJECT = 5;
599
+ let injected = 0;
600
+ for (const d of topCandidates) {
601
+ if (injected >= MAX_INJECT)
602
+ break;
603
+ if (!d.id || present.has(d.id))
604
+ continue;
605
+ if (readSymbolArray(d.defined_symbols).includes(symbolQuery)) {
606
+ rerankCandidates.push(d);
607
+ present.add(d.id);
608
+ injected++;
609
+ }
610
+ }
611
+ }
555
612
  // Phase B: Lazy-load colbert data only for the ~20 rerank candidates
556
613
  if (doRerank && rerankCandidates.length > 0) {
557
614
  const rerankIds = rerankCandidates
@@ -599,13 +656,25 @@ class Searcher {
599
656
  // Small tie-breaker so later items don't all share 0
600
657
  return fusedScore || 1 / (idx + 1);
601
658
  });
659
+ // Symbol-definition promotion (2/2): score. Multiplicatively boost any
660
+ // candidate that defines the queried symbol so the definition chunk outranks
661
+ // its own method-child chunks (e.g. the `BeyondError` class chunk vs its
662
+ // constructor/toJSON, which otherwise score higher on the literal and evict
663
+ // the parent in overlap dedup). Multiplicative keeps it scale-invariant
664
+ // across the rerank-on (ColBERT maxsim) and rerank-off (fusion) score ranges.
665
+ const envDefBoost = Number.parseFloat((_k = process.env.GMAX_DEF_BOOST) !== null && _k !== void 0 ? _k : "");
666
+ const DEF_MATCH_BOOST = Number.isFinite(envDefBoost) && envDefBoost >= 1 ? envDefBoost : 5;
602
667
  const scored = rerankCandidates.map((doc, idx) => {
603
668
  var _a, _b;
604
669
  const base = (_a = scores === null || scores === void 0 ? void 0 : scores[idx]) !== null && _a !== void 0 ? _a : 0;
605
670
  const key = doc.id || `${doc.path}:${doc.chunk_index}`;
606
671
  const fusedScore = (_b = candidateScores.get(key)) !== null && _b !== void 0 ? _b : 0;
607
672
  const blended = base + FUSED_WEIGHT * fusedScore;
608
- const boosted = this.applyStructureBoost(doc, blended, searchIntent);
673
+ let boosted = this.applyStructureBoost(doc, blended, searchIntent);
674
+ if (symbolQuery &&
675
+ readSymbolArray(doc.defined_symbols).includes(symbolQuery)) {
676
+ boosted *= DEF_MATCH_BOOST;
677
+ }
609
678
  return {
610
679
  record: doc,
611
680
  score: boosted,
@@ -622,7 +691,7 @@ class Searcher {
622
691
  try {
623
692
  const { scores: prScores, max: prMax } = yield (0, pagerank_1.loadOrComputePageRank)(this.db, pathPrefix);
624
693
  if (prMax > 0) {
625
- const envWeight = Number.parseFloat((_k = process.env.GMAX_PR_WEIGHT) !== null && _k !== void 0 ? _k : "");
694
+ const envWeight = Number.parseFloat((_l = process.env.GMAX_PR_WEIGHT) !== null && _l !== void 0 ? _l : "");
626
695
  const PR_WEIGHT = Number.isFinite(envWeight) && envWeight >= 0 ? envWeight : 0.05;
627
696
  for (const item of scored) {
628
697
  const raw = item.record.defined_symbols;
@@ -637,7 +706,7 @@ class Searcher {
637
706
  defs = arr.filter((v) => typeof v === "string");
638
707
  }
639
708
  }
640
- catch (_p) { }
709
+ catch (_q) { }
641
710
  }
642
711
  const norm = (0, pagerank_1.pageRankBoostForSymbols)(defs, prScores, prMax);
643
712
  item.score += PR_WEIGHT * norm;
@@ -655,7 +724,7 @@ class Searcher {
655
724
  // Item 10: Per-file diversification
656
725
  const seenFiles = new Map();
657
726
  const diversified = [];
658
- const envMaxPerFile = Number.parseInt((_l = process.env.GMAX_MAX_PER_FILE) !== null && _l !== void 0 ? _l : "", 10);
727
+ const envMaxPerFile = Number.parseInt((_m = process.env.GMAX_MAX_PER_FILE) !== null && _m !== void 0 ? _m : "", 10);
659
728
  const MAX_PER_FILE = Number.isFinite(envMaxPerFile) && envMaxPerFile > 0 ? envMaxPerFile : 3;
660
729
  for (const item of uniqueScored) {
661
730
  const path = item.record.path || "";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.8",
3
+ "version": "0.17.9",
4
4
  "author": "Robert Owens <78518764+reowens@users.noreply.github.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.8",
3
+ "version": "0.17.9",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",