grepmax 0.17.8 → 0.17.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -553,8 +553,78 @@ class TreeSitterChunker {
553
553
  referencedSymbols.push(name);
554
554
  }
555
555
  };
556
+ // Leaf identifier node types across grammars (a bare name with no
557
+ // named children — `ErrorCodes`, not `a.ErrorCodes`).
558
+ const LEAF_ID_TYPES = new Set([
559
+ "identifier",
560
+ "type_identifier",
561
+ "constant", // Ruby
562
+ "name", // PHP
563
+ "simple_identifier", // Kotlin, Swift
564
+ "property_identifier",
565
+ "field_identifier", // Go
566
+ ]);
567
+ const isLeafId = (n) => {
568
+ var _a, _b;
569
+ return !!n &&
570
+ LEAF_ID_TYPES.has(n.type) &&
571
+ ((_b = (_a = n.namedChildren) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0) === 0;
572
+ };
573
+ const firstNamed = (n) => { var _a, _b; return (_b = ((_a = n.namedChildren) !== null && _a !== void 0 ? _a : [])[0]) !== null && _b !== void 0 ? _b : null; };
574
+ // Resolve the type name out of an instantiation node (`new Foo`,
575
+ // `Foo{}`, `new Foo()`), preferring an explicit `type` field and
576
+ // falling back to the first named child (qualified names reduce to
577
+ // their rightmost segment via simpleRefName).
578
+ const instantiatedTypeName = (n) => {
579
+ var _a;
580
+ const typed = (_a = (n.childForFieldName ? n.childForFieldName("type") : null)) !== null && _a !== void 0 ? _a : firstNamed(n);
581
+ if (!typed)
582
+ return null;
583
+ if (isLeafId(typed))
584
+ return typed.text;
585
+ return simpleRefName(typed);
586
+ };
587
+ // First `type_identifier` reachable directly or via a `user_type`
588
+ // wrapper — covers Java `instanceof_expression` and Kotlin/Swift
589
+ // `check_expression` (`x is T`).
590
+ const firstTypeIdent = (n) => {
591
+ var _a, _b;
592
+ for (const c of (_a = n.namedChildren) !== null && _a !== void 0 ? _a : []) {
593
+ if (c.type === "type_identifier")
594
+ return c.text;
595
+ if (c.type === "user_type") {
596
+ const t = ((_b = c.namedChildren) !== null && _b !== void 0 ? _b : []).find((x) => x.type === "type_identifier");
597
+ if (t === null || t === void 0 ? void 0 : t.text)
598
+ return t.text;
599
+ }
600
+ }
601
+ return null;
602
+ };
603
+ // Member / scope access node types, one per grammar. We capture the
604
+ // head (object/scope) only when it is a Capitalized leaf identifier,
605
+ // so `ErrorCodes.VALIDATION` / `ErrorCodes::NOT_FOUND` yield an edge
606
+ // to `ErrorCodes` while `this.x` / `req.body` / lowercase locals do not.
607
+ const MEMBER_ACCESS_TYPES = new Set([
608
+ "member_expression", // TS/JS
609
+ "attribute", // Python
610
+ "selector_expression", // Go
611
+ "field_access", // Java
612
+ "member_access_expression", // C#
613
+ "scoped_identifier", // Rust
614
+ "scope_resolution", // Ruby
615
+ "navigation_expression", // Kotlin, Swift
616
+ "field_expression", // Scala
617
+ "class_constant_access_expression", // PHP
618
+ ]);
619
+ // Instantiation node types whose first/`type` child names a type.
620
+ const INSTANTIATION_TYPES = new Set([
621
+ "object_creation_expression", // Java, C#, PHP
622
+ "composite_literal", // Go
623
+ "struct_expression", // Rust
624
+ "instance_expression", // Scala
625
+ ]);
556
626
  const extractRefs = (n) => {
557
- var _a, _b, _c, _d, _e, _f, _g, _h;
627
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j;
558
628
  // Handle JS/TS (call_expression), Python (call), Lua (function_call)
559
629
  if (n.type === "call_expression" ||
560
630
  n.type === "call" ||
@@ -617,19 +687,26 @@ class TreeSitterChunker {
617
687
  }
618
688
  }
619
689
  }
620
- // Identifier-as-value references (TS/JS): edges the call-expression
621
- // capture above misses. These feed the graph-walk consumers (PPR,
690
+ // Identifier-as-value references: edges the call-expression capture
691
+ // above misses. These feed the graph-walk consumers (PPR,
622
692
  // `gmax dead <ClassName>`, audit) that need class/enum references,
623
- // not just method-call names.
693
+ // not just method-call names. Node types are grammar-specific but
694
+ // each falls into one of three shapes — instantiation, type-test, or
695
+ // member/scope access — handled uniformly across the 14 grammars.
696
+ // Shape 1 — instantiation: `new ClassName(...)`, `ClassName{...}`.
624
697
  if (n.type === "new_expression") {
625
- // `new ClassName(...)` — constructor is always a type reference.
698
+ // TS/JS — constructor may be qualified (`ns.ClassName`).
626
699
  const ctor = n.childForFieldName
627
700
  ? n.childForFieldName("constructor")
628
701
  : null;
629
702
  addRef(simpleRefName(ctor));
630
703
  }
631
- else if (n.type === "binary_expression") {
632
- // `x instanceof ClassName` — the right operand is a type reference.
704
+ else if (INSTANTIATION_TYPES.has(n.type)) {
705
+ addRef(instantiatedTypeName(n));
706
+ }
707
+ // Shape 2 — type-test: `x instanceof T`, `x is T`.
708
+ if (n.type === "binary_expression") {
709
+ // TS/JS, PHP — instanceof is a binary operator.
633
710
  const op = n.childForFieldName
634
711
  ? n.childForFieldName("operator")
635
712
  : null;
@@ -640,19 +717,27 @@ class TreeSitterChunker {
640
717
  addRef(simpleRefName(right));
641
718
  }
642
719
  }
643
- else if (n.type === "member_expression") {
644
- // `ClassName.MEMBER` / `Enum.MEMBER` capture the object only when
645
- // it looks like a type/namespace (Capitalized identifier), so we
646
- // get `ErrorCodes` from `ErrorCodes.VALIDATION` without flooding
647
- // the graph with `this.x` / `req.body` / lowercase-local access.
648
- const obj = n.childForFieldName
649
- ? n.childForFieldName("object")
650
- : null;
651
- if (obj && obj.type === "identifier" && /^[A-Z]/.test(obj.text)) {
652
- addRef(obj.text);
720
+ else if (n.type === "instanceof_expression" || // Java
721
+ n.type === "check_expression" // Kotlin, Swift (`x is T`)
722
+ ) {
723
+ addRef(firstTypeIdent(n));
724
+ }
725
+ else if (n.type === "is_pattern_expression") {
726
+ // C# — `x is BeyondError`: the type sits in a constant_pattern.
727
+ const cp = ((_h = n.namedChildren) !== null && _h !== void 0 ? _h : []).find((c) => c.type === "constant_pattern");
728
+ const id = cp ? firstNamed(cp) : null;
729
+ if (isLeafId(id) && id && /^[A-Z]/.test(id.text))
730
+ addRef(id.text);
731
+ }
732
+ // Shape 3 — member / scope access: `ErrorCodes.MEMBER`,
733
+ // `ErrorCodes::MEMBER`. Capitalized head only (skip `this.x`).
734
+ if (MEMBER_ACCESS_TYPES.has(n.type)) {
735
+ const head = firstNamed(n);
736
+ if (head && isLeafId(head) && /^[A-Z]/.test(head.text)) {
737
+ addRef(head.text);
653
738
  }
654
739
  }
655
- for (const child of (_h = n.namedChildren) !== null && _h !== void 0 ? _h : []) {
740
+ for (const child of (_j = n.namedChildren) !== null && _j !== void 0 ? _j : []) {
656
741
  extractRefs(child);
657
742
  }
658
743
  };
@@ -10,12 +10,40 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.Searcher = void 0;
13
+ exports.asSymbolQuery = asSymbolQuery;
13
14
  exports.buildWhereClause = buildWhereClause;
14
15
  const config_1 = require("../../config");
15
16
  const filter_builder_1 = require("../utils/filter-builder");
16
17
  const pool_1 = require("../workers/pool");
17
18
  const intent_1 = require("./intent");
18
19
  const pagerank_1 = require("./pagerank");
20
+ // Reads a defined_symbols / referenced_symbols column that may arrive as a plain
21
+ // array or a LanceDB Arrow proxy (.toArray()).
22
+ function readSymbolArray(val) {
23
+ if (!val)
24
+ return [];
25
+ if (Array.isArray(val))
26
+ return val.filter((v) => typeof v === "string");
27
+ const maybe = val;
28
+ if (typeof maybe.toArray === "function") {
29
+ try {
30
+ const a = maybe.toArray();
31
+ return Array.isArray(a) ? a.filter((v) => typeof v === "string") : [];
32
+ }
33
+ catch (_a) {
34
+ return [];
35
+ }
36
+ }
37
+ return [];
38
+ }
39
+ // A query that is a single bare identifier ("BeyondError", "requireAuth", "map")
40
+ // is almost always a symbol lookup — the user wants the chunk that *defines*
41
+ // that symbol. Returns the trimmed identifier, or null for natural-language
42
+ // queries. Drives the symbol-definition promotion in search().
43
+ function asSymbolQuery(query) {
44
+ const q = query.trim();
45
+ return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(q) ? q : null;
46
+ }
19
47
  function buildWhereClause(pathPrefix, filters, searchIntent) {
20
48
  var _a;
21
49
  const parts = [];
@@ -339,7 +367,7 @@ class Searcher {
339
367
  }
340
368
  search(query, top_k, _search_options, _filters, pathPrefix, intent, signal) {
341
369
  return __awaiter(this, void 0, void 0, function* () {
342
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
370
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
343
371
  const finalLimit = top_k !== null && top_k !== void 0 ? top_k : 10;
344
372
  // ColBERT rerank is opt-in as of v0.17.1. On the 97-case eval it
345
373
  // regresses MRR@10 by ~3% and doubles query latency; sweep across
@@ -348,6 +376,8 @@ class Searcher {
348
376
  let doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : false;
349
377
  const explain = (_b = _search_options === null || _search_options === void 0 ? void 0 : _search_options.explain) !== null && _b !== void 0 ? _b : false;
350
378
  const searchIntent = intent || (0, intent_1.detectIntent)(query);
379
+ // Bare-identifier queries get symbol-definition promotion (see below).
380
+ const symbolQuery = asSymbolQuery(query);
351
381
  const pool = (0, pool_1.getWorkerPool)();
352
382
  if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
353
383
  const err = new Error("Aborted");
@@ -372,7 +402,7 @@ class Searcher {
372
402
  try {
373
403
  table = yield this.db.ensureTable();
374
404
  }
375
- catch (_m) {
405
+ catch (_o) {
376
406
  return { data: [] };
377
407
  }
378
408
  // Ensure FTS index exists (lazy init, retry periodically on failure)
@@ -396,11 +426,14 @@ class Searcher {
396
426
  // it in the lightweight path only when the flag is on so we don't bloat the
397
427
  // default query path.
398
428
  const pagerankEnabled = process.env.GMAX_PAGERANK === "1" && !!pathPrefix;
429
+ // Symbol-definition promotion needs defined_symbols on every candidate, not
430
+ // just the final display set — load it for bare-symbol queries too.
431
+ const needDefinedSymbols = pagerankEnabled || symbolQuery !== null;
399
432
  const LIGHTWEIGHT_COLUMNS = [
400
433
  "id", "path", "hash", "chunk_index", "start_line", "end_line",
401
434
  "is_anchor", "chunk_type", "role", "complexity", "is_exported",
402
435
  "content", "parent_symbol", "referenced_symbols", "pooled_colbert_48d",
403
- ...(pagerankEnabled ? ["defined_symbols"] : []),
436
+ ...(needDefinedSymbols ? ["defined_symbols"] : []),
404
437
  ];
405
438
  // _distance is auto-added by vectorSearch, _score by FTS — include each
406
439
  // in the respective query to suppress LanceDB deprecation warnings
@@ -438,7 +471,7 @@ class Searcher {
438
471
  this.ftsAvailable = true;
439
472
  console.warn("[Searcher] Rebuilt FTS index with position support — retry search");
440
473
  }
441
- catch (_o) { }
474
+ catch (_p) { }
442
475
  }
443
476
  else {
444
477
  console.warn(`[Searcher] FTS search failed (will retry later): ${msg}`);
@@ -552,6 +585,30 @@ class Searcher {
552
585
  return { data: [] };
553
586
  }
554
587
  const rerankCandidates = stage2Candidates.slice(0, RERANK_TOP);
588
+ // Symbol-definition promotion (1/2): membership. For a bare-symbol query,
589
+ // ensure the chunk(s) that actually DEFINE the symbol reach the rerank set
590
+ // even when the cosine / RERANK_TOP cuts would drop them — e.g. ErrorCodes
591
+ // sits at pooled-cosine rank 24 (> RERANK_TOP=20) and resolveActor at fusion
592
+ // rank 91 (> the stage-2 cut). Pulled from the top-200 fusion pool, bounded
593
+ // so the rerank batch stays small. Must run before Phase B so the injected
594
+ // chunks get their colbert data fetched for reranking. The score boost in
595
+ // (2/2) below then lets them win dedup over their own method-child chunks.
596
+ if (symbolQuery && rerankCandidates.length > 0) {
597
+ const present = new Set(rerankCandidates.map((d) => d.id).filter(Boolean));
598
+ const MAX_INJECT = 5;
599
+ let injected = 0;
600
+ for (const d of topCandidates) {
601
+ if (injected >= MAX_INJECT)
602
+ break;
603
+ if (!d.id || present.has(d.id))
604
+ continue;
605
+ if (readSymbolArray(d.defined_symbols).includes(symbolQuery)) {
606
+ rerankCandidates.push(d);
607
+ present.add(d.id);
608
+ injected++;
609
+ }
610
+ }
611
+ }
555
612
  // Phase B: Lazy-load colbert data only for the ~20 rerank candidates
556
613
  if (doRerank && rerankCandidates.length > 0) {
557
614
  const rerankIds = rerankCandidates
@@ -599,13 +656,25 @@ class Searcher {
599
656
  // Small tie-breaker so later items don't all share 0
600
657
  return fusedScore || 1 / (idx + 1);
601
658
  });
659
+ // Symbol-definition promotion (2/2): score. Multiplicatively boost any
660
+ // candidate that defines the queried symbol so the definition chunk outranks
661
+ // its own method-child chunks (e.g. the `BeyondError` class chunk vs its
662
+ // constructor/toJSON, which otherwise score higher on the literal and evict
663
+ // the parent in overlap dedup). Multiplicative keeps it scale-invariant
664
+ // across the rerank-on (ColBERT maxsim) and rerank-off (fusion) score ranges.
665
+ const envDefBoost = Number.parseFloat((_k = process.env.GMAX_DEF_BOOST) !== null && _k !== void 0 ? _k : "");
666
+ const DEF_MATCH_BOOST = Number.isFinite(envDefBoost) && envDefBoost >= 1 ? envDefBoost : 5;
602
667
  const scored = rerankCandidates.map((doc, idx) => {
603
668
  var _a, _b;
604
669
  const base = (_a = scores === null || scores === void 0 ? void 0 : scores[idx]) !== null && _a !== void 0 ? _a : 0;
605
670
  const key = doc.id || `${doc.path}:${doc.chunk_index}`;
606
671
  const fusedScore = (_b = candidateScores.get(key)) !== null && _b !== void 0 ? _b : 0;
607
672
  const blended = base + FUSED_WEIGHT * fusedScore;
608
- const boosted = this.applyStructureBoost(doc, blended, searchIntent);
673
+ let boosted = this.applyStructureBoost(doc, blended, searchIntent);
674
+ if (symbolQuery &&
675
+ readSymbolArray(doc.defined_symbols).includes(symbolQuery)) {
676
+ boosted *= DEF_MATCH_BOOST;
677
+ }
609
678
  return {
610
679
  record: doc,
611
680
  score: boosted,
@@ -622,7 +691,7 @@ class Searcher {
622
691
  try {
623
692
  const { scores: prScores, max: prMax } = yield (0, pagerank_1.loadOrComputePageRank)(this.db, pathPrefix);
624
693
  if (prMax > 0) {
625
- const envWeight = Number.parseFloat((_k = process.env.GMAX_PR_WEIGHT) !== null && _k !== void 0 ? _k : "");
694
+ const envWeight = Number.parseFloat((_l = process.env.GMAX_PR_WEIGHT) !== null && _l !== void 0 ? _l : "");
626
695
  const PR_WEIGHT = Number.isFinite(envWeight) && envWeight >= 0 ? envWeight : 0.05;
627
696
  for (const item of scored) {
628
697
  const raw = item.record.defined_symbols;
@@ -637,7 +706,7 @@ class Searcher {
637
706
  defs = arr.filter((v) => typeof v === "string");
638
707
  }
639
708
  }
640
- catch (_p) { }
709
+ catch (_q) { }
641
710
  }
642
711
  const norm = (0, pagerank_1.pageRankBoostForSymbols)(defs, prScores, prMax);
643
712
  item.score += PR_WEIGHT * norm;
@@ -655,7 +724,7 @@ class Searcher {
655
724
  // Item 10: Per-file diversification
656
725
  const seenFiles = new Map();
657
726
  const diversified = [];
658
- const envMaxPerFile = Number.parseInt((_l = process.env.GMAX_MAX_PER_FILE) !== null && _l !== void 0 ? _l : "", 10);
727
+ const envMaxPerFile = Number.parseInt((_m = process.env.GMAX_MAX_PER_FILE) !== null && _m !== void 0 ? _m : "", 10);
659
728
  const MAX_PER_FILE = Number.isFinite(envMaxPerFile) && envMaxPerFile > 0 ? envMaxPerFile : 3;
660
729
  for (const item of uniqueScored) {
661
730
  const path = item.record.path || "";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.8",
3
+ "version": "0.17.10",
4
4
  "author": "Robert Owens <78518764+reowens@users.noreply.github.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.8",
3
+ "version": "0.17.10",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",