sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -14,13 +14,80 @@ import { createHash } from 'crypto';
14
14
  import path from 'path';
15
15
  import fs from 'fs/promises';
16
16
  import { GRAPH_CONFIG, DB_PATHS } from '../infrastructure/config/index.js';
17
- import { getLanguageByPath } from '../infrastructure/language-patterns.js';
17
+ import { getLanguageByPath, resolveLanguage } from '../infrastructure/language-patterns.js';
18
18
  import { getTreeSitterProvider } from '../infrastructure/tree-sitter-provider.js';
19
19
 
20
20
  // Schema version - increment when schema changes require full reindex
21
21
  // Users should run `/index-codebase --full` after upgrading
22
22
  export const SCHEMA_VERSION = 2;
23
23
 
24
+ /**
25
+ * Sentinel `end_line` clamp (2026-05-13). Lua-specific by design.
26
+ *
27
+ * Background: Lua's regex extractor uses `findEndLineKeyword` to find the
28
+ * `end` keyword that closes a function body. The helper tracks nesting
29
+ * depth across `if`/`while`/`for`/`function`/`do` keywords and decrements
30
+ * on `end`. When the depth counter mis-balances (control-flow keywords
31
+ * sharing line context with the closing `end`), the helper falls through
32
+ * to `return lines.length` (the file's last line), producing entities
33
+ * with end_line = EOF that structurally span half the file (LU-003:
34
+ * tablex.deepcopy at 118-120 got rendered as 98-999 because its
35
+ * preceding sibling cycle_aware_copy had bogus end_line=999).
36
+ *
37
+ * Gated to language='lua' EXPLICITLY because:
38
+ * - Tree-sitter languages (Java, Python, JS, TS, Go, Rust, C, C++,
39
+ * Ruby, etc.) get accurate end_lines from grammar-driven extraction —
40
+ * this clamp's pattern doesn't apply.
41
+ * - Other regex-path languages (zig, scala, kotlin, dart, elixir, php)
42
+ * may have similar bugs but haven't been audited. Apply only after
43
+ * per-language validation.
44
+ *
45
+ * Clamp condition (BOTH required):
46
+ * 1. cur.type ∈ NON_CONTAINER_TYPES (function-shaped — these cannot
47
+ * legitimately contain a same-level sibling that starts inside them)
48
+ * 2. cur.end_line >= file_line_count (ends at-or-past EOF)
49
+ * 3. A later entity starts after cur.start_line and before cur.end_line.
50
+ *
51
+ * Clamp target: next entity's start_line - 1. Mutates in place.
52
+ *
53
+ * The container-type gate (NON_CONTAINER_TYPES) is a defence-in-depth
54
+ * even within Lua — Lua doesn't really have classes, but if a future
55
+ * change adds 'module' or 'class' types via metatable detection, they
56
+ * stay protected.
57
+ */
58
+ const NON_CONTAINER_TYPES = new Set([
59
+ 'function', 'method', 'arrowFunction', 'variable', 'const', 'field',
60
+ 'decorator', 'assignedFunc', 'component', 'typeAlias',
61
+ ]);
62
+
63
+ const LUA_CLAMP_ALLOWED_LANGUAGES = new Set(['lua']);
64
+
65
+ export function clampSentinelEndLines(entities, fileLineCount, language) {
66
+ if (!Array.isArray(entities) || entities.length < 2) return entities;
67
+ if (fileLineCount == null || fileLineCount <= 0) return entities;
68
+ if (!LUA_CLAMP_ALLOWED_LANGUAGES.has(language)) return entities;
69
+ for (let i = 0; i < entities.length - 1; i++) {
70
+ const cur = entities[i];
71
+ if (!NON_CONTAINER_TYPES.has(cur?.type)) continue;
72
+ const curEnd = Number(cur?.end_line ?? 0);
73
+ if (!Number.isFinite(curEnd) || curEnd < fileLineCount) continue;
74
+ for (let j = i + 1; j < entities.length; j++) {
75
+ const next = entities[j];
76
+ const nextStart = Number(next?.start_line ?? 0);
77
+ const curStart = Number(cur?.start_line ?? 0);
78
+ if (!Number.isFinite(nextStart) || nextStart <= curStart) continue;
79
+ if (nextStart >= curEnd) break;
80
+ // Sentinel detected: clamp.
81
+ if (nextStart - 1 >= curStart) {
82
+ cur.end_line = nextStart - 1;
83
+ }
84
+ break;
85
+ }
86
+ }
87
+ return entities;
88
+ }
89
+
90
+
24
91
  /**
25
92
  * Normalize an identifier into searchable alias tokens.
26
93
  * Splits camelCase, PascalCase, snake_case, digits and emits both
@@ -317,9 +384,34 @@ export const TREE_SITTER_ENTITY_PRIORITY = Object.freeze({
317
384
  struct: 30,
318
385
  record: 30,
319
386
  module: 25,
387
+ // `variable` is intentionally lowest: when an `export const X = memo(...)`
388
+ // matches BOTH the @component (rank 40) and @variable rules, component wins.
389
+ // When `export const handler = async () => {}` matches BOTH @arrow (rank 20)
390
+ // and @variable, arrowFunction wins. Plain `export const FOO = "bar"` only
391
+ // matches @variable so it lands at rank 5 (kept).
392
+ variable: 5,
320
393
  trait: 25,
321
394
  impl: 20,
322
395
  decorator: 15,
396
+ // Rust macro_rules! definitions — same rank as function/struct/impl since
397
+ // they're top-level definitions with similar discoverability needs.
398
+ macro: 30,
399
+ // Java enum constants (FieldNamingPolicy.UPPER_CAMEL_CASE) — fine-grained
400
+ // anchor inside the enclosing enum class, but worth surfacing for
401
+ // symbol-anchored probes. Rank between decorator and arrow: low enough
402
+ // to not steal the enum's primary anchor when both match, high enough
403
+ // to win over plain variables in disambiguation.
404
+ enum_constant: 10,
405
+ // Java field declarations (static finals like TypeAdapters.BIT_SET that
406
+ // initialize anonymous inner-class subclasses). Same priority story as
407
+ // enum_constant — useful for anchoring, not primary.
408
+ field: 10,
409
+ // C# property declarations (`public RespCommand Command { get; init; }`) —
410
+ // first-class members per the C# spec, but lower in retrieval priority
411
+ // than methods/classes when both could anchor a result. Same rank as
412
+ // arrowFunction/interface/enum (20): high enough to win over enum_constant
413
+ // when both match, low enough to never overshadow the owning class.
414
+ property: 20,
323
415
  });
324
416
 
325
417
  // Module-scope constants for extractJavaScript() — avoid per-call/per-line allocation.
@@ -396,7 +488,10 @@ export class GraphExtractor {
396
488
  async extractFromFile(filePath, content) {
397
489
  this.currentFile = filePath;
398
490
  const lines = content.split('\n');
399
- const langInfo = getLanguageByPath(filePath);
491
+ // resolveLanguage handles per-file disambiguation of ambiguous extensions
492
+ // (today: `.h` → c-vs-cpp) so header-only C++ libraries get parsed by
493
+ // tree-sitter-cpp rather than tree-sitter-c.
494
+ const langInfo = resolveLanguage(filePath, content);
400
495
 
401
496
  if (!langInfo) {
402
497
  return { entities: [], relationships: [] };
@@ -516,12 +611,64 @@ export class GraphExtractor {
516
611
  let currentClass = null;
517
612
  let braceDepth = 0;
518
613
  let classStartDepth = 0;
614
+ // Track whether we are inside a `/* ... */` or `/** ... */` block
615
+ // comment. Without this, every entity-emission regex below also
616
+ // matches Javadoc `<pre>` examples ("public class MyClass { ... }"),
617
+ // creating phantom classes/methods/calls in the graph. Verified on
618
+ // gson SerializedName.java / Since.java / Until.java where phantom
619
+ // `MyClass`/`User`/`Gson`/`fromJson` entities were polluting
620
+ // search-time symbol attribution via findFirstEntityInRange.
621
+ // The state is a per-line boolean: true if the line BEGINS inside
622
+ // a block comment (and we therefore skip all regex extractions and
623
+ // brace counting on that line). State transitions on the first
624
+ // `/*` open and the first `*/` close encountered, scanned left-to-
625
+ // right. Inline `/* ... */` on a single line is treated as the
626
+ // line containing both open and close — the line ends OUT of the
627
+ // comment, so extraction runs as normal (a rare but harmless edge:
628
+ // identifiers on the same line as a closing `*/` could still be
629
+ // picked up; this matches existing whole-file regex behaviour).
630
+ let inBlockComment = false;
519
631
 
520
632
  for (let i = 0; i < lines.length; i++) {
521
633
  const line = lines[i];
522
634
  const lineNum = i + 1;
523
635
 
524
- // Track brace depth
636
+ const enteredAtStart = inBlockComment;
637
+ // Update inBlockComment state from this line's `/*` opens and
638
+ // `*/` closes. We scan character-by-character but cheaply: a
639
+ // single pass with two indexOf-style searches per iteration.
640
+ {
641
+ let scan = 0;
642
+ while (scan < line.length) {
643
+ if (inBlockComment) {
644
+ const close = line.indexOf('*/', scan);
645
+ if (close < 0) { scan = line.length; break; }
646
+ inBlockComment = false;
647
+ scan = close + 2;
648
+ } else {
649
+ const open = line.indexOf('/*', scan);
650
+ if (open < 0) { scan = line.length; break; }
651
+ // Inline line-comment `//` before `/*` on the same line:
652
+ // // /* not really a block */ — treat the `//` as wins.
653
+ const lineCom = line.indexOf('//', scan);
654
+ if (lineCom >= 0 && lineCom < open) { scan = line.length; break; }
655
+ inBlockComment = true;
656
+ scan = open + 2;
657
+ }
658
+ }
659
+ }
660
+
661
+ // Skip lines that are entirely inside a block comment (including
662
+ // the case where the line opens AND stays inside — entered false,
663
+ // ends true: line has no executable code AFTER the `/*`).
664
+ const lineWasFullyInComment = enteredAtStart && inBlockComment;
665
+ if (lineWasFullyInComment) {
666
+ // Don't count braces, don't run extraction regexes.
667
+ continue;
668
+ }
669
+
670
+ // Track brace depth (raw-line approximation, matches pre-fix
671
+ // behaviour for non-comment lines).
525
672
  braceDepth += (line.match(/{/g) || []).length;
526
673
  braceDepth -= (line.match(/}/g) || []).length;
527
674
 
@@ -530,6 +677,15 @@ export class GraphExtractor {
530
677
  currentClass = null;
531
678
  }
532
679
 
680
+ // If we OPENED a block comment on this line, code BEFORE the
681
+ // `/*` is still real — run extraction on the line as usual; the
682
+ // Javadoc body that follows starts on the next iteration with
683
+ // inBlockComment=true. Same for lines that close a block comment
684
+ // (we already cleared the state above by the time we get here).
685
+ // Defensive: if the line is mostly Javadoc but has trailing code
686
+ // after `*/`, the regex will still capture; that mirrors the
687
+ // existing 99% case (real `public class Foo {` lines).
688
+
533
689
  // Class declarations
534
690
  const classMatch = line.match(/(?:public|private|protected)?\s*(?:static)?\s*(?:final|abstract)?\s*class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w,\s]+))?/);
535
691
  if (classMatch) {
@@ -1112,6 +1268,17 @@ export class GraphExtractor {
1112
1268
  }
1113
1269
  }
1114
1270
 
1271
+ // Sentinel clamp (2026-05-13): Lua-only. The regex `findEndLineKeyword`
1272
+ // falls through to `return lines.length` when the `end` keyword counter
1273
+ // mis-balances (control-flow keywords sharing line context), producing
1274
+ // entities with end_line=EOF that swallow subsequent siblings (LU-003:
1275
+ // tablex.deepcopy at 118-120 was being rendered as 98-999 because the
1276
+ // preceding sibling cycle_aware_copy had bogus end_line=999). The
1277
+ // language gate inside clampSentinelEndLines is explicit — other
1278
+ // regex-path languages (zig, scala, kotlin, etc.) are unaffected and
1279
+ // would need per-language validation before opt-in.
1280
+ clampSentinelEndLines(entities, lines.length, langInfo?.id);
1281
+
1115
1282
  return { entities, relationships };
1116
1283
  }
1117
1284
 
@@ -1305,15 +1472,20 @@ export class GraphExtractor {
1305
1472
  .filter(Boolean);
1306
1473
  }
1307
1474
 
1475
+ _clampSentinelEndLines(entities, fileLineCount) {
1476
+ return clampSentinelEndLines(entities, fileLineCount);
1477
+ }
1478
+
1308
1479
  _normalizeTreeSitterEntities(filePath, symbols, language) {
1309
1480
  const dedupedBySymbolAndLine = new Map();
1310
1481
 
1311
1482
  for (const sym of symbols) {
1312
1483
  if (!sym?.name || !sym?.type) continue;
1313
1484
  const normalizedType = this._normalizeTreeSitterSymbolType(sym.type, sym.name);
1314
- if ((language === 'javascript' || language === 'typescript') && normalizedType === 'variable') {
1315
- continue;
1316
- }
1485
+ // Note: previously dropped 'variable' for js/ts to avoid noise from
1486
+ // every internal `let x = 1`. The current TS/TSX/JS tag query scopes
1487
+ // @variable.definition to `(export_statement (lexical_declaration ...))`
1488
+ // so only EXPORTED top-level consts reach this point — keep them.
1317
1489
  const startLine = Number.isInteger(sym.startLine) ? sym.startLine : 0;
1318
1490
  const endLine = Number.isInteger(sym.endLine) ? sym.endLine : startLine;
1319
1491
  const rank = TREE_SITTER_ENTITY_PRIORITY[normalizedType] || 0;
@@ -1334,9 +1506,14 @@ export class GraphExtractor {
1334
1506
  }
1335
1507
  }
1336
1508
 
1337
- return Array.from(dedupedBySymbolAndLine.values())
1338
- .sort((a, b) => a.start_line - b.start_line)
1339
- .map(({ rank, ...entity }) => entity);
1509
+ const sorted = Array.from(dedupedBySymbolAndLine.values())
1510
+ .sort((a, b) => a.start_line - b.start_line);
1511
+ // Tree-sitter path: NO sentinel clamp. Tree-sitter parsers return
1512
+ // accurate end_lines via grammar-driven extraction; the regex-path
1513
+ // `findEndLineKeyword` fall-through is the only known source of the
1514
+ // bogus-EOF pattern, and only Lua currently goes through that path
1515
+ // (Lua has no tree-sitter grammar registered).
1516
+ return sorted.map(({ rank, ...entity }) => entity);
1340
1517
  }
1341
1518
 
1342
1519
  _normalizeTreeSitterSymbolType(type, name) {
@@ -1347,7 +1524,7 @@ export class GraphExtractor {
1347
1524
  }
1348
1525
 
1349
1526
  _resolveRelationshipTargets(relType, match, language) {
1350
- const isJsTs = language === 'javascript' || language === 'typescript';
1527
+ const isJsTs = language === 'javascript' || language === 'typescript' || language === 'tsx';
1351
1528
 
1352
1529
  if (isJsTs && relType === 'import') {
1353
1530
  const source = match[3]?.trim();
@@ -1852,7 +2029,8 @@ export function createGraphSchema(db) {
1852
2029
  hierarchy_level INTEGER DEFAULT 0,
1853
2030
  code TEXT,
1854
2031
  name_alias TEXT,
1855
- stale_since INTEGER DEFAULT NULL
2032
+ stale_since INTEGER DEFAULT NULL,
2033
+ page_rank REAL DEFAULT 0
1856
2034
  )
1857
2035
  `);
1858
2036
 
@@ -1869,6 +2047,11 @@ export function createGraphSchema(db) {
1869
2047
  db.exec('ALTER TABLE entities ADD COLUMN name_alias TEXT');
1870
2048
  console.log(' Migrated: added name_alias column to entities table');
1871
2049
  }
2050
+ const hasPageRankColumn = columns.some(col => col.name === 'page_rank');
2051
+ if (!hasPageRankColumn) {
2052
+ db.exec('ALTER TABLE entities ADD COLUMN page_rank REAL DEFAULT 0');
2053
+ console.log(' Migrated: added page_rank column to entities table');
2054
+ }
1872
2055
  } catch (err) {
1873
2056
  // Ignore errors - column might already exist or table not created yet
1874
2057
  }
@@ -1936,6 +2119,8 @@ export function createGraphSchema(db) {
1936
2119
  db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_rel_unique ON relationships(source_id, target_id, type, target_name) WHERE source_id IS NOT NULL`);
1937
2120
  // Index on target_id for efficient reverse lookups ("what calls X")
1938
2121
  db.exec(`CREATE INDEX IF NOT EXISTS idx_rel_target_id ON relationships(target_id) WHERE target_id IS NOT NULL`);
2122
+ // Index supports `page_rank DESC` lookups for ss-trace ranking and ranking probes.
2123
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entities_page_rank ON entities(page_rank) WHERE stale_since IS NULL`);
1939
2124
 
1940
2125
  setSchemaVersion(db);
1941
2126