sweet-search 2.5.2 → 2.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -14,13 +14,80 @@ import { createHash } from 'crypto';
|
|
|
14
14
|
import path from 'path';
|
|
15
15
|
import fs from 'fs/promises';
|
|
16
16
|
import { GRAPH_CONFIG, DB_PATHS } from '../infrastructure/config/index.js';
|
|
17
|
-
import { getLanguageByPath } from '../infrastructure/language-patterns.js';
|
|
17
|
+
import { getLanguageByPath, resolveLanguage } from '../infrastructure/language-patterns.js';
|
|
18
18
|
import { getTreeSitterProvider } from '../infrastructure/tree-sitter-provider.js';
|
|
19
19
|
|
|
20
20
|
// Schema version - increment when schema changes require full reindex
|
|
21
21
|
// Users should run `/index-codebase --full` after upgrading
|
|
22
22
|
export const SCHEMA_VERSION = 2;
|
|
23
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Sentinel `end_line` clamp (2026-05-13). Lua-specific by design.
|
|
26
|
+
*
|
|
27
|
+
* Background: Lua's regex extractor uses `findEndLineKeyword` to find the
|
|
28
|
+
* `end` keyword that closes a function body. The helper tracks nesting
|
|
29
|
+
* depth across `if`/`while`/`for`/`function`/`do` keywords and decrements
|
|
30
|
+
* on `end`. When the depth counter mis-balances (control-flow keywords
|
|
31
|
+
* sharing line context with the closing `end`), the helper falls through
|
|
32
|
+
* to `return lines.length` (the file's last line), producing entities
|
|
33
|
+
* with end_line = EOF that structurally span half the file (LU-003:
|
|
34
|
+
* tablex.deepcopy at 118-120 got rendered as 98-999 because its
|
|
35
|
+
* preceding sibling cycle_aware_copy had bogus end_line=999).
|
|
36
|
+
*
|
|
37
|
+
* Gated to language='lua' EXPLICITLY because:
|
|
38
|
+
* - Tree-sitter languages (Java, Python, JS, TS, Go, Rust, C, C++,
|
|
39
|
+
* Ruby, etc.) get accurate end_lines from grammar-driven extraction —
|
|
40
|
+
* this clamp's pattern doesn't apply.
|
|
41
|
+
* - Other regex-path languages (zig, scala, kotlin, dart, elixir, php)
|
|
42
|
+
* may have similar bugs but haven't been audited. Apply only after
|
|
43
|
+
* per-language validation.
|
|
44
|
+
*
|
|
45
|
+
* Clamp condition (BOTH required):
|
|
46
|
+
* 1. cur.type ∈ NON_CONTAINER_TYPES (function-shaped — these cannot
|
|
47
|
+
* legitimately contain a same-level sibling that starts inside them)
|
|
48
|
+
* 2. cur.end_line >= file_line_count (ends at-or-past EOF)
|
|
49
|
+
* 3. A later entity starts after cur.start_line and before cur.end_line.
|
|
50
|
+
*
|
|
51
|
+
* Clamp target: next entity's start_line - 1. Mutates in place.
|
|
52
|
+
*
|
|
53
|
+
* The container-type gate (NON_CONTAINER_TYPES) is a defence-in-depth
|
|
54
|
+
* even within Lua — Lua doesn't really have classes, but if a future
|
|
55
|
+
* change adds 'module' or 'class' types via metatable detection, they
|
|
56
|
+
* stay protected.
|
|
57
|
+
*/
|
|
58
|
+
const NON_CONTAINER_TYPES = new Set([
|
|
59
|
+
'function', 'method', 'arrowFunction', 'variable', 'const', 'field',
|
|
60
|
+
'decorator', 'assignedFunc', 'component', 'typeAlias',
|
|
61
|
+
]);
|
|
62
|
+
|
|
63
|
+
const LUA_CLAMP_ALLOWED_LANGUAGES = new Set(['lua']);
|
|
64
|
+
|
|
65
|
+
export function clampSentinelEndLines(entities, fileLineCount, language) {
|
|
66
|
+
if (!Array.isArray(entities) || entities.length < 2) return entities;
|
|
67
|
+
if (fileLineCount == null || fileLineCount <= 0) return entities;
|
|
68
|
+
if (!LUA_CLAMP_ALLOWED_LANGUAGES.has(language)) return entities;
|
|
69
|
+
for (let i = 0; i < entities.length - 1; i++) {
|
|
70
|
+
const cur = entities[i];
|
|
71
|
+
if (!NON_CONTAINER_TYPES.has(cur?.type)) continue;
|
|
72
|
+
const curEnd = Number(cur?.end_line ?? 0);
|
|
73
|
+
if (!Number.isFinite(curEnd) || curEnd < fileLineCount) continue;
|
|
74
|
+
for (let j = i + 1; j < entities.length; j++) {
|
|
75
|
+
const next = entities[j];
|
|
76
|
+
const nextStart = Number(next?.start_line ?? 0);
|
|
77
|
+
const curStart = Number(cur?.start_line ?? 0);
|
|
78
|
+
if (!Number.isFinite(nextStart) || nextStart <= curStart) continue;
|
|
79
|
+
if (nextStart >= curEnd) break;
|
|
80
|
+
// Sentinel detected: clamp.
|
|
81
|
+
if (nextStart - 1 >= curStart) {
|
|
82
|
+
cur.end_line = nextStart - 1;
|
|
83
|
+
}
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return entities;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
24
91
|
/**
|
|
25
92
|
* Normalize an identifier into searchable alias tokens.
|
|
26
93
|
* Splits camelCase, PascalCase, snake_case, digits and emits both
|
|
@@ -317,9 +384,34 @@ export const TREE_SITTER_ENTITY_PRIORITY = Object.freeze({
|
|
|
317
384
|
struct: 30,
|
|
318
385
|
record: 30,
|
|
319
386
|
module: 25,
|
|
387
|
+
// `variable` is intentionally lowest: when an `export const X = memo(...)`
|
|
388
|
+
// matches BOTH the @component (rank 40) and @variable rules, component wins.
|
|
389
|
+
// When `export const handler = async () => {}` matches BOTH @arrow (rank 20)
|
|
390
|
+
// and @variable, arrowFunction wins. Plain `export const FOO = "bar"` only
|
|
391
|
+
// matches @variable so it lands at rank 5 (kept).
|
|
392
|
+
variable: 5,
|
|
320
393
|
trait: 25,
|
|
321
394
|
impl: 20,
|
|
322
395
|
decorator: 15,
|
|
396
|
+
// Rust macro_rules! definitions — same rank as function/struct/impl since
|
|
397
|
+
// they're top-level definitions with similar discoverability needs.
|
|
398
|
+
macro: 30,
|
|
399
|
+
// Java enum constants (FieldNamingPolicy.UPPER_CAMEL_CASE) — fine-grained
|
|
400
|
+
// anchor inside the enclosing enum class, but worth surfacing for
|
|
401
|
+
// symbol-anchored probes. Rank between decorator and arrow: low enough
|
|
402
|
+
// to not steal the enum's primary anchor when both match, high enough
|
|
403
|
+
// to win over plain variables in disambiguation.
|
|
404
|
+
enum_constant: 10,
|
|
405
|
+
// Java field declarations (static finals like TypeAdapters.BIT_SET that
|
|
406
|
+
// initialize anonymous inner-class subclasses). Same priority story as
|
|
407
|
+
// enum_constant — useful for anchoring, not primary.
|
|
408
|
+
field: 10,
|
|
409
|
+
// C# property declarations (`public RespCommand Command { get; init; }`) —
|
|
410
|
+
// first-class members per the C# spec, but lower in retrieval priority
|
|
411
|
+
// than methods/classes when both could anchor a result. Same rank as
|
|
412
|
+
// arrowFunction/interface/enum (20): high enough to win over enum_constant
|
|
413
|
+
// when both match, low enough to never overshadow the owning class.
|
|
414
|
+
property: 20,
|
|
323
415
|
});
|
|
324
416
|
|
|
325
417
|
// Module-scope constants for extractJavaScript() — avoid per-call/per-line allocation.
|
|
@@ -396,7 +488,10 @@ export class GraphExtractor {
|
|
|
396
488
|
async extractFromFile(filePath, content) {
|
|
397
489
|
this.currentFile = filePath;
|
|
398
490
|
const lines = content.split('\n');
|
|
399
|
-
|
|
491
|
+
// resolveLanguage handles per-file disambiguation of ambiguous extensions
|
|
492
|
+
// (today: `.h` → c-vs-cpp) so header-only C++ libraries get parsed by
|
|
493
|
+
// tree-sitter-cpp rather than tree-sitter-c.
|
|
494
|
+
const langInfo = resolveLanguage(filePath, content);
|
|
400
495
|
|
|
401
496
|
if (!langInfo) {
|
|
402
497
|
return { entities: [], relationships: [] };
|
|
@@ -516,12 +611,64 @@ export class GraphExtractor {
|
|
|
516
611
|
let currentClass = null;
|
|
517
612
|
let braceDepth = 0;
|
|
518
613
|
let classStartDepth = 0;
|
|
614
|
+
// Track whether we are inside a `/* ... */` or `/** ... */` block
|
|
615
|
+
// comment. Without this, every entity-emission regex below also
|
|
616
|
+
// matches Javadoc `<pre>` examples ("public class MyClass { ... }"),
|
|
617
|
+
// creating phantom classes/methods/calls in the graph. Verified on
|
|
618
|
+
// gson SerializedName.java / Since.java / Until.java where phantom
|
|
619
|
+
// `MyClass`/`User`/`Gson`/`fromJson` entities were polluting
|
|
620
|
+
// search-time symbol attribution via findFirstEntityInRange.
|
|
621
|
+
// The state is a per-line boolean: true if the line BEGINS inside
|
|
622
|
+
// a block comment (and we therefore skip all regex extractions and
|
|
623
|
+
// brace counting on that line). State transitions on the first
|
|
624
|
+
// `/*` open and the first `*/` close encountered, scanned left-to-
|
|
625
|
+
// right. Inline `/* ... */` on a single line is treated as the
|
|
626
|
+
// line containing both open and close — the line ends OUT of the
|
|
627
|
+
// comment, so extraction runs as normal (a rare but harmless edge:
|
|
628
|
+
// identifiers on the same line as a closing `*/` could still be
|
|
629
|
+
// picked up; this matches existing whole-file regex behaviour).
|
|
630
|
+
let inBlockComment = false;
|
|
519
631
|
|
|
520
632
|
for (let i = 0; i < lines.length; i++) {
|
|
521
633
|
const line = lines[i];
|
|
522
634
|
const lineNum = i + 1;
|
|
523
635
|
|
|
524
|
-
|
|
636
|
+
const enteredAtStart = inBlockComment;
|
|
637
|
+
// Update inBlockComment state from this line's `/*` opens and
|
|
638
|
+
// `*/` closes. We scan character-by-character but cheaply: a
|
|
639
|
+
// single pass with two indexOf-style searches per iteration.
|
|
640
|
+
{
|
|
641
|
+
let scan = 0;
|
|
642
|
+
while (scan < line.length) {
|
|
643
|
+
if (inBlockComment) {
|
|
644
|
+
const close = line.indexOf('*/', scan);
|
|
645
|
+
if (close < 0) { scan = line.length; break; }
|
|
646
|
+
inBlockComment = false;
|
|
647
|
+
scan = close + 2;
|
|
648
|
+
} else {
|
|
649
|
+
const open = line.indexOf('/*', scan);
|
|
650
|
+
if (open < 0) { scan = line.length; break; }
|
|
651
|
+
// Inline line-comment `//` before `/*` on the same line:
|
|
652
|
+
// // /* not really a block */ — treat the `//` as wins.
|
|
653
|
+
const lineCom = line.indexOf('//', scan);
|
|
654
|
+
if (lineCom >= 0 && lineCom < open) { scan = line.length; break; }
|
|
655
|
+
inBlockComment = true;
|
|
656
|
+
scan = open + 2;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Skip lines that are entirely inside a block comment (including
|
|
662
|
+
// the case where the line opens AND stays inside — entered false,
|
|
663
|
+
// ends true: line has no executable code AFTER the `/*`).
|
|
664
|
+
const lineWasFullyInComment = enteredAtStart && inBlockComment;
|
|
665
|
+
if (lineWasFullyInComment) {
|
|
666
|
+
// Don't count braces, don't run extraction regexes.
|
|
667
|
+
continue;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
// Track brace depth (raw-line approximation, matches pre-fix
|
|
671
|
+
// behaviour for non-comment lines).
|
|
525
672
|
braceDepth += (line.match(/{/g) || []).length;
|
|
526
673
|
braceDepth -= (line.match(/}/g) || []).length;
|
|
527
674
|
|
|
@@ -530,6 +677,15 @@ export class GraphExtractor {
|
|
|
530
677
|
currentClass = null;
|
|
531
678
|
}
|
|
532
679
|
|
|
680
|
+
// If we OPENED a block comment on this line, code BEFORE the
|
|
681
|
+
// `/*` is still real — run extraction on the line as usual; the
|
|
682
|
+
// Javadoc body that follows starts on the next iteration with
|
|
683
|
+
// inBlockComment=true. Same for lines that close a block comment
|
|
684
|
+
// (we already cleared the state above by the time we get here).
|
|
685
|
+
// Defensive: if the line is mostly Javadoc but has trailing code
|
|
686
|
+
// after `*/`, the regex will still capture; that mirrors the
|
|
687
|
+
// existing 99% case (real `public class Foo {` lines).
|
|
688
|
+
|
|
533
689
|
// Class declarations
|
|
534
690
|
const classMatch = line.match(/(?:public|private|protected)?\s*(?:static)?\s*(?:final|abstract)?\s*class\s+(\w+)(?:\s+extends\s+(\w+))?(?:\s+implements\s+([\w,\s]+))?/);
|
|
535
691
|
if (classMatch) {
|
|
@@ -1112,6 +1268,17 @@ export class GraphExtractor {
|
|
|
1112
1268
|
}
|
|
1113
1269
|
}
|
|
1114
1270
|
|
|
1271
|
+
// Sentinel clamp (2026-05-13): Lua-only. The regex `findEndLineKeyword`
|
|
1272
|
+
// falls through to `return lines.length` when the `end` keyword counter
|
|
1273
|
+
// mis-balances (control-flow keywords sharing line context), producing
|
|
1274
|
+
// entities with end_line=EOF that swallow subsequent siblings (LU-003:
|
|
1275
|
+
// tablex.deepcopy at 118-120 was being rendered as 98-999 because the
|
|
1276
|
+
// preceding sibling cycle_aware_copy had bogus end_line=999). The
|
|
1277
|
+
// language gate inside clampSentinelEndLines is explicit — other
|
|
1278
|
+
// regex-path languages (zig, scala, kotlin, etc.) are unaffected and
|
|
1279
|
+
// would need per-language validation before opt-in.
|
|
1280
|
+
clampSentinelEndLines(entities, lines.length, langInfo?.id);
|
|
1281
|
+
|
|
1115
1282
|
return { entities, relationships };
|
|
1116
1283
|
}
|
|
1117
1284
|
|
|
@@ -1305,15 +1472,20 @@ export class GraphExtractor {
|
|
|
1305
1472
|
.filter(Boolean);
|
|
1306
1473
|
}
|
|
1307
1474
|
|
|
1475
|
+
_clampSentinelEndLines(entities, fileLineCount) {
|
|
1476
|
+
return clampSentinelEndLines(entities, fileLineCount);
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1308
1479
|
_normalizeTreeSitterEntities(filePath, symbols, language) {
|
|
1309
1480
|
const dedupedBySymbolAndLine = new Map();
|
|
1310
1481
|
|
|
1311
1482
|
for (const sym of symbols) {
|
|
1312
1483
|
if (!sym?.name || !sym?.type) continue;
|
|
1313
1484
|
const normalizedType = this._normalizeTreeSitterSymbolType(sym.type, sym.name);
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1485
|
+
// Note: previously dropped 'variable' for js/ts to avoid noise from
|
|
1486
|
+
// every internal `let x = 1`. The current TS/TSX/JS tag query scopes
|
|
1487
|
+
// @variable.definition to `(export_statement (lexical_declaration ...))`
|
|
1488
|
+
// so only EXPORTED top-level consts reach this point — keep them.
|
|
1317
1489
|
const startLine = Number.isInteger(sym.startLine) ? sym.startLine : 0;
|
|
1318
1490
|
const endLine = Number.isInteger(sym.endLine) ? sym.endLine : startLine;
|
|
1319
1491
|
const rank = TREE_SITTER_ENTITY_PRIORITY[normalizedType] || 0;
|
|
@@ -1334,9 +1506,14 @@ export class GraphExtractor {
|
|
|
1334
1506
|
}
|
|
1335
1507
|
}
|
|
1336
1508
|
|
|
1337
|
-
|
|
1338
|
-
.sort((a, b) => a.start_line - b.start_line)
|
|
1339
|
-
|
|
1509
|
+
const sorted = Array.from(dedupedBySymbolAndLine.values())
|
|
1510
|
+
.sort((a, b) => a.start_line - b.start_line);
|
|
1511
|
+
// Tree-sitter path: NO sentinel clamp. Tree-sitter parsers return
|
|
1512
|
+
// accurate end_lines via grammar-driven extraction; the regex-path
|
|
1513
|
+
// `findEndLineKeyword` fall-through is the only known source of the
|
|
1514
|
+
// bogus-EOF pattern, and only Lua currently goes through that path
|
|
1515
|
+
// (Lua has no tree-sitter grammar registered).
|
|
1516
|
+
return sorted.map(({ rank, ...entity }) => entity);
|
|
1340
1517
|
}
|
|
1341
1518
|
|
|
1342
1519
|
_normalizeTreeSitterSymbolType(type, name) {
|
|
@@ -1347,7 +1524,7 @@ export class GraphExtractor {
|
|
|
1347
1524
|
}
|
|
1348
1525
|
|
|
1349
1526
|
_resolveRelationshipTargets(relType, match, language) {
|
|
1350
|
-
const isJsTs = language === 'javascript' || language === 'typescript';
|
|
1527
|
+
const isJsTs = language === 'javascript' || language === 'typescript' || language === 'tsx';
|
|
1351
1528
|
|
|
1352
1529
|
if (isJsTs && relType === 'import') {
|
|
1353
1530
|
const source = match[3]?.trim();
|
|
@@ -1852,7 +2029,8 @@ export function createGraphSchema(db) {
|
|
|
1852
2029
|
hierarchy_level INTEGER DEFAULT 0,
|
|
1853
2030
|
code TEXT,
|
|
1854
2031
|
name_alias TEXT,
|
|
1855
|
-
stale_since INTEGER DEFAULT NULL
|
|
2032
|
+
stale_since INTEGER DEFAULT NULL,
|
|
2033
|
+
page_rank REAL DEFAULT 0
|
|
1856
2034
|
)
|
|
1857
2035
|
`);
|
|
1858
2036
|
|
|
@@ -1869,6 +2047,11 @@ export function createGraphSchema(db) {
|
|
|
1869
2047
|
db.exec('ALTER TABLE entities ADD COLUMN name_alias TEXT');
|
|
1870
2048
|
console.log(' Migrated: added name_alias column to entities table');
|
|
1871
2049
|
}
|
|
2050
|
+
const hasPageRankColumn = columns.some(col => col.name === 'page_rank');
|
|
2051
|
+
if (!hasPageRankColumn) {
|
|
2052
|
+
db.exec('ALTER TABLE entities ADD COLUMN page_rank REAL DEFAULT 0');
|
|
2053
|
+
console.log(' Migrated: added page_rank column to entities table');
|
|
2054
|
+
}
|
|
1872
2055
|
} catch (err) {
|
|
1873
2056
|
// Ignore errors - column might already exist or table not created yet
|
|
1874
2057
|
}
|
|
@@ -1936,6 +2119,8 @@ export function createGraphSchema(db) {
|
|
|
1936
2119
|
db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_rel_unique ON relationships(source_id, target_id, type, target_name) WHERE source_id IS NOT NULL`);
|
|
1937
2120
|
// Index on target_id for efficient reverse lookups ("what calls X")
|
|
1938
2121
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_rel_target_id ON relationships(target_id) WHERE target_id IS NOT NULL`);
|
|
2122
|
+
// Index supports `page_rank DESC` lookups for ss-trace ranking and ranking probes.
|
|
2123
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entities_page_rank ON entities(page_rank) WHERE stale_since IS NULL`);
|
|
1939
2124
|
|
|
1940
2125
|
setSchemaVersion(db);
|
|
1941
2126
|
|