sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,450 @@
1
+ /**
2
+ * Unified structural context builder.
3
+ *
4
+ * Produces one agent-oriented trace for a symbol: callers, callees, and
5
+ * transitive impact paths, ranked and packed under an adaptive token budget.
6
+ */
7
+
8
+ import { DB_PATHS } from '../infrastructure/config/index.js';
9
+ import { StructuralContextRepository } from '../infrastructure/structural-context-repository.js';
10
+ import { buildAnswerCues } from './structural-answer-cues.js';
11
+ import { callsiteHints } from './structural-callsite-hints.js';
12
+ import { extractHeaderContext } from './structural-header-context.js';
13
+ import { scoreEntity, scoreImpactPath, tokenize, safeMax } from './structural-importance.js';
14
+ import { personalizedPageRank } from './structural-forward-push.js';
15
+ const BUDGETS = { preview: 4000, full: 8000, xl: 12000 };
16
+ const DEFAULT_MAX_DEPTH = 3;
17
+ function estimateTokens(text) {
18
+ return text ? Math.ceil(String(text).length / 3.5) : 0;
19
+ }
20
+ function clamp(n, lo, hi) {
21
+ const x = Number.parseInt(n, 10);
22
+ if (!Number.isFinite(x)) return lo;
23
+ return Math.max(lo, Math.min(hi, x));
24
+ }
25
+
26
+ function entropy(items) {
27
+ if (!items.length) return 0;
28
+ const sum = items.reduce((acc, x) => acc + Math.max(0, x.importance || 0), 0);
29
+ if (sum <= 0 || items.length === 1) return 0;
30
+ const h = items.reduce((acc, x) => {
31
+ const p = Math.max(0, x.importance || 0) / sum;
32
+ return p > 0 ? acc - p * Math.log(p) : acc;
33
+ }, 0);
34
+ return h / Math.log(items.length);
35
+ }
36
+
37
+ function selectBudget(explicitBudget, candidates) {
38
+ if (explicitBudget) {
39
+ const n = clamp(explicitBudget, 1000, 16000);
40
+ return { tier: n >= 11000 ? 'xl' : n >= 7000 ? 'full' : 'preview', tokenBudget: n, reason: 'explicit' };
41
+ }
42
+ const all = [...candidates.callers, ...candidates.callees, ...candidates.impactPaths];
43
+ const total = all.length;
44
+ const h = entropy(all);
45
+ const sorted = [...all].sort((a, b) => (b.importance || 0) - (a.importance || 0));
46
+ const dominance = sorted.length > 1
47
+ ? (sorted[0].importance || 0) / Math.max(0.001, sorted[1].importance || 0)
48
+ : 99;
49
+ if (total <= 10 && dominance >= 2.0 && h < 0.65) {
50
+ return { tier: 'preview', tokenBudget: BUDGETS.preview, reason: 'compact_dominant' };
51
+ }
52
+ if (total > 35 || h >= 0.82 || candidates.impactPaths.length > 18) {
53
+ return { tier: 'xl', tokenBudget: BUDGETS.xl, reason: 'high_entropy_impact' };
54
+ }
55
+ return { tier: 'full', tokenBudget: BUDGETS.full, reason: 'balanced' };
56
+ }
57
+
58
+ function sectionShares(targetFan, hint = '', target = null) {
59
+ const q = String(hint || '').toLowerCase(); if (/^(class|struct|trait|interface|enum|type|typeAlias)$/.test(target?.type || '')) return { target: 0.50, callers: 0.25, callees: 0.05, impact: 0.20 };
60
+ if (/\b(callee|callees|downstream|helper|helpers|relies|next)\b/.test(q)) return { target: 0.16, callers: 0.08, callees: 0.54, impact: 0.22 };
61
+ if (/\b(caller|callers|who calls|upstream|references)\b/.test(q)) return { target: 0.16, callers: 0.54, callees: 0.10, impact: 0.20 };
62
+ if (/\b(impact|changing|change|affect|break|handoff)\b/.test(q)) return { target: 0.16, callers: 0.24, callees: 0.20, impact: 0.40 };
63
+ const fanIn = targetFan.fanIn || 0, fanOut = targetFan.fanOut || 0;
64
+ if (fanIn === 0 && fanOut > 0) return { target: 0.10, callers: 0.05, callees: 0.55, impact: 0.30 };
65
+ if (fanIn >= fanOut * 2) return { target: 0.10, callers: 0.55, callees: 0.12, impact: 0.23 };
66
+ if (fanOut >= fanIn * 2) return { target: 0.10, callers: 0.18, callees: 0.52, impact: 0.20 };
67
+ return { target: 0.10, callers: 0.36, callees: 0.34, impact: 0.20 };
68
+ }
69
+
70
+ function readSlices(readFileRange, filePath, slices) {
71
+ const out = [];
72
+ let previousEnd = null;
73
+ for (const s of slices.sort((a, b) => a.start - b.start)) {
74
+ if (previousEnd != null && s.start > previousEnd + 1) {
75
+ out.push(`// ... (${s.start - previousEnd - 1} lines elided) ...`);
76
+ }
77
+ const text = readFileRange(filePath, s.start, s.end);
78
+ if (text) out.push(text);
79
+ previousEnd = s.end;
80
+ }
81
+ return out.join('\n');
82
+ }
83
+
84
+ function clampText(text, tokenCap) {
85
+ if (!text || tokenCap <= 0) return '';
86
+ if (estimateTokens(text) <= tokenCap) return text;
87
+ const lines = text.split('\n');
88
+ while (lines.length > 0 && estimateTokens(`${lines.join('\n')}\n// ...`) > tokenCap) {
89
+ lines.pop();
90
+ }
91
+ return lines.length ? `${lines.join('\n')}\n// ...` : '';
92
+ }
93
+
94
+ function renderCode(entity, opts) {
95
+ if (!entity.filePath || !entity.startLine || !entity.endLine || opts.tokenCap < 80) {
96
+ return { code: null, codeTokens: 0, presentation: 'summary' };
97
+ }
98
+ const lines = Math.max(1, entity.endLine - entity.startLine + 1);
99
+ let code;
100
+ let presentation = 'full';
101
+ if (lines * 9 <= opts.tokenCap) {
102
+ code = opts.readFileRange(entity.filePath, entity.startLine, entity.endLine);
103
+ } else {
104
+ presentation = 'preview';
105
+ const slices = [{ start: entity.startLine, end: Math.min(entity.endLine, entity.startLine + 3) }];
106
+ const line = entity.contextLine || opts.focusLine;
107
+ if (line && line >= entity.startLine && line <= entity.endLine) {
108
+ slices.push({ start: Math.max(entity.startLine, line - 2), end: Math.min(entity.endLine, line + 2) });
109
+ }
110
+ if (entity.endLine > entity.startLine + 4) slices.push({ start: entity.endLine, end: entity.endLine });
111
+ code = readSlices(opts.readFileRange, entity.filePath, mergeSlices(slices));
112
+ }
113
+ code = clampText(code || '', opts.tokenCap);
114
+ const codeTokens = estimateTokens(code);
115
+ return code
116
+ ? { code, codeTokens, presentation }
117
+ : { code: null, codeTokens: 0, presentation: 'summary' };
118
+ }
119
+
120
+ function mergeSlices(slices) {
121
+ const sorted = slices.sort((a, b) => a.start - b.start);
122
+ const merged = [];
123
+ for (const s of sorted) {
124
+ const last = merged[merged.length - 1];
125
+ if (last && s.start <= last.end + 1) last.end = Math.max(last.end, s.end);
126
+ else merged.push({ ...s });
127
+ }
128
+ return merged;
129
+ }
130
+
131
+ function itemSummary(entity) {
132
+ const loc = entity.filePath ? `${entity.filePath}:${entity.startLine || '?'}` : '(external)';
133
+ const call = entity.contextLine ? ` call@${entity.contextLine}` : '';
134
+ return `${entity.name} [${entity.type}] ${loc}${call}`;
135
+ }
136
+
137
+ function packSection(items, budget, opts) {
138
+ const sorted = [...items].sort((a, b) => b.importance - a.importance);
139
+ const utilityOrder = [...sorted].sort((a, b) => {
140
+ const ac = Math.max(60, Math.min(1200, ((a.endLine || 0) - (a.startLine || 0) + 1) * 9));
141
+ const bc = Math.max(60, Math.min(1200, ((b.endLine || 0) - (b.startLine || 0) + 1) * 9));
142
+ return (b.importance / bc) - (a.importance / ac);
143
+ });
144
+ const codeWinners = new Set();
145
+ let projected = 0;
146
+ const maxItems = Math.max(3, Math.min(40, Math.floor(budget / 90)));
147
+ for (const item of utilityOrder) {
148
+ const est = Math.max(80, Math.min(opts.perItemCap, ((item.endLine || 0) - (item.startLine || 0) + 1) * 9));
149
+ if (projected + est > budget) continue;
150
+ codeWinners.add(item.id);
151
+ projected += est;
152
+ }
153
+
154
+ let used = 0;
155
+ const packed = [];
156
+ for (const item of sorted) {
157
+ if (packed.length >= maxItems) break;
158
+ const summaryTokens = estimateTokens(itemSummary(item));
159
+ if (used + summaryTokens > budget && packed.length >= 3) break;
160
+ const remaining = Math.max(0, budget - used - summaryTokens);
161
+ let codeInfo = { code: null, codeTokens: 0, presentation: 'summary' };
162
+ if (codeWinners.has(item.id) && remaining >= 80) {
163
+ codeInfo = renderCode(item, { ...opts, tokenCap: Math.min(opts.perItemCap, remaining) });
164
+ }
165
+ used += summaryTokens + codeInfo.codeTokens;
166
+ packed.push({
167
+ id: item.id,
168
+ name: item.name,
169
+ type: item.type,
170
+ file: item.filePath,
171
+ startLine: item.startLine,
172
+ endLine: item.endLine,
173
+ contextLine: item.contextLine || null,
174
+ relationship: item.relationship || null,
175
+ depth: item.depth || 1,
176
+ importance: Number(item.importance.toFixed(4)),
177
+ presentation: codeInfo.presentation,
178
+ summary: itemSummary(item),
179
+ code: codeInfo.code,
180
+ codeTokens: codeInfo.codeTokens,
181
+ });
182
+ }
183
+ return { items: packed, tokensUsed: used };
184
+ }
185
+
186
+ function buildImpactPaths(repo, target, opts) {
187
+ const maxDepth = clamp(opts.maxDepth ?? DEFAULT_MAX_DEPTH, 1, 4);
188
+ const limit = clamp(opts.limit ?? 80, 10, 250);
189
+ let frontier = new Map([[target.id, { entity: target, path: [target], edgeTypes: [] }]]);
190
+ const upstreamVisited = new Set([target.id]);
191
+ const paths = [];
192
+ const seenPathIds = new Set();
193
+
194
+ for (let depth = 1; depth <= maxDepth && paths.length < limit; depth++) {
195
+ const rows = repo.getReverseDependents([...frontier.keys()], target, {
196
+ includeNamePattern: depth === 1,
197
+ limit: limit * 3,
198
+ });
199
+ const next = new Map();
200
+ for (const row of rows) {
201
+ if (!row.id || row.id === target.id || upstreamVisited.has(row.id)) continue;
202
+ const parent = frontier.get(row.targetId) || (depth === 1 ? frontier.get(target.id) : null);
203
+ if (!parent) continue;
204
+ const path = [row, ...parent.path];
205
+ const edgeTypes = [row.relationship, ...parent.edgeTypes];
206
+ const id = `up:${path.map(p => p.id).join('>')}`;
207
+ if (!seenPathIds.has(id)) {
208
+ paths.push({ id, direction: 'upstream', path, edgeTypes, depth });
209
+ seenPathIds.add(id);
210
+ }
211
+ next.set(row.id, { entity: row, path, edgeTypes });
212
+ upstreamVisited.add(row.id);
213
+ if (paths.length >= limit) break;
214
+ }
215
+ frontier = next;
216
+ if (frontier.size === 0) break;
217
+ }
218
+
219
+ frontier = new Map([[target.id, { entity: target, path: [target], edgeTypes: [] }]]);
220
+ const downstreamVisited = new Set([target.id]);
221
+ for (let depth = 1; depth <= maxDepth && paths.length < limit; depth++) {
222
+ const rows = repo.getForwardDependencies?.([...frontier.keys()], { limit: limit * 3 }) || [];
223
+ const next = new Map();
224
+ for (const row of rows) {
225
+ if (!row.id || row.id === target.id || downstreamVisited.has(row.id)) continue;
226
+ const parent = frontier.get(row.sourceId);
227
+ if (!parent) continue;
228
+ const path = [...parent.path, row];
229
+ const edgeTypes = [...parent.edgeTypes, row.relationship];
230
+ const id = `down:${path.map(p => p.id).join('>')}`;
231
+ if (!seenPathIds.has(id)) {
232
+ paths.push({ id, direction: 'downstream', path, edgeTypes, depth });
233
+ seenPathIds.add(id);
234
+ }
235
+ if (!String(row.id).startsWith('external:')) {
236
+ next.set(row.id, { entity: row, path, edgeTypes });
237
+ downstreamVisited.add(row.id);
238
+ }
239
+ if (paths.length >= limit) break;
240
+ }
241
+ frontier = next;
242
+ if (frontier.size === 0) break;
243
+ }
244
+ addHintImpactPaths(paths, seenPathIds, repo, target, opts.hints || [], limit);
245
+ return paths;
246
+ }
247
+
248
+ function addHintImpactPaths(paths, seen, repo, target, hints, limit) {
249
+ for (const name of hints) {
250
+ if (paths.length >= limit) break;
251
+ const hint = repo.findEntityCandidates?.(name, { limit: 1 })?.[0];
252
+ if (!hint || hint.id === target.id) continue;
253
+ const id = `hint:${target.id}>${hint.id}`;
254
+ if (!seen.has(id)) {
255
+ paths.push({ id, direction: 'downstream', path: [target, hint], edgeTypes: ['handoff'], depth: 1 });
256
+ seen.add(id);
257
+ }
258
+ for (const row of repo.getForwardDependencies?.([hint.id], { limit: 12 }) || []) {
259
+ if (paths.length >= limit) break;
260
+ if (!row.id || row.id === target.id) continue;
261
+ const rid = `hint:${target.id}>${hint.id}>${row.id}`;
262
+ if (seen.has(rid)) continue;
263
+ paths.push({ id: rid, direction: 'downstream', path: [target, hint, row], edgeTypes: ['handoff', row.relationship], depth: 2 });
264
+ seen.add(rid);
265
+ }
266
+ }
267
+ }
268
+
269
+ function formatPath(path) {
270
+ return path.path.map(p => {
271
+ const loc = p.filePath ? `${p.filePath}:${p.startLine || '?'}` : 'external';
272
+ return `${p.name} (${loc})`;
273
+ }).join(' -> ');
274
+ }
275
+
276
+ export class StructuralContextBuilder {
277
+ constructor(options = {}) {
278
+ this.projectRoot = options.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
279
+ this.repo = options.repository || new StructuralContextRepository(options.graphDbPath || DB_PATHS.codeGraph, {
280
+ projectRoot: this.projectRoot,
281
+ manifestEpoch: options.manifestEpoch,
282
+ });
283
+ }
284
+
285
+ close() {
286
+ this.repo?.close?.();
287
+ }
288
+
289
+ build(symbol, options = {}) {
290
+ const started = performance.now();
291
+ const cleanSymbol = String(symbol || '').trim();
292
+ if (!cleanSymbol || cleanSymbol.length > 256) {
293
+ return this._empty(cleanSymbol, 'invalid_symbol', started);
294
+ }
295
+
296
+ const candidates = this.repo.findEntityCandidates(cleanSymbol, { filePath: options.filePath, queryHint: options.queryHint, limit: 12 });
297
+ if (!candidates.length) return this._empty(cleanSymbol, 'not_found', started);
298
+
299
+ const target = candidates[0];
300
+ const readFileRange = this.repo.readFileRange?.bind(this.repo) || (() => null);
301
+ const targetSource = readFileRange(target.filePath, target.startLine, target.endLine);
302
+ const targetHeaderContext = extractHeaderContext(readFileRange, target.filePath);
303
+ const targetCallsiteHints = callsiteHints(targetSource, new Set([target.name]));
304
+ const callersRaw = [...this.repo.getCallers(target, { limit: 160 }), ...(this.repo.getAliasCallers?.(target, { limit: 80 }) || [])].map(x => ({ ...x, depth: 1 }));
305
+ let calleesRaw = this.repo.getCallees(target, { limit: 160 }).map(x => ({ ...x, depth: 1 }));
306
+ if (!calleesRaw.length) calleesRaw = targetCallsiteHints.map(name => this.repo.findEntityCandidates?.(name, { limit: 1 })?.[0]).filter(Boolean).map(x => ({ ...x, relationship: 'handoff', depth: 1 }));
307
+ const impactRaw = buildImpactPaths(this.repo, target, {
308
+ maxDepth: options.maxDepth ?? DEFAULT_MAX_DEPTH,
309
+ limit: 120,
310
+ hints: targetCallsiteHints,
311
+ });
312
+ const ids = [
313
+ target.id,
314
+ ...callersRaw.map(x => x.id),
315
+ ...calleesRaw.map(x => x.id),
316
+ ...impactRaw.flatMap(p => p.path.map(x => x.id)),
317
+ ];
318
+ const fan = this.repo.getFanCounts(ids);
319
+ const pageRank = this.repo.getPageRank(ids);
320
+ const backwardRun = personalizedPageRank({
321
+ sourceId: target.id,
322
+ loadFrontier: (idsBatch) => this.repo.getFrontierBackwardEdges(idsBatch),
323
+ });
324
+ const forwardRun = personalizedPageRank({
325
+ sourceId: target.id,
326
+ loadFrontier: (idsBatch) => this.repo.getFrontierForwardEdges(idsBatch),
327
+ });
328
+ const maxFanIn = safeMax([...fan.values()].map(x => x.fanIn));
329
+ const maxPageRank = safeMax(pageRank.values());
330
+ const hintTokens = tokenize(options.queryHint || cleanSymbol);
331
+ const callerCtx = {
332
+ fan, pageRank, hintTokens,
333
+ pprScores: backwardRun.scores,
334
+ maxFanIn, maxPageRank,
335
+ maxPpr: safeMax(backwardRun.scores.values()),
336
+ };
337
+ const calleeCtx = {
338
+ fan, pageRank, hintTokens,
339
+ pprScores: forwardRun.scores,
340
+ maxFanIn, maxPageRank,
341
+ maxPpr: safeMax(forwardRun.scores.values()),
342
+ };
343
+ const callers = callersRaw.map(x => ({ ...x, importance: scoreEntity(x, callerCtx) }));
344
+ const callees = calleesRaw.map(x => ({ ...x, importance: scoreEntity(x, calleeCtx) }));
345
+ const impactPaths = impactRaw.map(p => ({
346
+ ...p,
347
+ importance: scoreImpactPath(p, p.direction === 'downstream' ? calleeCtx : callerCtx),
348
+ })).sort((a, b) => b.importance - a.importance);
349
+
350
+ callers.sort((a, b) => b.importance - a.importance);
351
+ callees.sort((a, b) => b.importance - a.importance);
352
+ const budget = selectBudget(options.tokenBudget, { callers, callees, impactPaths });
353
+ const targetFan = fan.get(target.id) || { fanIn: callers.length, fanOut: callees.length };
354
+ const shares = sectionShares(targetFan, options.queryHint, target);
355
+ const targetInfo = renderCode(target, {
356
+ readFileRange,
357
+ tokenCap: Math.floor(budget.tokenBudget * shares.target),
358
+ perItemCap: Math.floor(budget.tokenBudget * shares.target),
359
+ });
360
+ const packOpts = {
361
+ readFileRange,
362
+ perItemCap: budget.tier === 'xl' ? 1400 : budget.tier === 'full' ? 1100 : 800,
363
+ };
364
+ const callersPack = packSection(callers, Math.floor(budget.tokenBudget * shares.callers), packOpts);
365
+ const calleesPack = packSection(callees, Math.floor(budget.tokenBudget * shares.callees), packOpts);
366
+ const impactPack = this._packImpact(impactPaths, Math.floor(budget.tokenBudget * shares.impact));
367
+ const targetWithCode = { ...target, code: targetInfo.code };
368
+ const targetForCues = { ...targetWithCode, code: targetSource || targetInfo.code };
369
+ const tokensUsed = targetInfo.codeTokens + estimateTokens(targetHeaderContext) + callersPack.tokensUsed + calleesPack.tokensUsed + impactPack.tokensUsed;
370
+
371
+ return {
372
+ format: 'structural_context',
373
+ tool: 'trace',
374
+ symbol: cleanSymbol,
375
+ target: {
376
+ ...targetWithCode,
377
+ fanIn: targetFan.fanIn,
378
+ fanOut: targetFan.fanOut,
379
+ headerContext: targetHeaderContext || null,
380
+ codeTokens: targetInfo.codeTokens,
381
+ presentation: targetInfo.presentation,
382
+ callsiteHints: targetCallsiteHints,
383
+ },
384
+ answerCues: buildAnswerCues({ target: targetForCues, hint: options.queryHint, callers, callees, impactPaths, resolveTerm: name => this.repo.findSameFileDefinition?.(name, target.filePath) }),
385
+ disambiguation: candidates.slice(1).map(c => ({
386
+ name: c.name, type: c.type, file: c.filePath, startLine: c.startLine,
387
+ })),
388
+ budgetTier: budget.tier,
389
+ budgetReason: budget.reason,
390
+ tokenBudget: budget.tokenBudget,
391
+ tokensUsed,
392
+ maxDepth: clamp(options.maxDepth ?? DEFAULT_MAX_DEPTH, 1, 4),
393
+ stats: {
394
+ totalEntities: this.repo.getEntityCount(),
395
+ callers: callers.length,
396
+ callees: callees.length,
397
+ impactPaths: impactPaths.length,
398
+ entropy: Number(entropy([...callers, ...callees, ...impactPaths]).toFixed(4)),
399
+ latencyMs: Math.round(performance.now() - started),
400
+ },
401
+ sections: {
402
+ callers: { total: callers.length, shown: callersPack.items.length, items: callersPack.items },
403
+ callees: { total: callees.length, shown: calleesPack.items.length, items: calleesPack.items },
404
+ impact: { total: impactPaths.length, shown: impactPack.paths.length, paths: impactPack.paths },
405
+ },
406
+ };
407
+ }
408
+
409
+ _packImpact(paths, budget) {
410
+ const out = [];
411
+ let used = 0;
412
+ const maxPaths = Math.max(3, Math.min(24, Math.floor(budget / 80)));
413
+ for (const p of paths) {
414
+ if (out.length >= maxPaths) break;
415
+ const row = {
416
+ path: formatPath(p),
417
+ direction: p.direction || 'upstream',
418
+ depth: p.depth,
419
+ edgeTypes: p.edgeTypes,
420
+ importance: Number(p.importance.toFixed(4)),
421
+ };
422
+ const cost = estimateTokens(`${row.path} ${row.edgeTypes.join(' ')}`);
423
+ if (used + cost > budget && out.length >= 3) break;
424
+ used += cost;
425
+ out.push(row);
426
+ }
427
+ return { paths: out, tokensUsed: used };
428
+ }
429
+
430
+ _empty(symbol, reason, started) {
431
+ return {
432
+ format: 'structural_context',
433
+ tool: 'trace',
434
+ symbol,
435
+ target: null,
436
+ disambiguation: [],
437
+ budgetTier: 'preview',
438
+ budgetReason: reason,
439
+ tokenBudget: BUDGETS.preview,
440
+ tokensUsed: 0,
441
+ maxDepth: DEFAULT_MAX_DEPTH,
442
+ stats: { totalEntities: this.repo.getEntityCount(), callers: 0, callees: 0, impactPaths: 0, entropy: 0, latencyMs: Math.round(performance.now() - started) },
443
+ sections: { callers: { total: 0, shown: 0, items: [] }, callees: { total: 0, shown: 0, items: [] }, impact: { total: 0, shown: 0, paths: [] } },
444
+ };
445
+ }
446
+ }
447
+
448
+ export { formatStructuralContext } from './structural-context-format.js';
449
+
450
+ export default StructuralContextBuilder;
@@ -0,0 +1,156 @@
1
+ /**
2
+ * Forward Push — query-time Personalized PageRank over a directed call graph.
3
+ *
4
+ * The structural-trace tool calls this *twice* per query:
5
+ * - direction='backward' over reverse edges to rank CALLERS by importance
6
+ * relative to the target ("which of foo's callers are themselves
7
+ * important to foo").
8
+ * - direction='forward' over outgoing edges to rank CALLEES by importance
9
+ * relative to the target ("which of foo's callees coordinate substantive
10
+ * work, not just leaf utilities").
11
+ *
12
+ * The asymmetry matters: standard global PageRank consistently over-promotes
13
+ * leaf utilities (loggers, formatters) when used to rank callees, because
14
+ * those nodes have high in-degree from the rest of the codebase. Direction-
15
+ * matched PPR from the target avoids that bias.
16
+ *
17
+ * Algorithm: Andersen, Chung, Lang (2006) Forward Push. Sub-linear in the
18
+ * graph size; for typical 4-hop subgraphs the runtime is sub-10ms in JS.
19
+ *
20
+ * Domain layer: pure graph reasoning. Subgraph loading is delegated to the
21
+ * caller via the `subgraph` argument so persistence can stay in
22
+ * core/infrastructure/.
23
+ */
24
+
25
+ const DEFAULT_ALPHA = 0.15;
26
+ const DEFAULT_EPSILON = 1e-4;
27
+ const DEFAULT_MAX_HOPS = 4;
28
+ const DEFAULT_MAX_NODES = 5000;
29
+
30
+ /**
31
+ * Run Forward Push from a source node over a preloaded subgraph.
32
+ *
33
+ * The subgraph encodes a single direction: each entry maps a node to its
34
+ * outgoing neighbours *in the direction we're traversing*. For caller PPR
35
+ * the caller passes in a reversed-edge subgraph; for callee PPR a
36
+ * forward-edge subgraph. The algorithm itself is direction-agnostic.
37
+ *
38
+ * @param {object} params
39
+ * @param {string|number} params.sourceId
40
+ * @param {Map<string|number, Map<string|number, number>>} params.subgraph - node → (neighbour → edge weight)
41
+ * @param {number} [params.alpha=0.15] - teleport probability
42
+ * @param {number} [params.epsilon=1e-4] - residual threshold
43
+ * @param {number} [params.maxIterations=10000] - safety cap
44
+ * @returns {Map<string|number, number>} node → PPR score (excludes the source)
45
+ */
46
+ export function forwardPush({ sourceId, subgraph, alpha = DEFAULT_ALPHA, epsilon = DEFAULT_EPSILON, maxIterations = 10000 }) {
47
+ const p = new Map();
48
+ const r = new Map();
49
+ if (!sourceId || !(subgraph instanceof Map)) return p;
50
+ r.set(sourceId, 1);
51
+ const queue = [sourceId];
52
+ const inQueue = new Set([sourceId]);
53
+ const totalOutWeight = new Map();
54
+ let iter = 0;
55
+ while (queue.length && iter < maxIterations) {
56
+ iter++;
57
+ const v = queue.shift();
58
+ inQueue.delete(v);
59
+ const rv = r.get(v) || 0;
60
+ const neighbours = subgraph.get(v);
61
+ let outW = totalOutWeight.get(v);
62
+ if (outW === undefined) {
63
+ outW = 0;
64
+ if (neighbours) for (const w of neighbours.values()) outW += w;
65
+ totalOutWeight.set(v, outW);
66
+ }
67
+ if (rv <= epsilon * Math.max(1, outW)) continue;
68
+ p.set(v, (p.get(v) || 0) + alpha * rv);
69
+ r.set(v, 0);
70
+ if (!neighbours || outW <= 0) continue;
71
+ const push = (1 - alpha) * rv / outW;
72
+ for (const [u, w] of neighbours) {
73
+ const newR = (r.get(u) || 0) + push * w;
74
+ r.set(u, newR);
75
+ let outU = totalOutWeight.get(u);
76
+ if (outU === undefined) {
77
+ const nu = subgraph.get(u);
78
+ outU = 0;
79
+ if (nu) for (const wu of nu.values()) outU += wu;
80
+ totalOutWeight.set(u, outU);
81
+ }
82
+ if (!inQueue.has(u) && newR > epsilon * Math.max(1, outU)) {
83
+ queue.push(u);
84
+ inQueue.add(u);
85
+ }
86
+ }
87
+ }
88
+ p.delete(sourceId);
89
+ return p;
90
+ }
91
+
92
+ /**
93
+ * BFS-load a bounded directional subgraph around a target node.
94
+ *
95
+ * Input: a `loadFrontier(ids)` callback that returns one-hop edges for a
96
+ * batch of frontier IDs in the chosen direction. The callback is supplied by
97
+ * core/infrastructure/structural-context-repository.js so that all SQL stays
98
+ * in the persistence layer.
99
+ *
100
+ * @param {object} params
101
+ * @param {string|number} params.sourceId
102
+ * @param {(ids: Array<string|number>) => Array<{ from: string|number, to: string|number, weight: number }>} params.loadFrontier
103
+ * @param {number} [params.maxHops=4]
104
+ * @param {number} [params.maxNodes=5000]
105
+ * @returns {{ subgraph: Map<string|number, Map<string|number, number>>, nodeCount: number, hops: number }}
106
+ */
107
+ export function loadDirectionalSubgraph({ sourceId, loadFrontier, maxHops = DEFAULT_MAX_HOPS, maxNodes = DEFAULT_MAX_NODES }) {
108
+ const subgraph = new Map();
109
+ const visited = new Set([sourceId]);
110
+ let frontier = [sourceId];
111
+ let hops = 0;
112
+ while (frontier.length && hops < maxHops && visited.size < maxNodes) {
113
+ hops++;
114
+ const edges = loadFrontier(frontier) || [];
115
+ const next = new Set();
116
+ for (const edge of edges) {
117
+ if (!edge || edge.from == null || edge.to == null) continue;
118
+ let bucket = subgraph.get(edge.from);
119
+ if (!bucket) {
120
+ bucket = new Map();
121
+ subgraph.set(edge.from, bucket);
122
+ }
123
+ const w = Number.isFinite(edge.weight) && edge.weight > 0 ? edge.weight : 1;
124
+ bucket.set(edge.to, (bucket.get(edge.to) || 0) + w);
125
+ if (!visited.has(edge.to)) {
126
+ visited.add(edge.to);
127
+ next.add(edge.to);
128
+ }
129
+ if (visited.size >= maxNodes) break;
130
+ }
131
+ frontier = [...next];
132
+ }
133
+ return { subgraph, nodeCount: visited.size, hops };
134
+ }
135
+
136
+ /**
137
+ * Convenience wrapper: load a subgraph in the given direction and run
138
+ * Forward Push from the source. Suitable for the structural-trace builder.
139
+ *
140
+ * @param {object} params
141
+ * @param {string|number} params.sourceId
142
+ * @param {(ids: Array<string|number>) => Array<{ from, to, weight }>} params.loadFrontier
143
+ * @param {number} [params.alpha]
144
+ * @param {number} [params.epsilon]
145
+ * @param {number} [params.maxHops]
146
+ * @param {number} [params.maxNodes]
147
+ * @returns {{ scores: Map<string|number, number>, nodeCount: number, hops: number }}
148
+ */
149
+ export function personalizedPageRank(params) {
150
+ const { sourceId, loadFrontier, alpha, epsilon, maxHops, maxNodes } = params;
151
+ const { subgraph, nodeCount, hops } = loadDirectionalSubgraph({ sourceId, loadFrontier, maxHops, maxNodes });
152
+ const scores = forwardPush({ sourceId, subgraph, alpha, epsilon });
153
+ return { scores, nodeCount, hops };
154
+ }
155
+
156
+ export const __TEST__ = { DEFAULT_ALPHA, DEFAULT_EPSILON };
@@ -0,0 +1,19 @@
1
+ export function extractHeaderContext(readFileRange, filePath, maxLines = 100) {
2
+ if (!filePath || !readFileRange) return '';
3
+ const text = readFileRange(filePath, 1, maxLines) || '';
4
+ const out = [];
5
+ for (const raw of text.split('\n')) {
6
+ const line = raw.trimEnd();
7
+ if (!line.trim()) continue;
8
+ if (/^\s*(import|from|use|pub use|package)\b/.test(line) ||
9
+ /^\s*(const|let|var)\s+\w+\s*=\s*require\s*\(/.test(line) ||
10
+ /^\s*require\s*\(/.test(line)) {
11
+ out.push(line);
12
+ }
13
+ if (out.length >= 16) break;
14
+ }
15
+ const joined = out.join('\n');
16
+ return joined.length > 900 ? `${joined.slice(0, 897)}...` : joined;
17
+ }
18
+
19
+ export default extractHeaderContext;