sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
package/core/cli.js CHANGED
@@ -20,6 +20,9 @@ if (args[0] === 'init') {
20
20
  } else if (args[0] === 'prewarm-vocab') {
21
21
  const { handlePrewarmVocabCli } = await import('./vocabulary/index.js');
22
22
  await handlePrewarmVocabCli(args.slice(1));
23
+ } else if (args[0] === 'reconcile' || args[0] === 'rebuild') {
24
+ const { handleIncrementalCli } = await import('./incremental-indexing/application/operator-cli.mjs');
25
+ await handleIncrementalCli(args[0], args.slice(1));
23
26
  } else if (args[0] === 'read') {
24
27
  // Filesystem-grounded reader; runs in JS (no native equivalent yet).
25
28
  const { handleReadCli } = await import('./search/search-read.js');
@@ -28,6 +31,10 @@ if (args[0] === 'init') {
28
31
  // Hybrid span-selection reader; runs in JS (depends on LI index + ranking).
29
32
  const { handleReadSemanticCli } = await import('./search/search-read-semantic.js');
30
33
  await handleReadSemanticCli(args.slice(1));
34
+ } else if (args[0] === 'trace') {
35
+ // Unified structural code context: callers, callees, and impact.
36
+ const { handleTraceCli } = await import('./search/search-trace.js');
37
+ await handleTraceCli(args.slice(1));
31
38
  } else if (args[0] === 'index') {
32
39
  // Indexing pipeline. Forwarded to index-codebase-v21.js::main(), which
33
40
  // reads its own flags via process.argv. Setting argv here is required
@@ -39,9 +46,15 @@ if (args[0] === 'init') {
39
46
  // `index` so existing flag combos (--full / --graph-only / --vectors-only /
40
47
  // --files-from-stdin / --late-interaction-model=… / etc.) all work.
41
48
  const indexerArgs = args.slice(1);
42
- process.argv = [process.argv[0], 'index-codebase-v21.js', ...indexerArgs];
43
- const { main: runIndexer } = await import('./indexing/index-codebase-v21.js');
44
- await runIndexer();
49
+ const hasAddHint = indexerArgs.includes('--add') || indexerArgs.some((arg) => arg.startsWith('--add='));
50
+ if (hasAddHint) {
51
+ const { handleIndexAddCli } = await import('./incremental-indexing/application/operator-cli.mjs');
52
+ await handleIndexAddCli(indexerArgs);
53
+ } else {
54
+ process.argv = [process.argv[0], 'index-codebase-v21.js', ...indexerArgs];
55
+ const { main: runIndexer } = await import('./indexing/index-codebase-v21.js');
56
+ await runIndexer();
57
+ }
45
58
  } else if (args[0] === '--serve' || args[0] === '--stop') {
46
59
  // Warm search server lifecycle is implemented in JS.
47
60
  const { runCli } = await import('./search/index.js');
@@ -51,9 +64,16 @@ if (args[0] === 'init') {
51
64
 
52
65
  Usage:
53
66
  sweet-search <query> Search the indexed codebase
67
+ sweet-search trace <symbol> Structural context: callers, callees, impact
54
68
  sweet-search read <file...> Filesystem-grounded read (1-20 files)
55
69
  sweet-search read-semantic <f> <q> Return only file spans relevant to a query
56
70
  sweet-search index [options] Build / update the codebase index
71
+ sweet-search index --add <path> Hint a file as dirty
72
+ sweet-search reconcile status Show incremental epoch and dirty status
73
+ sweet-search reconcile inspect <path> Explain why a file is dirty or clean
74
+ sweet-search reconcile pause|resume Pause or resume automatic reconcile work
75
+ sweet-search rebuild status Show incremental maintenance queue
76
+ sweet-search rebuild force <tier> Queue maintenance for a tier
57
77
  sweet-search init [options] Set up runtime assets and models
58
78
  sweet-search uninstall [opts] Remove local state created by init
59
79
  sweet-search prewarm-vocab [file] Pre-warm vocabulary cache with terms
@@ -70,6 +90,7 @@ Indexing flags (sweet-search index ...):
70
90
  --graph-only Build code graph only
71
91
  --vectors-only Build vectors + HNSW only (skips code graph)
72
92
  --files-from-stdin Read newline-delimited paths from stdin
93
+ --add <path> Queue a dirty-file hint without running the indexer
73
94
  --late-interaction-model=ID Override the LI variant for this run
74
95
  --no-late-interaction Skip LI index build
75
96
  --quiet | --verbose Logging verbosity
@@ -12,6 +12,97 @@
12
12
  // Default edge types to follow during expansion
13
13
  const DEFAULT_EDGE_TYPES = new Set(['imports', 'extends', 'implements', 'uses', 'calls']);
14
14
 
15
+ // SQLite-variable-limit guard. Mirrors SAFE_IN_CLAUSE_BATCH in
16
+ // core/infrastructure/db-utils.js; inlined here so this module stays
17
+ // import-free (callers inject all dependencies). 2-hop expansion can in
18
+ // principle balloon to thousands of IDs when a seed entity has many
19
+ // outgoing edges; without this guard, an `IN(?,?,...)` over a >32k array
20
+ // crashes with "too many SQL variables". Fail fast at 999 with a clear
21
+ // message instead.
22
+ const _SAFE_IN_CLAUSE_BATCH = 999;
23
+ function _assertInClauseSize(n, label) {
24
+ if (n > _SAFE_IN_CLAUSE_BATCH) {
25
+ throw new RangeError(
26
+ `${label}: IN(?,?,...) clause would bind ${n} parameters, exceeding ` +
27
+ `SAFE_IN_CLAUSE_BATCH=${_SAFE_IN_CLAUSE_BATCH}. Chunk via ` +
28
+ `chunkedIn() in core/infrastructure/db-utils.js or upstream-cap the input.`
29
+ );
30
+ }
31
+ }
32
+
33
+ const _VISIBILITY_CACHE = new WeakMap();
34
+
35
+ function _sqlAliasPrefix(alias = '') {
36
+ if (!alias) return '';
37
+ const normalized = String(alias).endsWith('.') ? String(alias).slice(0, -1) : String(alias);
38
+ return normalized ? `${normalized}.` : '';
39
+ }
40
+
41
+ function _visibilityInfo(db) {
42
+ let cached = _VISIBILITY_CACHE.get(db);
43
+ if (cached) return cached;
44
+ const hasColumns = (table, columns) => {
45
+ try {
46
+ const names = new Set(db.prepare(`PRAGMA table_info(${table})`).all().map((c) => c.name));
47
+ return columns.every((c) => names.has(c));
48
+ } catch {
49
+ return false;
50
+ }
51
+ };
52
+ cached = {
53
+ entities: hasColumns('entities', ['epoch_written', 'epoch_retired']),
54
+ relationships: hasColumns('relationships', ['epoch_written', 'epoch_retired']),
55
+ };
56
+ _VISIBILITY_CACHE.set(db, cached);
57
+ return cached;
58
+ }
59
+
60
+ function _entityVisibility(db, manifestEpoch, alias = '', options = {}) {
61
+ const prefix = _sqlAliasPrefix(alias);
62
+ let sql;
63
+ if (!_visibilityInfo(db).entities) {
64
+ sql = `${prefix}stale_since IS NULL`;
65
+ } else if (Number.isInteger(manifestEpoch)) {
66
+ sql = `(${prefix}epoch_written IS NULL OR ${prefix}epoch_written <= ?)
67
+ AND (${prefix}epoch_retired IS NULL OR ${prefix}epoch_retired > ?)
68
+ AND (${prefix}stale_since IS NULL OR (${prefix}epoch_retired IS NOT NULL AND ${prefix}epoch_retired > ?))`;
69
+ } else {
70
+ sql = `${prefix}stale_since IS NULL AND ${prefix}epoch_retired IS NULL`;
71
+ }
72
+ return {
73
+ sql: options.allowNullJoined ? `(${sql} OR ${prefix}id IS NULL)` : sql,
74
+ params: _visibilityInfo(db).entities && Number.isInteger(manifestEpoch)
75
+ ? [manifestEpoch, manifestEpoch, manifestEpoch]
76
+ : [],
77
+ };
78
+ }
79
+
80
+ function _relationshipVisibility(db, manifestEpoch, alias = '') {
81
+ const prefix = _sqlAliasPrefix(alias);
82
+ if (!_visibilityInfo(db).relationships) return { sql: '1=1', params: [] };
83
+ if (Number.isInteger(manifestEpoch)) {
84
+ return {
85
+ sql: `(${prefix}epoch_written IS NULL OR ${prefix}epoch_written <= ?)
86
+ AND (${prefix}epoch_retired IS NULL OR ${prefix}epoch_retired > ?)`,
87
+ params: [manifestEpoch, manifestEpoch],
88
+ };
89
+ }
90
+ return { sql: `${prefix}epoch_retired IS NULL`, params: [] };
91
+ }
92
+
93
+ // Per-stage profiling hooks. No-op unless `globalThis.__stageTimings` is set
94
+ // by scripts/profile-search-stages.mjs (same convention as search-hybrid.js
95
+ // and search-postprocess.js).
96
+ function __ptStart() {
97
+ return globalThis.__stageTimings ? performance.now() : null;
98
+ }
99
+ function __ptEnd(stage, t0) {
100
+ if (t0 == null || !globalThis.__stageTimings) return;
101
+ const ms = performance.now() - t0;
102
+ const buf = globalThis.__stageTimings;
103
+ (buf[stage] = buf[stage] || []).push(ms);
104
+ }
105
+
15
106
  // --- Token Estimation Helpers ---
16
107
 
17
108
  // Language-specific tokens-per-line averages (from CodeSearchNet analysis)
@@ -66,6 +157,7 @@ export function loadChunkTexts(codebaseDbOrRepo, ids) {
66
157
  }
67
158
  // Legacy raw-DB path (backward compat)
68
159
  try {
160
+ _assertInClauseSize(ids.length, 'graph-expansion.getChunkTexts');
69
161
  const ph = ids.map(() => '?').join(',');
70
162
  const rows = codebaseDbOrRepo.prepare(
71
163
  `SELECT id, text FROM vectors WHERE id IN (${ph})`
@@ -201,20 +293,52 @@ export function expandResults(db, results, options = {}) {
201
293
  cosineSimilarity = null,
202
294
  codebaseDb = null,
203
295
  readFileLines = null,
296
+ format = null,
297
+ manifestEpoch = null,
204
298
  } = options;
299
+ // F1 envelope cap (2026-05-07): drop graph-expanded entities whose line span
300
+ // exceeds maxEnvelopeLines. The taxonomy diagnosed mega-class envelopes
301
+ // (Flask App 951L, Scaffold 646L, uv do_lock 555L) as the #1 failure mode —
302
+ // these are pulled from the entity DB by graph expansion, not the chunker.
303
+ // Capped here so the seed chunks (30-60 lines each) keep the top spot.
304
+ //
305
+ // Format-gated to agent: GCSN NL queries don't carry format='agent' so are
306
+ // unaffected. Cap default 500 was selected by dev sweep over {Inf, 500, 300,
307
+ // 200, 150, 100}: cap=500 was the only value with zero regressions on
308
+ // FreshStack uv (lower caps regressed PASS counts). Yields +1 probe PASS
309
+ // (S5-Q9 Flask Scaffold class) and +1 FreshStack PARTIAL (UV-NL-2 do_lock).
310
+ // Held-out probes flat — no overfit signature, but also no held-out transfer
311
+ // since the failure mode (mega-class envelope) isn't present in held-out.
312
+ const maxEnvelopeLines = (() => {
313
+ const raw = process.env.SWEET_SEARCH_MAX_ENVELOPE_LINES;
314
+ if (raw != null && raw !== '') {
315
+ const n = Number.parseInt(raw, 10);
316
+ if (Number.isFinite(n) && n > 0) return n;
317
+ }
318
+ return options.maxEnvelopeLines ?? 500;
319
+ })();
320
+ const isAgentFormat = format === 'agent' || format === 'agent_full'
321
+ || format === 'agent_full_xl' || format === 'agent_preview'
322
+ || process.env.SWEET_SEARCH_FORCE_BM25F_BOOSTS === '1';
323
+ const envelopeCapEnabled = isAgentFormat && Number.isFinite(maxEnvelopeLines);
205
324
  const clampedSemanticWeight = clampSemanticWeight(semanticWeight);
206
325
 
207
326
  if (expandMode === 'none' || results.length === 0) return results;
208
327
 
209
328
  // Collect entity IDs from results
210
- const seedIds = collectSeedIds(db, results);
329
+ const __t_seeds = __ptStart();
330
+ const seedIds = collectSeedIds(db, results, { manifestEpoch });
331
+ __ptEnd('expand:collectSeedIds', __t_seeds);
211
332
  if (seedIds.size === 0) return results;
212
333
 
213
334
  // 1-hop expansion: find neighbors via forward + reverse edges
214
- const expanded = expandOneHop(db, seedIds, edgeTypes);
335
+ const __t_hop1 = __ptStart();
336
+ const expanded = expandOneHop(db, seedIds, edgeTypes, { manifestEpoch });
337
+ __ptEnd('expand:expandOneHop', __t_hop1);
215
338
 
216
339
  // 2-hop expansion (if requested)
217
340
  if (expandMode === '2hop' && expanded.size > 0) {
341
+ const __t_hop2 = __ptStart();
218
342
  if (adaptiveHop2) {
219
343
  expandSecondHopAdaptive(db, seedIds, expanded, edgeTypes, {
220
344
  maxHop2: maxExpanded,
@@ -223,6 +347,7 @@ export function expandResults(db, results, options = {}) {
223
347
  hnswIndex,
224
348
  semanticWeight: clampedSemanticWeight,
225
349
  cosineSimilarity,
350
+ manifestEpoch,
226
351
  });
227
352
  } else {
228
353
  expandSecondHop(db, seedIds, expanded, edgeTypes, {
@@ -230,15 +355,31 @@ export function expandResults(db, results, options = {}) {
230
355
  hnswIndex,
231
356
  semanticWeight: clampedSemanticWeight,
232
357
  cosineSimilarity,
358
+ manifestEpoch,
233
359
  });
234
360
  }
361
+ __ptEnd(adaptiveHop2 ? 'expand:expandSecondHopAdaptive' : 'expand:expandSecondHop', __t_hop2);
235
362
  }
236
363
 
237
364
  if (expanded.size === 0) return results;
238
365
 
239
366
  // Look up entity details for expanded IDs, respecting maxExpanded
240
367
  const expandedIds = [...expanded.keys()].slice(0, maxExpanded);
241
- const expandedResults = lookupEntities(db, expandedIds, expanded);
368
+ const __t_lookup = __ptStart();
369
+ let expandedResults = lookupEntities(db, expandedIds, expanded, { manifestEpoch });
370
+ __ptEnd('expand:lookupEntities', __t_lookup);
371
+
372
+ // F1 envelope cap: drop expanded entities exceeding line cap (agent format only).
373
+ if (envelopeCapEnabled && expandedResults.length > 0) {
374
+ const beforeLen = expandedResults.length;
375
+ expandedResults = expandedResults.filter(er => {
376
+ const lines = (er.endLine - er.startLine) + 1;
377
+ return Number.isFinite(lines) && lines <= maxEnvelopeLines;
378
+ });
379
+ if (process.env.SWEET_SEARCH_DEBUG_ENVELOPE_CAP === '1' && expandedResults.length < beforeLen) {
380
+ console.warn(`[envelope-cap] dropped ${beforeLen - expandedResults.length}/${beforeLen} expanded entities (cap=${maxEnvelopeLines})`);
381
+ }
382
+ }
242
383
 
243
384
  // Score expanded results relative to original results
244
385
  const maxOriginalScore = Math.max(...results.map(r => r.score || 0), 1);
@@ -249,18 +390,22 @@ export function expandResults(db, results, options = {}) {
249
390
  }
250
391
 
251
392
  // Rerank expanded results using composite scoring (file proximity + entity type + semantic)
393
+ const __t_rerank = __ptStart();
252
394
  rerankExpanded(expandedResults, results, {
253
395
  queryInt8,
254
396
  hnswIndex,
255
397
  semanticWeight: clampedSemanticWeight,
256
398
  cosineSimilarity,
257
399
  });
400
+ __ptEnd('expand:rerankExpanded', __t_rerank);
258
401
 
259
402
  // Apply token budget
403
+ const __t_budget = __ptStart();
260
404
  const { results: budgeted, stats: budgetStats } = applyTokenBudget(
261
405
  [...results, ...expandedResults], tokenBudget,
262
406
  { expandedBudget, codebaseDb, readFileLines }
263
407
  );
408
+ __ptEnd('expand:applyTokenBudget', __t_budget);
264
409
 
265
410
  budgeted._budgetStats = budgetStats;
266
411
  return budgeted;
@@ -274,7 +419,7 @@ export function expandResults(db, results, options = {}) {
274
419
  * @param {Array} results
275
420
  * @returns {Set<string>}
276
421
  */
277
- function collectSeedIds(db, results) {
422
+ function collectSeedIds(db, results, options = {}) {
278
423
  const seedIds = new Set();
279
424
  const needsLineMatch = [];
280
425
 
@@ -295,13 +440,28 @@ function collectSeedIds(db, results) {
295
440
 
296
441
  if (needsLineMatch.length === 0) return seedIds;
297
442
 
298
- // Line-range fallback for chunk-id keyed results.
299
- let entityLookup;
443
+ // Per-result indexed point query. Hybrid output is keyed on chunk-ids
444
+ // (path:start-end:n), so this fallback is the COMMON path for graph
445
+ // expansion, not a rare one. The original implementation did a full
446
+ // SELECT * FROM entities and then an O(N×M) JS-side scan to find the
447
+ // smallest overlapping entity per result — costing ~11ms p50 on
448
+ // production-sized indexes (10 results × 100k+ entities = 1M JS-side
449
+ // comparisons + materialization GC). Replaced with a single prepared
450
+ // statement that uses the (file_path, start_line, end_line) index for
451
+ // O(log N) lookup. Reuses the same prepared statement across all
452
+ // needsLineMatch results in one collectSeedIds call.
453
+ let findStmt;
300
454
  try {
301
- entityLookup = db.prepare(`
302
- SELECT id, file_path, start_line, end_line
303
- FROM entities WHERE stale_since IS NULL
304
- `).all();
455
+ const entityVis = _entityVisibility(db, options.manifestEpoch);
456
+ findStmt = db.prepare(`
457
+ SELECT id FROM entities
458
+ WHERE file_path = ?
459
+ AND start_line <= ?
460
+ AND end_line >= ?
461
+ AND ${entityVis.sql}
462
+ ORDER BY (end_line - start_line) ASC
463
+ LIMIT 1
464
+ `);
305
465
  } catch {
306
466
  return seedIds;
307
467
  }
@@ -331,23 +491,19 @@ function collectSeedIds(db, results) {
331
491
  }
332
492
  }
333
493
  if (!filePath || lineStart == null) continue;
334
- // If we still don't have an end line, treat the chunk as a single line.
335
494
  if (lineEnd == null) lineEnd = lineStart;
336
495
 
337
- // Find the SMALLEST entity that overlaps the chunk's [start, end] —
338
- // smaller entities (functions/methods) are more meaningful seeds than
339
- // file-level container entities. Cap to one seed per result to avoid
340
- // unbounded seed-set blow-up that can break the relationships SQL.
341
- let bestId = null;
342
- let bestSize = Infinity;
343
- for (const e of entityLookup) {
344
- if (e.file_path !== filePath) continue;
345
- if (e.start_line == null || e.end_line == null) continue;
346
- if (e.start_line > lineEnd || e.end_line < lineStart) continue;
347
- const size = (e.end_line - e.start_line) + 1;
348
- if (size < bestSize) { bestSize = size; bestId = e.id; }
496
+ // Smallest enclosing/overlapping entity wins (functions/methods over
497
+ // file-level containers). The SQL ORDER BY (end_line - start_line) ASC
498
+ // matches the JS `bestSize` selection in the prior implementation
499
+ // exactly: same overlap predicate, same tie-breaker.
500
+ try {
501
+ const row = findStmt.get(filePath, lineEnd, lineStart, ..._entityVisibility(db, options.manifestEpoch).params);
502
+ if (row?.id) seedIds.add(row.id);
503
+ } catch {
504
+ // Skip this result; preserves prior behavior of silently dropping
505
+ // entries the lookup couldn't match.
349
506
  }
350
- if (bestId) seedIds.add(bestId);
351
507
  }
352
508
 
353
509
  return seedIds;
@@ -361,18 +517,21 @@ function collectSeedIds(db, results) {
361
517
  * @param {Set<string>} edgeTypes
362
518
  * @returns {Map<string, {via: string, direction: string, score: number, hops?: number}>}
363
519
  */
364
- export function expandOneHop(db, seedIds, edgeTypes) {
520
+ export function expandOneHop(db, seedIds, edgeTypes, options = {}) {
365
521
  const expanded = new Map();
366
522
  const seedArray = [...seedIds];
523
+ _assertInClauseSize(seedArray.length, 'graph-expansion.expandOneHop.seeds');
367
524
  const placeholders = seedArray.map(() => '?').join(',');
368
525
 
369
526
  // Forward edges: seed -> neighbor
370
527
  let forwardRels;
371
528
  try {
529
+ const relVis = _relationshipVisibility(db, options.manifestEpoch);
372
530
  forwardRels = db.prepare(`
373
531
  SELECT DISTINCT target_id, type FROM relationships
374
532
  WHERE source_id IN (${placeholders}) AND target_id IS NOT NULL
375
- `).all(...seedArray);
533
+ AND ${relVis.sql}
534
+ `).all(...seedArray, ...relVis.params);
376
535
  } catch {
377
536
  forwardRels = [];
378
537
  }
@@ -380,10 +539,12 @@ export function expandOneHop(db, seedIds, edgeTypes) {
380
539
  // Reverse edges: neighbor -> seed
381
540
  let reverseRels;
382
541
  try {
542
+ const relVis = _relationshipVisibility(db, options.manifestEpoch);
383
543
  reverseRels = db.prepare(`
384
544
  SELECT DISTINCT source_id, type FROM relationships
385
545
  WHERE target_id IN (${placeholders}) AND source_id IS NOT NULL
386
- `).all(...seedArray);
546
+ AND ${relVis.sql}
547
+ `).all(...seedArray, ...relVis.params);
387
548
  } catch {
388
549
  reverseRels = [];
389
550
  }
@@ -427,15 +588,18 @@ export function expandSecondHop(db, seedIds, expanded, edgeTypes, options = {})
427
588
 
428
589
  const hop1Ids = [...expanded.keys()];
429
590
  if (hop1Ids.length === 0) return;
591
+ _assertInClauseSize(hop1Ids.length, 'graph-expansion.expand2Hop.forward');
430
592
 
431
593
  const ph = hop1Ids.map(() => '?').join(',');
432
594
 
433
595
  let hop2Forward;
434
596
  try {
597
+ const relVis = _relationshipVisibility(db, options.manifestEpoch);
435
598
  hop2Forward = db.prepare(`
436
599
  SELECT source_id, target_id, type FROM relationships
437
600
  WHERE source_id IN (${ph}) AND target_id IS NOT NULL
438
- `).all(...hop1Ids);
601
+ AND ${relVis.sql}
602
+ `).all(...hop1Ids, ...relVis.params);
439
603
  } catch {
440
604
  return;
441
605
  }
@@ -520,6 +684,7 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
520
684
 
521
685
  const hop1Ids = [...hop1Expanded.keys()];
522
686
  if (hop1Ids.length === 0) return { added: 0, budgetUsed: 0, candidates: 0 };
687
+ _assertInClauseSize(hop1Ids.length, 'graph-expansion.expand2HopRanked.hop1');
523
688
 
524
689
  const ph = hop1Ids.map(() => '?').join(',');
525
690
 
@@ -531,11 +696,13 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
531
696
  const typeList = [...edgeTypes].map(t => `'${t}'`).join(',');
532
697
  let degreeMap;
533
698
  try {
699
+ const relVis = _relationshipVisibility(db, options.manifestEpoch);
534
700
  const degRows = db.prepare(`
535
701
  SELECT source_id, COUNT(*) as deg FROM relationships
536
702
  WHERE source_id IN (${ph}) AND type IN (${typeList})
703
+ AND ${relVis.sql}
537
704
  GROUP BY source_id
538
- `).all(...hop1Ids);
705
+ `).all(...hop1Ids, ...relVis.params);
539
706
  degreeMap = new Map(degRows.map(r => [r.source_id, r.deg]));
540
707
  } catch {
541
708
  degreeMap = new Map();
@@ -544,12 +711,15 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
544
711
  // Query candidate 2-hop targets with source, weights, and line ranges
545
712
  let rawCandidates;
546
713
  try {
714
+ const entityVis = _entityVisibility(db, options.manifestEpoch, 'e');
715
+ const relVis = _relationshipVisibility(db, options.manifestEpoch, 'r');
547
716
  rawCandidates = db.prepare(`
548
717
  SELECT r.source_id, r.target_id, r.type, r.weight, e.file_path, e.start_line, e.end_line
549
718
  FROM relationships r
550
- JOIN entities e ON e.id = r.target_id AND e.stale_since IS NULL
719
+ JOIN entities e ON e.id = r.target_id AND ${entityVis.sql}
551
720
  WHERE r.source_id IN (${ph}) AND r.target_id IS NOT NULL
552
- `).all(...hop1Ids);
721
+ AND ${relVis.sql}
722
+ `).all(...entityVis.params, ...hop1Ids, ...relVis.params);
553
723
  } catch {
554
724
  return { added: 0, budgetUsed: 0, candidates: 0 };
555
725
  }
@@ -667,16 +837,18 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
667
837
  * @param {Map<string, Object>} expansionMeta
668
838
  * @returns {Array}
669
839
  */
670
- function lookupEntities(db, expandedIds, expansionMeta) {
840
+ function lookupEntities(db, expandedIds, expansionMeta, options = {}) {
671
841
  if (expandedIds.length === 0) return [];
842
+ _assertInClauseSize(expandedIds.length, 'graph-expansion.lookupEntities');
672
843
 
673
844
  const ph = expandedIds.map(() => '?').join(',');
674
845
  let entities;
675
846
  try {
847
+ const entityVis = _entityVisibility(db, options.manifestEpoch);
676
848
  entities = db.prepare(`
677
849
  SELECT id, file_path, type, name, signature, start_line, end_line
678
- FROM entities WHERE id IN (${ph}) AND stale_since IS NULL
679
- `).all(...expandedIds);
850
+ FROM entities WHERE id IN (${ph}) AND ${entityVis.sql}
851
+ `).all(...expandedIds, ...entityVis.params);
680
852
  } catch {
681
853
  return [];
682
854
  }
@@ -852,18 +1024,25 @@ export function applyTokenBudget(results, budget, options = {}) {
852
1024
  * @param {string[]} entityIds
853
1025
  * @returns {{ total: number, byType: Record<string, number> }}
854
1026
  */
855
- export function getExpansionStats(db, entityIds) {
1027
+ export function getExpansionStats(db, entityIds, options = {}) {
856
1028
  if (!entityIds || entityIds.length === 0) return { total: 0, byType: {} };
1029
+ // The query interpolates `${ph}` twice (source_id IN OR target_id IN) and the
1030
+ // `.all()` call binds entityIds twice in one prepared statement, so the
1031
+ // SQLite-parameter ceiling is reached at half the array length — guard the
1032
+ // actual bind count, not the array length.
1033
+ _assertInClauseSize(2 * entityIds.length, 'graph-expansion.getExpansionStats (double-bind OR)');
857
1034
 
858
1035
  const ph = entityIds.map(() => '?').join(',');
859
1036
  let rels;
860
1037
  try {
1038
+ const relVis = _relationshipVisibility(db, options.manifestEpoch);
861
1039
  rels = db.prepare(`
862
1040
  SELECT type, COUNT(*) as count FROM relationships
863
1041
  WHERE (source_id IN (${ph}) OR target_id IN (${ph}))
864
1042
  AND source_id IS NOT NULL AND target_id IS NOT NULL
1043
+ AND ${relVis.sql}
865
1044
  GROUP BY type
866
- `).all(...entityIds, ...entityIds);
1045
+ `).all(...entityIds, ...entityIds, ...relVis.params);
867
1046
  } catch {
868
1047
  return { total: 0, byType: {} };
869
1048
  }