sweet-search 2.5.2 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -30,10 +30,13 @@
30
30
  */
31
31
 
32
32
  import path from 'node:path';
33
+ import fs from 'node:fs';
33
34
  import { CodebaseRepository } from '../infrastructure/codebase-repository.js';
34
- import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
35
+ import { DB_PATHS, LATE_INTERACTION_CONFIG, PROJECT_ROOT } from '../infrastructure/config/index.js';
35
36
  import { applyPersistedLiModel } from '../infrastructure/init-config.js';
36
37
  import { readFile as readFileExact } from './search-read.js';
38
+ import { withPinnedRead } from './search-reader-pin.js';
39
+ import { emitToolIdentityAuto } from './cli-decoration.js';
37
40
 
38
41
  // Applies the user's persisted LI model exactly once per (projectRoot, env)
39
42
  // pair so encodeQuery/_getLateInteractionIndex below see the right variant.
@@ -62,6 +65,21 @@ const DEFAULTS = {
62
65
  lexicalWeight: 1.0,
63
66
  symbolWeight: 1.5, // symbol-name hits are stronger evidence per-file
64
67
  maxsimWeight: 1.6, // late interaction wins ties
68
+ // Demotion factors applied to the final re-rank score (after MaxSim re-rank).
69
+ // Stage 3 diagnosis (2026-05-13, PHASE6_REDO ss-semantic) found:
70
+ // - chunks with null/unknown symbol metadata frequently win top-1 when
71
+ // they're really file-header fragments or unnamed code blocks
72
+ // (CPP-002, RB-001, C-005, PY-004 dev failures)
73
+ // - tiny chunks (≤ 5 lines) inflate MaxSim by concentrating literal
74
+ // token presence in a small window (RB-001 `module Sinatra`,
75
+ // C-005 single-line `redisContext *redisConnectWithOptions(...)`).
76
+ // Multiplicative demotion at the final-rank stage is conservative: the
77
+ // chunk is still returned, just less likely to be top-1. Tunable; 0.85
78
+ // was chosen by inspecting per-failure score margins (typical wrong-vs-
79
+ // gold gap is 0.01-0.04, so 0.85 reliably flips the cases identified).
80
+ unsymboledDemote: 0.85,
81
+ smallChunkDemote: 0.85,
82
+ smallChunkMaxLines: 5,
65
83
  };
66
84
 
67
85
  const APPROX_CHARS_PER_TOKEN = 4;
@@ -70,36 +88,155 @@ const APPROX_CHARS_PER_TOKEN = 4;
70
88
  // Module-level lazy singletons
71
89
  // ---------------------------------------------------------------------------
72
90
 
73
- let _repo = null;
74
- function _getRepo() {
75
- if (_repo === null) {
76
- try { _repo = new CodebaseRepository(DB_PATHS.codebase); }
77
- catch { _repo = false; }
91
+ const RECONCILE_MANIFEST_FILENAME = 'reconcile-manifest.json';
92
+
93
+ function _projectKey(projectRoot) {
94
+ return path.resolve(projectRoot || PROJECT_ROOT || process.cwd());
95
+ }
96
+
97
+ function _dataDirName() {
98
+ const dir = path.basename(path.dirname(DB_PATHS.codebase || ''));
99
+ return dir && dir !== '.' ? dir : '.sweet-search';
100
+ }
101
+
102
+ function _stateDirForProject(projectRoot) {
103
+ const root = _projectKey(projectRoot);
104
+ if (root === path.resolve(PROJECT_ROOT)) return path.dirname(DB_PATHS.codebase);
105
+ return path.join(root, _dataDirName());
106
+ }
107
+
108
+ function _codebasePathForProject(projectRoot, manifest = null) {
109
+ const descriptor = manifest?.vectors?.path || manifest?.vectors?.dbPath;
110
+ if (descriptor) {
111
+ return _resolveStatePath(projectRoot, descriptor);
112
+ }
113
+ return _defaultCodebasePathForProject(projectRoot);
114
+ }
115
+
116
+ function _defaultCodebasePathForProject(projectRoot) {
117
+ const root = _projectKey(projectRoot);
118
+ if (root === path.resolve(PROJECT_ROOT)) return DB_PATHS.codebase;
119
+ return path.join(_stateDirForProject(root), 'codebase.db');
120
+ }
121
+
122
+ function _readReconcileManifest(projectRoot) {
123
+ try {
124
+ const manifest = JSON.parse(
125
+ fs.readFileSync(path.join(_stateDirForProject(projectRoot), RECONCILE_MANIFEST_FILENAME), 'utf-8'),
126
+ );
127
+ return Number.isInteger(manifest?.epoch) ? manifest : null;
128
+ } catch {
129
+ return null;
130
+ }
131
+ }
132
+
133
+ function _resolveStatePath(projectRoot, filePath) {
134
+ if (!filePath) return null;
135
+ if (path.isAbsolute(filePath)) return filePath;
136
+ return path.join(_stateDirForProject(projectRoot), filePath);
137
+ }
138
+
139
+ function _lateInteractionIndexPath(projectRoot, manifest) {
140
+ const descriptor = manifest?.lateInteraction?.path
141
+ || manifest?.lateInteraction?.indexPath
142
+ || manifest?.lateInteraction?.manifest;
143
+ if (descriptor) {
144
+ const resolved = _resolveStatePath(projectRoot, descriptor);
145
+ const segmentDir = path.dirname(resolved);
146
+ return segmentDir.endsWith('.segments')
147
+ ? segmentDir.slice(0, -'.segments'.length)
148
+ : resolved;
149
+ }
150
+ const root = _projectKey(projectRoot);
151
+ if (root === path.resolve(PROJECT_ROOT)) return DB_PATHS.lateInteraction;
152
+ if (DB_PATHS.lateInteraction && fs.existsSync(DB_PATHS.lateInteraction)) {
153
+ return DB_PATHS.lateInteraction;
78
154
  }
79
- return _repo || null;
155
+ return path.join(_stateDirForProject(root), path.basename(DB_PATHS.lateInteraction));
156
+ }
157
+
158
+ function _sourceStaleness(projectRoot, filePathRel) {
159
+ const manifest = _readReconcileManifest(projectRoot);
160
+ const publishedMs = Date.parse(manifest?.publishedAt || '');
161
+ if (!Number.isFinite(publishedMs)) return null;
162
+ try {
163
+ const abs = path.isAbsolute(filePathRel)
164
+ ? filePathRel
165
+ : path.resolve(projectRoot, filePathRel);
166
+ const stat = fs.statSync(abs);
167
+ if (stat.mtimeMs <= publishedMs) return null;
168
+ return {
169
+ stale: true,
170
+ indexEpoch: manifest.epoch,
171
+ indexPublishedAt: manifest.publishedAt,
172
+ sourceMtime: stat.mtime.toISOString(),
173
+ warning: 'source file is newer than the semantic index; spans were selected from stale index metadata and text was reread from disk',
174
+ };
175
+ } catch {
176
+ return null;
177
+ }
178
+ }
179
+
180
+ const _repos = new Map();
181
+ function _getRepo(projectRoot) {
182
+ const key = _projectKey(projectRoot);
183
+ const manifest = _readReconcileManifest(projectRoot);
184
+ const dbPath = _codebasePathForProject(projectRoot, manifest);
185
+ const baseDbPath = _defaultCodebasePathForProject(projectRoot);
186
+ let entry = _repos.get(key);
187
+ if (!entry || entry.dbPath !== dbPath || entry.baseDbPath !== baseDbPath) {
188
+ entry?.repo?.close?.();
189
+ try {
190
+ entry = { dbPath, baseDbPath, repo: new CodebaseRepository(baseDbPath) };
191
+ _repos.set(key, entry);
192
+ } catch {
193
+ return null;
194
+ }
195
+ }
196
+ const repo = entry.repo;
197
+ repo.refreshManifestEpoch?.();
198
+ return repo;
80
199
  }
81
200
 
82
201
  let _liIndex = null;
83
202
  let _liInitPromise = null;
84
- async function _getLateInteractionIndex() {
85
- if (_liIndex) return _liIndex;
203
+ let _liProjectKey = null;
204
+ let _liManifestEpoch = null;
205
+ async function _getLateInteractionIndex(projectRoot) {
206
+ const projectKey = _projectKey(projectRoot);
207
+ const manifest = _readReconcileManifest(projectRoot);
208
+ const manifestEpoch = Number.isInteger(manifest?.epoch) ? manifest.epoch : null;
209
+ const samePin = _liProjectKey === projectKey && _liManifestEpoch === manifestEpoch;
210
+ if (_liIndex !== null && samePin) return _liIndex || null;
211
+ if (_liIndex !== null && !samePin) {
212
+ _liIndex = null;
213
+ _liInitPromise = null;
214
+ }
86
215
  if (_liInitPromise) return _liInitPromise;
87
216
  if (!LATE_INTERACTION_CONFIG?.enabled) return null;
88
217
  _liInitPromise = (async () => {
89
218
  try {
90
219
  const { LateInteractionIndex } = await import('../ranking/late-interaction-index.js');
91
- const idx = new LateInteractionIndex({});
220
+ const idx = new LateInteractionIndex({
221
+ indexPath: _lateInteractionIndexPath(projectRoot, manifest),
222
+ });
92
223
  await idx.init();
93
224
  // If the index is empty (no segments, no docs), treat as unavailable —
94
225
  // saves a noisy warning later when scoreWithLateInteraction runs.
95
226
  if (!idx.documents || idx.documents.size === 0) {
96
227
  _liIndex = false;
228
+ _liProjectKey = projectKey;
229
+ _liManifestEpoch = manifestEpoch;
97
230
  return null;
98
231
  }
99
232
  _liIndex = idx;
233
+ _liProjectKey = projectKey;
234
+ _liManifestEpoch = manifestEpoch;
100
235
  return idx;
101
236
  } catch {
102
237
  _liIndex = false;
238
+ _liProjectKey = projectKey;
239
+ _liManifestEpoch = manifestEpoch;
103
240
  return null;
104
241
  } finally {
105
242
  _liInitPromise = null;
@@ -130,7 +267,26 @@ function _projectRelative(absOrRelPath, projectRoot) {
130
267
  ? absOrRelPath
131
268
  : path.resolve(root, absOrRelPath);
132
269
  const rel = path.relative(root, abs);
133
- return rel.startsWith('..') || path.isAbsolute(rel) ? abs : rel;
270
+ const normalized = _normalizeRelativePath(rel);
271
+ if (normalized) return normalized;
272
+ try {
273
+ const realRel = path.relative(
274
+ fs.realpathSync.native(root),
275
+ fs.realpathSync.native(abs),
276
+ );
277
+ return _normalizeRelativePath(realRel) || abs;
278
+ } catch {
279
+ return abs;
280
+ }
281
+ }
282
+
283
+ function _normalizeRelativePath(rel) {
284
+ const normalized = rel.replace(/\\/g, '/').replace(/^\.\//, '');
285
+ if (!normalized || normalized === '..' || normalized.startsWith('../') || normalized.includes('/../')) {
286
+ return null;
287
+ }
288
+ if (path.isAbsolute(normalized)) return null;
289
+ return normalized;
134
290
  }
135
291
 
136
292
  function _parseMeta(rawMeta) {
@@ -176,7 +332,7 @@ function _escapeRegex(s) {
176
332
  // ---------------------------------------------------------------------------
177
333
 
178
334
  async function _loadFileChunks(filePathRel, projectRoot) {
179
- const repo = _getRepo();
335
+ const repo = _getRepo(projectRoot);
180
336
  if (!repo) return { chunks: [], language: null };
181
337
  const rows = repo.getChunksByFilePath(filePathRel);
182
338
  if (rows.length === 0) return { chunks: [], language: null };
@@ -268,24 +424,38 @@ function _scoreSymbol(chunks, queryTerms, queryRaw) {
268
424
  const sym = (c.symbol || '').toLowerCase();
269
425
  if (!sym) continue;
270
426
  let s = 0;
271
- if (sym && lowerRaw.includes(sym)) s += 2; // raw query mentions the symbol
427
+ // Word-boundary match prevents short symbols from collecting +2 just for
428
+ // being a substring of an unrelated longer token in the query. Stage 3
429
+ // PHASE6_REDO diagnosis (2026-05-13) found two ss-semantic dev FAILs
430
+ // (JV-004 `Show getType` → `get` chunk got +2 from "get" ⊂ "gettype";
431
+ // LU-001 `trace _class metatable` → `class` chunk got +2 from "class"
432
+ // ⊂ "_class") where this substring-rule over-credited the wrong chunk.
433
+ // The word-boundary form still credits genuine mentions (e.g., "query"
434
+ // as a real query token still matches `query`-symbol chunks — ZG-001
435
+ // ambiguity is preserved). Structural rule, no per-language signal,
436
+ // no stopword growth.
437
+ const reBoundary = new RegExp(`(?:^|[^a-zA-Z0-9_])${_escapeRegex(sym)}(?=[^a-zA-Z0-9_]|$)`, 'i');
438
+ if (sym && reBoundary.test(lowerRaw)) s += 2; // query mentions symbol as a word
272
439
  for (const t of queryTerms) {
273
440
  if (sym === t) s += 3; // exact name match
274
- else if (sym.includes(t)) s += 1; // substring
441
+ else if (sym.includes(t)) s += 1; // substring (chunk symbol contains query token)
275
442
  }
276
443
  if (s > 0) scores.set(c.id, s);
277
444
  }
278
445
  return scores;
279
446
  }
280
447
 
281
- async function _scoreLateInteraction(chunks, query) {
448
+ async function _scoreLateInteraction(chunks, query, projectRoot) {
282
449
  if (chunks.length === 0) return { scores: new Map(), ran: false };
283
- const liIndex = await _getLateInteractionIndex();
450
+ const liIndex = await _getLateInteractionIndex(projectRoot);
284
451
  if (!liIndex) return { scores: new Map(), ran: false };
285
452
 
286
- // Only score chunks whose IDs actually appear in the LI index.
453
+ // Only score chunks whose IDs actually appear in the LI index. Use the
454
+ // public availability API so alias pointers and live tombstone sidecars
455
+ // share the same visibility contract as normal search.
456
+ const available = liIndex.hasTokens(chunks.map(c => c.id));
287
457
  const candidates = chunks
288
- .filter(c => liIndex.documents.has(c.id))
458
+ .filter(c => available.has(c.id))
289
459
  .map(c => ({ id: c.id, score: 0 }));
290
460
  if (candidates.length === 0) return { scores: new Map(), ran: false };
291
461
 
@@ -457,7 +627,7 @@ function _fallbackSpanFromText(fileText, totalLines, maxChars) {
457
627
  * @param {boolean} [req.verbose=false] - include timings + signal contributions
458
628
  * @returns {Promise<Object>}
459
629
  */
460
- export async function readSemantic(req) {
630
+ async function _readSemanticUnpinned(req) {
461
631
  const t0 = performance.now();
462
632
  if (!req || !req.path) throw new Error('path is required');
463
633
  if (!req.query || !String(req.query).trim()) throw new Error('query is required');
@@ -465,6 +635,7 @@ export async function readSemantic(req) {
465
635
  const projectRoot = req.projectRoot || process.cwd();
466
636
  _ensurePersistedLiModelApplied(projectRoot);
467
637
  const filePathRel = _projectRelative(req.path, projectRoot);
638
+ const staleness = _sourceStaleness(projectRoot, filePathRel);
468
639
 
469
640
  const topK = req.topK ?? DEFAULTS.topK;
470
641
  const threshold = req.threshold ?? DEFAULTS.threshold;
@@ -493,6 +664,7 @@ export async function readSemantic(req) {
493
664
  spans: fallback.ok ? [_fallbackSpanFromRead(fallback, maxChars)] : [],
494
665
  charsReturned: fallback.ok ? Math.min((fallback.text || '').length, maxChars) : 0,
495
666
  approxTokensReturned: fallback.ok ? Math.ceil(Math.min((fallback.text || '').length, maxChars) / APPROX_CHARS_PER_TOKEN) : 0,
667
+ ...(staleness ? { staleness, warnings: [staleness.warning] } : {}),
496
668
  timings: { totalMs: +(performance.now() - t0).toFixed(2) },
497
669
  };
498
670
  }
@@ -514,7 +686,7 @@ export async function readSemantic(req) {
514
686
  const tLex1 = performance.now();
515
687
 
516
688
  const tLi0 = performance.now();
517
- const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(chunks, req.query);
689
+ const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(chunks, req.query, projectRoot);
518
690
  const tLi1 = performance.now();
519
691
 
520
692
  // Threshold gate on MaxSim — drop chunks whose LI score is too low. This
@@ -550,6 +722,7 @@ export async function readSemantic(req) {
550
722
  charsReturned: Math.min(fileText.length, maxChars),
551
723
  approxTokensReturned: Math.ceil(Math.min(fileText.length, maxChars) / APPROX_CHARS_PER_TOKEN),
552
724
  signals: verbose ? { liRan, lexicalHits: 0, symbolHits: 0, maxsimHits: 0 } : undefined,
725
+ ...(staleness ? { staleness, warnings: [staleness.warning] } : {}),
553
726
  timings: verbose ? {
554
727
  loadMs: +(tLoad1 - tLoad0).toFixed(2),
555
728
  lexicalMs: +(tLex1 - tLex0).toFixed(2),
@@ -568,12 +741,39 @@ export async function readSemantic(req) {
568
741
  // Final re-rank: prefer late-interaction score when LI ran; otherwise the
569
742
  // RRF score is the authority. This mirrors the SOTA pattern (cheap candidate
570
743
  // pool → expensive LI re-rank on the survivors).
744
+ //
745
+ // Multiplicative score demotions on null/unknown-symbol chunks and on tiny
746
+ // chunks are applied here so the re-rank below sees the corrected score
747
+ // (Stage 3 PHASE6_REDO ss-semantic, 2026-05-13). Demotion is intentionally
748
+ // applied AFTER the MaxSim re-rank threshold gate above — chunks still
749
+ // survive into the result, they're just less likely to win top-1.
750
+ const unsymDemote = req.unsymboledDemote ?? DEFAULTS.unsymboledDemote;
751
+ const smallDemote = req.smallChunkDemote ?? DEFAULTS.smallChunkDemote;
752
+ const smallChunkMaxLines = req.smallChunkMaxLines ?? DEFAULTS.smallChunkMaxLines;
753
+
571
754
  const ranked = fusedTop
572
755
  .map(([id, fusedScore]) => {
573
756
  const c = idToChunk.get(id);
574
757
  if (!c) return null;
575
758
  const li = maxsimScores.get(id);
576
- const finalScore = liRan && li != null ? li : fusedScore;
759
+ const baseScore = liRan && li != null ? li : fusedScore;
760
+ // Stage 3 PHASE6_REDO ss-semantic (2026-05-13): demote only the
761
+ // INTERSECTION of (null-or-unknown symbol) AND (≤ smallChunkMaxLines).
762
+ // Earlier OR-form regressed typescript-lib (interface declarations
763
+ // are legitimately small AND symboled; OR-rule demoted them too).
764
+ // The intersection targets exactly the RB-001 pattern — short
765
+ // unnamed code fragments that win MaxSim by concentrated literal
766
+ // tokens (e.g., 3-line `module Sinatra` decl beating the 24-line
767
+ // Base class body). Multiplicative composition gives 0.85*0.85=0.7225x
768
+ // when both conditions fire.
769
+ const symMeta = c.symbol;
770
+ const isUnsymboled = !symMeta || symMeta === 'unknown';
771
+ const chunkLines = c.endLine - c.startLine + 1;
772
+ const isSmall = chunkLines <= smallChunkMaxLines;
773
+ const demoteFactor = (isUnsymboled && isSmall)
774
+ ? unsymDemote * smallDemote
775
+ : 1;
776
+ const finalScore = baseScore * demoteFactor;
577
777
  return {
578
778
  id,
579
779
  symbol: c.symbol,
@@ -586,6 +786,8 @@ export async function readSemantic(req) {
586
786
  symbol: symbolScores.get(id) || 0,
587
787
  maxsim: liRan ? (maxsimScores.get(id) ?? null) : null,
588
788
  fused: fusedScore,
789
+ baseScore,
790
+ demoteFactor,
589
791
  },
590
792
  };
591
793
  })
@@ -607,6 +809,7 @@ export async function readSemantic(req) {
607
809
  spans,
608
810
  charsReturned: charsUsed,
609
811
  approxTokensReturned: Math.ceil(charsUsed / APPROX_CHARS_PER_TOKEN),
812
+ ...(staleness ? { staleness, warnings: [staleness.warning] } : {}),
610
813
  signals: verbose ? {
611
814
  liRan,
612
815
  lexicalHits: lexicalScores.size,
@@ -624,6 +827,21 @@ export async function readSemantic(req) {
624
827
  };
625
828
  }
626
829
 
830
+ export async function readSemantic(req) {
831
+ const projectRoot = req?.projectRoot || process.cwd();
832
+ return withPinnedRead(
833
+ {
834
+ projectRoot,
835
+ meta: {
836
+ tool: 'read-semantic',
837
+ path: req?.path ?? null,
838
+ query: req?.query ? String(req.query).slice(0, 200) : null,
839
+ },
840
+ },
841
+ () => _readSemanticUnpinned({ ...req, projectRoot }),
842
+ );
843
+ }
844
+
627
845
  // ---------------------------------------------------------------------------
628
846
  // Formatting
629
847
  // ---------------------------------------------------------------------------
@@ -640,6 +858,9 @@ export function formatReadSemanticResult(result, format = 'agent') {
640
858
  lines.push(`[error]`);
641
859
  return lines.join('\n');
642
860
  }
861
+ for (const warning of result.warnings || []) {
862
+ lines.push(`[warning] ${warning}`);
863
+ }
643
864
  for (const span of result.spans) {
644
865
  const label = span.symbols && span.symbols.length
645
866
  ? `${span.symbols.join(', ')} (lines ${span.startLine}-${span.endLine})`
@@ -663,10 +884,18 @@ function _parseArgs(args) {
663
884
  const positional = [];
664
885
  let format = 'agent';
665
886
  let topK; let threshold; let contextLines; let maxChars; let maxTokens; let verbose = false;
887
+ let plain = false; let noBanner = false;
666
888
  for (let i = 0; i < args.length; i++) {
667
889
  const a = args[i];
668
890
  if (a === '--json') format = 'json';
669
891
  else if (a === '--agent') format = 'agent';
892
+ else if (a === '--no-banner') noBanner = true;
893
+ else if (a === '--format' || a.startsWith('--format=')) {
894
+ const v = a === '--format' ? args[++i] : a.slice('--format='.length);
895
+ if (v === 'json' || v === 'agent') format = v;
896
+ else if (v === 'plain') plain = true;
897
+ else throw new Error(`unknown --format value: ${v}`);
898
+ }
670
899
  else if (a === '--verbose') verbose = true;
671
900
  else if (a === '--top' || a === '--top-k' || a === '-k') topK = +args[++i];
672
901
  else if (a === '--threshold') threshold = +args[++i];
@@ -677,7 +906,7 @@ function _parseArgs(args) {
677
906
  else if (a.startsWith('--')) throw new Error(`unknown flag: ${a}`);
678
907
  else positional.push(a);
679
908
  }
680
- return { positional, format, topK, threshold, contextLines, maxChars, maxTokens, verbose };
909
+ return { positional, format, topK, threshold, contextLines, maxChars, maxTokens, verbose, plain, noBanner };
681
910
  }
682
911
 
683
912
  function _printHelp() {
@@ -694,6 +923,8 @@ function _printHelp() {
694
923
  ' --max-chars <n> Hard cap on returned text (default: 8000)',
695
924
  ' --max-tokens <n> Convenience cap (~chars/4)',
696
925
  ' --json Emit JSON',
926
+ ' --format <fmt> json | agent | plain (plain = no identity line)',
927
+ ' --no-banner Suppress the identity line',
697
928
  ' --verbose Include timings + per-signal scores',
698
929
  '',
699
930
  ].join('\n'));
@@ -719,6 +950,9 @@ export async function handleReadSemanticCli(args) {
719
950
  maxTokens: parsed.maxTokens,
720
951
  verbose: parsed.verbose,
721
952
  });
953
+ if (parsed.format !== 'json') {
954
+ emitToolIdentityAuto('read-semantic', `${file} · "${query}"`, { plain: parsed.plain, noBanner: parsed.noBanner });
955
+ }
722
956
  process.stdout.write(formatReadSemanticResult(result, parsed.format));
723
957
  if (parsed.format !== 'json') process.stdout.write('\n');
724
958
  process.exit(result.ok ? 0 : 1);
@@ -726,9 +960,12 @@ export async function handleReadSemanticCli(args) {
726
960
 
727
961
  // Test-only export — clears caches between unit tests.
728
962
  export function __resetReadSemanticCachesForTests() {
729
- _repo = null;
963
+ for (const entry of _repos.values()) entry?.repo?.close?.();
964
+ _repos.clear();
730
965
  _liIndex = null;
731
966
  _liInitPromise = null;
967
+ _liProjectKey = null;
968
+ _liManifestEpoch = null;
732
969
  _encodeQueryFn = null;
733
970
  _appliedLiPerRoot.clear();
734
971
  }