sweet-search 2.5.2 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  /**
4
- * Incremental Indexing Tracker v2.3
4
+ * Incremental Indexing Tracker v2.4
5
5
  *
6
6
  * Tracks file changes using content hashes to enable incremental reindexing.
7
7
  * Only files that have changed since last index are reprocessed.
@@ -11,9 +11,14 @@
11
11
  * - Forces full reindex when config fingerprint mismatches
12
12
  * - Prevents silent dimension mismatch corruption
13
13
  *
14
- * Sweet Search v2.3: mtime/size fast-path optimization (Phase 0.3)
15
- * - Stores { hash, size, mtime_ns } per file instead of just hash
16
- * - Fast-path: skip content read if (size, mtime_ns) match stored values
14
+ * Sweet Search v2.3: mtime/size/inode fast-path optimization (Phase 0.3)
15
+ *
16
+ * Sweet Search v2.4: xxHash3 content hashes
17
+ * - Uses the shared incremental hashing wrapper and records the hash
18
+ * algorithm in the config fingerprint.
19
+ * - SHA/xxHash state mismatches force a controlled full reindex.
20
+ * - Stores { hash, size, mtime_ns, inode } per file instead of just hash
21
+ * - Fast-path: skip content read if (size, mtime_ns, inode) match stored values
17
22
  * - 10-50x speedup for typical incremental checks when few/no files changed
18
23
  * - Backward compatible migration from v2.2 (hash-only format)
19
24
  *
@@ -23,17 +28,17 @@
23
28
  import fs from 'fs/promises';
24
29
  import { existsSync, openSync, fsyncSync, closeSync } from 'fs';
25
30
  import path from 'path';
26
- import crypto from 'crypto';
27
31
  import { DB_PATHS, EMBEDDING_CONFIG } from '../infrastructure/config/index.js';
32
+ import { contentHashSync, HASH_ALGORITHM } from '../incremental-indexing/infrastructure/hashing.mjs';
28
33
 
29
34
  const STATE_PATH = DB_PATHS.merkle;
30
35
 
31
36
  // =============================================================================
32
- // CONFIG FINGERPRINT (Sweet Search v2.3)
37
+ // CONFIG FINGERPRINT (Sweet Search v2.4)
33
38
  // Detects embedding provider/dimension changes that require full reindex
34
39
  // =============================================================================
35
40
 
36
- const STATE_VERSION = '2.3';
41
+ const STATE_VERSION = '2.4';
37
42
 
38
43
  /**
39
44
  * Build config fingerprint from current embedding configuration
@@ -48,6 +53,7 @@ function buildConfigFingerprint() {
48
53
  // Quantization pipeline version — bump when changing the embedding pipeline
49
54
  // to invalidate all existing indexes. v2 = int8 quantized embeddings.
50
55
  pipelineVersion: 2,
56
+ hashAlgorithm: HASH_ALGORITHM,
51
57
  version: STATE_VERSION,
52
58
  };
53
59
  }
@@ -122,6 +128,18 @@ function validateConfigFingerprint(storedFingerprint) {
122
128
  };
123
129
  }
124
130
 
131
+ if (storedFingerprint.hashAlgorithm !== current.hashAlgorithm) {
132
+ return {
133
+ valid: false,
134
+ reason: 'hash_algorithm_changed',
135
+ details: {
136
+ previous: storedFingerprint.hashAlgorithm ?? 'unknown',
137
+ current: current.hashAlgorithm,
138
+ message: `Content hash algorithm changed: ${storedFingerprint.hashAlgorithm ?? 'unknown'} -> ${current.hashAlgorithm}`,
139
+ },
140
+ };
141
+ }
142
+
125
143
  // State version upgrade (may require reindex for new features)
126
144
  if (storedFingerprint.version !== current.version) {
127
145
  // Version 2.1 -> 2.2 is backward compatible, just add fingerprint
@@ -137,7 +155,7 @@ function validateConfigFingerprint(storedFingerprint) {
137
155
  },
138
156
  };
139
157
  }
140
- // Version 2.2 -> 2.3 is backward compatible (adds mtime/size fast-path)
158
+ // Version 2.2 -> 2.3 is backward compatible (adds mtime/size/inode fast-path)
141
159
  // First run will read all files but store new format with metadata
142
160
  if (storedFingerprint.version === '2.2' && current.version === '2.3') {
143
161
  return {
@@ -147,21 +165,29 @@ function validateConfigFingerprint(storedFingerprint) {
147
165
  details: {
148
166
  previous: storedFingerprint.version,
149
167
  current: current.version,
150
- message: `State version upgraded: ${storedFingerprint.version} -> ${current.version} (mtime/size fast-path enabled)`,
168
+ message: `State version upgraded: ${storedFingerprint.version} -> ${current.version} (mtime/size/inode fast-path enabled)`,
151
169
  },
152
170
  };
153
171
  }
154
- // Future incompatible versions would return valid: false here
172
+ return {
173
+ valid: false,
174
+ reason: 'state_version_changed',
175
+ details: {
176
+ previous: storedFingerprint.version,
177
+ current: current.version,
178
+ message: `State version changed: ${storedFingerprint.version} -> ${current.version}`,
179
+ },
180
+ };
155
181
  }
156
182
 
157
183
  return { valid: true };
158
184
  }
159
185
 
160
186
  /**
161
- * Compute SHA-256 hash of file content
187
+ * Compute the configured content hash of file content.
162
188
  */
163
189
  function hashContent(content) {
164
- return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
190
+ return contentHashSync(content);
165
191
  }
166
192
 
167
193
  // =============================================================================
@@ -170,23 +196,37 @@ function hashContent(content) {
170
196
  // =============================================================================
171
197
 
172
198
  /**
173
- * Get file metadata (size and mtime in nanoseconds) via fs.stat()
199
+ * Get file metadata (size, mtime in nanoseconds, inode) via fs.stat().
200
+ *
201
+ * Per INCREMENTAL_INDEXING_PLAN.md § 9.1, the dirty-detection tuple is
202
+ * `(mtime_ns, size, inode)`. The naive `mtime > recorded` check misses a
203
+ * second write within the same FS resolution tick; the equality-on-tuple
204
+ * check plus inode detects atomic-rename-over-existing-path (vim swap
205
+ * write, JetBrains safe-write) which produces a new inode at the same
206
+ * path with identical mtime/size.
207
+ *
208
+ * 64-bit inodes from APFS/ZFS/XFS routinely exceed Number.MAX_SAFE_INTEGER;
209
+ * we therefore store inode as a JSON string (BigInt has no JSON type) and
210
+ * cast back via `BigInt(stored.inode)` for comparison. `mtime_ns` and `size`
211
+ * are stored the same way so the tuple has one type policy end to end.
212
+ *
174
213
  * @param {string} filePath - Absolute path to file
175
- * @returns {Promise<{size: number, mtime_ns: bigint}>}
214
+ * @returns {Promise<{size: string, mtime_ns: string, inode: string}>}
176
215
  */
177
216
  async function getFileMetadata(filePath) {
178
217
  const stat = await fs.stat(filePath, { bigint: true });
179
218
  return {
180
- size: Number(stat.size),
181
- mtime_ns: stat.mtimeNs.toString(), // Store as string for JSON serialization
219
+ size: stat.size.toString(),
220
+ mtime_ns: stat.mtimeNs.toString(),
221
+ inode: stat.ino.toString(),
182
222
  };
183
223
  }
184
224
 
185
225
  /**
186
226
  * Migrate legacy file entry (hash-only string) to new format
187
227
  * Used for backward compatibility with v2.2 state files
188
- * @param {string|Object} entry - Either a hash string (v2.2) or {hash, size, mtime_ns} object (v2.3)
189
- * @returns {Object|null} - Returns {hash, size, mtime_ns} or null if entry needs full check
228
+ * @param {string|Object} entry - Either a hash string (v2.2) or {hash, size, mtime_ns, inode?} object (v2.3)
229
+ * @returns {Object|null} - Returns {hash, size, mtime_ns, inode?} or null if entry needs full check
190
230
  */
191
231
  function migrateFileEntry(entry) {
192
232
  // v2.3 format: already an object with hash, size, mtime_ns
@@ -201,10 +241,26 @@ function migrateFileEntry(entry) {
201
241
  return null;
202
242
  }
203
243
 
244
+ function statFieldToBigIntString(value) {
245
+ if (value === null || value === undefined) return null;
246
+ try {
247
+ return BigInt(value).toString();
248
+ } catch {
249
+ return null;
250
+ }
251
+ }
252
+
204
253
  /**
205
- * Check if file metadata matches stored values (fast-path)
206
- * @param {Object} stored - Stored entry {hash, size, mtime_ns}
207
- * @param {Object} current - Current metadata {size, mtime_ns}
254
+ * Check if file metadata matches stored values (fast-path).
255
+ *
256
+ * Per INCREMENTAL_INDEXING_PLAN.md § 9.1 the comparison is the full
257
+ * `(mtime_ns, size, inode)` tuple. A stored entry without an inode is
258
+ * deliberately not fast-path eligible: the next run pays one content read
259
+ * and rewrites the complete tuple, which closes the atomic-rename hole for
260
+ * state files produced before inode persistence landed.
261
+ *
262
+ * @param {Object} stored - Stored entry {hash, size, mtime_ns, inode?}
263
+ * @param {Object} current - Current metadata {size, mtime_ns, inode}
208
264
  * @returns {boolean} - True if metadata matches (file unchanged)
209
265
  */
210
266
  function metadataMatches(stored, current) {
@@ -212,7 +268,10 @@ function metadataMatches(stored, current) {
212
268
  if (stored.size === null || stored.mtime_ns === null) {
213
269
  return false;
214
270
  }
215
- return stored.size === current.size && stored.mtime_ns === current.mtime_ns;
271
+ if (statFieldToBigIntString(stored.size) !== statFieldToBigIntString(current.size)) return false;
272
+ if (statFieldToBigIntString(stored.mtime_ns) !== statFieldToBigIntString(current.mtime_ns)) return false;
273
+ if (stored.inode == null || current.inode == null) return false;
274
+ return statFieldToBigIntString(stored.inode) === statFieldToBigIntString(current.inode);
216
275
  }
217
276
 
218
277
  /**
@@ -319,7 +378,7 @@ async function saveState(state) {
319
378
  /**
320
379
  * Determine which files need reindexing
321
380
  *
322
- * Sweet Search v2.3: mtime/size fast-path optimization
381
+ * Sweet Search v2.3: mtime/size/inode fast-path optimization
323
382
  * - First: fs.stat() to get size and mtime (single syscall, ~0.1ms)
324
383
  * - If metadata matches stored values: skip content read (fast-path)
325
384
  * - If metadata differs: read content and compute hash (slow-path)
@@ -365,13 +424,18 @@ export async function getChangedFiles(allFiles, projectRoot) {
365
424
  const filePath = path.join(projectRoot, file);
366
425
  try {
367
426
  const [content, metadata] = await Promise.all([
368
- fs.readFile(filePath, 'utf-8'),
427
+ fs.readFile(filePath),
369
428
  getFileMetadata(filePath),
370
429
  ]);
371
430
  const hash = hashContent(content);
372
431
  return {
373
432
  file,
374
- data: { hash, size: metadata.size, mtime_ns: metadata.mtime_ns },
433
+ data: {
434
+ hash,
435
+ size: metadata.size,
436
+ mtime_ns: metadata.mtime_ns,
437
+ inode: metadata.inode,
438
+ },
375
439
  error: null,
376
440
  };
377
441
  } catch (err) {
@@ -450,6 +514,7 @@ export async function getChangedFiles(allFiles, projectRoot) {
450
514
  hash: storedEntry.hash,
451
515
  size: metadata.size,
452
516
  mtime_ns: metadata.mtime_ns,
517
+ inode: metadata.inode,
453
518
  };
454
519
  unchanged.push(file);
455
520
  fastPathStats.hits++;
@@ -465,7 +530,7 @@ export async function getChangedFiles(allFiles, projectRoot) {
465
530
  const contentResults = await Promise.all(
466
531
  needsContentRead.map(async ({ file, filePath, metadata, storedEntry }) => {
467
532
  try {
468
- const content = await fs.readFile(filePath, 'utf-8');
533
+ const content = await fs.readFile(filePath);
469
534
  const hash = hashContent(content);
470
535
  return { file, hash, metadata, storedEntry, error: null };
471
536
  } catch (err) {
@@ -485,6 +550,7 @@ export async function getChangedFiles(allFiles, projectRoot) {
485
550
  hash,
486
551
  size: metadata.size,
487
552
  mtime_ns: metadata.mtime_ns,
553
+ inode: metadata.inode,
488
554
  };
489
555
 
490
556
  // Check if file actually changed (compare hashes)
@@ -644,7 +710,10 @@ export async function getPhaseProgress() {
644
710
  const currentFp = buildConfigFingerprint();
645
711
  if (data.configFingerprint?.provider !== currentFp.provider ||
646
712
  data.configFingerprint?.model !== currentFp.model ||
647
- data.configFingerprint?.dimension !== currentFp.dimension) {
713
+ data.configFingerprint?.dimension !== currentFp.dimension ||
714
+ data.configFingerprint?.hnswDimension !== currentFp.hnswDimension ||
715
+ data.configFingerprint?.hashAlgorithm !== currentFp.hashAlgorithm ||
716
+ data.configFingerprint?.version !== currentFp.version) {
648
717
  return null;
649
718
  }
650
719
  return data;
@@ -736,10 +805,10 @@ Config-Aware Cache Invalidation (Sweet Search v2.3):
736
805
  This prevents silent dimension mismatch corruption when switching
737
806
  between providers (e.g., Voyage -> Mistral).
738
807
 
739
- mtime/size Fast-Path Optimization (Sweet Search v2.3):
740
- Each file entry now stores { hash, size, mtime_ns } instead of just hash.
808
+ mtime/size/inode Fast-Path Optimization (Sweet Search v2.3):
809
+ Each file entry now stores { hash, size, mtime_ns, inode } instead of just hash.
741
810
  On incremental checks, fs.stat() is called first (~0.1ms per file).
742
- If (size, mtime_ns) match stored values, content read is skipped entirely.
811
+ If (size, mtime_ns, inode) match stored values, content read is skipped entirely.
743
812
 
744
813
  This provides 10-50x speedup for typical incremental checks when
745
814
  few or no files have changed. First run after upgrade reads all files
@@ -38,7 +38,6 @@ if (process.env.SWEET_SEARCH_UV_THREADPOOL_SIZE && !process.env.UV_THREADPOOL_SI
38
38
  }
39
39
 
40
40
  import { existsSync } from 'fs';
41
-
42
41
  import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
43
42
  import { applyPersistedLiModel } from '../infrastructure/init-config.js';
44
43
  import { resolveRelationshipTargets } from '../graph/relationship-resolver.js';
@@ -53,25 +52,22 @@ import {
53
52
  atomicSwapDatabase,
54
53
  readFilesFromStdin, discoverFiles,
55
54
  } from './indexer-utils.js';
56
-
57
55
  import {
58
56
  buildCodeGraph, createVectorSchema, ensureVectorSchema,
59
57
  buildInsertItems, insertVectors, pipelinedEmbedAndInsert,
60
58
  buildVectorIndex,
61
59
  } from './indexer-build.js';
62
-
63
60
  import {
64
61
  incrementalUpdateHNSW, buildHNSWIndex,
65
62
  buildLateInteractionIndex, buildQuantizedArtifactsPhase,
66
63
  } from './indexer-ann.js';
67
-
68
64
  import {
69
65
  runPhase,
70
66
  discoverFilesPhase, determineFilesToIndexPhase,
71
67
  buildCodeGraphWithHCGSPhase, buildVectorsAndArtifactsPhase,
72
68
  updateIncrementalStatePhase, printSummaryPhase,
73
69
  } from './indexer-phases.js';
74
-
70
+ import { establishEmptyBaseline } from './indexer-empty-baseline.js';
75
71
  // =============================================================================
76
72
  // CLI ARGUMENT PARSING
77
73
  // =============================================================================
@@ -275,6 +271,8 @@ Output:
275
271
  const { allFiles, stdinFiles, earlyExit: discoveryEarlyExit, exitReason: discoveryExitReason } = discoveryResult.result;
276
272
 
277
273
  if (discoveryEarlyExit) {
274
+ // Empty repo: write a coherent zero-row baseline (see indexer-empty-baseline.js).
275
+ if (!dryRun && discoveryExitReason === 'no_files') await establishEmptyBaseline();
278
276
  if (quiet) {
279
277
  console.log(JSON.stringify({ success: true, filesProcessed: 0, reason: discoveryExitReason }));
280
278
  }
@@ -363,6 +361,7 @@ Output:
363
361
  // PHASE 4: Vectors + HNSW + Artifacts (if not --graph-only)
364
362
  // =========================================================================
365
363
  let vectorStats = { chunks: 0, embeddings: 0 };
364
+ let sparseGramResult = null;
366
365
 
367
366
  if (!graphOnly) {
368
367
  const vectorsResult = await runPhase('Vectors + HNSW + Artifacts', buildVectorsAndArtifactsPhase, {
@@ -384,6 +383,7 @@ Output:
384
383
  }
385
384
 
386
385
  vectorStats = vectorsResult.result.vectorStats;
386
+ sparseGramResult = vectorsResult.result.sparseGramResult;
387
387
  } else if (hcgsPromise) {
388
388
  const hcgsResult = await hcgsPromise;
389
389
  if (hcgsResult && !hcgsResult.error) {
@@ -401,6 +401,7 @@ Output:
401
401
  allFiles,
402
402
  vectorStats,
403
403
  graphStats,
404
+ sparseGramResult,
404
405
  });
405
406
 
406
407
  // =========================================================================