sweet-search 2.5.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Incremental Indexing Tracker v2.
|
|
4
|
+
* Incremental Indexing Tracker v2.4
|
|
5
5
|
*
|
|
6
6
|
* Tracks file changes using content hashes to enable incremental reindexing.
|
|
7
7
|
* Only files that have changed since last index are reprocessed.
|
|
@@ -11,9 +11,14 @@
|
|
|
11
11
|
* - Forces full reindex when config fingerprint mismatches
|
|
12
12
|
* - Prevents silent dimension mismatch corruption
|
|
13
13
|
*
|
|
14
|
-
* Sweet Search v2.3: mtime/size fast-path optimization (Phase 0.3)
|
|
15
|
-
*
|
|
16
|
-
*
|
|
14
|
+
* Sweet Search v2.3: mtime/size/inode fast-path optimization (Phase 0.3)
|
|
15
|
+
*
|
|
16
|
+
* Sweet Search v2.4: xxHash3 content hashes
|
|
17
|
+
* - Uses the shared incremental hashing wrapper and records the hash
|
|
18
|
+
* algorithm in the config fingerprint.
|
|
19
|
+
* - SHA/xxHash state mismatches force a controlled full reindex.
|
|
20
|
+
* - Stores { hash, size, mtime_ns, inode } per file instead of just hash
|
|
21
|
+
* - Fast-path: skip content read if (size, mtime_ns, inode) match stored values
|
|
17
22
|
* - 10-50x speedup for typical incremental checks when few/no files changed
|
|
18
23
|
* - Backward compatible migration from v2.2 (hash-only format)
|
|
19
24
|
*
|
|
@@ -23,17 +28,17 @@
|
|
|
23
28
|
import fs from 'fs/promises';
|
|
24
29
|
import { existsSync, openSync, fsyncSync, closeSync } from 'fs';
|
|
25
30
|
import path from 'path';
|
|
26
|
-
import crypto from 'crypto';
|
|
27
31
|
import { DB_PATHS, EMBEDDING_CONFIG } from '../infrastructure/config/index.js';
|
|
32
|
+
import { contentHashSync, HASH_ALGORITHM } from '../incremental-indexing/infrastructure/hashing.mjs';
|
|
28
33
|
|
|
29
34
|
const STATE_PATH = DB_PATHS.merkle;
|
|
30
35
|
|
|
31
36
|
// =============================================================================
|
|
32
|
-
// CONFIG FINGERPRINT (Sweet Search v2.
|
|
37
|
+
// CONFIG FINGERPRINT (Sweet Search v2.4)
|
|
33
38
|
// Detects embedding provider/dimension changes that require full reindex
|
|
34
39
|
// =============================================================================
|
|
35
40
|
|
|
36
|
-
const STATE_VERSION = '2.
|
|
41
|
+
const STATE_VERSION = '2.4';
|
|
37
42
|
|
|
38
43
|
/**
|
|
39
44
|
* Build config fingerprint from current embedding configuration
|
|
@@ -48,6 +53,7 @@ function buildConfigFingerprint() {
|
|
|
48
53
|
// Quantization pipeline version — bump when changing the embedding pipeline
|
|
49
54
|
// to invalidate all existing indexes. v2 = int8 quantized embeddings.
|
|
50
55
|
pipelineVersion: 2,
|
|
56
|
+
hashAlgorithm: HASH_ALGORITHM,
|
|
51
57
|
version: STATE_VERSION,
|
|
52
58
|
};
|
|
53
59
|
}
|
|
@@ -122,6 +128,18 @@ function validateConfigFingerprint(storedFingerprint) {
|
|
|
122
128
|
};
|
|
123
129
|
}
|
|
124
130
|
|
|
131
|
+
if (storedFingerprint.hashAlgorithm !== current.hashAlgorithm) {
|
|
132
|
+
return {
|
|
133
|
+
valid: false,
|
|
134
|
+
reason: 'hash_algorithm_changed',
|
|
135
|
+
details: {
|
|
136
|
+
previous: storedFingerprint.hashAlgorithm ?? 'unknown',
|
|
137
|
+
current: current.hashAlgorithm,
|
|
138
|
+
message: `Content hash algorithm changed: ${storedFingerprint.hashAlgorithm ?? 'unknown'} -> ${current.hashAlgorithm}`,
|
|
139
|
+
},
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
125
143
|
// State version upgrade (may require reindex for new features)
|
|
126
144
|
if (storedFingerprint.version !== current.version) {
|
|
127
145
|
// Version 2.1 -> 2.2 is backward compatible, just add fingerprint
|
|
@@ -137,7 +155,7 @@ function validateConfigFingerprint(storedFingerprint) {
|
|
|
137
155
|
},
|
|
138
156
|
};
|
|
139
157
|
}
|
|
140
|
-
// Version 2.2 -> 2.3 is backward compatible (adds mtime/size fast-path)
|
|
158
|
+
// Version 2.2 -> 2.3 is backward compatible (adds mtime/size/inode fast-path)
|
|
141
159
|
// First run will read all files but store new format with metadata
|
|
142
160
|
if (storedFingerprint.version === '2.2' && current.version === '2.3') {
|
|
143
161
|
return {
|
|
@@ -147,21 +165,29 @@ function validateConfigFingerprint(storedFingerprint) {
|
|
|
147
165
|
details: {
|
|
148
166
|
previous: storedFingerprint.version,
|
|
149
167
|
current: current.version,
|
|
150
|
-
message: `State version upgraded: ${storedFingerprint.version} -> ${current.version} (mtime/size fast-path enabled)`,
|
|
168
|
+
message: `State version upgraded: ${storedFingerprint.version} -> ${current.version} (mtime/size/inode fast-path enabled)`,
|
|
151
169
|
},
|
|
152
170
|
};
|
|
153
171
|
}
|
|
154
|
-
|
|
172
|
+
return {
|
|
173
|
+
valid: false,
|
|
174
|
+
reason: 'state_version_changed',
|
|
175
|
+
details: {
|
|
176
|
+
previous: storedFingerprint.version,
|
|
177
|
+
current: current.version,
|
|
178
|
+
message: `State version changed: ${storedFingerprint.version} -> ${current.version}`,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
155
181
|
}
|
|
156
182
|
|
|
157
183
|
return { valid: true };
|
|
158
184
|
}
|
|
159
185
|
|
|
160
186
|
/**
|
|
161
|
-
* Compute
|
|
187
|
+
* Compute the configured content hash of file content.
|
|
162
188
|
*/
|
|
163
189
|
function hashContent(content) {
|
|
164
|
-
return
|
|
190
|
+
return contentHashSync(content);
|
|
165
191
|
}
|
|
166
192
|
|
|
167
193
|
// =============================================================================
|
|
@@ -170,23 +196,37 @@ function hashContent(content) {
|
|
|
170
196
|
// =============================================================================
|
|
171
197
|
|
|
172
198
|
/**
|
|
173
|
-
* Get file metadata (size
|
|
199
|
+
* Get file metadata (size, mtime in nanoseconds, inode) via fs.stat().
|
|
200
|
+
*
|
|
201
|
+
* Per INCREMENTAL_INDEXING_PLAN.md § 9.1, the dirty-detection tuple is
|
|
202
|
+
* `(mtime_ns, size, inode)`. The naive `mtime > recorded` check misses a
|
|
203
|
+
* second write within the same FS resolution tick; the equality-on-tuple
|
|
204
|
+
* check plus inode detects atomic-rename-over-existing-path (vim swap
|
|
205
|
+
* write, JetBrains safe-write) which produces a new inode at the same
|
|
206
|
+
* path with identical mtime/size.
|
|
207
|
+
*
|
|
208
|
+
* 64-bit inodes from APFS/ZFS/XFS routinely exceed Number.MAX_SAFE_INTEGER;
|
|
209
|
+
* we therefore store inode as a JSON string (BigInt has no JSON type) and
|
|
210
|
+
* cast back via `BigInt(stored.inode)` for comparison. `mtime_ns` and `size`
|
|
211
|
+
* are stored the same way so the tuple has one type policy end to end.
|
|
212
|
+
*
|
|
174
213
|
* @param {string} filePath - Absolute path to file
|
|
175
|
-
* @returns {Promise<{size:
|
|
214
|
+
* @returns {Promise<{size: string, mtime_ns: string, inode: string}>}
|
|
176
215
|
*/
|
|
177
216
|
async function getFileMetadata(filePath) {
|
|
178
217
|
const stat = await fs.stat(filePath, { bigint: true });
|
|
179
218
|
return {
|
|
180
|
-
size:
|
|
181
|
-
mtime_ns: stat.mtimeNs.toString(),
|
|
219
|
+
size: stat.size.toString(),
|
|
220
|
+
mtime_ns: stat.mtimeNs.toString(),
|
|
221
|
+
inode: stat.ino.toString(),
|
|
182
222
|
};
|
|
183
223
|
}
|
|
184
224
|
|
|
185
225
|
/**
|
|
186
226
|
* Migrate legacy file entry (hash-only string) to new format
|
|
187
227
|
* Used for backward compatibility with v2.2 state files
|
|
188
|
-
* @param {string|Object} entry - Either a hash string (v2.2) or {hash, size, mtime_ns} object (v2.3)
|
|
189
|
-
* @returns {Object|null} - Returns {hash, size, mtime_ns} or null if entry needs full check
|
|
228
|
+
* @param {string|Object} entry - Either a hash string (v2.2) or {hash, size, mtime_ns, inode?} object (v2.3)
|
|
229
|
+
* @returns {Object|null} - Returns {hash, size, mtime_ns, inode?} or null if entry needs full check
|
|
190
230
|
*/
|
|
191
231
|
function migrateFileEntry(entry) {
|
|
192
232
|
// v2.3 format: already an object with hash, size, mtime_ns
|
|
@@ -201,10 +241,26 @@ function migrateFileEntry(entry) {
|
|
|
201
241
|
return null;
|
|
202
242
|
}
|
|
203
243
|
|
|
244
|
+
function statFieldToBigIntString(value) {
|
|
245
|
+
if (value === null || value === undefined) return null;
|
|
246
|
+
try {
|
|
247
|
+
return BigInt(value).toString();
|
|
248
|
+
} catch {
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
204
253
|
/**
|
|
205
|
-
* Check if file metadata matches stored values (fast-path)
|
|
206
|
-
*
|
|
207
|
-
*
|
|
254
|
+
* Check if file metadata matches stored values (fast-path).
|
|
255
|
+
*
|
|
256
|
+
* Per INCREMENTAL_INDEXING_PLAN.md § 9.1 the comparison is the full
|
|
257
|
+
* `(mtime_ns, size, inode)` tuple. A stored entry without an inode is
|
|
258
|
+
* deliberately not fast-path eligible: the next run pays one content read
|
|
259
|
+
* and rewrites the complete tuple, which closes the atomic-rename hole for
|
|
260
|
+
* state files produced before inode persistence landed.
|
|
261
|
+
*
|
|
262
|
+
* @param {Object} stored - Stored entry {hash, size, mtime_ns, inode?}
|
|
263
|
+
* @param {Object} current - Current metadata {size, mtime_ns, inode}
|
|
208
264
|
* @returns {boolean} - True if metadata matches (file unchanged)
|
|
209
265
|
*/
|
|
210
266
|
function metadataMatches(stored, current) {
|
|
@@ -212,7 +268,10 @@ function metadataMatches(stored, current) {
|
|
|
212
268
|
if (stored.size === null || stored.mtime_ns === null) {
|
|
213
269
|
return false;
|
|
214
270
|
}
|
|
215
|
-
|
|
271
|
+
if (statFieldToBigIntString(stored.size) !== statFieldToBigIntString(current.size)) return false;
|
|
272
|
+
if (statFieldToBigIntString(stored.mtime_ns) !== statFieldToBigIntString(current.mtime_ns)) return false;
|
|
273
|
+
if (stored.inode == null || current.inode == null) return false;
|
|
274
|
+
return statFieldToBigIntString(stored.inode) === statFieldToBigIntString(current.inode);
|
|
216
275
|
}
|
|
217
276
|
|
|
218
277
|
/**
|
|
@@ -319,7 +378,7 @@ async function saveState(state) {
|
|
|
319
378
|
/**
|
|
320
379
|
* Determine which files need reindexing
|
|
321
380
|
*
|
|
322
|
-
* Sweet Search v2.3: mtime/size fast-path optimization
|
|
381
|
+
* Sweet Search v2.3: mtime/size/inode fast-path optimization
|
|
323
382
|
* - First: fs.stat() to get size and mtime (single syscall, ~0.1ms)
|
|
324
383
|
* - If metadata matches stored values: skip content read (fast-path)
|
|
325
384
|
* - If metadata differs: read content and compute hash (slow-path)
|
|
@@ -365,13 +424,18 @@ export async function getChangedFiles(allFiles, projectRoot) {
|
|
|
365
424
|
const filePath = path.join(projectRoot, file);
|
|
366
425
|
try {
|
|
367
426
|
const [content, metadata] = await Promise.all([
|
|
368
|
-
fs.readFile(filePath
|
|
427
|
+
fs.readFile(filePath),
|
|
369
428
|
getFileMetadata(filePath),
|
|
370
429
|
]);
|
|
371
430
|
const hash = hashContent(content);
|
|
372
431
|
return {
|
|
373
432
|
file,
|
|
374
|
-
data: {
|
|
433
|
+
data: {
|
|
434
|
+
hash,
|
|
435
|
+
size: metadata.size,
|
|
436
|
+
mtime_ns: metadata.mtime_ns,
|
|
437
|
+
inode: metadata.inode,
|
|
438
|
+
},
|
|
375
439
|
error: null,
|
|
376
440
|
};
|
|
377
441
|
} catch (err) {
|
|
@@ -450,6 +514,7 @@ export async function getChangedFiles(allFiles, projectRoot) {
|
|
|
450
514
|
hash: storedEntry.hash,
|
|
451
515
|
size: metadata.size,
|
|
452
516
|
mtime_ns: metadata.mtime_ns,
|
|
517
|
+
inode: metadata.inode,
|
|
453
518
|
};
|
|
454
519
|
unchanged.push(file);
|
|
455
520
|
fastPathStats.hits++;
|
|
@@ -465,7 +530,7 @@ export async function getChangedFiles(allFiles, projectRoot) {
|
|
|
465
530
|
const contentResults = await Promise.all(
|
|
466
531
|
needsContentRead.map(async ({ file, filePath, metadata, storedEntry }) => {
|
|
467
532
|
try {
|
|
468
|
-
const content = await fs.readFile(filePath
|
|
533
|
+
const content = await fs.readFile(filePath);
|
|
469
534
|
const hash = hashContent(content);
|
|
470
535
|
return { file, hash, metadata, storedEntry, error: null };
|
|
471
536
|
} catch (err) {
|
|
@@ -485,6 +550,7 @@ export async function getChangedFiles(allFiles, projectRoot) {
|
|
|
485
550
|
hash,
|
|
486
551
|
size: metadata.size,
|
|
487
552
|
mtime_ns: metadata.mtime_ns,
|
|
553
|
+
inode: metadata.inode,
|
|
488
554
|
};
|
|
489
555
|
|
|
490
556
|
// Check if file actually changed (compare hashes)
|
|
@@ -644,7 +710,10 @@ export async function getPhaseProgress() {
|
|
|
644
710
|
const currentFp = buildConfigFingerprint();
|
|
645
711
|
if (data.configFingerprint?.provider !== currentFp.provider ||
|
|
646
712
|
data.configFingerprint?.model !== currentFp.model ||
|
|
647
|
-
data.configFingerprint?.dimension !== currentFp.dimension
|
|
713
|
+
data.configFingerprint?.dimension !== currentFp.dimension ||
|
|
714
|
+
data.configFingerprint?.hnswDimension !== currentFp.hnswDimension ||
|
|
715
|
+
data.configFingerprint?.hashAlgorithm !== currentFp.hashAlgorithm ||
|
|
716
|
+
data.configFingerprint?.version !== currentFp.version) {
|
|
648
717
|
return null;
|
|
649
718
|
}
|
|
650
719
|
return data;
|
|
@@ -736,10 +805,10 @@ Config-Aware Cache Invalidation (Sweet Search v2.3):
|
|
|
736
805
|
This prevents silent dimension mismatch corruption when switching
|
|
737
806
|
between providers (e.g., Voyage -> Mistral).
|
|
738
807
|
|
|
739
|
-
mtime/size Fast-Path Optimization (Sweet Search v2.3):
|
|
740
|
-
Each file entry now stores { hash, size, mtime_ns } instead of just hash.
|
|
808
|
+
mtime/size/inode Fast-Path Optimization (Sweet Search v2.3):
|
|
809
|
+
Each file entry now stores { hash, size, mtime_ns, inode } instead of just hash.
|
|
741
810
|
On incremental checks, fs.stat() is called first (~0.1ms per file).
|
|
742
|
-
If (size, mtime_ns) match stored values, content read is skipped entirely.
|
|
811
|
+
If (size, mtime_ns, inode) match stored values, content read is skipped entirely.
|
|
743
812
|
|
|
744
813
|
This provides 10-50x speedup for typical incremental checks when
|
|
745
814
|
few or no files have changed. First run after upgrade reads all files
|
|
@@ -38,7 +38,6 @@ if (process.env.SWEET_SEARCH_UV_THREADPOOL_SIZE && !process.env.UV_THREADPOOL_SI
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
import { existsSync } from 'fs';
|
|
41
|
-
|
|
42
41
|
import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
|
|
43
42
|
import { applyPersistedLiModel } from '../infrastructure/init-config.js';
|
|
44
43
|
import { resolveRelationshipTargets } from '../graph/relationship-resolver.js';
|
|
@@ -53,25 +52,22 @@ import {
|
|
|
53
52
|
atomicSwapDatabase,
|
|
54
53
|
readFilesFromStdin, discoverFiles,
|
|
55
54
|
} from './indexer-utils.js';
|
|
56
|
-
|
|
57
55
|
import {
|
|
58
56
|
buildCodeGraph, createVectorSchema, ensureVectorSchema,
|
|
59
57
|
buildInsertItems, insertVectors, pipelinedEmbedAndInsert,
|
|
60
58
|
buildVectorIndex,
|
|
61
59
|
} from './indexer-build.js';
|
|
62
|
-
|
|
63
60
|
import {
|
|
64
61
|
incrementalUpdateHNSW, buildHNSWIndex,
|
|
65
62
|
buildLateInteractionIndex, buildQuantizedArtifactsPhase,
|
|
66
63
|
} from './indexer-ann.js';
|
|
67
|
-
|
|
68
64
|
import {
|
|
69
65
|
runPhase,
|
|
70
66
|
discoverFilesPhase, determineFilesToIndexPhase,
|
|
71
67
|
buildCodeGraphWithHCGSPhase, buildVectorsAndArtifactsPhase,
|
|
72
68
|
updateIncrementalStatePhase, printSummaryPhase,
|
|
73
69
|
} from './indexer-phases.js';
|
|
74
|
-
|
|
70
|
+
import { establishEmptyBaseline } from './indexer-empty-baseline.js';
|
|
75
71
|
// =============================================================================
|
|
76
72
|
// CLI ARGUMENT PARSING
|
|
77
73
|
// =============================================================================
|
|
@@ -275,6 +271,8 @@ Output:
|
|
|
275
271
|
const { allFiles, stdinFiles, earlyExit: discoveryEarlyExit, exitReason: discoveryExitReason } = discoveryResult.result;
|
|
276
272
|
|
|
277
273
|
if (discoveryEarlyExit) {
|
|
274
|
+
// Empty repo: write a coherent zero-row baseline (see indexer-empty-baseline.js).
|
|
275
|
+
if (!dryRun && discoveryExitReason === 'no_files') await establishEmptyBaseline();
|
|
278
276
|
if (quiet) {
|
|
279
277
|
console.log(JSON.stringify({ success: true, filesProcessed: 0, reason: discoveryExitReason }));
|
|
280
278
|
}
|
|
@@ -363,6 +361,7 @@ Output:
|
|
|
363
361
|
// PHASE 4: Vectors + HNSW + Artifacts (if not --graph-only)
|
|
364
362
|
// =========================================================================
|
|
365
363
|
let vectorStats = { chunks: 0, embeddings: 0 };
|
|
364
|
+
let sparseGramResult = null;
|
|
366
365
|
|
|
367
366
|
if (!graphOnly) {
|
|
368
367
|
const vectorsResult = await runPhase('Vectors + HNSW + Artifacts', buildVectorsAndArtifactsPhase, {
|
|
@@ -384,6 +383,7 @@ Output:
|
|
|
384
383
|
}
|
|
385
384
|
|
|
386
385
|
vectorStats = vectorsResult.result.vectorStats;
|
|
386
|
+
sparseGramResult = vectorsResult.result.sparseGramResult;
|
|
387
387
|
} else if (hcgsPromise) {
|
|
388
388
|
const hcgsResult = await hcgsPromise;
|
|
389
389
|
if (hcgsResult && !hcgsResult.error) {
|
|
@@ -401,6 +401,7 @@ Output:
|
|
|
401
401
|
allFiles,
|
|
402
402
|
vectorStats,
|
|
403
403
|
graphStats,
|
|
404
|
+
sparseGramResult,
|
|
404
405
|
});
|
|
405
406
|
|
|
406
407
|
// =========================================================================
|