sweet-search 2.5.1 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/core/cli.js +45 -0
  2. package/core/embedding/embedding-cache.js +90 -4
  3. package/core/embedding/embedding-service.js +27 -5
  4. package/core/graph/graph-expansion.js +215 -36
  5. package/core/graph/graph-extractor.js +196 -11
  6. package/core/graph/graph-search.js +395 -92
  7. package/core/graph/hcgs-generator.js +2 -1
  8. package/core/graph/index.js +2 -0
  9. package/core/graph/repo-map.js +28 -6
  10. package/core/graph/structural-answer-cues.js +168 -0
  11. package/core/graph/structural-callsite-hints.js +40 -0
  12. package/core/graph/structural-context-format.js +40 -0
  13. package/core/graph/structural-context.js +450 -0
  14. package/core/graph/structural-forward-push.js +156 -0
  15. package/core/graph/structural-header-context.js +19 -0
  16. package/core/graph/structural-importance.js +148 -0
  17. package/core/graph/structural-pagerank.js +197 -0
  18. package/core/graph/summary-manager.js +13 -9
  19. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  20. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  21. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  22. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  23. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  24. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  25. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  26. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  27. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  28. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  29. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  30. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  31. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  32. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  33. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  34. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  35. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  36. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  37. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  38. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  39. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  40. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  41. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  42. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  43. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  44. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  45. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  46. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  47. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  48. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  49. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  50. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  51. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  52. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  53. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  54. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  55. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  56. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  57. package/core/indexing/admission-policy.js +139 -0
  58. package/core/indexing/artifact-builder.js +29 -12
  59. package/core/indexing/ast-chunker.js +107 -30
  60. package/core/indexing/dedup/exemplar-selector.js +19 -1
  61. package/core/indexing/gitignore-filter.js +223 -0
  62. package/core/indexing/incremental-tracker.js +99 -30
  63. package/core/indexing/index-codebase-v21.js +37 -7
  64. package/core/indexing/index-maintainer.mjs +698 -6
  65. package/core/indexing/indexer-ann.js +99 -15
  66. package/core/indexing/indexer-build.js +158 -45
  67. package/core/indexing/indexer-empty-baseline.js +80 -0
  68. package/core/indexing/indexer-manifest.js +66 -0
  69. package/core/indexing/indexer-phases.js +56 -23
  70. package/core/indexing/indexer-sparse-gram.js +54 -13
  71. package/core/indexing/indexer-utils.js +26 -208
  72. package/core/indexing/indexing-file-policy.js +32 -7
  73. package/core/indexing/maintainer-launcher.mjs +137 -0
  74. package/core/indexing/merkle-tracker.js +251 -244
  75. package/core/indexing/model-pool.js +46 -5
  76. package/core/infrastructure/code-graph-repository.js +758 -6
  77. package/core/infrastructure/code-graph-visibility.js +157 -0
  78. package/core/infrastructure/codebase-repository.js +100 -13
  79. package/core/infrastructure/config/search.js +1 -1
  80. package/core/infrastructure/db-utils.js +118 -0
  81. package/core/infrastructure/dedup-hashing.js +10 -13
  82. package/core/infrastructure/hardware-capability.js +17 -7
  83. package/core/infrastructure/index.js +10 -2
  84. package/core/infrastructure/init-config.js +138 -0
  85. package/core/infrastructure/language-patterns/maps.js +4 -1
  86. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  87. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  88. package/core/infrastructure/language-patterns.js +69 -0
  89. package/core/infrastructure/model-registry.js +20 -0
  90. package/core/infrastructure/native-inference.js +7 -12
  91. package/core/infrastructure/native-resolver.js +52 -37
  92. package/core/infrastructure/native-sparse-gram.js +261 -20
  93. package/core/infrastructure/native-tokenizer.js +6 -15
  94. package/core/infrastructure/simd-distance.js +10 -16
  95. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  96. package/core/infrastructure/structural-alias-resolver.js +122 -0
  97. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  98. package/core/infrastructure/structural-context-repository.js +472 -0
  99. package/core/infrastructure/structural-context-utils.js +51 -0
  100. package/core/infrastructure/structural-graph-signals.js +121 -0
  101. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  102. package/core/infrastructure/structural-source-definitions.js +100 -0
  103. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  104. package/core/infrastructure/tree-sitter-provider.js +811 -37
  105. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  106. package/core/query/query-router.js +55 -5
  107. package/core/ranking/file-kind-ranking.js +2192 -15
  108. package/core/ranking/late-interaction-index.js +87 -12
  109. package/core/search/cli-decoration.js +290 -0
  110. package/core/search/context-expander.js +988 -78
  111. package/core/search/index.js +1 -0
  112. package/core/search/output-policy.js +275 -0
  113. package/core/search/search-anchor.js +499 -0
  114. package/core/search/search-boost.js +93 -1
  115. package/core/search/search-cli.js +61 -204
  116. package/core/search/search-hybrid.js +250 -10
  117. package/core/search/search-pattern-chunks.js +57 -8
  118. package/core/search/search-pattern-planner.js +68 -9
  119. package/core/search/search-pattern-prefilter.js +30 -10
  120. package/core/search/search-pattern-ripgrep.js +40 -4
  121. package/core/search/search-pattern-sparse-overlay.js +256 -0
  122. package/core/search/search-pattern.js +117 -29
  123. package/core/search/search-postprocess.js +479 -5
  124. package/core/search/search-read-semantic.js +277 -23
  125. package/core/search/search-read.js +82 -64
  126. package/core/search/search-reader-pin.js +71 -0
  127. package/core/search/search-rrf.js +279 -0
  128. package/core/search/search-semantic.js +110 -5
  129. package/core/search/search-server.js +273 -54
  130. package/core/search/search-trace.js +107 -0
  131. package/core/search/server-identity.js +93 -0
  132. package/core/search/session-daemon-prewarm.mjs +33 -10
  133. package/core/search/sweet-search.js +414 -9
  134. package/core/skills/sweet-index/SKILL.md +8 -6
  135. package/core/start-server.js +13 -2
  136. package/core/vector-store/binary-hnsw-index.js +194 -30
  137. package/core/vector-store/float-vector-store.js +96 -6
  138. package/core/vector-store/hnsw-index.js +220 -49
  139. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  140. package/eval/agent-read-workflows/bin/ss-find +15 -0
  141. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  142. package/eval/agent-read-workflows/bin/ss-read +14 -0
  143. package/eval/agent-read-workflows/bin/ss-search +18 -0
  144. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  145. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  146. package/mcp/read-tool.js +109 -0
  147. package/mcp/server.js +55 -15
  148. package/mcp/tool-handlers.js +14 -124
  149. package/mcp/trace-tool.js +81 -0
  150. package/package.json +25 -10
  151. package/scripts/hooks/intercept-read.mjs +55 -0
  152. package/scripts/hooks/remind-tools.mjs +40 -0
  153. package/scripts/init.js +698 -54
  154. package/scripts/inject-agent-instructions.js +431 -0
  155. package/scripts/install-prompt-reminders.js +188 -0
  156. package/scripts/install-tool-enforcement.js +220 -0
  157. package/scripts/smoke-test.js +12 -9
  158. package/scripts/uninstall.js +427 -23
  159. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,157 @@
1
+ import { readFileSync } from 'fs';
2
+ import { dirname, isAbsolute, join } from 'path';
3
+
4
+ export function readAdjacentManifest(dbPath) {
5
+ try {
6
+ const manifest = JSON.parse(readFileSync(join(dirname(dbPath), 'reconcile-manifest.json'), 'utf8'));
7
+ return Number.isInteger(manifest?.epoch) ? manifest : null;
8
+ } catch {
9
+ return null;
10
+ }
11
+ }
12
+
13
+ export function readAdjacentManifestEpoch(dbPath) {
14
+ return readAdjacentManifest(dbPath)?.epoch ?? null;
15
+ }
16
+
17
+ export function resolveManifestCodeGraphPath(dbPath, manifest = readAdjacentManifest(dbPath)) {
18
+ const descriptor = manifest?.codeGraph?.path || manifest?.codeGraph?.dbPath;
19
+ if (!descriptor || typeof descriptor !== 'string') return dbPath;
20
+ return isAbsolute(descriptor) ? descriptor : join(dirname(dbPath), descriptor);
21
+ }
22
+
23
+ function hasColumns(db, table, columns) {
24
+ const names = new Set(db.prepare(`PRAGMA table_info(${table})`).all().map((row) => row.name));
25
+ return columns.every((column) => names.has(column));
26
+ }
27
+
28
+ export function sqlAliasPrefix(alias = '') {
29
+ if (!alias) return '';
30
+ const normalized = String(alias).endsWith('.') ? String(alias).slice(0, -1) : String(alias);
31
+ return normalized ? `${normalized}.` : '';
32
+ }
33
+
34
+ export function createCodeGraphVisibility(db, manifestEpoch) {
35
+ return {
36
+ manifestEpoch: Number.isInteger(manifestEpoch) ? manifestEpoch : null,
37
+ entities: hasColumns(db, 'entities', ['epoch_written', 'epoch_retired']),
38
+ relationships: hasColumns(db, 'relationships', ['epoch_written', 'epoch_retired']),
39
+ };
40
+ }
41
+
42
+ export function entityVisibilitySql(visibility, alias = '') {
43
+ const prefix = sqlAliasPrefix(alias);
44
+ if (!visibility?.entities) return `${prefix}stale_since IS NULL`;
45
+ if (Number.isInteger(visibility.manifestEpoch)) {
46
+ return `(${prefix}epoch_written IS NULL OR ${prefix}epoch_written <= ?)
47
+ AND (${prefix}epoch_retired IS NULL OR ${prefix}epoch_retired > ?)
48
+ AND (${prefix}stale_since IS NULL OR (${prefix}epoch_retired IS NOT NULL AND ${prefix}epoch_retired > ?))`;
49
+ }
50
+ return `${prefix}stale_since IS NULL AND ${prefix}epoch_retired IS NULL`;
51
+ }
52
+
53
+ export function entityVisibilityParams(visibility) {
54
+ if (!visibility?.entities || !Number.isInteger(visibility.manifestEpoch)) return [];
55
+ return [visibility.manifestEpoch, visibility.manifestEpoch, visibility.manifestEpoch];
56
+ }
57
+
58
+ export function relationshipVisibilitySql(visibility, alias = 'r') {
59
+ if (!visibility?.relationships) return '1=1';
60
+ const prefix = sqlAliasPrefix(alias);
61
+ if (Number.isInteger(visibility.manifestEpoch)) {
62
+ return `(${prefix}epoch_written IS NULL OR ${prefix}epoch_written <= ?)
63
+ AND (${prefix}epoch_retired IS NULL OR ${prefix}epoch_retired > ?)`;
64
+ }
65
+ return `${prefix}epoch_retired IS NULL`;
66
+ }
67
+
68
+ export function relationshipVisibilityParams(visibility) {
69
+ if (!visibility?.relationships || !Number.isInteger(visibility.manifestEpoch)) return [];
70
+ return [visibility.manifestEpoch, visibility.manifestEpoch];
71
+ }
72
+
73
+ export class CodeGraphReaderVisibility {
74
+ constructor(baseDbPath, opts = {}) {
75
+ this.baseDbPath = baseDbPath;
76
+ this.explicitManifestEpoch = Number.isInteger(opts.manifestEpoch);
77
+ this.manifestEpoch = this.explicitManifestEpoch ? opts.manifestEpoch : null;
78
+ const manifest = this.explicitManifestEpoch ? readAdjacentManifest(this.baseDbPath) : null;
79
+ this.dbPath = this.explicitManifestEpoch
80
+ ? resolveManifestCodeGraphPath(this.baseDbPath, manifest)
81
+ : baseDbPath;
82
+ this._visibility = null;
83
+ this._hasHcgsSummaryMetadata = null;
84
+ this._summaryVisibilityCache = new Map();
85
+ if (!this.explicitManifestEpoch) this.sync();
86
+ }
87
+
88
+ sync(onChange) {
89
+ if (this.explicitManifestEpoch) return false;
90
+ const manifest = readAdjacentManifest(this.baseDbPath);
91
+ const nextEpoch = Number.isInteger(manifest?.epoch) ? manifest.epoch : null;
92
+ const nextDbPath = resolveManifestCodeGraphPath(this.baseDbPath, manifest);
93
+ const changed = nextEpoch !== this.manifestEpoch || nextDbPath !== this.dbPath;
94
+ this.manifestEpoch = nextEpoch;
95
+ this.dbPath = nextDbPath;
96
+ if (changed) {
97
+ this.reset();
98
+ if (typeof onChange === 'function') onChange();
99
+ }
100
+ return changed;
101
+ }
102
+
103
+ reset() {
104
+ this._visibility = null;
105
+ this._hasHcgsSummaryMetadata = null;
106
+ this._summaryVisibilityCache = new Map();
107
+ }
108
+
109
+ state(db) {
110
+ if (!this._visibility) this._visibility = createCodeGraphVisibility(db, this.manifestEpoch);
111
+ return this._visibility;
112
+ }
113
+
114
+ entitySql(db, alias = '') { return entityVisibilitySql(this.state(db), alias); }
115
+ entityParams(db) { return entityVisibilityParams(this.state(db)); }
116
+ relationshipSql(db, alias = 'r') { return relationshipVisibilitySql(this.state(db), alias); }
117
+ relationshipParams(db) { return relationshipVisibilityParams(this.state(db)); }
118
+
119
+ hasSummarySidecar(db) {
120
+ if (
121
+ this._hasHcgsSummaryMetadata === null
122
+ || (this._hasHcgsSummaryMetadata === false && this.manifestEpoch === null)
123
+ ) {
124
+ this._hasHcgsSummaryMetadata = !!db.prepare(
125
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='hcgs_summary_metadata'",
126
+ ).get();
127
+ }
128
+ return this._hasHcgsSummaryMetadata;
129
+ }
130
+
131
+ summaryVisible(db, entityId) {
132
+ if (!entityId || !this.hasSummarySidecar(db)) return true;
133
+ if (this.manifestEpoch === null) {
134
+ const row = db.prepare(`
135
+ SELECT epoch_retired
136
+ FROM hcgs_summary_metadata
137
+ WHERE entity_id = ?
138
+ `).get(String(entityId));
139
+ return row ? row.epoch_retired == null : false;
140
+ }
141
+ const key = `${entityId}:${this.manifestEpoch ?? 'live'}`;
142
+ if (this._summaryVisibilityCache.has(key)) return this._summaryVisibilityCache.get(key);
143
+ const row = db.prepare(`
144
+ SELECT epoch_written, epoch_retired
145
+ FROM hcgs_summary_metadata
146
+ WHERE entity_id = ?
147
+ `).get(String(entityId));
148
+ const visible = row
149
+ ? (Number.isInteger(this.manifestEpoch)
150
+ ? (row.epoch_written == null || row.epoch_written <= this.manifestEpoch)
151
+ && (row.epoch_retired == null || row.epoch_retired > this.manifestEpoch)
152
+ : row.epoch_retired == null)
153
+ : false;
154
+ this._summaryVisibilityCache.set(key, visible);
155
+ return visible;
156
+ }
157
+ }
@@ -6,16 +6,68 @@
6
6
  */
7
7
 
8
8
  import Database from 'better-sqlite3';
9
- import { applyReadPragmas } from './db-utils.js';
9
+ import fs from 'node:fs';
10
+ import path from 'node:path';
11
+ import { applyReadPragmas, assertInClauseSize } from './db-utils.js';
12
+
13
+ function readAdjacentManifest(dbPath) {
14
+ try {
15
+ const manifestPath = path.join(path.dirname(dbPath), 'reconcile-manifest.json');
16
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
17
+ return Number.isInteger(manifest?.epoch) ? manifest : null;
18
+ } catch {
19
+ return null;
20
+ }
21
+ }
22
+
23
+ function resolveManifestVectorsPath(dbPath, manifest = readAdjacentManifest(dbPath)) {
24
+ const descriptor = manifest?.vectors?.path || manifest?.vectors?.dbPath;
25
+ if (!descriptor || typeof descriptor !== 'string') return dbPath;
26
+ return path.isAbsolute(descriptor) ? descriptor : path.join(path.dirname(dbPath), descriptor);
27
+ }
10
28
 
11
29
  export class CodebaseRepository {
12
- constructor(dbPath) {
13
- this._dbPath = dbPath;
30
+ constructor(dbPath, options = {}) {
31
+ this._baseDbPath = dbPath;
32
+ this._explicitManifestEpoch = Number.isInteger(options.manifestEpoch);
33
+ this._manifestEpoch = this._explicitManifestEpoch ? options.manifestEpoch : null;
34
+ const manifest = this._explicitManifestEpoch ? readAdjacentManifest(this._baseDbPath) : null;
35
+ this._dbPath = this._explicitManifestEpoch
36
+ ? resolveManifestVectorsPath(this._baseDbPath, manifest)
37
+ : dbPath;
14
38
  this._db = null;
39
+ this._hasEpochVisibility = null;
40
+ if (!this._explicitManifestEpoch) {
41
+ this._syncAdjacentManifest();
42
+ }
43
+ }
44
+
45
+ _syncAdjacentManifest() {
46
+ if (this._explicitManifestEpoch) return false;
47
+ const manifest = readAdjacentManifest(this._baseDbPath);
48
+ const nextEpoch = Number.isInteger(manifest?.epoch) ? manifest.epoch : null;
49
+ const nextDbPath = resolveManifestVectorsPath(this._baseDbPath, manifest);
50
+ const changed = nextEpoch !== this._manifestEpoch || nextDbPath !== this._dbPath;
51
+ this._manifestEpoch = nextEpoch;
52
+ this._dbPath = nextDbPath;
53
+ if (changed) {
54
+ this.close();
55
+ }
56
+ return changed;
57
+ }
58
+
59
+ refreshManifestEpoch() {
60
+ this._syncAdjacentManifest();
61
+ return this._manifestEpoch;
62
+ }
63
+
64
+ getManifestEpoch() {
65
+ return this._manifestEpoch;
15
66
  }
16
67
 
17
68
  /** Lazy read-only connection with optimized pragmas. */
18
69
  _open() {
70
+ this._syncAdjacentManifest();
19
71
  if (!this._db) {
20
72
  this._db = new Database(this._dbPath, { readonly: true });
21
73
  applyReadPragmas(this._db);
@@ -23,13 +75,31 @@ export class CodebaseRepository {
23
75
  return this._db;
24
76
  }
25
77
 
78
+ _visibility(db) {
79
+ if (this._hasEpochVisibility === null) {
80
+ const cols = db.prepare('PRAGMA table_info(vectors)').all().map((c) => c.name);
81
+ this._hasEpochVisibility = cols.includes('epoch_written') && cols.includes('epoch_retired');
82
+ }
83
+ if (!this._hasEpochVisibility) return { sql: '', params: [] };
84
+ if (this._manifestEpoch !== null) {
85
+ return {
86
+ sql: '(epoch_written IS NULL OR epoch_written <= ?) AND (epoch_retired IS NULL OR epoch_retired > ?)',
87
+ params: [this._manifestEpoch, this._manifestEpoch],
88
+ };
89
+ }
90
+ return { sql: 'epoch_retired IS NULL', params: [] };
91
+ }
92
+
26
93
  /**
27
94
  * Iterate all vectors (for O(N) scan or chunk type map building).
28
95
  * Returns rows with: id, embedding (Buffer), text, metadata (string), file_path.
29
96
  */
30
97
  * iterateVectors() {
31
98
  const db = this._open();
32
- yield* db.prepare('SELECT id, embedding, text, metadata, file_path FROM vectors').iterate();
99
+ const visibility = this._visibility(db);
100
+ const where = visibility.sql ? ` WHERE ${visibility.sql}` : '';
101
+ yield* db.prepare(`SELECT id, embedding, text, metadata, file_path FROM vectors${where}`)
102
+ .iterate(...visibility.params);
33
103
  }
34
104
 
35
105
  /**
@@ -40,12 +110,15 @@ export class CodebaseRepository {
40
110
  getEmbeddingsByIds(ids) {
41
111
  const uniqueIds = [...new Set(ids)];
42
112
  if (uniqueIds.length === 0) return new Map();
113
+ assertInClauseSize(uniqueIds.length, 'CodebaseRepository.getEmbeddingsByIds');
43
114
 
44
115
  const db = this._open();
45
116
  const placeholders = uniqueIds.map(() => '?').join(',');
117
+ const visibility = this._visibility(db);
118
+ const visibilityClause = visibility.sql ? ` AND ${visibility.sql}` : '';
46
119
  const rows = db.prepare(
47
- `SELECT id, embedding FROM vectors WHERE id IN (${placeholders})`
48
- ).all(...uniqueIds);
120
+ `SELECT id, embedding FROM vectors WHERE id IN (${placeholders})${visibilityClause}`
121
+ ).all(...uniqueIds, ...visibility.params);
49
122
 
50
123
  const result = new Map();
51
124
  for (const row of rows) {
@@ -67,11 +140,14 @@ export class CodebaseRepository {
67
140
  getChunkTexts(ids) {
68
141
  if (!ids || ids.length === 0) return new Map();
69
142
  try {
143
+ assertInClauseSize(ids.length, 'CodebaseRepository.getChunkTexts');
70
144
  const db = this._open();
71
145
  const ph = ids.map(() => '?').join(',');
146
+ const visibility = this._visibility(db);
147
+ const visibilityClause = visibility.sql ? ` AND ${visibility.sql}` : '';
72
148
  const rows = db.prepare(
73
- `SELECT id, text FROM vectors WHERE id IN (${ph})`
74
- ).all(...ids);
149
+ `SELECT id, text FROM vectors WHERE id IN (${ph})${visibilityClause}`
150
+ ).all(...ids, ...visibility.params);
75
151
  return new Map(rows.map(r => [r.id, r.text]));
76
152
  } catch {
77
153
  return new Map();
@@ -91,9 +167,11 @@ export class CodebaseRepository {
91
167
  if (!filePath) return [];
92
168
  try {
93
169
  const db = this._open();
170
+ const visibility = this._visibility(db);
171
+ const visibilityClause = visibility.sql ? ` AND ${visibility.sql}` : '';
94
172
  return db.prepare(
95
- 'SELECT id, file_path, text, metadata FROM vectors WHERE file_path = ? ORDER BY id'
96
- ).all(filePath);
173
+ `SELECT id, file_path, text, metadata FROM vectors WHERE file_path = ?${visibilityClause} ORDER BY id`
174
+ ).all(filePath, ...visibility.params);
97
175
  } catch {
98
176
  return [];
99
177
  }
@@ -105,10 +183,14 @@ export class CodebaseRepository {
105
183
  * @returns {Array<{id, embedding: Buffer, text: string, metadata: string}>}
106
184
  */
107
185
  scanAllVectors() {
186
+ this._syncAdjacentManifest();
108
187
  const db = new Database(this._dbPath, { readonly: true });
109
188
  applyReadPragmas(db);
110
189
  try {
111
- return db.prepare('SELECT id, embedding, text, metadata FROM vectors').all();
190
+ const visibility = this._visibility(db);
191
+ const where = visibility.sql ? ` WHERE ${visibility.sql}` : '';
192
+ return db.prepare(`SELECT id, embedding, text, metadata FROM vectors${where}`)
193
+ .all(...visibility.params);
112
194
  } finally {
113
195
  db.close();
114
196
  }
@@ -128,19 +210,23 @@ export class CodebaseRepository {
128
210
  if (!clusterIds || clusterIds.length === 0) return [];
129
211
  const uniqueClusters = [...new Set(clusterIds)];
130
212
  const uniqueExclude = [...new Set(excludeIds)];
213
+ assertInClauseSize(uniqueClusters.length, 'CodebaseRepository.findSiblingsByClusterIds.clusters');
214
+ assertInClauseSize(uniqueExclude.length, 'CodebaseRepository.findSiblingsByClusterIds.exclude');
131
215
  const db = this._open();
132
216
 
133
217
  const clusterPh = uniqueClusters.map(() => '?').join(',');
134
218
  const excludePh = uniqueExclude.map(() => '?').join(',');
135
219
  const excludeClause = uniqueExclude.length > 0 ? ` AND id NOT IN (${excludePh})` : '';
220
+ const visibility = this._visibility(db);
221
+ const visibilityClause = visibility.sql ? ` AND ${visibility.sql}` : '';
136
222
 
137
223
  const sql = `
138
224
  SELECT id, file_path, text, metadata
139
225
  FROM vectors
140
- WHERE json_extract(metadata, '$.clusterId') IN (${clusterPh})${excludeClause}
226
+ WHERE json_extract(metadata, '$.clusterId') IN (${clusterPh})${excludeClause}${visibilityClause}
141
227
  `;
142
228
 
143
- return db.prepare(sql).all(...uniqueClusters, ...uniqueExclude);
229
+ return db.prepare(sql).all(...uniqueClusters, ...uniqueExclude, ...visibility.params);
144
230
  }
145
231
 
146
232
  close() {
@@ -148,5 +234,6 @@ export class CodebaseRepository {
148
234
  this._db.close();
149
235
  this._db = null;
150
236
  }
237
+ this._hasEpochVisibility = null;
151
238
  }
152
239
  }
@@ -61,7 +61,7 @@ export const FILE_PATTERNS = {
61
61
  '**/*.{rb,erb}', // Ruby
62
62
  '**/*.php', // PHP
63
63
  '**/*.{swift,m,mm}', // Apple
64
- '**/*.{lua,zig,nim,ex,exs}', // Other
64
+ '**/*.{lua,zig,nim,ex,exs,dart}', // Other
65
65
  '**/*.{sh,bash,zsh,fish,ps1}', // Shell
66
66
  '**/*.sql', // SQL
67
67
  '**/*.proto', // Protobuf
@@ -4,6 +4,19 @@
4
4
 
5
5
  import Database from 'better-sqlite3';
6
6
 
7
+ /**
8
+ * Maximum number of bound parameters in a single SQLite statement.
9
+ *
10
+ * SQLite's compiled-in `SQLITE_LIMIT_VARIABLE_NUMBER` is 32766 on builds
11
+ * since 3.32, but the historic floor is 999, and `better-sqlite3` does
12
+ * not expose `sqlite3_limit()` to raise it at runtime. Using 999 makes
13
+ * `chunkedIn()` safe against any SQLite version or fork the runtime may
14
+ * be vendoring, with no dependency on binding internals.
15
+ *
16
+ * Reference: https://www.sqlite.org/limits.html (LIMIT_VARIABLE_NUMBER).
17
+ */
18
+ export const SAFE_IN_CLAUSE_BATCH = 999;
19
+
7
20
  /**
8
21
  * Apply read-path PRAGMA optimizations to a read-only database connection.
9
22
  *
@@ -43,6 +56,111 @@ export function applyReadPragmas(db, options = {}) {
43
56
  }
44
57
  }
45
58
 
59
+ /**
60
+ * Throw if an array would overflow SQLite's bound-parameter limit when
61
+ * splatted into a single `IN (?,?,...)` clause. Use this at sites that
62
+ * cannot easily be migrated to {@link chunkedIn} (e.g. UPDATE/DELETE with
63
+ * non-trivial templates, or queries with multiple interpolated
64
+ * placeholder groups) as a fail-fast guard. Crash with a clear domain
65
+ * message instead of the opaque "too many SQL variables" from SQLite.
66
+ *
67
+ * @param {number} n - Length of the values array
68
+ * @param {string} [siteLabel] - Caller identifier for the error message
69
+ */
70
+ export function assertInClauseSize(n, siteLabel = 'IN clause') {
71
+ if (typeof n !== 'number' || !Number.isFinite(n) || n < 0) {
72
+ throw new TypeError(`assertInClauseSize: invalid size ${String(n)}`);
73
+ }
74
+ if (n > SAFE_IN_CLAUSE_BATCH) {
75
+ throw new RangeError(
76
+ `${siteLabel} would bind ${n} parameters; SQLite IN(?,?,...) clauses ` +
77
+ `must be chunked at SAFE_IN_CLAUSE_BATCH=${SAFE_IN_CLAUSE_BATCH}. ` +
78
+ `Migrate this call to chunkedIn() in core/infrastructure/db-utils.js.`
79
+ );
80
+ }
81
+ }
82
+
83
+ /**
84
+ * Execute a SELECT-style query with a dynamic `IN (?,?,...)` clause across
85
+ * an arbitrarily large `values` array by chunking the binds at
86
+ * {@link SAFE_IN_CLAUSE_BATCH}. The caller passes a SQL template that
87
+ * contains the literal token `__IN_PLACEHOLDERS__` exactly once; the
88
+ * helper replaces it with the right number of `?`s per batch and unions
89
+ * the per-batch rows. Empty `values` yields an empty array without
90
+ * preparing a statement.
91
+ *
92
+ * Rows are NOT re-sorted across batches — within a batch SQLite honours
93
+ * any ORDER BY in the template, but batch boundaries break global order.
94
+ * Callers that need globally monotonic output (e.g. ORDER BY rowid for
95
+ * deterministic HNSW insertion) must sort the result themselves.
96
+ *
97
+ * This is the SQLite-variable-limit fix referenced in CLAUDE.md and the
98
+ * indexer-ann.js handoff: the same helper is reused at every site that
99
+ * needs to splat a potentially-large array into an IN clause.
100
+ *
101
+ * @template T
102
+ * @param {import('better-sqlite3').Database} db - Open SQLite handle
103
+ * @param {string} sqlTemplate - Must contain `__IN_PLACEHOLDERS__` once
104
+ * @param {ReadonlyArray<unknown>} values - Values to bind in chunks
105
+ * @returns {T[]} Concatenated rows from each batch
106
+ */
107
+ export function chunkedIn(db, sqlTemplate, values) {
108
+ if (!db || typeof db.prepare !== 'function') {
109
+ throw new TypeError('chunkedIn: db must be a better-sqlite3 Database');
110
+ }
111
+ if (typeof sqlTemplate !== 'string' || !sqlTemplate.includes('__IN_PLACEHOLDERS__')) {
112
+ throw new TypeError("chunkedIn: sqlTemplate must contain '__IN_PLACEHOLDERS__'");
113
+ }
114
+ if (!values || values.length === 0) return [];
115
+
116
+ const out = [];
117
+ for (let i = 0; i < values.length; i += SAFE_IN_CLAUSE_BATCH) {
118
+ const batch = Array.isArray(values)
119
+ ? values.slice(i, i + SAFE_IN_CLAUSE_BATCH)
120
+ : [...values].slice(i, i + SAFE_IN_CLAUSE_BATCH);
121
+ const placeholders = batch.map(() => '?').join(',');
122
+ const sql = sqlTemplate.replace('__IN_PLACEHOLDERS__', placeholders);
123
+ const rows = db.prepare(sql).all(...batch);
124
+ for (let j = 0; j < rows.length; j++) out.push(rows[j]);
125
+ }
126
+ return out;
127
+ }
128
+
129
+ /**
130
+ * UPDATE/DELETE counterpart to {@link chunkedIn}. Same template rules; the
131
+ * helper returns the total `changes` across batches and is executed inside
132
+ * a single transaction so partial-batch failure rolls everything back.
133
+ *
134
+ * @param {import('better-sqlite3').Database} db
135
+ * @param {string} sqlTemplate - Must contain `__IN_PLACEHOLDERS__` once
136
+ * @param {ReadonlyArray<unknown>} values
137
+ * @returns {{ changes: number }} Cumulative row-change count
138
+ */
139
+ export function chunkedInExec(db, sqlTemplate, values) {
140
+ if (!db || typeof db.prepare !== 'function' || typeof db.transaction !== 'function') {
141
+ throw new TypeError('chunkedInExec: db must be a better-sqlite3 Database');
142
+ }
143
+ if (typeof sqlTemplate !== 'string' || !sqlTemplate.includes('__IN_PLACEHOLDERS__')) {
144
+ throw new TypeError("chunkedInExec: sqlTemplate must contain '__IN_PLACEHOLDERS__'");
145
+ }
146
+ if (!values || values.length === 0) return { changes: 0 };
147
+
148
+ const runBatches = db.transaction((items) => {
149
+ let changes = 0;
150
+ for (let i = 0; i < items.length; i += SAFE_IN_CLAUSE_BATCH) {
151
+ const batch = items.slice(i, i + SAFE_IN_CLAUSE_BATCH);
152
+ const placeholders = batch.map(() => '?').join(',');
153
+ const sql = sqlTemplate.replace('__IN_PLACEHOLDERS__', placeholders);
154
+ const result = db.prepare(sql).run(...batch);
155
+ changes += result.changes || 0;
156
+ }
157
+ return changes;
158
+ });
159
+
160
+ const items = Array.isArray(values) ? values : [...values];
161
+ return { changes: runBatches(items) };
162
+ }
163
+
46
164
  /**
47
165
  * Warm the graph database page cache with lightweight queries.
48
166
  * Opens an ephemeral connection, touches FTS5/relationship/summary pages, closes.
@@ -7,12 +7,9 @@
7
7
  * fall back to a no-op that treats every chunk as its own exemplar.
8
8
  */
9
9
 
10
- import { createRequire } from 'module';
11
- import { resolveNativeAddon } from './native-resolver.js';
10
+ import { loadNativeAddon } from './native-resolver.js';
12
11
  import { DEDUP_CONFIG } from './config/dedup.js';
13
12
 
14
- const require = createRequire(import.meta.url);
15
-
16
13
  let _addon = null;
17
14
  let _loadAttempted = false;
18
15
  let _loadError = null;
@@ -20,15 +17,15 @@ let _loadError = null;
20
17
  function loadAddon() {
21
18
  if (_loadAttempted) return _addon;
22
19
  _loadAttempted = true;
23
- const addonPath = resolveNativeAddon();
24
- if (!addonPath) {
25
- _loadError = new Error('dedup-hashing: native addon not resolved for this platform');
26
- return null;
27
- }
28
- try {
29
- _addon = require(addonPath);
30
- } catch (e) {
31
- _loadError = e;
20
+ // CUDA-preferred with CPU fallback (see loadNativeAddon): a CUDA addon that
21
+ // can't load on a no-GPU box falls back to the plain CPU addon.
22
+ const res = loadNativeAddon({
23
+ validate: (m) => typeof m.dedupFingerprintBatch === 'function' && typeof m.dedupCluster === 'function',
24
+ });
25
+ if (res) {
26
+ _addon = res.mod;
27
+ } else {
28
+ _loadError = new Error('dedup-hashing: native addon not available for this platform');
32
29
  _addon = null;
33
30
  }
34
31
  return _addon;
@@ -12,9 +12,10 @@
12
12
  * - `sysctl` is a cheap (~5 ms) one-shot call. The result is cached so
13
13
  * repeated consumers (init, native-inference, uninstall) all share one
14
14
  * detection. Hardware doesn't change at runtime.
15
- * - Never throws. Unknown hardware degrades to "candle-cpu fallback" —
16
- * this module is only advisory; absence of a capability is never an
17
- * error.
15
+ * - Never throws. Unknown / no-accelerator hardware degrades to the
16
+ * "ort-cpu" preference the optimized ORT INT8 CPU path used for both
17
+ * indexing and queries. It never means "load candle on CPU". This
18
+ * module is only advisory; absence of a capability is never an error.
18
19
  * - Unknown new Apple chips (e.g. an M5 shipped after this file) are
19
20
  * admitted as cascade-eligible via the ">= 3" rule — we prefer
20
21
  * optimistic new-hardware behavior to silently refusing to try.
@@ -214,7 +215,10 @@ function probeAddonCudaAvailability() {
214
215
  * cudaReason — human string explaining eligible/not
215
216
  * candleGpuBackend — "metal" | "cuda" | null
216
217
  * inferenceBackendPreference — "coreml-cascade" | "candle-metal"
217
- * | "candle-cuda" | "candle-cpu"
218
+ * | "candle-cuda" | "ort-cpu"
219
+ * ("ort-cpu" = no usable accelerator →
220
+ * optimized ORT INT8 CPU for indexing
221
+ * and queries; candle is never armed)
218
222
  */
219
223
  export function detectHardwareCapability() {
220
224
  if (_cached) return _cached;
@@ -281,7 +285,7 @@ export function detectHardwareCapability() {
281
285
  // Candle GPU backend availability.
282
286
  // darwin-arm64 → metal (bundled with the darwin-arm64 native package)
283
287
  // linux-*-gnu + NVIDIA + cuda-enabled addon → cuda
284
- // everything else → null (falls through to candle CPU)
288
+ // everything else → null (falls through to ORT INT8 CPU)
285
289
  let candleGpuBackend = null;
286
290
  if (platform === 'darwin' && arch === 'arm64') {
287
291
  candleGpuBackend = 'metal';
@@ -289,10 +293,16 @@ export function detectHardwareCapability() {
289
293
  candleGpuBackend = 'cuda';
290
294
  }
291
295
 
292
- // Preference order: coreml-cascade > candle-metal > candle-cuda > candle-cpu.
296
+ // Preference order: coreml-cascade > candle-metal > candle-cuda > ort-cpu.
293
297
  // coreml-cascade and candle-cuda never co-exist on the same host
294
298
  // (one is darwin, the other is linux), so the ordering is orthogonal
295
299
  // in practice.
300
+ //
301
+ // The no-accelerator fallback is 'ort-cpu', NOT 'candle-cpu': a host with
302
+ // no usable Metal / CoreML / CUDA accelerator indexes (and queries) on the
303
+ // optimized ORT INT8 CPU path. candle/native FP32 on CPU is slower AND
304
+ // lives in a different embedding space than the ORT INT8 query encoder, so
305
+ // it must never be the fallback. See AGENTS.md + core/indexing/model-pool.js.
296
306
  let inferenceBackendPreference;
297
307
  if (coremlCascadeEligible) {
298
308
  inferenceBackendPreference = 'coreml-cascade';
@@ -301,7 +311,7 @@ export function detectHardwareCapability() {
301
311
  } else if (candleGpuBackend === 'cuda') {
302
312
  inferenceBackendPreference = 'candle-cuda';
303
313
  } else {
304
- inferenceBackendPreference = 'candle-cpu';
314
+ inferenceBackendPreference = 'ort-cpu';
305
315
  }
306
316
 
307
317
  _cached = Object.freeze({
@@ -8,7 +8,13 @@ export * from './config/index.js';
8
8
  export { default as config } from './config/index.js';
9
9
 
10
10
  // Database utilities
11
- export { applyReadPragmas } from './db-utils.js';
11
+ export {
12
+ applyReadPragmas,
13
+ SAFE_IN_CLAUSE_BATCH,
14
+ assertInClauseSize,
15
+ chunkedIn,
16
+ chunkedInExec,
17
+ } from './db-utils.js';
12
18
 
13
19
  // Repositories (DDD infrastructure layer)
14
20
  export { CodebaseRepository } from './codebase-repository.js';
@@ -16,7 +22,7 @@ export { CodeGraphRepository } from './code-graph-repository.js';
16
22
 
17
23
  // Model management
18
24
  export { fetchModel, fetchModelFile, resolveModelFile, computeFileHash, getModelCacheDir, isCacheValid } from './model-fetcher.js';
19
- export { MODEL_REGISTRY, getModelEntry, getModelsForProfile, getSkippedOptInModels, isModelEnabled } from './model-registry.js';
25
+ export { MODEL_REGISTRY, getModelEntry, getModelsForProfile, getSkippedOptInModels, isModelEnabled, isNativeAcceleratedModel } from './model-registry.js';
20
26
 
21
27
  // Native platform resolution
22
28
  export { resolveNativeAddon, resolveNativeBinary, getPlatformInfo } from './native-resolver.js';
@@ -82,6 +88,8 @@ export {
82
88
  loadInitConfig,
83
89
  writeInitConfig,
84
90
  readPersistedLiPolicy,
91
+ resolveRuntimeLiModel,
92
+ applyPersistedLiModel,
85
93
  } from './init-config.js';
86
94
 
87
95
  // Language analysis