codebase-context 1.6.2 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +417 -282
  3. package/dist/analyzers/angular/index.d.ts.map +1 -1
  4. package/dist/analyzers/angular/index.js +91 -40
  5. package/dist/analyzers/angular/index.js.map +1 -1
  6. package/dist/analyzers/generic/index.d.ts +1 -0
  7. package/dist/analyzers/generic/index.d.ts.map +1 -1
  8. package/dist/analyzers/generic/index.js +94 -14
  9. package/dist/analyzers/generic/index.js.map +1 -1
  10. package/dist/cli-formatters.d.ts +47 -0
  11. package/dist/cli-formatters.d.ts.map +1 -0
  12. package/dist/cli-formatters.js +803 -0
  13. package/dist/cli-formatters.js.map +1 -0
  14. package/dist/cli-memory.d.ts +5 -0
  15. package/dist/cli-memory.d.ts.map +1 -0
  16. package/dist/cli-memory.js +218 -0
  17. package/dist/cli-memory.js.map +1 -0
  18. package/dist/cli.d.ts +3 -1
  19. package/dist/cli.d.ts.map +1 -1
  20. package/dist/cli.js +317 -88
  21. package/dist/cli.js.map +1 -1
  22. package/dist/constants/codebase-context.d.ts +13 -0
  23. package/dist/constants/codebase-context.d.ts.map +1 -1
  24. package/dist/constants/codebase-context.js +13 -0
  25. package/dist/constants/codebase-context.js.map +1 -1
  26. package/dist/core/auto-refresh.d.ts +16 -0
  27. package/dist/core/auto-refresh.d.ts.map +1 -0
  28. package/dist/core/auto-refresh.js +25 -0
  29. package/dist/core/auto-refresh.js.map +1 -0
  30. package/dist/core/file-watcher.d.ts +15 -0
  31. package/dist/core/file-watcher.d.ts.map +1 -0
  32. package/dist/core/file-watcher.js +59 -0
  33. package/dist/core/file-watcher.js.map +1 -0
  34. package/dist/core/index-meta.d.ts +27 -0
  35. package/dist/core/index-meta.d.ts.map +1 -0
  36. package/dist/core/index-meta.js +212 -0
  37. package/dist/core/index-meta.js.map +1 -0
  38. package/dist/core/indexer.d.ts.map +1 -1
  39. package/dist/core/indexer.js +324 -26
  40. package/dist/core/indexer.js.map +1 -1
  41. package/dist/core/reranker.d.ts.map +1 -1
  42. package/dist/core/reranker.js +3 -0
  43. package/dist/core/reranker.js.map +1 -1
  44. package/dist/core/search-quality.js +2 -2
  45. package/dist/core/search-quality.js.map +1 -1
  46. package/dist/core/search.d.ts +1 -0
  47. package/dist/core/search.d.ts.map +1 -1
  48. package/dist/core/search.js +79 -11
  49. package/dist/core/search.js.map +1 -1
  50. package/dist/core/symbol-references.d.ts +20 -0
  51. package/dist/core/symbol-references.d.ts.map +1 -0
  52. package/dist/core/symbol-references.js +186 -0
  53. package/dist/core/symbol-references.js.map +1 -0
  54. package/dist/embeddings/index.d.ts +8 -0
  55. package/dist/embeddings/index.d.ts.map +1 -1
  56. package/dist/embeddings/index.js +17 -2
  57. package/dist/embeddings/index.js.map +1 -1
  58. package/dist/embeddings/openai.d.ts +1 -1
  59. package/dist/embeddings/openai.d.ts.map +1 -1
  60. package/dist/embeddings/openai.js +3 -1
  61. package/dist/embeddings/openai.js.map +1 -1
  62. package/dist/embeddings/transformers.d.ts +6 -0
  63. package/dist/embeddings/transformers.d.ts.map +1 -1
  64. package/dist/embeddings/transformers.js +12 -5
  65. package/dist/embeddings/transformers.js.map +1 -1
  66. package/dist/embeddings/types.d.ts +1 -0
  67. package/dist/embeddings/types.d.ts.map +1 -1
  68. package/dist/embeddings/types.js +7 -1
  69. package/dist/embeddings/types.js.map +1 -1
  70. package/dist/eval/harness.d.ts +5 -0
  71. package/dist/eval/harness.d.ts.map +1 -0
  72. package/dist/eval/harness.js +153 -0
  73. package/dist/eval/harness.js.map +1 -0
  74. package/dist/eval/types.d.ts +59 -0
  75. package/dist/eval/types.d.ts.map +1 -0
  76. package/dist/eval/types.js +2 -0
  77. package/dist/eval/types.js.map +1 -0
  78. package/dist/grammars/manifest.d.ts +26 -0
  79. package/dist/grammars/manifest.d.ts.map +1 -0
  80. package/dist/grammars/manifest.js +64 -0
  81. package/dist/grammars/manifest.js.map +1 -0
  82. package/dist/index.d.ts +16 -2
  83. package/dist/index.d.ts.map +1 -1
  84. package/dist/index.js +181 -1300
  85. package/dist/index.js.map +1 -1
  86. package/dist/patterns/semantics.d.ts +2 -1
  87. package/dist/patterns/semantics.d.ts.map +1 -1
  88. package/dist/patterns/semantics.js +0 -2
  89. package/dist/patterns/semantics.js.map +1 -1
  90. package/dist/preflight/evidence-lock.d.ts +6 -0
  91. package/dist/preflight/evidence-lock.d.ts.map +1 -1
  92. package/dist/preflight/evidence-lock.js +33 -1
  93. package/dist/preflight/evidence-lock.js.map +1 -1
  94. package/dist/storage/index.d.ts +4 -1
  95. package/dist/storage/index.d.ts.map +1 -1
  96. package/dist/storage/index.js +2 -2
  97. package/dist/storage/index.js.map +1 -1
  98. package/dist/storage/lancedb.d.ts +11 -1
  99. package/dist/storage/lancedb.d.ts.map +1 -1
  100. package/dist/storage/lancedb.js +45 -11
  101. package/dist/storage/lancedb.js.map +1 -1
  102. package/dist/storage/types.d.ts +4 -1
  103. package/dist/storage/types.d.ts.map +1 -1
  104. package/dist/storage/types.js.map +1 -1
  105. package/dist/tools/detect-circular-dependencies.d.ts +5 -0
  106. package/dist/tools/detect-circular-dependencies.d.ts.map +1 -0
  107. package/dist/tools/detect-circular-dependencies.js +117 -0
  108. package/dist/tools/detect-circular-dependencies.js.map +1 -0
  109. package/dist/tools/get-codebase-metadata.d.ts +5 -0
  110. package/dist/tools/get-codebase-metadata.d.ts.map +1 -0
  111. package/dist/tools/get-codebase-metadata.js +53 -0
  112. package/dist/tools/get-codebase-metadata.js.map +1 -0
  113. package/dist/tools/get-indexing-status.d.ts +5 -0
  114. package/dist/tools/get-indexing-status.d.ts.map +1 -0
  115. package/dist/tools/get-indexing-status.js +44 -0
  116. package/dist/tools/get-indexing-status.js.map +1 -0
  117. package/dist/tools/get-memory.d.ts +5 -0
  118. package/dist/tools/get-memory.d.ts.map +1 -0
  119. package/dist/tools/get-memory.js +89 -0
  120. package/dist/tools/get-memory.js.map +1 -0
  121. package/dist/tools/get-style-guide.d.ts +5 -0
  122. package/dist/tools/get-style-guide.d.ts.map +1 -0
  123. package/dist/tools/get-style-guide.js +151 -0
  124. package/dist/tools/get-style-guide.js.map +1 -0
  125. package/dist/tools/get-symbol-references.d.ts +5 -0
  126. package/dist/tools/get-symbol-references.d.ts.map +1 -0
  127. package/dist/tools/get-symbol-references.js +70 -0
  128. package/dist/tools/get-symbol-references.js.map +1 -0
  129. package/dist/tools/get-team-patterns.d.ts +5 -0
  130. package/dist/tools/get-team-patterns.d.ts.map +1 -0
  131. package/dist/tools/get-team-patterns.js +147 -0
  132. package/dist/tools/get-team-patterns.js.map +1 -0
  133. package/dist/tools/index.d.ts +6 -0
  134. package/dist/tools/index.d.ts.map +1 -0
  135. package/dist/tools/index.js +41 -0
  136. package/dist/tools/index.js.map +1 -0
  137. package/dist/tools/refresh-index.d.ts +5 -0
  138. package/dist/tools/refresh-index.d.ts.map +1 -0
  139. package/dist/tools/refresh-index.js +40 -0
  140. package/dist/tools/refresh-index.js.map +1 -0
  141. package/dist/tools/remember.d.ts +5 -0
  142. package/dist/tools/remember.d.ts.map +1 -0
  143. package/dist/tools/remember.js +101 -0
  144. package/dist/tools/remember.js.map +1 -0
  145. package/dist/tools/search-codebase.d.ts +5 -0
  146. package/dist/tools/search-codebase.d.ts.map +1 -0
  147. package/dist/tools/search-codebase.js +745 -0
  148. package/dist/tools/search-codebase.js.map +1 -0
  149. package/dist/tools/types.d.ts +223 -0
  150. package/dist/tools/types.d.ts.map +1 -0
  151. package/dist/tools/types.js +2 -0
  152. package/dist/tools/types.js.map +1 -0
  153. package/dist/types/index.d.ts +79 -11
  154. package/dist/types/index.d.ts.map +1 -1
  155. package/dist/types/index.js +0 -1
  156. package/dist/types/index.js.map +1 -1
  157. package/dist/utils/ast-chunker.d.ts +71 -0
  158. package/dist/utils/ast-chunker.d.ts.map +1 -0
  159. package/dist/utils/ast-chunker.js +453 -0
  160. package/dist/utils/ast-chunker.js.map +1 -0
  161. package/dist/utils/chunking.d.ts.map +1 -1
  162. package/dist/utils/chunking.js +10 -3
  163. package/dist/utils/chunking.js.map +1 -1
  164. package/dist/utils/language-detection.d.ts.map +1 -1
  165. package/dist/utils/language-detection.js +26 -1
  166. package/dist/utils/language-detection.js.map +1 -1
  167. package/dist/utils/tree-sitter.d.ts +28 -0
  168. package/dist/utils/tree-sitter.d.ts.map +1 -0
  169. package/dist/utils/tree-sitter.js +422 -0
  170. package/dist/utils/tree-sitter.js.map +1 -0
  171. package/dist/utils/usage-tracker.d.ts +30 -40
  172. package/dist/utils/usage-tracker.d.ts.map +1 -1
  173. package/dist/utils/usage-tracker.js +66 -8
  174. package/dist/utils/usage-tracker.js.map +1 -1
  175. package/docs/capabilities.md +183 -92
  176. package/docs/cli.md +196 -0
  177. package/grammars/.gitkeep +0 -0
  178. package/grammars/tree-sitter-c.wasm +0 -0
  179. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  180. package/grammars/tree-sitter-cpp.wasm +0 -0
  181. package/grammars/tree-sitter-go.wasm +0 -0
  182. package/grammars/tree-sitter-java.wasm +0 -0
  183. package/grammars/tree-sitter-javascript.wasm +0 -0
  184. package/grammars/tree-sitter-kotlin.wasm +0 -0
  185. package/grammars/tree-sitter-python.wasm +0 -0
  186. package/grammars/tree-sitter-rust.wasm +0 -0
  187. package/grammars/tree-sitter-tsx.wasm +0 -0
  188. package/grammars/tree-sitter-typescript.wasm +0 -0
  189. package/package.json +153 -157
@@ -2,20 +2,153 @@
2
2
  * Core Indexer - Orchestrates codebase indexing
3
3
  * Scans files, delegates to analyzers, creates embeddings, stores in vector DB
4
4
  */
5
- /* eslint-disable @typescript-eslint/no-explicit-any */
5
+ import { randomUUID } from 'crypto';
6
6
  import { promises as fs } from 'fs';
7
7
  import path from 'path';
8
8
  import { glob } from 'glob';
9
9
  import ignore from 'ignore';
10
10
  import { analyzerRegistry } from './analyzer-registry.js';
11
11
  import { isCodeFile, isBinaryFile } from '../utils/language-detection.js';
12
- import { getEmbeddingProvider, DEFAULT_MODEL } from '../embeddings/index.js';
12
+ import { getEmbeddingProvider, getConfiguredDimensions, DEFAULT_MODEL, parseEmbeddingProviderName } from '../embeddings/index.js';
13
13
  import { getStorageProvider } from '../storage/index.js';
14
14
  import { LibraryUsageTracker, PatternDetector, ImportGraph, InternalFileGraph } from '../utils/usage-tracker.js';
15
15
  import { mergeSmallChunks } from '../utils/chunking.js';
16
16
  import { getFileCommitDates } from '../utils/git-dates.js';
17
- import { CODEBASE_CONTEXT_DIRNAME, INDEXING_STATS_FILENAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, MANIFEST_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
17
+ import { CODEBASE_CONTEXT_DIRNAME, INDEX_FORMAT_VERSION, INDEXING_STATS_FILENAME, INDEX_META_FILENAME, INDEX_META_VERSION, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, MANIFEST_FILENAME, RELATIONSHIPS_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
18
+ const STAGING_DIRNAME = '.staging';
19
+ const PREVIOUS_DIRNAME = '.previous';
18
20
  import { computeFileHashes, readManifest, writeManifest, diffManifest } from './manifest.js';
21
+ import { readIndexMeta, checkEmbeddingMismatch } from './index-meta.js';
22
+ let cachedToolVersion = null;
23
+ async function getToolVersion() {
24
+ if (cachedToolVersion)
25
+ return cachedToolVersion;
26
+ try {
27
+ const pkgRaw = await fs.readFile(new URL('../../package.json', import.meta.url), 'utf-8');
28
+ const pkg = JSON.parse(pkgRaw);
29
+ if (typeof pkg.version === 'string' && pkg.version.trim()) {
30
+ cachedToolVersion = pkg.version;
31
+ return cachedToolVersion;
32
+ }
33
+ }
34
+ catch {
35
+ // Best-effort — fall back below
36
+ }
37
+ cachedToolVersion = 'unknown';
38
+ return cachedToolVersion;
39
+ }
40
+ /**
41
+ * Perform a Windows-safe atomic swap of staging artifacts into active location.
42
+ * Strategy: move current active to .previous, then rename staging to active.
43
+ * If staging rename fails, restore from .previous.
44
+ */
45
+ async function atomicSwapStagingToActive(contextDir, stagingDir, buildId) {
46
+ const previousDir = path.join(contextDir, PREVIOUS_DIRNAME);
47
+ const activeMetaPath = path.join(contextDir, INDEX_META_FILENAME);
48
+ const activeIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
49
+ const activeIntelligencePath = path.join(contextDir, INTELLIGENCE_FILENAME);
50
+ const activeVectorDir = path.join(contextDir, VECTOR_DB_DIRNAME);
51
+ const activeManifestPath = path.join(contextDir, MANIFEST_FILENAME);
52
+ const activeStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
53
+ const activeRelationshipsPath = path.join(contextDir, RELATIONSHIPS_FILENAME);
54
+ const stagingMetaPath = path.join(stagingDir, INDEX_META_FILENAME);
55
+ const stagingIndexPath = path.join(stagingDir, KEYWORD_INDEX_FILENAME);
56
+ const stagingIntelligencePath = path.join(stagingDir, INTELLIGENCE_FILENAME);
57
+ const stagingVectorDir = path.join(stagingDir, VECTOR_DB_DIRNAME);
58
+ const stagingManifestPath = path.join(stagingDir, MANIFEST_FILENAME);
59
+ const stagingStatsPath = path.join(stagingDir, INDEXING_STATS_FILENAME);
60
+ const stagingRelationshipsPath = path.join(stagingDir, RELATIONSHIPS_FILENAME);
61
+ // Step 1: Create .previous directory and move current active there
62
+ await fs.mkdir(previousDir, { recursive: true });
63
+ const moveIfExists = async (src, dest) => {
64
+ try {
65
+ await fs.rename(src, dest);
66
+ }
67
+ catch (error) {
68
+ const code = error.code;
69
+ if (code !== 'ENOENT') {
70
+ // File doesn't exist is OK, other errors are problems
71
+ throw error;
72
+ }
73
+ }
74
+ };
75
+ const moveDirIfExists = async (src, dest) => {
76
+ try {
77
+ const stat = await fs.stat(src);
78
+ if (stat.isDirectory()) {
79
+ await fs.rename(src, dest);
80
+ }
81
+ }
82
+ catch (error) {
83
+ const code = error.code;
84
+ if (code !== 'ENOENT') {
85
+ throw error;
86
+ }
87
+ }
88
+ };
89
+ // Move active artifacts to .previous
90
+ await moveIfExists(activeMetaPath, path.join(previousDir, INDEX_META_FILENAME));
91
+ await moveIfExists(activeIndexPath, path.join(previousDir, KEYWORD_INDEX_FILENAME));
92
+ await moveIfExists(activeIntelligencePath, path.join(previousDir, INTELLIGENCE_FILENAME));
93
+ await moveIfExists(activeManifestPath, path.join(previousDir, MANIFEST_FILENAME));
94
+ await moveIfExists(activeStatsPath, path.join(previousDir, INDEXING_STATS_FILENAME));
95
+ await moveIfExists(activeRelationshipsPath, path.join(previousDir, RELATIONSHIPS_FILENAME));
96
+ await moveDirIfExists(activeVectorDir, path.join(previousDir, VECTOR_DB_DIRNAME));
97
+ // Step 2: Move staging artifacts to active location
98
+ try {
99
+ await moveIfExists(stagingMetaPath, activeMetaPath);
100
+ await moveIfExists(stagingIndexPath, activeIndexPath);
101
+ await moveIfExists(stagingIntelligencePath, activeIntelligencePath);
102
+ await moveIfExists(stagingManifestPath, activeManifestPath);
103
+ await moveIfExists(stagingStatsPath, activeStatsPath);
104
+ await moveIfExists(stagingRelationshipsPath, activeRelationshipsPath);
105
+ await moveDirIfExists(stagingVectorDir, activeVectorDir);
106
+ // Step 3: Clean up .previous and staging directories
107
+ await cleanupDirectory(previousDir);
108
+ await cleanupDirectory(stagingDir);
109
+ // Also clean up the parent .staging/ directory if empty
110
+ const stagingBase = path.join(contextDir, STAGING_DIRNAME);
111
+ try {
112
+ const remaining = await fs.readdir(stagingBase);
113
+ if (remaining.length === 0) {
114
+ await fs.rmdir(stagingBase);
115
+ }
116
+ }
117
+ catch {
118
+ // Directory doesn't exist or not empty - ignore
119
+ }
120
+ console.error(`Atomic swap complete: build ${buildId} now active`);
121
+ }
122
+ catch (swapError) {
123
+ console.error('Atomic swap failed, attempting rollback:', swapError);
124
+ // Attempt rollback: restore from .previous
125
+ try {
126
+ await moveIfExists(path.join(previousDir, INDEX_META_FILENAME), activeMetaPath);
127
+ await moveIfExists(path.join(previousDir, KEYWORD_INDEX_FILENAME), activeIndexPath);
128
+ await moveIfExists(path.join(previousDir, INTELLIGENCE_FILENAME), activeIntelligencePath);
129
+ await moveIfExists(path.join(previousDir, MANIFEST_FILENAME), activeManifestPath);
130
+ await moveIfExists(path.join(previousDir, INDEXING_STATS_FILENAME), activeStatsPath);
131
+ await moveIfExists(path.join(previousDir, RELATIONSHIPS_FILENAME), activeRelationshipsPath);
132
+ await moveDirIfExists(path.join(previousDir, VECTOR_DB_DIRNAME), activeVectorDir);
133
+ console.error('Rollback successful');
134
+ }
135
+ catch (rollbackError) {
136
+ console.error('Rollback also failed:', rollbackError);
137
+ }
138
+ throw swapError;
139
+ }
140
+ }
141
+ /**
142
+ * Best-effort cleanup of a directory and its contents.
143
+ */
144
+ async function cleanupDirectory(dirPath) {
145
+ try {
146
+ await fs.rm(dirPath, { recursive: true, force: true });
147
+ }
148
+ catch {
149
+ // Best-effort: ignore cleanup failures
150
+ }
151
+ }
19
152
  export class CodebaseIndexer {
20
153
  rootPath;
21
154
  config;
@@ -38,6 +171,13 @@ export class CodebaseIndexer {
38
171
  };
39
172
  }
40
173
  mergeConfig(userConfig) {
174
+ const defaultEmbeddingProvider = parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
175
+ // When provider=openai and EMBEDDING_MODEL is not set, DEFAULT_MODEL resolves to the
176
+ // transformers fallback (Xenova/bge-small-en-v1.5), which the OpenAI API rejects.
177
+ // Use a sane OpenAI default instead.
178
+ const defaultModel = defaultEmbeddingProvider === 'openai' && !process.env.EMBEDDING_MODEL
179
+ ? 'text-embedding-3-small'
180
+ : DEFAULT_MODEL;
41
181
  const defaultConfig = {
42
182
  analyzers: {
43
183
  angular: { enabled: true, priority: 100 },
@@ -45,8 +185,25 @@ export class CodebaseIndexer {
45
185
  vue: { enabled: false, priority: 90 },
46
186
  generic: { enabled: true, priority: 10 }
47
187
  },
48
- include: ['**/*.{ts,tsx,js,jsx,html,css,scss,sass,less}'],
49
- exclude: ['node_modules/**', 'dist/**', 'build/**', '.git/**', 'coverage/**'],
188
+ include: [
189
+ '**/*.{ts,tsx,js,jsx,mjs,cjs,mts,cts}',
190
+ '**/*.{html,htm,css,scss,sass,less}',
191
+ '**/*.{py,pyi,rb,php}',
192
+ '**/*.{java,kt,kts,scala,swift,cs}',
193
+ '**/*.{go,rs}',
194
+ '**/*.{c,cpp,cc,cxx,h,hpp}',
195
+ '**/*.{sh,bash,zsh,ps1}',
196
+ '**/*.{sql,graphql,gql}',
197
+ '**/*.{json,jsonc,yaml,yml,toml,xml}'
198
+ ],
199
+ exclude: [
200
+ 'node_modules/**',
201
+ 'dist/**',
202
+ 'build/**',
203
+ '.git/**',
204
+ 'coverage/**',
205
+ '.codebase-context/**'
206
+ ],
50
207
  respectGitignore: true,
51
208
  parsing: {
52
209
  maxFileSize: 1048576,
@@ -66,8 +223,8 @@ export class CodebaseIndexer {
66
223
  includeChangelogs: false
67
224
  },
68
225
  embedding: {
69
- provider: 'transformers',
70
- model: DEFAULT_MODEL,
226
+ provider: defaultEmbeddingProvider,
227
+ model: defaultModel,
71
228
  batchSize: 32
72
229
  },
73
230
  skipEmbedding: false,
@@ -115,7 +272,17 @@ export class CodebaseIndexer {
115
272
  errors: [],
116
273
  startedAt: new Date()
117
274
  };
275
+ let stagingDir = null;
118
276
  try {
277
+ // Ensure there is at least a generic fallback analyzer registered when the indexer
278
+ // is used directly (e.g. in tests or standalone scripts).
279
+ if (analyzerRegistry.getAll().length === 0) {
280
+ const { GenericAnalyzer } = await import('../analyzers/generic/index.js');
281
+ analyzerRegistry.register(new GenericAnalyzer());
282
+ }
283
+ const buildId = randomUUID();
284
+ const generatedAt = new Date().toISOString();
285
+ const toolVersion = await getToolVersion();
119
286
  // Phase 1: Scanning
120
287
  this.updateProgress('scanning', 0);
121
288
  let files = await this.scanFiles();
@@ -150,8 +317,26 @@ export class CodebaseIndexer {
150
317
  deleted: diff.deleted.length,
151
318
  unchanged: diff.unchanged.length
152
319
  };
320
+ // Check for embedding provider/model mismatch — forces full rebuild to avoid
321
+ // silent vector dimension mismatch when switching providers or models.
322
+ try {
323
+ const existingMeta = await readIndexMeta(this.rootPath);
324
+ const currentProvider = this.config.embedding?.provider ?? 'transformers';
325
+ const currentModel = this.config.embedding?.model ?? DEFAULT_MODEL;
326
+ if (checkEmbeddingMismatch(existingMeta, currentProvider, currentModel)) {
327
+ const stored = existingMeta.artifacts.vectorDb;
328
+ console.error(`Embedding provider/model changed (stored: ${stored.embeddingProvider}:${stored.embeddingModel}, current: ${currentProvider}:${currentModel}) — forcing full rebuild`);
329
+ diff = null;
330
+ }
331
+ }
332
+ catch {
333
+ // No meta yet or legacy index without embedding fields — proceed with incremental
334
+ }
153
335
  // Short-circuit: nothing changed
154
- if (diff.added.length === 0 && diff.changed.length === 0 && diff.deleted.length === 0) {
336
+ if (diff &&
337
+ diff.added.length === 0 &&
338
+ diff.changed.length === 0 &&
339
+ diff.deleted.length === 0) {
155
340
  console.error('No files changed - skipping re-index.');
156
341
  this.updateProgress('complete', 100);
157
342
  stats.duration = Date.now() - startTime;
@@ -177,7 +362,13 @@ export class CodebaseIndexer {
177
362
  }
178
363
  try {
179
364
  const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
180
- const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
365
+ const existing = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
366
+ const existingObj = existing;
367
+ const existingChunks = Array.isArray(existing)
368
+ ? existing
369
+ : existingObj && Array.isArray(existingObj.chunks)
370
+ ? existingObj.chunks
371
+ : null;
181
372
  if (Array.isArray(existingChunks)) {
182
373
  stats.totalChunks = existingChunks.length;
183
374
  if (stats.indexedFiles === 0) {
@@ -253,7 +444,7 @@ export class CodebaseIndexer {
253
444
  break;
254
445
  }
255
446
  }
256
- internalFileGraph.trackImport(file, resolvedPath, imp.imports);
447
+ internalFileGraph.trackImport(file, resolvedPath, imp.line || 1, imp.imports);
257
448
  }
258
449
  }
259
450
  // Track exports for unused export detection
@@ -285,7 +476,8 @@ export class CodebaseIndexer {
285
476
  // GENERIC PATTERN FORWARDING
286
477
  // Framework analyzers return detectedPatterns in metadata - we just forward them
287
478
  // This keeps the indexer framework-agnostic
288
- if (result.metadata?.detectedPatterns) {
479
+ if (result.metadata?.detectedPatterns &&
480
+ Array.isArray(result.metadata.detectedPatterns)) {
289
481
  for (const pattern of result.metadata.detectedPatterns) {
290
482
  // Try to extract a relevant snippet for the pattern
291
483
  // Ask analyzer registry for snippet pattern (framework-agnostic delegation)
@@ -297,7 +489,10 @@ export class CodebaseIndexer {
297
489
  }
298
490
  // Track file for Golden File scoring (framework-agnostic)
299
491
  // A golden file = file with patterns in ≥3 distinct categories
300
- const detectedPatterns = result.metadata?.detectedPatterns || [];
492
+ const rawPatterns = result.metadata?.detectedPatterns;
493
+ const detectedPatterns = Array.isArray(rawPatterns)
494
+ ? rawPatterns
495
+ : [];
301
496
  const uniqueCategories = new Set(detectedPatterns.map((p) => p.category));
302
497
  const patternScore = uniqueCategories.size;
303
498
  if (patternScore >= 3) {
@@ -305,7 +500,7 @@ export class CodebaseIndexer {
305
500
  for (const p of detectedPatterns) {
306
501
  patternFlags[`${p.category}:${p.name}`] = true;
307
502
  }
308
- patternDetector.trackGoldenFile(relPath, patternScore, patternFlags); // TODO: fix type;
503
+ patternDetector.trackGoldenFile(relPath, patternScore, patternFlags);
309
504
  }
310
505
  // Update component statistics
311
506
  for (const component of result.components) {
@@ -404,10 +599,22 @@ export class CodebaseIndexer {
404
599
  }
405
600
  // Phase 4: Storing
406
601
  this.updateProgress('storing', 75);
407
- await fs.mkdir(contextDir, { recursive: true });
602
+ // For full rebuilds, use staging directory for atomic swap
603
+ // For incremental, write directly to active location
604
+ const isFullRebuild = !diff;
605
+ let activeContextDir = contextDir;
606
+ if (isFullRebuild) {
607
+ // Create staging directory for atomic swap
608
+ const stagingBase = path.join(contextDir, STAGING_DIRNAME);
609
+ stagingDir = path.join(stagingBase, buildId);
610
+ await fs.mkdir(stagingDir, { recursive: true });
611
+ activeContextDir = stagingDir;
612
+ console.error(`Full rebuild: writing to staging ${stagingDir}`);
613
+ }
614
+ await fs.mkdir(activeContextDir, { recursive: true });
408
615
  if (!this.config.skipEmbedding) {
409
- const storagePath = path.join(contextDir, VECTOR_DB_DIRNAME);
410
- const storageProvider = await getStorageProvider({ path: storagePath });
616
+ const storagePath = path.join(activeContextDir, VECTOR_DB_DIRNAME);
617
+ const storageProvider = await getStorageProvider({ path: storagePath }, diff ? { expectedDimensions: getConfiguredDimensions(this.config.embedding) } : undefined);
411
618
  if (diff) {
412
619
  // Incremental: delete old chunks for changed + deleted files, then add new
413
620
  const filesToDelete = [...diff.changed, ...diff.deleted].map((rel) => path.join(this.rootPath, rel).replace(/\\/g, '/'));
@@ -424,19 +631,26 @@ export class CodebaseIndexer {
424
631
  `added ${chunksWithEmbeddings.length} new chunks`);
425
632
  }
426
633
  else {
427
- // Full: clear and re-store everything
428
- console.error(`Storing ${chunksToEmbed.length} chunks...`);
429
- await storageProvider.clear();
634
+ // Full rebuild: store to staging (no clear - fresh directory)
635
+ console.error(`Storing ${chunksToEmbed.length} chunks to staging...`);
430
636
  await storageProvider.store(chunksWithEmbeddings);
431
637
  }
432
638
  }
639
+ // Vector DB build marker (required for version gating)
640
+ // Write after semantic store step so marker reflects the latest DB state.
641
+ const vectorDir = path.join(activeContextDir, VECTOR_DB_DIRNAME);
642
+ await fs.mkdir(vectorDir, { recursive: true });
643
+ await fs.writeFile(path.join(vectorDir, 'index-build.json'), JSON.stringify({ buildId, formatVersion: INDEX_FORMAT_VERSION }));
433
644
  // Keyword index always uses ALL chunks (full regen)
434
- const indexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
645
+ const indexPath = path.join(activeContextDir, KEYWORD_INDEX_FILENAME);
435
646
  // Memory safety: cap keyword index too
436
647
  const keywordChunks = allChunks.length > MAX_CHUNKS ? allChunks.slice(0, MAX_CHUNKS) : allChunks;
437
- await fs.writeFile(indexPath, JSON.stringify(keywordChunks));
648
+ await fs.writeFile(indexPath, JSON.stringify({
649
+ header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
650
+ chunks: keywordChunks
651
+ }));
438
652
  // Save library usage and pattern stats (always full regen)
439
- const intelligencePath = path.join(contextDir, INTELLIGENCE_FILENAME);
653
+ const intelligencePath = path.join(activeContextDir, INTELLIGENCE_FILENAME);
440
654
  const libraryStats = libraryTracker.getStats();
441
655
  // Extract tsconfig paths for AI to understand import aliases
442
656
  let tsconfigPaths;
@@ -453,6 +667,7 @@ export class CodebaseIndexer {
453
667
  // No tsconfig.json or no paths defined
454
668
  }
455
669
  const intelligence = {
670
+ header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
456
671
  libraryUsage: libraryStats,
457
672
  patterns: patternDetector.getAllPatterns(),
458
673
  goldenFiles: patternDetector.getGoldenFiles(5),
@@ -465,23 +680,97 @@ export class CodebaseIndexer {
465
680
  },
466
681
  // Internal file graph for circular dependency and unused export detection
467
682
  internalFileGraph: internalFileGraph.toJSON(),
468
- generatedAt: new Date().toISOString()
683
+ generatedAt
469
684
  };
470
685
  await fs.writeFile(intelligencePath, JSON.stringify(intelligence, null, 2));
686
+ // Write relationships sidecar (versioned, for fast lookup)
687
+ const relationshipsPath = path.join(activeContextDir, RELATIONSHIPS_FILENAME);
688
+ const graphData = internalFileGraph.toJSON();
689
+ // Build reverse import map (importedBy)
690
+ const importedBy = {};
691
+ if (graphData.imports) {
692
+ for (const [file, deps] of Object.entries(graphData.imports)) {
693
+ for (const dep of deps) {
694
+ if (!importedBy[dep])
695
+ importedBy[dep] = [];
696
+ importedBy[dep].push(file);
697
+ }
698
+ }
699
+ }
700
+ // Build symbol export map (exportedBy)
701
+ const exportedBy = {};
702
+ if (graphData.exports) {
703
+ for (const [file, exps] of Object.entries(graphData.exports)) {
704
+ for (const exp of exps) {
705
+ if (exp.name && exp.name !== 'default') {
706
+ if (!exportedBy[exp.name])
707
+ exportedBy[exp.name] = [];
708
+ if (!exportedBy[exp.name].includes(file)) {
709
+ exportedBy[exp.name].push(file);
710
+ }
711
+ }
712
+ }
713
+ }
714
+ }
715
+ const relationships = {
716
+ header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
717
+ generatedAt,
718
+ graph: {
719
+ imports: graphData.imports || {},
720
+ ...(graphData.importDetails ? { importDetails: graphData.importDetails } : {}),
721
+ importedBy,
722
+ exports: graphData.exports || {}
723
+ },
724
+ symbols: {
725
+ exportedBy
726
+ },
727
+ stats: graphData.stats || internalFileGraph.getStats()
728
+ };
729
+ await fs.writeFile(relationshipsPath, JSON.stringify(relationships, null, 2));
471
730
  // Write manifest (both full and incremental)
731
+ // For full rebuild, write to staging; for incremental, write to active
732
+ const activeManifestPath = path.join(activeContextDir, MANIFEST_FILENAME);
472
733
  const manifest = {
473
734
  version: 1,
474
735
  generatedAt: new Date().toISOString(),
475
736
  files: currentHashes ?? (await computeFileHashes(files, this.rootPath))
476
737
  };
477
- await writeManifest(manifestPath, manifest);
738
+ await writeManifest(activeManifestPath, manifest);
478
739
  const persistedStats = {
479
740
  indexedFiles: stats.indexedFiles,
480
741
  totalChunks: stats.totalChunks,
481
742
  totalFiles: stats.totalFiles,
482
- generatedAt: new Date().toISOString()
743
+ generatedAt
483
744
  };
484
- await fs.writeFile(indexingStatsPath, JSON.stringify(persistedStats, null, 2));
745
+ const activeIndexingStatsPath = path.join(activeContextDir, INDEXING_STATS_FILENAME);
746
+ await fs.writeFile(activeIndexingStatsPath, JSON.stringify(persistedStats, null, 2));
747
+ // Index meta (authoritative) — write last so readers never observe meta pointing to missing artifacts.
748
+ const metaPath = path.join(activeContextDir, INDEX_META_FILENAME);
749
+ await fs.writeFile(metaPath, JSON.stringify({
750
+ metaVersion: INDEX_META_VERSION,
751
+ formatVersion: INDEX_FORMAT_VERSION,
752
+ buildId,
753
+ generatedAt,
754
+ toolVersion,
755
+ artifacts: {
756
+ keywordIndex: { path: KEYWORD_INDEX_FILENAME },
757
+ vectorDb: {
758
+ path: VECTOR_DB_DIRNAME,
759
+ provider: 'lancedb',
760
+ embeddingProvider: this.config.embedding?.provider ?? 'transformers',
761
+ embeddingModel: this.config.embedding?.model ?? DEFAULT_MODEL
762
+ },
763
+ intelligence: { path: INTELLIGENCE_FILENAME },
764
+ manifest: { path: MANIFEST_FILENAME },
765
+ indexingStats: { path: INDEXING_STATS_FILENAME },
766
+ relationships: { path: RELATIONSHIPS_FILENAME }
767
+ }
768
+ }, null, 2));
769
+ // Atomic swap for full rebuilds: move staging into active location
770
+ if (isFullRebuild && stagingDir) {
771
+ console.error('Performing atomic swap of staging to active...');
772
+ await atomicSwapStagingToActive(contextDir, stagingDir, buildId);
773
+ }
485
774
  // Phase 5: Complete
486
775
  this.updateProgress('complete', 100);
487
776
  stats.duration = Date.now() - startTime;
@@ -505,6 +794,11 @@ export class CodebaseIndexer {
505
794
  phase: this.progress.phase,
506
795
  timestamp: new Date()
507
796
  });
797
+ // Clean up staging directory on failure (best-effort)
798
+ if (stagingDir) {
799
+ console.error('Cleaning up staging directory after failure...');
800
+ await cleanupDirectory(stagingDir);
801
+ }
508
802
  throw error;
509
803
  }
510
804
  }
@@ -636,6 +930,10 @@ export class CodebaseIndexer {
636
930
  const intelligencePath = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME);
637
931
  const intelligenceContent = await fs.readFile(intelligencePath, 'utf-8');
638
932
  const intelligence = JSON.parse(intelligenceContent);
933
+ // Phase 06: ignore legacy intelligence files that lack a versioned header.
934
+ if (!intelligence || typeof intelligence !== 'object' || !intelligence.header) {
935
+ return metadata;
936
+ }
639
937
  metadata.customMetadata = {
640
938
  ...metadata.customMetadata,
641
939
  libraryUsage: intelligence.libraryUsage,