codebase-context 1.2.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +292 -87
  3. package/dist/analyzers/angular/index.d.ts +1 -1
  4. package/dist/analyzers/angular/index.d.ts.map +1 -1
  5. package/dist/analyzers/angular/index.js +298 -309
  6. package/dist/analyzers/angular/index.js.map +1 -1
  7. package/dist/analyzers/generic/index.d.ts +1 -2
  8. package/dist/analyzers/generic/index.d.ts.map +1 -1
  9. package/dist/analyzers/generic/index.js +93 -60
  10. package/dist/analyzers/generic/index.js.map +1 -1
  11. package/dist/constants/codebase-context.d.ts +8 -0
  12. package/dist/constants/codebase-context.d.ts.map +1 -0
  13. package/dist/constants/codebase-context.js +10 -0
  14. package/dist/constants/codebase-context.js.map +1 -0
  15. package/dist/constants/git-patterns.d.ts +12 -0
  16. package/dist/constants/git-patterns.d.ts.map +1 -0
  17. package/dist/constants/git-patterns.js +11 -0
  18. package/dist/constants/git-patterns.js.map +1 -0
  19. package/dist/core/analyzer-registry.d.ts.map +1 -1
  20. package/dist/core/analyzer-registry.js +8 -8
  21. package/dist/core/analyzer-registry.js.map +1 -1
  22. package/dist/core/indexer.d.ts +11 -1
  23. package/dist/core/indexer.d.ts.map +1 -1
  24. package/dist/core/indexer.js +359 -157
  25. package/dist/core/indexer.js.map +1 -1
  26. package/dist/core/manifest.d.ts +39 -0
  27. package/dist/core/manifest.d.ts.map +1 -0
  28. package/dist/core/manifest.js +86 -0
  29. package/dist/core/manifest.js.map +1 -0
  30. package/dist/core/search-quality.d.ts +10 -0
  31. package/dist/core/search-quality.d.ts.map +1 -0
  32. package/dist/core/search-quality.js +64 -0
  33. package/dist/core/search-quality.js.map +1 -0
  34. package/dist/core/search.d.ts +17 -1
  35. package/dist/core/search.d.ts.map +1 -1
  36. package/dist/core/search.js +303 -104
  37. package/dist/core/search.js.map +1 -1
  38. package/dist/embeddings/openai.d.ts.map +1 -1
  39. package/dist/embeddings/openai.js +2 -2
  40. package/dist/embeddings/openai.js.map +1 -1
  41. package/dist/embeddings/transformers.d.ts +1 -1
  42. package/dist/embeddings/transformers.d.ts.map +1 -1
  43. package/dist/embeddings/transformers.js +19 -15
  44. package/dist/embeddings/transformers.js.map +1 -1
  45. package/dist/embeddings/types.d.ts +1 -1
  46. package/dist/embeddings/types.d.ts.map +1 -1
  47. package/dist/embeddings/types.js +3 -3
  48. package/dist/embeddings/types.js.map +1 -1
  49. package/dist/errors/index.d.ts +8 -0
  50. package/dist/errors/index.d.ts.map +1 -0
  51. package/dist/errors/index.js +11 -0
  52. package/dist/errors/index.js.map +1 -0
  53. package/dist/index.d.ts +7 -29
  54. package/dist/index.d.ts.map +1 -1
  55. package/dist/index.js +1125 -362
  56. package/dist/index.js.map +1 -1
  57. package/dist/lib.d.ts +18 -18
  58. package/dist/lib.d.ts.map +1 -1
  59. package/dist/lib.js +23 -23
  60. package/dist/lib.js.map +1 -1
  61. package/dist/memory/git-memory.d.ts +9 -0
  62. package/dist/memory/git-memory.d.ts.map +1 -0
  63. package/dist/memory/git-memory.js +51 -0
  64. package/dist/memory/git-memory.js.map +1 -0
  65. package/dist/memory/store.d.ts +38 -0
  66. package/dist/memory/store.d.ts.map +1 -0
  67. package/dist/memory/store.js +136 -0
  68. package/dist/memory/store.js.map +1 -0
  69. package/dist/patterns/semantics.d.ts +4 -0
  70. package/dist/patterns/semantics.d.ts.map +1 -0
  71. package/dist/patterns/semantics.js +24 -0
  72. package/dist/patterns/semantics.js.map +1 -0
  73. package/dist/preflight/evidence-lock.d.ts +50 -0
  74. package/dist/preflight/evidence-lock.d.ts.map +1 -0
  75. package/dist/preflight/evidence-lock.js +130 -0
  76. package/dist/preflight/evidence-lock.js.map +1 -0
  77. package/dist/preflight/query-scope.d.ts +3 -0
  78. package/dist/preflight/query-scope.d.ts.map +1 -0
  79. package/dist/preflight/query-scope.js +40 -0
  80. package/dist/preflight/query-scope.js.map +1 -0
  81. package/dist/resources/uri.d.ts +5 -0
  82. package/dist/resources/uri.d.ts.map +1 -0
  83. package/dist/resources/uri.js +15 -0
  84. package/dist/resources/uri.js.map +1 -0
  85. package/dist/storage/lancedb.d.ts +1 -0
  86. package/dist/storage/lancedb.d.ts.map +1 -1
  87. package/dist/storage/lancedb.js +51 -34
  88. package/dist/storage/lancedb.js.map +1 -1
  89. package/dist/storage/types.d.ts +5 -0
  90. package/dist/storage/types.d.ts.map +1 -1
  91. package/dist/storage/types.js +2 -1
  92. package/dist/storage/types.js.map +1 -1
  93. package/dist/types/index.d.ts +47 -0
  94. package/dist/types/index.d.ts.map +1 -1
  95. package/dist/types/index.js +1 -0
  96. package/dist/types/index.js.map +1 -1
  97. package/dist/utils/chunking.d.ts.map +1 -1
  98. package/dist/utils/chunking.js +10 -9
  99. package/dist/utils/chunking.js.map +1 -1
  100. package/dist/utils/dependency-detection.d.ts +18 -0
  101. package/dist/utils/dependency-detection.d.ts.map +1 -0
  102. package/dist/utils/dependency-detection.js +102 -0
  103. package/dist/utils/dependency-detection.js.map +1 -0
  104. package/dist/utils/git-dates.d.ts +1 -0
  105. package/dist/utils/git-dates.d.ts.map +1 -1
  106. package/dist/utils/git-dates.js +23 -3
  107. package/dist/utils/git-dates.js.map +1 -1
  108. package/dist/utils/language-detection.d.ts.map +1 -1
  109. package/dist/utils/language-detection.js +69 -17
  110. package/dist/utils/language-detection.js.map +1 -1
  111. package/dist/utils/usage-tracker.d.ts +2 -2
  112. package/dist/utils/usage-tracker.d.ts.map +1 -1
  113. package/dist/utils/usage-tracker.js +67 -38
  114. package/dist/utils/usage-tracker.js.map +1 -1
  115. package/dist/utils/workspace-detection.d.ts +32 -0
  116. package/dist/utils/workspace-detection.d.ts.map +1 -0
  117. package/dist/utils/workspace-detection.js +107 -0
  118. package/dist/utils/workspace-detection.js.map +1 -0
  119. package/package.json +122 -97
  120. package/dist/core/file-watcher.d.ts +0 -63
  121. package/dist/core/file-watcher.d.ts.map +0 -1
  122. package/dist/core/file-watcher.js +0 -210
  123. package/dist/core/file-watcher.js.map +0 -1
  124. package/dist/utils/logger.d.ts +0 -36
  125. package/dist/utils/logger.d.ts.map +0 -1
  126. package/dist/utils/logger.js +0 -111
  127. package/dist/utils/logger.js.map +0 -1
  128. package/dist/utils/pattern-detector.d.ts +0 -41
  129. package/dist/utils/pattern-detector.d.ts.map +0 -1
  130. package/dist/utils/pattern-detector.js +0 -101
  131. package/dist/utils/pattern-detector.js.map +0 -1
@@ -2,33 +2,38 @@
2
2
  * Core Indexer - Orchestrates codebase indexing
3
3
  * Scans files, delegates to analyzers, creates embeddings, stores in vector DB
4
4
  */
5
- import { promises as fs } from "fs";
6
- import path from "path";
7
- import { glob } from "glob";
8
- import ignore from "ignore";
9
- import { analyzerRegistry } from "./analyzer-registry.js";
10
- import { isCodeFile, isBinaryFile } from "../utils/language-detection.js";
11
- import { getEmbeddingProvider, } from "../embeddings/index.js";
12
- import { getStorageProvider, } from "../storage/index.js";
13
- import { LibraryUsageTracker, PatternDetector, ImportGraph, InternalFileGraph } from "../utils/usage-tracker.js";
14
- import { getFileCommitDates } from "../utils/git-dates.js";
5
+ /* eslint-disable @typescript-eslint/no-explicit-any */
6
+ import { promises as fs } from 'fs';
7
+ import path from 'path';
8
+ import { glob } from 'glob';
9
+ import ignore from 'ignore';
10
+ import { analyzerRegistry } from './analyzer-registry.js';
11
+ import { isCodeFile, isBinaryFile } from '../utils/language-detection.js';
12
+ import { getEmbeddingProvider } from '../embeddings/index.js';
13
+ import { getStorageProvider } from '../storage/index.js';
14
+ import { LibraryUsageTracker, PatternDetector, ImportGraph, InternalFileGraph } from '../utils/usage-tracker.js';
15
+ import { getFileCommitDates } from '../utils/git-dates.js';
16
+ import { CODEBASE_CONTEXT_DIRNAME, INDEXING_STATS_FILENAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, MANIFEST_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
17
+ import { computeFileHashes, readManifest, writeManifest, diffManifest } from './manifest.js';
15
18
  export class CodebaseIndexer {
16
19
  rootPath;
17
20
  config;
18
21
  progress;
19
22
  onProgressCallback;
23
+ incrementalOnly;
20
24
  constructor(options) {
21
25
  this.rootPath = path.resolve(options.rootPath);
22
26
  this.config = this.mergeConfig(options.config);
23
27
  this.onProgressCallback = options.onProgress;
28
+ this.incrementalOnly = options.incrementalOnly ?? false;
24
29
  this.progress = {
25
- phase: "initializing",
30
+ phase: 'initializing',
26
31
  percentage: 0,
27
32
  filesProcessed: 0,
28
33
  totalFiles: 0,
29
34
  chunksCreated: 0,
30
35
  errors: [],
31
- startedAt: new Date(),
36
+ startedAt: new Date()
32
37
  };
33
38
  }
34
39
  mergeConfig(userConfig) {
@@ -37,44 +42,38 @@ export class CodebaseIndexer {
37
42
  angular: { enabled: true, priority: 100 },
38
43
  react: { enabled: false, priority: 90 },
39
44
  vue: { enabled: false, priority: 90 },
40
- generic: { enabled: true, priority: 10 },
45
+ generic: { enabled: true, priority: 10 }
41
46
  },
42
- include: ["**/*.{ts,tsx,js,jsx,html,css,scss,sass,less}"],
43
- exclude: [
44
- "node_modules/**",
45
- "dist/**",
46
- "build/**",
47
- ".git/**",
48
- "coverage/**",
49
- ],
47
+ include: ['**/*.{ts,tsx,js,jsx,html,css,scss,sass,less}'],
48
+ exclude: ['node_modules/**', 'dist/**', 'build/**', '.git/**', 'coverage/**'],
50
49
  respectGitignore: true,
51
50
  parsing: {
52
51
  maxFileSize: 1048576, // 1MB
53
52
  chunkSize: 100,
54
53
  chunkOverlap: 10,
55
54
  parseTests: true,
56
- parseNodeModules: false,
55
+ parseNodeModules: false
57
56
  },
58
57
  styleGuides: {
59
58
  autoDetect: true,
60
- paths: ["STYLE_GUIDE.md", "docs/style-guide.md", "ARCHITECTURE.md"],
61
- parseMarkdown: true,
59
+ paths: ['STYLE_GUIDE.md', 'docs/style-guide.md', 'ARCHITECTURE.md'],
60
+ parseMarkdown: true
62
61
  },
63
62
  documentation: {
64
63
  autoDetect: true,
65
64
  includeReadmes: true,
66
- includeChangelogs: false,
65
+ includeChangelogs: false
67
66
  },
68
67
  embedding: {
69
- provider: "transformers",
70
- model: "Xenova/bge-base-en-v1.5",
71
- batchSize: 100,
68
+ provider: 'transformers',
69
+ model: 'Xenova/bge-small-en-v1.5',
70
+ batchSize: 100
72
71
  },
73
72
  skipEmbedding: false,
74
73
  storage: {
75
- provider: "lancedb",
76
- path: "./codebase-index",
77
- },
74
+ provider: 'lancedb',
75
+ path: './codebase-index'
76
+ }
78
77
  };
79
78
  return {
80
79
  ...defaultConfig,
@@ -84,10 +83,10 @@ export class CodebaseIndexer {
84
83
  styleGuides: { ...defaultConfig.styleGuides, ...userConfig?.styleGuides },
85
84
  documentation: {
86
85
  ...defaultConfig.documentation,
87
- ...userConfig?.documentation,
86
+ ...userConfig?.documentation
88
87
  },
89
88
  embedding: { ...defaultConfig.embedding, ...userConfig?.embedding },
90
- storage: { ...defaultConfig.storage, ...userConfig?.storage },
89
+ storage: { ...defaultConfig.storage, ...userConfig?.storage }
91
90
  };
92
91
  }
93
92
  async index() {
@@ -110,14 +109,14 @@ export class CodebaseIndexer {
110
109
  shared: 0,
111
110
  feature: 0,
112
111
  infrastructure: 0,
113
- unknown: 0,
112
+ unknown: 0
114
113
  },
115
114
  errors: [],
116
- startedAt: new Date(),
115
+ startedAt: new Date()
117
116
  };
118
117
  try {
119
118
  // Phase 1: Scanning
120
- this.updateProgress("scanning", 0);
119
+ this.updateProgress('scanning', 0);
121
120
  let files = await this.scanFiles();
122
121
  // Memory safety: limit total files to prevent heap exhaustion
123
122
  const MAX_FILES = 10000;
@@ -129,15 +128,92 @@ export class CodebaseIndexer {
129
128
  stats.totalFiles = files.length;
130
129
  this.progress.totalFiles = files.length;
131
130
  console.error(`Found ${files.length} files to index`);
131
+ // Phase 1b: Incremental diff (if incremental mode)
132
+ const contextDir = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME);
133
+ const manifestPath = path.join(contextDir, MANIFEST_FILENAME);
134
+ const indexingStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
135
+ let diff = null;
136
+ let currentHashes = null;
137
+ let previousManifest = null;
138
+ if (this.incrementalOnly) {
139
+ this.updateProgress('scanning', 10);
140
+ console.error('Computing file hashes for incremental diff...');
141
+ currentHashes = await computeFileHashes(files, this.rootPath);
142
+ previousManifest = await readManifest(manifestPath);
143
+ diff = diffManifest(previousManifest, currentHashes);
144
+ console.error(`Incremental diff: ${diff.added.length} added, ${diff.changed.length} changed, ` +
145
+ `${diff.deleted.length} deleted, ${diff.unchanged.length} unchanged`);
146
+ stats.incremental = {
147
+ added: diff.added.length,
148
+ changed: diff.changed.length,
149
+ deleted: diff.deleted.length,
150
+ unchanged: diff.unchanged.length
151
+ };
152
+ // Short-circuit: nothing changed
153
+ if (diff.added.length === 0 && diff.changed.length === 0 && diff.deleted.length === 0) {
154
+ console.error('No files changed — skipping re-index.');
155
+ this.updateProgress('complete', 100);
156
+ stats.duration = Date.now() - startTime;
157
+ stats.completedAt = new Date();
158
+ let restoredFromPersistedStats = false;
159
+ try {
160
+ const persisted = JSON.parse(await fs.readFile(indexingStatsPath, 'utf-8'));
161
+ if (typeof persisted.indexedFiles === 'number' &&
162
+ typeof persisted.totalChunks === 'number' &&
163
+ typeof persisted.totalFiles === 'number') {
164
+ stats.indexedFiles = persisted.indexedFiles;
165
+ stats.totalChunks = persisted.totalChunks;
166
+ stats.totalFiles = persisted.totalFiles;
167
+ restoredFromPersistedStats = true;
168
+ }
169
+ }
170
+ catch {
171
+ // No persisted stats yet — fall back below
172
+ }
173
+ if (!restoredFromPersistedStats) {
174
+ if (previousManifest) {
175
+ stats.indexedFiles = Object.keys(previousManifest.files).length;
176
+ }
177
+ try {
178
+ const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
179
+ const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
180
+ if (Array.isArray(existingChunks)) {
181
+ stats.totalChunks = existingChunks.length;
182
+ if (stats.indexedFiles === 0) {
183
+ const uniqueFiles = new Set(existingChunks.map((c) => c.filePath));
184
+ stats.indexedFiles = uniqueFiles.size;
185
+ }
186
+ }
187
+ }
188
+ catch {
189
+ // Keyword index doesn't exist yet — keep best-known counts
190
+ }
191
+ }
192
+ stats.totalFiles = files.length;
193
+ return stats;
194
+ }
195
+ }
196
+ // Build the set of files that need analysis + embedding (incremental: only added/changed)
197
+ const filesToProcess = diff
198
+ ? files.filter((f) => {
199
+ const rel = path.relative(this.rootPath, f).replace(/\\/g, '/');
200
+ return diff.added.includes(rel) || diff.changed.includes(rel);
201
+ })
202
+ : files;
132
203
  // Phase 2: Analyzing & Parsing
133
- this.updateProgress("analyzing", 0);
204
+ // Intelligence tracking (patterns, libraries, import graph) runs on ALL files
205
+ // but embedding only runs on filesToProcess
206
+ this.updateProgress('analyzing', 0);
134
207
  const allChunks = [];
208
+ const changedChunks = []; // Only chunks from added/changed files
135
209
  const libraryTracker = new LibraryUsageTracker();
136
210
  const patternDetector = new PatternDetector();
137
211
  const importGraph = new ImportGraph();
138
212
  const internalFileGraph = new InternalFileGraph(this.rootPath);
139
213
  // Fetch git commit dates for pattern momentum analysis
140
214
  const fileDates = await getFileCommitDates(this.rootPath);
215
+ // When incremental, track which files need embedding
216
+ const filesToProcessSet = diff ? new Set(filesToProcess.map((f) => f)) : null;
141
217
  for (let i = 0; i < files.length; i++) {
142
218
  const file = files[i];
143
219
  this.progress.currentFile = file;
@@ -145,13 +221,17 @@ export class CodebaseIndexer {
145
221
  this.progress.percentage = Math.round(((i + 1) / files.length) * 100);
146
222
  try {
147
223
  // Normalize line endings to \n for consistent cross-platform output
148
- const rawContent = await fs.readFile(file, "utf-8");
149
- const content = rawContent.replace(/\r\n/g, "\n");
224
+ const rawContent = await fs.readFile(file, 'utf-8');
225
+ const content = rawContent.replace(/\r\n/g, '\n');
150
226
  const result = await analyzerRegistry.analyzeFile(file, content);
151
227
  if (result) {
228
+ const isFileChanged = !filesToProcessSet || filesToProcessSet.has(file);
152
229
  allChunks.push(...result.chunks);
230
+ if (isFileChanged) {
231
+ changedChunks.push(...result.chunks);
232
+ }
153
233
  stats.indexedFiles++;
154
- stats.totalLines += content.split("\n").length;
234
+ stats.totalLines += content.split('\n').length;
155
235
  // Track library usage AND import graph from imports
156
236
  for (const imp of result.imports) {
157
237
  libraryTracker.track(imp.source, file);
@@ -176,9 +256,9 @@ export class CodebaseIndexer {
176
256
  }
177
257
  // Track exports for unused export detection
178
258
  if (result.exports && result.exports.length > 0) {
179
- const fileExports = result.exports.map(exp => ({
259
+ const fileExports = result.exports.map((exp) => ({
180
260
  name: exp.name,
181
- type: exp.isDefault ? 'default' : exp.type || 'other',
261
+ type: exp.isDefault ? 'default' : exp.type || 'other'
182
262
  }));
183
263
  internalFileGraph.trackExports(file, fileExports);
184
264
  }
@@ -214,12 +294,12 @@ export class CodebaseIndexer {
214
294
  // Track file for Golden File scoring (framework-agnostic based on patterns)
215
295
  const detectedPatterns = result.metadata?.detectedPatterns || [];
216
296
  const hasPattern = (category, name) => detectedPatterns.some((p) => p.category === category && p.name === name);
217
- const patternScore = ((hasPattern('dependencyInjection', 'inject() function') ? 1 : 0) +
297
+ const patternScore = (hasPattern('dependencyInjection', 'inject() function') ? 1 : 0) +
218
298
  (hasPattern('stateManagement', 'Signals') ? 1 : 0) +
219
299
  (hasPattern('reactivity', 'Computed') ? 1 : 0) +
220
300
  (hasPattern('reactivity', 'Effect') ? 1 : 0) +
221
301
  (hasPattern('componentStyle', 'Standalone') ? 1 : 0) +
222
- (hasPattern('componentInputs', 'Signal-based inputs') ? 1 : 0));
302
+ (hasPattern('componentInputs', 'Signal-based inputs') ? 1 : 0);
223
303
  if (patternScore >= 3) {
224
304
  patternDetector.trackGoldenFile(relPath, patternScore, {
225
305
  inject: hasPattern('dependencyInjection', 'inject() function'),
@@ -227,7 +307,7 @@ export class CodebaseIndexer {
227
307
  computed: hasPattern('reactivity', 'Computed'),
228
308
  effect: hasPattern('reactivity', 'Effect'),
229
309
  standalone: hasPattern('componentStyle', 'Standalone'),
230
- signalInputs: hasPattern('componentInputs', 'Signal-based inputs'),
310
+ signalInputs: hasPattern('componentInputs', 'Signal-based inputs')
231
311
  });
232
312
  }
233
313
  // Update component statistics
@@ -250,8 +330,8 @@ export class CodebaseIndexer {
250
330
  stats.errors.push({
251
331
  filePath: file,
252
332
  error: error instanceof Error ? error.message : String(error),
253
- phase: "analyzing",
254
- timestamp: new Date(),
333
+ phase: 'analyzing',
334
+ timestamp: new Date()
255
335
  });
256
336
  }
257
337
  if (this.onProgressCallback) {
@@ -261,21 +341,24 @@ export class CodebaseIndexer {
261
341
  stats.totalChunks = allChunks.length;
262
342
  stats.avgChunkSize =
263
343
  allChunks.length > 0
264
- ? Math.round(allChunks.reduce((sum, c) => sum + c.content.length, 0) /
265
- allChunks.length)
344
+ ? Math.round(allChunks.reduce((sum, c) => sum + c.content.length, 0) / allChunks.length)
266
345
  : 0;
346
+ // Determine which chunks to embed: in incremental mode, only changed/added file chunks
347
+ const chunksForEmbedding = diff ? changedChunks : allChunks;
267
348
  // Memory safety: limit chunks to prevent embedding memory issues
268
349
  const MAX_CHUNKS = 5000;
269
- let chunksToEmbed = allChunks;
270
- if (allChunks.length > MAX_CHUNKS) {
271
- console.warn(`WARNING: ${allChunks.length} chunks exceed limit. Indexing first ${MAX_CHUNKS} chunks.`);
272
- chunksToEmbed = allChunks.slice(0, MAX_CHUNKS);
350
+ let chunksToEmbed = chunksForEmbedding;
351
+ if (chunksForEmbedding.length > MAX_CHUNKS) {
352
+ console.warn(`WARNING: ${chunksForEmbedding.length} chunks exceed limit. Indexing first ${MAX_CHUNKS} chunks.`);
353
+ chunksToEmbed = chunksForEmbedding.slice(0, MAX_CHUNKS);
273
354
  }
274
- // Phase 3: Embedding
275
- let chunksWithEmbeddings = [];
276
- if (!this.config.skipEmbedding) {
277
- this.updateProgress("embedding", 50);
278
- console.error(`Creating embeddings for ${chunksToEmbed.length} chunks...`);
355
+ // Phase 3: Embedding (only changed/added chunks in incremental mode)
356
+ const chunksWithEmbeddings = [];
357
+ if (!this.config.skipEmbedding && chunksToEmbed.length > 0) {
358
+ this.updateProgress('embedding', 50);
359
+ console.error(`Creating embeddings for ${chunksToEmbed.length} chunks` +
360
+ (diff ? ` (${allChunks.length} total, ${chunksToEmbed.length} changed)` : '') +
361
+ '...');
279
362
  // Initialize embedding provider
280
363
  const embeddingProvider = await getEmbeddingProvider(this.config.embedding);
281
364
  // Generate embeddings for all chunks
@@ -291,56 +374,77 @@ export class CodebaseIndexer {
291
374
  if (chunk.componentType) {
292
375
  parts.unshift(`Type: ${chunk.componentType}`);
293
376
  }
294
- return parts.join("\n");
377
+ return parts.join('\n');
295
378
  });
296
379
  const embeddings = await embeddingProvider.embedBatch(texts);
297
380
  for (let j = 0; j < batch.length; j++) {
298
381
  chunksWithEmbeddings.push({
299
382
  ...batch[j],
300
- embedding: embeddings[j],
383
+ embedding: embeddings[j]
301
384
  });
302
385
  }
303
386
  // Update progress
304
387
  const embeddingProgress = 50 + Math.round((i / chunksToEmbed.length) * 25);
305
- this.updateProgress("embedding", embeddingProgress);
306
- if ((i + batchSize) % 100 === 0 ||
307
- i + batchSize >= chunksToEmbed.length) {
388
+ this.updateProgress('embedding', embeddingProgress);
389
+ if ((i + batchSize) % 100 === 0 || i + batchSize >= chunksToEmbed.length) {
308
390
  console.error(`Embedded ${Math.min(i + batchSize, chunksToEmbed.length)}/${chunksToEmbed.length} chunks`);
309
391
  }
310
392
  }
311
393
  }
312
- else {
313
- console.error("Skipping embedding generation (skipEmbedding=true)");
394
+ else if (this.config.skipEmbedding) {
395
+ console.error('Skipping embedding generation (skipEmbedding=true)');
396
+ }
397
+ else if (chunksToEmbed.length === 0 && diff) {
398
+ console.error('No chunks to embed (all unchanged)');
314
399
  }
315
400
  // Phase 4: Storing
316
- this.updateProgress("storing", 75);
401
+ this.updateProgress('storing', 75);
402
+ await fs.mkdir(contextDir, { recursive: true });
317
403
  if (!this.config.skipEmbedding) {
318
- console.error(`Storing ${chunksToEmbed.length} chunks...`);
319
- // Store in LanceDB for vector search
320
- const storagePath = path.join(this.rootPath, ".codebase-index");
404
+ const storagePath = path.join(contextDir, VECTOR_DB_DIRNAME);
321
405
  const storageProvider = await getStorageProvider({ path: storagePath });
322
- await storageProvider.clear(); // Clear existing index
323
- await storageProvider.store(chunksWithEmbeddings);
406
+ if (diff) {
407
+ // Incremental: delete old chunks for changed + deleted files, then add new
408
+ const filesToDelete = [...diff.changed, ...diff.deleted].map((rel) => path.join(this.rootPath, rel).replace(/\\/g, '/'));
409
+ // Also try with OS-native separators for matching
410
+ const filePathsForDelete = [...diff.changed, ...diff.deleted].map((rel) => path.resolve(this.rootPath, rel));
411
+ const allDeletePaths = [...new Set([...filesToDelete, ...filePathsForDelete])];
412
+ if (allDeletePaths.length > 0) {
413
+ await storageProvider.deleteByFilePaths(allDeletePaths);
414
+ }
415
+ if (chunksWithEmbeddings.length > 0) {
416
+ await storageProvider.store(chunksWithEmbeddings);
417
+ }
418
+ console.error(`Incremental store: deleted chunks for ${diff.changed.length + diff.deleted.length} files, ` +
419
+ `added ${chunksWithEmbeddings.length} new chunks`);
420
+ }
421
+ else {
422
+ // Full: clear and re-store everything
423
+ console.error(`Storing ${chunksToEmbed.length} chunks...`);
424
+ await storageProvider.clear();
425
+ await storageProvider.store(chunksWithEmbeddings);
426
+ }
324
427
  }
325
- // Also save JSON for keyword search (Fuse.js) - use chunksToEmbed for consistency
326
- const indexPath = path.join(this.rootPath, ".codebase-index.json");
327
- // Write without pretty-printing to save memory
328
- await fs.writeFile(indexPath, JSON.stringify(chunksToEmbed));
329
- // Save library usage and pattern stats
330
- const intelligencePath = path.join(this.rootPath, ".codebase-intelligence.json");
428
+ // Keyword index always uses ALL chunks (full regen)
429
+ const indexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
430
+ // Memory safety: cap keyword index too
431
+ const keywordChunks = allChunks.length > MAX_CHUNKS ? allChunks.slice(0, MAX_CHUNKS) : allChunks;
432
+ await fs.writeFile(indexPath, JSON.stringify(keywordChunks));
433
+ // Save library usage and pattern stats (always full regen)
434
+ const intelligencePath = path.join(contextDir, INTELLIGENCE_FILENAME);
331
435
  const libraryStats = libraryTracker.getStats();
332
436
  // Extract tsconfig paths for AI to understand import aliases
333
437
  let tsconfigPaths;
334
438
  try {
335
- const tsconfigPath = path.join(this.rootPath, "tsconfig.json");
336
- const tsconfigContent = await fs.readFile(tsconfigPath, "utf-8");
439
+ const tsconfigPath = path.join(this.rootPath, 'tsconfig.json');
440
+ const tsconfigContent = await fs.readFile(tsconfigPath, 'utf-8');
337
441
  const tsconfig = JSON.parse(tsconfigContent);
338
442
  if (tsconfig.compilerOptions?.paths) {
339
443
  tsconfigPaths = tsconfig.compilerOptions.paths;
340
444
  console.error(`Found ${Object.keys(tsconfigPaths).length} path aliases in tsconfig.json`);
341
445
  }
342
446
  }
343
- catch (error) {
447
+ catch (_error) {
344
448
  // No tsconfig.json or no paths defined
345
449
  }
346
450
  const intelligence = {
@@ -352,57 +456,84 @@ export class CodebaseIndexer {
352
456
  tsconfigPaths,
353
457
  importGraph: {
354
458
  usages: importGraph.getAllUsages(),
355
- topUsed: importGraph.getTopUsed(30),
459
+ topUsed: importGraph.getTopUsed(30)
356
460
  },
357
461
  // Internal file graph for circular dependency and unused export detection
358
462
  internalFileGraph: internalFileGraph.toJSON(),
359
- generatedAt: new Date().toISOString(),
463
+ generatedAt: new Date().toISOString()
360
464
  };
361
465
  await fs.writeFile(intelligencePath, JSON.stringify(intelligence, null, 2));
466
+ // Write manifest (both full and incremental)
467
+ const manifest = {
468
+ version: 1,
469
+ generatedAt: new Date().toISOString(),
470
+ files: currentHashes ?? (await computeFileHashes(files, this.rootPath))
471
+ };
472
+ await writeManifest(manifestPath, manifest);
473
+ const persistedStats = {
474
+ indexedFiles: stats.indexedFiles,
475
+ totalChunks: stats.totalChunks,
476
+ totalFiles: stats.totalFiles,
477
+ generatedAt: new Date().toISOString()
478
+ };
479
+ await fs.writeFile(indexingStatsPath, JSON.stringify(persistedStats, null, 2));
362
480
  // Phase 5: Complete
363
- this.updateProgress("complete", 100);
481
+ this.updateProgress('complete', 100);
364
482
  stats.duration = Date.now() - startTime;
365
483
  stats.completedAt = new Date();
366
- console.error(`Indexing complete in ${stats.duration}ms`);
367
- console.error(`Indexed ${stats.indexedFiles} files, ${stats.totalChunks} chunks`);
484
+ if (diff) {
485
+ console.error(`Incremental indexing complete in ${stats.duration}ms ` +
486
+ `(${diff.added.length} added, ${diff.changed.length} changed, ` +
487
+ `${diff.deleted.length} deleted, ${diff.unchanged.length} unchanged)`);
488
+ }
489
+ else {
490
+ console.error(`Indexing complete in ${stats.duration}ms`);
491
+ console.error(`Indexed ${stats.indexedFiles} files, ${stats.totalChunks} chunks`);
492
+ }
368
493
  return stats;
369
494
  }
370
495
  catch (error) {
371
- this.progress.phase = "error";
496
+ this.progress.phase = 'error';
372
497
  stats.errors.push({
373
498
  filePath: this.rootPath,
374
499
  error: error instanceof Error ? error.message : String(error),
375
500
  phase: this.progress.phase,
376
- timestamp: new Date(),
501
+ timestamp: new Date()
377
502
  });
378
503
  throw error;
379
504
  }
380
505
  }
381
506
  async scanFiles() {
382
507
  const files = [];
508
+ const seen = new Set();
383
509
  // Read .gitignore if respecting it
384
510
  let ig = null;
385
511
  if (this.config.respectGitignore) {
386
512
  try {
387
- const gitignorePath = path.join(this.rootPath, ".gitignore");
388
- const gitignoreContent = await fs.readFile(gitignorePath, "utf-8");
513
+ const gitignorePath = path.join(this.rootPath, '.gitignore');
514
+ const gitignoreContent = await fs.readFile(gitignorePath, 'utf-8');
389
515
  ig = ignore.default().add(gitignoreContent);
390
516
  }
391
- catch (error) {
517
+ catch (_error) {
392
518
  // No .gitignore or couldn't read it
393
519
  }
394
520
  }
395
521
  // Scan with glob
396
- const includePatterns = this.config.include || ["**/*"];
522
+ const includePatterns = this.config.include || ['**/*'];
397
523
  const excludePatterns = this.config.exclude || [];
398
524
  for (const pattern of includePatterns) {
399
525
  const matches = await glob(pattern, {
400
526
  cwd: this.rootPath,
401
527
  absolute: true,
402
528
  ignore: excludePatterns,
403
- nodir: true,
529
+ nodir: true
404
530
  });
405
531
  for (const file of matches) {
532
+ const normalizedFile = file.replace(/\\/g, '/');
533
+ if (seen.has(normalizedFile)) {
534
+ continue;
535
+ }
536
+ seen.add(normalizedFile);
406
537
  const relativePath = path.relative(this.rootPath, file);
407
538
  // Check gitignore
408
539
  if (ig && ig.ignores(relativePath)) {
@@ -420,7 +551,7 @@ export class CodebaseIndexer {
420
551
  continue;
421
552
  }
422
553
  }
423
- catch (error) {
554
+ catch (_error) {
424
555
  continue;
425
556
  }
426
557
  files.push(file);
@@ -436,76 +567,147 @@ export class CodebaseIndexer {
436
567
  }
437
568
  }
438
569
  async detectMetadata() {
439
- // Try to use the most specific analyzer for metadata detection
440
- const primaryAnalyzer = analyzerRegistry.getAll()[0]; // Highest priority
441
- let metadata;
442
- if (primaryAnalyzer) {
443
- metadata = await primaryAnalyzer.detectCodebaseMetadata(this.rootPath);
444
- }
445
- else {
446
- // Fallback metadata
447
- metadata = {
448
- name: path.basename(this.rootPath),
449
- rootPath: this.rootPath,
450
- languages: [],
451
- dependencies: [],
452
- architecture: {
453
- type: "mixed",
454
- layers: {
455
- presentation: 0,
456
- business: 0,
457
- data: 0,
458
- state: 0,
459
- core: 0,
460
- shared: 0,
461
- feature: 0,
462
- infrastructure: 0,
463
- unknown: 0,
464
- },
465
- patterns: [],
466
- },
467
- styleGuides: [],
468
- documentation: [],
469
- projectStructure: {
470
- type: "single-app",
471
- },
472
- statistics: {
473
- totalFiles: 0,
474
- totalLines: 0,
475
- totalComponents: 0,
476
- componentsByType: {},
477
- componentsByLayer: {
478
- presentation: 0,
479
- business: 0,
480
- data: 0,
481
- state: 0,
482
- core: 0,
483
- shared: 0,
484
- feature: 0,
485
- infrastructure: 0,
486
- unknown: 0,
487
- },
570
+ // Get all registered analyzers (sorted by priority, highest first)
571
+ const analyzers = analyzerRegistry.getAll();
572
+ // Start with base metadata template
573
+ let metadata = {
574
+ name: path.basename(this.rootPath),
575
+ rootPath: this.rootPath,
576
+ languages: [],
577
+ dependencies: [],
578
+ architecture: {
579
+ type: 'mixed',
580
+ layers: {
581
+ presentation: 0,
582
+ business: 0,
583
+ data: 0,
584
+ state: 0,
585
+ core: 0,
586
+ shared: 0,
587
+ feature: 0,
588
+ infrastructure: 0,
589
+ unknown: 0
488
590
  },
489
- customMetadata: {},
490
- };
591
+ patterns: []
592
+ },
593
+ styleGuides: [],
594
+ documentation: [],
595
+ projectStructure: {
596
+ type: 'single-app'
597
+ },
598
+ statistics: {
599
+ totalFiles: 0,
600
+ totalLines: 0,
601
+ totalComponents: 0,
602
+ componentsByType: {},
603
+ componentsByLayer: {
604
+ presentation: 0,
605
+ business: 0,
606
+ data: 0,
607
+ state: 0,
608
+ core: 0,
609
+ shared: 0,
610
+ feature: 0,
611
+ infrastructure: 0,
612
+ unknown: 0
613
+ }
614
+ },
615
+ customMetadata: {}
616
+ };
617
+ // Loop through all analyzers (highest priority first) and merge their metadata
618
+ // Higher priority analyzers' values win on conflicts
619
+ for (const analyzer of analyzers) {
620
+ try {
621
+ const analyzerMeta = await analyzer.detectCodebaseMetadata(this.rootPath);
622
+ metadata = this.mergeMetadata(metadata, analyzerMeta);
623
+ }
624
+ catch (error) {
625
+ // Analyzer failed, continue with next
626
+ console.warn(`Analyzer ${analyzer.name} failed to detect metadata:`, error);
627
+ }
491
628
  }
492
629
  // Load intelligence data if available
493
630
  try {
494
- const intelligencePath = path.join(this.rootPath, ".codebase-intelligence.json");
495
- const intelligenceContent = await fs.readFile(intelligencePath, "utf-8");
631
+ const intelligencePath = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME);
632
+ const intelligenceContent = await fs.readFile(intelligencePath, 'utf-8');
496
633
  const intelligence = JSON.parse(intelligenceContent);
497
634
  metadata.customMetadata = {
498
635
  ...metadata.customMetadata,
499
636
  libraryUsage: intelligence.libraryUsage,
500
637
  patterns: intelligence.patterns,
501
- intelligenceGeneratedAt: intelligence.generatedAt,
638
+ intelligenceGeneratedAt: intelligence.generatedAt
502
639
  };
503
640
  }
504
- catch (error) {
641
+ catch (_error) {
505
642
  // Intelligence file doesn't exist yet (indexing not run)
506
643
  }
507
644
  return metadata;
508
645
  }
646
+ /**
647
+ * Merge two CodebaseMetadata objects.
648
+ * The 'incoming' metadata takes precedence for non-empty values.
649
+ */
650
+ mergeMetadata(base, incoming) {
651
+ return {
652
+ name: incoming.name || base.name,
653
+ rootPath: incoming.rootPath || base.rootPath,
654
+ languages: [...new Set([...base.languages, ...incoming.languages])], // Merge and deduplicate
655
+ dependencies: this.mergeDependencies(base.dependencies, incoming.dependencies),
656
+ framework: incoming.framework || base.framework, // Framework from higher priority analyzer wins
657
+ architecture: {
658
+ type: incoming.architecture?.type || base.architecture.type,
659
+ layers: this.mergeLayers(base.architecture.layers, incoming.architecture?.layers),
660
+ patterns: [
661
+ ...new Set([
662
+ ...(base.architecture.patterns || []),
663
+ ...(incoming.architecture?.patterns || [])
664
+ ])
665
+ ] // Merge and deduplicate
666
+ },
667
+ styleGuides: [...new Set([...base.styleGuides, ...incoming.styleGuides])], // Merge and deduplicate
668
+ documentation: [...new Set([...base.documentation, ...incoming.documentation])], // Merge and deduplicate
669
+ projectStructure: incoming.projectStructure?.type !== 'single-app'
670
+ ? incoming.projectStructure
671
+ : base.projectStructure,
672
+ statistics: this.mergeStatistics(base.statistics, incoming.statistics),
673
+ customMetadata: { ...base.customMetadata, ...incoming.customMetadata }
674
+ };
675
+ }
676
+ mergeDependencies(base, incoming) {
677
+ const seen = new Set(base.map((d) => d.name));
678
+ const result = [...base];
679
+ for (const dep of incoming) {
680
+ if (!seen.has(dep.name)) {
681
+ result.push(dep);
682
+ seen.add(dep.name);
683
+ }
684
+ }
685
+ return result;
686
+ }
687
+ mergeLayers(base, incoming) {
688
+ if (!incoming)
689
+ return base;
690
+ return {
691
+ presentation: Math.max(base.presentation || 0, incoming.presentation || 0),
692
+ business: Math.max(base.business || 0, incoming.business || 0),
693
+ data: Math.max(base.data || 0, incoming.data || 0),
694
+ state: Math.max(base.state || 0, incoming.state || 0),
695
+ core: Math.max(base.core || 0, incoming.core || 0),
696
+ shared: Math.max(base.shared || 0, incoming.shared || 0),
697
+ feature: Math.max(base.feature || 0, incoming.feature || 0),
698
+ infrastructure: Math.max(base.infrastructure || 0, incoming.infrastructure || 0),
699
+ unknown: Math.max(base.unknown || 0, incoming.unknown || 0)
700
+ };
701
+ }
702
+ mergeStatistics(base, incoming) {
703
+ return {
704
+ totalFiles: Math.max(base.totalFiles || 0, incoming.totalFiles || 0),
705
+ totalLines: Math.max(base.totalLines || 0, incoming.totalLines || 0),
706
+ totalComponents: Math.max(base.totalComponents || 0, incoming.totalComponents || 0),
707
+ componentsByType: { ...base.componentsByType, ...incoming.componentsByType },
708
+ componentsByLayer: this.mergeLayers(base.componentsByLayer, incoming.componentsByLayer)
709
+ };
710
+ }
509
711
  /**
510
712
  * Get regex pattern for extracting code snippets based on pattern category and name
511
713
  * This maps abstract pattern names to actual code patterns
@@ -514,24 +716,24 @@ export class CodebaseIndexer {
514
716
  const patterns = {
515
717
  dependencyInjection: {
516
718
  'inject() function': /\binject\s*[<(]/,
517
- 'Constructor injection': /constructor\s*\(/,
719
+ 'Constructor injection': /constructor\s*\(/
518
720
  },
519
721
  stateManagement: {
520
- 'RxJS': /BehaviorSubject|ReplaySubject|Subject|Observable/,
521
- 'Signals': /\bsignal\s*[<(]/,
722
+ RxJS: /BehaviorSubject|ReplaySubject|Subject|Observable/,
723
+ Signals: /\bsignal\s*[<(]/
522
724
  },
523
725
  reactivity: {
524
- 'Effect': /\beffect\s*\(/,
525
- 'Computed': /\bcomputed\s*[<(]/,
726
+ Effect: /\beffect\s*\(/,
727
+ Computed: /\bcomputed\s*[<(]/
526
728
  },
527
729
  componentStyle: {
528
- 'Standalone': /standalone\s*:\s*true/,
529
- 'NgModule-based': /@(?:Component|Directive|Pipe)\s*\(/,
730
+ Standalone: /standalone\s*:\s*true/,
731
+ 'NgModule-based': /@(?:Component|Directive|Pipe)\s*\(/
530
732
  },
531
733
  componentInputs: {
532
734
  'Signal-based inputs': /\binput\s*[<(]/,
533
- 'Decorator-based @Input': /@Input\(\)/,
534
- },
735
+ 'Decorator-based @Input': /@Input\(\)/
736
+ }
535
737
  };
536
738
  return patterns[category]?.[name] || null;
537
739
  }