codebase-context 1.6.2 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +417 -282
- package/dist/analyzers/angular/index.d.ts.map +1 -1
- package/dist/analyzers/angular/index.js +91 -40
- package/dist/analyzers/angular/index.js.map +1 -1
- package/dist/analyzers/generic/index.d.ts +1 -0
- package/dist/analyzers/generic/index.d.ts.map +1 -1
- package/dist/analyzers/generic/index.js +94 -14
- package/dist/analyzers/generic/index.js.map +1 -1
- package/dist/cli-formatters.d.ts +47 -0
- package/dist/cli-formatters.d.ts.map +1 -0
- package/dist/cli-formatters.js +803 -0
- package/dist/cli-formatters.js.map +1 -0
- package/dist/cli-memory.d.ts +5 -0
- package/dist/cli-memory.d.ts.map +1 -0
- package/dist/cli-memory.js +218 -0
- package/dist/cli-memory.js.map +1 -0
- package/dist/cli.d.ts +3 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +317 -88
- package/dist/cli.js.map +1 -1
- package/dist/constants/codebase-context.d.ts +13 -0
- package/dist/constants/codebase-context.d.ts.map +1 -1
- package/dist/constants/codebase-context.js +13 -0
- package/dist/constants/codebase-context.js.map +1 -1
- package/dist/core/auto-refresh.d.ts +16 -0
- package/dist/core/auto-refresh.d.ts.map +1 -0
- package/dist/core/auto-refresh.js +25 -0
- package/dist/core/auto-refresh.js.map +1 -0
- package/dist/core/file-watcher.d.ts +15 -0
- package/dist/core/file-watcher.d.ts.map +1 -0
- package/dist/core/file-watcher.js +59 -0
- package/dist/core/file-watcher.js.map +1 -0
- package/dist/core/index-meta.d.ts +27 -0
- package/dist/core/index-meta.d.ts.map +1 -0
- package/dist/core/index-meta.js +212 -0
- package/dist/core/index-meta.js.map +1 -0
- package/dist/core/indexer.d.ts.map +1 -1
- package/dist/core/indexer.js +324 -26
- package/dist/core/indexer.js.map +1 -1
- package/dist/core/reranker.d.ts.map +1 -1
- package/dist/core/reranker.js +3 -0
- package/dist/core/reranker.js.map +1 -1
- package/dist/core/search-quality.js +2 -2
- package/dist/core/search-quality.js.map +1 -1
- package/dist/core/search.d.ts +1 -0
- package/dist/core/search.d.ts.map +1 -1
- package/dist/core/search.js +79 -11
- package/dist/core/search.js.map +1 -1
- package/dist/core/symbol-references.d.ts +20 -0
- package/dist/core/symbol-references.d.ts.map +1 -0
- package/dist/core/symbol-references.js +186 -0
- package/dist/core/symbol-references.js.map +1 -0
- package/dist/embeddings/index.d.ts +8 -0
- package/dist/embeddings/index.d.ts.map +1 -1
- package/dist/embeddings/index.js +17 -2
- package/dist/embeddings/index.js.map +1 -1
- package/dist/embeddings/openai.d.ts +1 -1
- package/dist/embeddings/openai.d.ts.map +1 -1
- package/dist/embeddings/openai.js +3 -1
- package/dist/embeddings/openai.js.map +1 -1
- package/dist/embeddings/transformers.d.ts +6 -0
- package/dist/embeddings/transformers.d.ts.map +1 -1
- package/dist/embeddings/transformers.js +12 -5
- package/dist/embeddings/transformers.js.map +1 -1
- package/dist/embeddings/types.d.ts +1 -0
- package/dist/embeddings/types.d.ts.map +1 -1
- package/dist/embeddings/types.js +7 -1
- package/dist/embeddings/types.js.map +1 -1
- package/dist/eval/harness.d.ts +5 -0
- package/dist/eval/harness.d.ts.map +1 -0
- package/dist/eval/harness.js +153 -0
- package/dist/eval/harness.js.map +1 -0
- package/dist/eval/types.d.ts +59 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +2 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/grammars/manifest.d.ts +26 -0
- package/dist/grammars/manifest.d.ts.map +1 -0
- package/dist/grammars/manifest.js +64 -0
- package/dist/grammars/manifest.js.map +1 -0
- package/dist/index.d.ts +16 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +181 -1300
- package/dist/index.js.map +1 -1
- package/dist/patterns/semantics.d.ts +2 -1
- package/dist/patterns/semantics.d.ts.map +1 -1
- package/dist/patterns/semantics.js +0 -2
- package/dist/patterns/semantics.js.map +1 -1
- package/dist/preflight/evidence-lock.d.ts +6 -0
- package/dist/preflight/evidence-lock.d.ts.map +1 -1
- package/dist/preflight/evidence-lock.js +33 -1
- package/dist/preflight/evidence-lock.js.map +1 -1
- package/dist/storage/index.d.ts +4 -1
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +2 -2
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/lancedb.d.ts +11 -1
- package/dist/storage/lancedb.d.ts.map +1 -1
- package/dist/storage/lancedb.js +45 -11
- package/dist/storage/lancedb.js.map +1 -1
- package/dist/storage/types.d.ts +4 -1
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/storage/types.js.map +1 -1
- package/dist/tools/detect-circular-dependencies.d.ts +5 -0
- package/dist/tools/detect-circular-dependencies.d.ts.map +1 -0
- package/dist/tools/detect-circular-dependencies.js +117 -0
- package/dist/tools/detect-circular-dependencies.js.map +1 -0
- package/dist/tools/get-codebase-metadata.d.ts +5 -0
- package/dist/tools/get-codebase-metadata.d.ts.map +1 -0
- package/dist/tools/get-codebase-metadata.js +53 -0
- package/dist/tools/get-codebase-metadata.js.map +1 -0
- package/dist/tools/get-indexing-status.d.ts +5 -0
- package/dist/tools/get-indexing-status.d.ts.map +1 -0
- package/dist/tools/get-indexing-status.js +44 -0
- package/dist/tools/get-indexing-status.js.map +1 -0
- package/dist/tools/get-memory.d.ts +5 -0
- package/dist/tools/get-memory.d.ts.map +1 -0
- package/dist/tools/get-memory.js +89 -0
- package/dist/tools/get-memory.js.map +1 -0
- package/dist/tools/get-style-guide.d.ts +5 -0
- package/dist/tools/get-style-guide.d.ts.map +1 -0
- package/dist/tools/get-style-guide.js +151 -0
- package/dist/tools/get-style-guide.js.map +1 -0
- package/dist/tools/get-symbol-references.d.ts +5 -0
- package/dist/tools/get-symbol-references.d.ts.map +1 -0
- package/dist/tools/get-symbol-references.js +70 -0
- package/dist/tools/get-symbol-references.js.map +1 -0
- package/dist/tools/get-team-patterns.d.ts +5 -0
- package/dist/tools/get-team-patterns.d.ts.map +1 -0
- package/dist/tools/get-team-patterns.js +147 -0
- package/dist/tools/get-team-patterns.js.map +1 -0
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +41 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/refresh-index.d.ts +5 -0
- package/dist/tools/refresh-index.d.ts.map +1 -0
- package/dist/tools/refresh-index.js +40 -0
- package/dist/tools/refresh-index.js.map +1 -0
- package/dist/tools/remember.d.ts +5 -0
- package/dist/tools/remember.d.ts.map +1 -0
- package/dist/tools/remember.js +101 -0
- package/dist/tools/remember.js.map +1 -0
- package/dist/tools/search-codebase.d.ts +5 -0
- package/dist/tools/search-codebase.d.ts.map +1 -0
- package/dist/tools/search-codebase.js +745 -0
- package/dist/tools/search-codebase.js.map +1 -0
- package/dist/tools/types.d.ts +223 -0
- package/dist/tools/types.d.ts.map +1 -0
- package/dist/tools/types.js +2 -0
- package/dist/tools/types.js.map +1 -0
- package/dist/types/index.d.ts +79 -11
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +0 -1
- package/dist/types/index.js.map +1 -1
- package/dist/utils/ast-chunker.d.ts +71 -0
- package/dist/utils/ast-chunker.d.ts.map +1 -0
- package/dist/utils/ast-chunker.js +453 -0
- package/dist/utils/ast-chunker.js.map +1 -0
- package/dist/utils/chunking.d.ts.map +1 -1
- package/dist/utils/chunking.js +10 -3
- package/dist/utils/chunking.js.map +1 -1
- package/dist/utils/language-detection.d.ts.map +1 -1
- package/dist/utils/language-detection.js +26 -1
- package/dist/utils/language-detection.js.map +1 -1
- package/dist/utils/tree-sitter.d.ts +28 -0
- package/dist/utils/tree-sitter.d.ts.map +1 -0
- package/dist/utils/tree-sitter.js +422 -0
- package/dist/utils/tree-sitter.js.map +1 -0
- package/dist/utils/usage-tracker.d.ts +30 -40
- package/dist/utils/usage-tracker.d.ts.map +1 -1
- package/dist/utils/usage-tracker.js +66 -8
- package/dist/utils/usage-tracker.js.map +1 -1
- package/docs/capabilities.md +183 -92
- package/docs/cli.md +196 -0
- package/grammars/.gitkeep +0 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +153 -157
package/dist/core/indexer.js
CHANGED
|
@@ -2,20 +2,153 @@
|
|
|
2
2
|
* Core Indexer - Orchestrates codebase indexing
|
|
3
3
|
* Scans files, delegates to analyzers, creates embeddings, stores in vector DB
|
|
4
4
|
*/
|
|
5
|
-
|
|
5
|
+
import { randomUUID } from 'crypto';
|
|
6
6
|
import { promises as fs } from 'fs';
|
|
7
7
|
import path from 'path';
|
|
8
8
|
import { glob } from 'glob';
|
|
9
9
|
import ignore from 'ignore';
|
|
10
10
|
import { analyzerRegistry } from './analyzer-registry.js';
|
|
11
11
|
import { isCodeFile, isBinaryFile } from '../utils/language-detection.js';
|
|
12
|
-
import { getEmbeddingProvider, DEFAULT_MODEL } from '../embeddings/index.js';
|
|
12
|
+
import { getEmbeddingProvider, getConfiguredDimensions, DEFAULT_MODEL, parseEmbeddingProviderName } from '../embeddings/index.js';
|
|
13
13
|
import { getStorageProvider } from '../storage/index.js';
|
|
14
14
|
import { LibraryUsageTracker, PatternDetector, ImportGraph, InternalFileGraph } from '../utils/usage-tracker.js';
|
|
15
15
|
import { mergeSmallChunks } from '../utils/chunking.js';
|
|
16
16
|
import { getFileCommitDates } from '../utils/git-dates.js';
|
|
17
|
-
import { CODEBASE_CONTEXT_DIRNAME, INDEXING_STATS_FILENAME, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, MANIFEST_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
|
|
17
|
+
import { CODEBASE_CONTEXT_DIRNAME, INDEX_FORMAT_VERSION, INDEXING_STATS_FILENAME, INDEX_META_FILENAME, INDEX_META_VERSION, INTELLIGENCE_FILENAME, KEYWORD_INDEX_FILENAME, MANIFEST_FILENAME, RELATIONSHIPS_FILENAME, VECTOR_DB_DIRNAME } from '../constants/codebase-context.js';
|
|
18
|
+
const STAGING_DIRNAME = '.staging';
|
|
19
|
+
const PREVIOUS_DIRNAME = '.previous';
|
|
18
20
|
import { computeFileHashes, readManifest, writeManifest, diffManifest } from './manifest.js';
|
|
21
|
+
import { readIndexMeta, checkEmbeddingMismatch } from './index-meta.js';
|
|
22
|
+
let cachedToolVersion = null;
|
|
23
|
+
async function getToolVersion() {
|
|
24
|
+
if (cachedToolVersion)
|
|
25
|
+
return cachedToolVersion;
|
|
26
|
+
try {
|
|
27
|
+
const pkgRaw = await fs.readFile(new URL('../../package.json', import.meta.url), 'utf-8');
|
|
28
|
+
const pkg = JSON.parse(pkgRaw);
|
|
29
|
+
if (typeof pkg.version === 'string' && pkg.version.trim()) {
|
|
30
|
+
cachedToolVersion = pkg.version;
|
|
31
|
+
return cachedToolVersion;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
// Best-effort — fall back below
|
|
36
|
+
}
|
|
37
|
+
cachedToolVersion = 'unknown';
|
|
38
|
+
return cachedToolVersion;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Perform a Windows-safe atomic swap of staging artifacts into active location.
|
|
42
|
+
* Strategy: move current active to .previous, then rename staging to active.
|
|
43
|
+
* If staging rename fails, restore from .previous.
|
|
44
|
+
*/
|
|
45
|
+
async function atomicSwapStagingToActive(contextDir, stagingDir, buildId) {
|
|
46
|
+
const previousDir = path.join(contextDir, PREVIOUS_DIRNAME);
|
|
47
|
+
const activeMetaPath = path.join(contextDir, INDEX_META_FILENAME);
|
|
48
|
+
const activeIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
|
|
49
|
+
const activeIntelligencePath = path.join(contextDir, INTELLIGENCE_FILENAME);
|
|
50
|
+
const activeVectorDir = path.join(contextDir, VECTOR_DB_DIRNAME);
|
|
51
|
+
const activeManifestPath = path.join(contextDir, MANIFEST_FILENAME);
|
|
52
|
+
const activeStatsPath = path.join(contextDir, INDEXING_STATS_FILENAME);
|
|
53
|
+
const activeRelationshipsPath = path.join(contextDir, RELATIONSHIPS_FILENAME);
|
|
54
|
+
const stagingMetaPath = path.join(stagingDir, INDEX_META_FILENAME);
|
|
55
|
+
const stagingIndexPath = path.join(stagingDir, KEYWORD_INDEX_FILENAME);
|
|
56
|
+
const stagingIntelligencePath = path.join(stagingDir, INTELLIGENCE_FILENAME);
|
|
57
|
+
const stagingVectorDir = path.join(stagingDir, VECTOR_DB_DIRNAME);
|
|
58
|
+
const stagingManifestPath = path.join(stagingDir, MANIFEST_FILENAME);
|
|
59
|
+
const stagingStatsPath = path.join(stagingDir, INDEXING_STATS_FILENAME);
|
|
60
|
+
const stagingRelationshipsPath = path.join(stagingDir, RELATIONSHIPS_FILENAME);
|
|
61
|
+
// Step 1: Create .previous directory and move current active there
|
|
62
|
+
await fs.mkdir(previousDir, { recursive: true });
|
|
63
|
+
const moveIfExists = async (src, dest) => {
|
|
64
|
+
try {
|
|
65
|
+
await fs.rename(src, dest);
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
const code = error.code;
|
|
69
|
+
if (code !== 'ENOENT') {
|
|
70
|
+
// File doesn't exist is OK, other errors are problems
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
const moveDirIfExists = async (src, dest) => {
|
|
76
|
+
try {
|
|
77
|
+
const stat = await fs.stat(src);
|
|
78
|
+
if (stat.isDirectory()) {
|
|
79
|
+
await fs.rename(src, dest);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
const code = error.code;
|
|
84
|
+
if (code !== 'ENOENT') {
|
|
85
|
+
throw error;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
// Move active artifacts to .previous
|
|
90
|
+
await moveIfExists(activeMetaPath, path.join(previousDir, INDEX_META_FILENAME));
|
|
91
|
+
await moveIfExists(activeIndexPath, path.join(previousDir, KEYWORD_INDEX_FILENAME));
|
|
92
|
+
await moveIfExists(activeIntelligencePath, path.join(previousDir, INTELLIGENCE_FILENAME));
|
|
93
|
+
await moveIfExists(activeManifestPath, path.join(previousDir, MANIFEST_FILENAME));
|
|
94
|
+
await moveIfExists(activeStatsPath, path.join(previousDir, INDEXING_STATS_FILENAME));
|
|
95
|
+
await moveIfExists(activeRelationshipsPath, path.join(previousDir, RELATIONSHIPS_FILENAME));
|
|
96
|
+
await moveDirIfExists(activeVectorDir, path.join(previousDir, VECTOR_DB_DIRNAME));
|
|
97
|
+
// Step 2: Move staging artifacts to active location
|
|
98
|
+
try {
|
|
99
|
+
await moveIfExists(stagingMetaPath, activeMetaPath);
|
|
100
|
+
await moveIfExists(stagingIndexPath, activeIndexPath);
|
|
101
|
+
await moveIfExists(stagingIntelligencePath, activeIntelligencePath);
|
|
102
|
+
await moveIfExists(stagingManifestPath, activeManifestPath);
|
|
103
|
+
await moveIfExists(stagingStatsPath, activeStatsPath);
|
|
104
|
+
await moveIfExists(stagingRelationshipsPath, activeRelationshipsPath);
|
|
105
|
+
await moveDirIfExists(stagingVectorDir, activeVectorDir);
|
|
106
|
+
// Step 3: Clean up .previous and staging directories
|
|
107
|
+
await cleanupDirectory(previousDir);
|
|
108
|
+
await cleanupDirectory(stagingDir);
|
|
109
|
+
// Also clean up the parent .staging/ directory if empty
|
|
110
|
+
const stagingBase = path.join(contextDir, STAGING_DIRNAME);
|
|
111
|
+
try {
|
|
112
|
+
const remaining = await fs.readdir(stagingBase);
|
|
113
|
+
if (remaining.length === 0) {
|
|
114
|
+
await fs.rmdir(stagingBase);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
// Directory doesn't exist or not empty - ignore
|
|
119
|
+
}
|
|
120
|
+
console.error(`Atomic swap complete: build ${buildId} now active`);
|
|
121
|
+
}
|
|
122
|
+
catch (swapError) {
|
|
123
|
+
console.error('Atomic swap failed, attempting rollback:', swapError);
|
|
124
|
+
// Attempt rollback: restore from .previous
|
|
125
|
+
try {
|
|
126
|
+
await moveIfExists(path.join(previousDir, INDEX_META_FILENAME), activeMetaPath);
|
|
127
|
+
await moveIfExists(path.join(previousDir, KEYWORD_INDEX_FILENAME), activeIndexPath);
|
|
128
|
+
await moveIfExists(path.join(previousDir, INTELLIGENCE_FILENAME), activeIntelligencePath);
|
|
129
|
+
await moveIfExists(path.join(previousDir, MANIFEST_FILENAME), activeManifestPath);
|
|
130
|
+
await moveIfExists(path.join(previousDir, INDEXING_STATS_FILENAME), activeStatsPath);
|
|
131
|
+
await moveIfExists(path.join(previousDir, RELATIONSHIPS_FILENAME), activeRelationshipsPath);
|
|
132
|
+
await moveDirIfExists(path.join(previousDir, VECTOR_DB_DIRNAME), activeVectorDir);
|
|
133
|
+
console.error('Rollback successful');
|
|
134
|
+
}
|
|
135
|
+
catch (rollbackError) {
|
|
136
|
+
console.error('Rollback also failed:', rollbackError);
|
|
137
|
+
}
|
|
138
|
+
throw swapError;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Best-effort cleanup of a directory and its contents.
|
|
143
|
+
*/
|
|
144
|
+
async function cleanupDirectory(dirPath) {
|
|
145
|
+
try {
|
|
146
|
+
await fs.rm(dirPath, { recursive: true, force: true });
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
// Best-effort: ignore cleanup failures
|
|
150
|
+
}
|
|
151
|
+
}
|
|
19
152
|
export class CodebaseIndexer {
|
|
20
153
|
rootPath;
|
|
21
154
|
config;
|
|
@@ -38,6 +171,13 @@ export class CodebaseIndexer {
|
|
|
38
171
|
};
|
|
39
172
|
}
|
|
40
173
|
mergeConfig(userConfig) {
|
|
174
|
+
const defaultEmbeddingProvider = parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
|
|
175
|
+
// When provider=openai and EMBEDDING_MODEL is not set, DEFAULT_MODEL resolves to the
|
|
176
|
+
// transformers fallback (Xenova/bge-small-en-v1.5), which the OpenAI API rejects.
|
|
177
|
+
// Use a sane OpenAI default instead.
|
|
178
|
+
const defaultModel = defaultEmbeddingProvider === 'openai' && !process.env.EMBEDDING_MODEL
|
|
179
|
+
? 'text-embedding-3-small'
|
|
180
|
+
: DEFAULT_MODEL;
|
|
41
181
|
const defaultConfig = {
|
|
42
182
|
analyzers: {
|
|
43
183
|
angular: { enabled: true, priority: 100 },
|
|
@@ -45,8 +185,25 @@ export class CodebaseIndexer {
|
|
|
45
185
|
vue: { enabled: false, priority: 90 },
|
|
46
186
|
generic: { enabled: true, priority: 10 }
|
|
47
187
|
},
|
|
48
|
-
include: [
|
|
49
|
-
|
|
188
|
+
include: [
|
|
189
|
+
'**/*.{ts,tsx,js,jsx,mjs,cjs,mts,cts}',
|
|
190
|
+
'**/*.{html,htm,css,scss,sass,less}',
|
|
191
|
+
'**/*.{py,pyi,rb,php}',
|
|
192
|
+
'**/*.{java,kt,kts,scala,swift,cs}',
|
|
193
|
+
'**/*.{go,rs}',
|
|
194
|
+
'**/*.{c,cpp,cc,cxx,h,hpp}',
|
|
195
|
+
'**/*.{sh,bash,zsh,ps1}',
|
|
196
|
+
'**/*.{sql,graphql,gql}',
|
|
197
|
+
'**/*.{json,jsonc,yaml,yml,toml,xml}'
|
|
198
|
+
],
|
|
199
|
+
exclude: [
|
|
200
|
+
'node_modules/**',
|
|
201
|
+
'dist/**',
|
|
202
|
+
'build/**',
|
|
203
|
+
'.git/**',
|
|
204
|
+
'coverage/**',
|
|
205
|
+
'.codebase-context/**'
|
|
206
|
+
],
|
|
50
207
|
respectGitignore: true,
|
|
51
208
|
parsing: {
|
|
52
209
|
maxFileSize: 1048576,
|
|
@@ -66,8 +223,8 @@ export class CodebaseIndexer {
|
|
|
66
223
|
includeChangelogs: false
|
|
67
224
|
},
|
|
68
225
|
embedding: {
|
|
69
|
-
provider:
|
|
70
|
-
model:
|
|
226
|
+
provider: defaultEmbeddingProvider,
|
|
227
|
+
model: defaultModel,
|
|
71
228
|
batchSize: 32
|
|
72
229
|
},
|
|
73
230
|
skipEmbedding: false,
|
|
@@ -115,7 +272,17 @@ export class CodebaseIndexer {
|
|
|
115
272
|
errors: [],
|
|
116
273
|
startedAt: new Date()
|
|
117
274
|
};
|
|
275
|
+
let stagingDir = null;
|
|
118
276
|
try {
|
|
277
|
+
// Ensure there is at least a generic fallback analyzer registered when the indexer
|
|
278
|
+
// is used directly (e.g. in tests or standalone scripts).
|
|
279
|
+
if (analyzerRegistry.getAll().length === 0) {
|
|
280
|
+
const { GenericAnalyzer } = await import('../analyzers/generic/index.js');
|
|
281
|
+
analyzerRegistry.register(new GenericAnalyzer());
|
|
282
|
+
}
|
|
283
|
+
const buildId = randomUUID();
|
|
284
|
+
const generatedAt = new Date().toISOString();
|
|
285
|
+
const toolVersion = await getToolVersion();
|
|
119
286
|
// Phase 1: Scanning
|
|
120
287
|
this.updateProgress('scanning', 0);
|
|
121
288
|
let files = await this.scanFiles();
|
|
@@ -150,8 +317,26 @@ export class CodebaseIndexer {
|
|
|
150
317
|
deleted: diff.deleted.length,
|
|
151
318
|
unchanged: diff.unchanged.length
|
|
152
319
|
};
|
|
320
|
+
// Check for embedding provider/model mismatch — forces full rebuild to avoid
|
|
321
|
+
// silent vector dimension mismatch when switching providers or models.
|
|
322
|
+
try {
|
|
323
|
+
const existingMeta = await readIndexMeta(this.rootPath);
|
|
324
|
+
const currentProvider = this.config.embedding?.provider ?? 'transformers';
|
|
325
|
+
const currentModel = this.config.embedding?.model ?? DEFAULT_MODEL;
|
|
326
|
+
if (checkEmbeddingMismatch(existingMeta, currentProvider, currentModel)) {
|
|
327
|
+
const stored = existingMeta.artifacts.vectorDb;
|
|
328
|
+
console.error(`Embedding provider/model changed (stored: ${stored.embeddingProvider}:${stored.embeddingModel}, current: ${currentProvider}:${currentModel}) — forcing full rebuild`);
|
|
329
|
+
diff = null;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
catch {
|
|
333
|
+
// No meta yet or legacy index without embedding fields — proceed with incremental
|
|
334
|
+
}
|
|
153
335
|
// Short-circuit: nothing changed
|
|
154
|
-
if (diff
|
|
336
|
+
if (diff &&
|
|
337
|
+
diff.added.length === 0 &&
|
|
338
|
+
diff.changed.length === 0 &&
|
|
339
|
+
diff.deleted.length === 0) {
|
|
155
340
|
console.error('No files changed - skipping re-index.');
|
|
156
341
|
this.updateProgress('complete', 100);
|
|
157
342
|
stats.duration = Date.now() - startTime;
|
|
@@ -177,7 +362,13 @@ export class CodebaseIndexer {
|
|
|
177
362
|
}
|
|
178
363
|
try {
|
|
179
364
|
const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
|
|
180
|
-
const
|
|
365
|
+
const existing = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
|
|
366
|
+
const existingObj = existing;
|
|
367
|
+
const existingChunks = Array.isArray(existing)
|
|
368
|
+
? existing
|
|
369
|
+
: existingObj && Array.isArray(existingObj.chunks)
|
|
370
|
+
? existingObj.chunks
|
|
371
|
+
: null;
|
|
181
372
|
if (Array.isArray(existingChunks)) {
|
|
182
373
|
stats.totalChunks = existingChunks.length;
|
|
183
374
|
if (stats.indexedFiles === 0) {
|
|
@@ -253,7 +444,7 @@ export class CodebaseIndexer {
|
|
|
253
444
|
break;
|
|
254
445
|
}
|
|
255
446
|
}
|
|
256
|
-
internalFileGraph.trackImport(file, resolvedPath, imp.imports);
|
|
447
|
+
internalFileGraph.trackImport(file, resolvedPath, imp.line || 1, imp.imports);
|
|
257
448
|
}
|
|
258
449
|
}
|
|
259
450
|
// Track exports for unused export detection
|
|
@@ -285,7 +476,8 @@ export class CodebaseIndexer {
|
|
|
285
476
|
// GENERIC PATTERN FORWARDING
|
|
286
477
|
// Framework analyzers return detectedPatterns in metadata - we just forward them
|
|
287
478
|
// This keeps the indexer framework-agnostic
|
|
288
|
-
if (result.metadata?.detectedPatterns
|
|
479
|
+
if (result.metadata?.detectedPatterns &&
|
|
480
|
+
Array.isArray(result.metadata.detectedPatterns)) {
|
|
289
481
|
for (const pattern of result.metadata.detectedPatterns) {
|
|
290
482
|
// Try to extract a relevant snippet for the pattern
|
|
291
483
|
// Ask analyzer registry for snippet pattern (framework-agnostic delegation)
|
|
@@ -297,7 +489,10 @@ export class CodebaseIndexer {
|
|
|
297
489
|
}
|
|
298
490
|
// Track file for Golden File scoring (framework-agnostic)
|
|
299
491
|
// A golden file = file with patterns in ≥3 distinct categories
|
|
300
|
-
const
|
|
492
|
+
const rawPatterns = result.metadata?.detectedPatterns;
|
|
493
|
+
const detectedPatterns = Array.isArray(rawPatterns)
|
|
494
|
+
? rawPatterns
|
|
495
|
+
: [];
|
|
301
496
|
const uniqueCategories = new Set(detectedPatterns.map((p) => p.category));
|
|
302
497
|
const patternScore = uniqueCategories.size;
|
|
303
498
|
if (patternScore >= 3) {
|
|
@@ -305,7 +500,7 @@ export class CodebaseIndexer {
|
|
|
305
500
|
for (const p of detectedPatterns) {
|
|
306
501
|
patternFlags[`${p.category}:${p.name}`] = true;
|
|
307
502
|
}
|
|
308
|
-
patternDetector.trackGoldenFile(relPath, patternScore, patternFlags);
|
|
503
|
+
patternDetector.trackGoldenFile(relPath, patternScore, patternFlags);
|
|
309
504
|
}
|
|
310
505
|
// Update component statistics
|
|
311
506
|
for (const component of result.components) {
|
|
@@ -404,10 +599,22 @@ export class CodebaseIndexer {
|
|
|
404
599
|
}
|
|
405
600
|
// Phase 4: Storing
|
|
406
601
|
this.updateProgress('storing', 75);
|
|
407
|
-
|
|
602
|
+
// For full rebuilds, use staging directory for atomic swap
|
|
603
|
+
// For incremental, write directly to active location
|
|
604
|
+
const isFullRebuild = !diff;
|
|
605
|
+
let activeContextDir = contextDir;
|
|
606
|
+
if (isFullRebuild) {
|
|
607
|
+
// Create staging directory for atomic swap
|
|
608
|
+
const stagingBase = path.join(contextDir, STAGING_DIRNAME);
|
|
609
|
+
stagingDir = path.join(stagingBase, buildId);
|
|
610
|
+
await fs.mkdir(stagingDir, { recursive: true });
|
|
611
|
+
activeContextDir = stagingDir;
|
|
612
|
+
console.error(`Full rebuild: writing to staging ${stagingDir}`);
|
|
613
|
+
}
|
|
614
|
+
await fs.mkdir(activeContextDir, { recursive: true });
|
|
408
615
|
if (!this.config.skipEmbedding) {
|
|
409
|
-
const storagePath = path.join(
|
|
410
|
-
const storageProvider = await getStorageProvider({ path: storagePath });
|
|
616
|
+
const storagePath = path.join(activeContextDir, VECTOR_DB_DIRNAME);
|
|
617
|
+
const storageProvider = await getStorageProvider({ path: storagePath }, diff ? { expectedDimensions: getConfiguredDimensions(this.config.embedding) } : undefined);
|
|
411
618
|
if (diff) {
|
|
412
619
|
// Incremental: delete old chunks for changed + deleted files, then add new
|
|
413
620
|
const filesToDelete = [...diff.changed, ...diff.deleted].map((rel) => path.join(this.rootPath, rel).replace(/\\/g, '/'));
|
|
@@ -424,19 +631,26 @@ export class CodebaseIndexer {
|
|
|
424
631
|
`added ${chunksWithEmbeddings.length} new chunks`);
|
|
425
632
|
}
|
|
426
633
|
else {
|
|
427
|
-
// Full: clear
|
|
428
|
-
console.error(`Storing ${chunksToEmbed.length} chunks...`);
|
|
429
|
-
await storageProvider.clear();
|
|
634
|
+
// Full rebuild: store to staging (no clear - fresh directory)
|
|
635
|
+
console.error(`Storing ${chunksToEmbed.length} chunks to staging...`);
|
|
430
636
|
await storageProvider.store(chunksWithEmbeddings);
|
|
431
637
|
}
|
|
432
638
|
}
|
|
639
|
+
// Vector DB build marker (required for version gating)
|
|
640
|
+
// Write after semantic store step so marker reflects the latest DB state.
|
|
641
|
+
const vectorDir = path.join(activeContextDir, VECTOR_DB_DIRNAME);
|
|
642
|
+
await fs.mkdir(vectorDir, { recursive: true });
|
|
643
|
+
await fs.writeFile(path.join(vectorDir, 'index-build.json'), JSON.stringify({ buildId, formatVersion: INDEX_FORMAT_VERSION }));
|
|
433
644
|
// Keyword index always uses ALL chunks (full regen)
|
|
434
|
-
const indexPath = path.join(
|
|
645
|
+
const indexPath = path.join(activeContextDir, KEYWORD_INDEX_FILENAME);
|
|
435
646
|
// Memory safety: cap keyword index too
|
|
436
647
|
const keywordChunks = allChunks.length > MAX_CHUNKS ? allChunks.slice(0, MAX_CHUNKS) : allChunks;
|
|
437
|
-
await fs.writeFile(indexPath, JSON.stringify(
|
|
648
|
+
await fs.writeFile(indexPath, JSON.stringify({
|
|
649
|
+
header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
|
|
650
|
+
chunks: keywordChunks
|
|
651
|
+
}));
|
|
438
652
|
// Save library usage and pattern stats (always full regen)
|
|
439
|
-
const intelligencePath = path.join(
|
|
653
|
+
const intelligencePath = path.join(activeContextDir, INTELLIGENCE_FILENAME);
|
|
440
654
|
const libraryStats = libraryTracker.getStats();
|
|
441
655
|
// Extract tsconfig paths for AI to understand import aliases
|
|
442
656
|
let tsconfigPaths;
|
|
@@ -453,6 +667,7 @@ export class CodebaseIndexer {
|
|
|
453
667
|
// No tsconfig.json or no paths defined
|
|
454
668
|
}
|
|
455
669
|
const intelligence = {
|
|
670
|
+
header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
|
|
456
671
|
libraryUsage: libraryStats,
|
|
457
672
|
patterns: patternDetector.getAllPatterns(),
|
|
458
673
|
goldenFiles: patternDetector.getGoldenFiles(5),
|
|
@@ -465,23 +680,97 @@ export class CodebaseIndexer {
|
|
|
465
680
|
},
|
|
466
681
|
// Internal file graph for circular dependency and unused export detection
|
|
467
682
|
internalFileGraph: internalFileGraph.toJSON(),
|
|
468
|
-
generatedAt
|
|
683
|
+
generatedAt
|
|
469
684
|
};
|
|
470
685
|
await fs.writeFile(intelligencePath, JSON.stringify(intelligence, null, 2));
|
|
686
|
+
// Write relationships sidecar (versioned, for fast lookup)
|
|
687
|
+
const relationshipsPath = path.join(activeContextDir, RELATIONSHIPS_FILENAME);
|
|
688
|
+
const graphData = internalFileGraph.toJSON();
|
|
689
|
+
// Build reverse import map (importedBy)
|
|
690
|
+
const importedBy = {};
|
|
691
|
+
if (graphData.imports) {
|
|
692
|
+
for (const [file, deps] of Object.entries(graphData.imports)) {
|
|
693
|
+
for (const dep of deps) {
|
|
694
|
+
if (!importedBy[dep])
|
|
695
|
+
importedBy[dep] = [];
|
|
696
|
+
importedBy[dep].push(file);
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
// Build symbol export map (exportedBy)
|
|
701
|
+
const exportedBy = {};
|
|
702
|
+
if (graphData.exports) {
|
|
703
|
+
for (const [file, exps] of Object.entries(graphData.exports)) {
|
|
704
|
+
for (const exp of exps) {
|
|
705
|
+
if (exp.name && exp.name !== 'default') {
|
|
706
|
+
if (!exportedBy[exp.name])
|
|
707
|
+
exportedBy[exp.name] = [];
|
|
708
|
+
if (!exportedBy[exp.name].includes(file)) {
|
|
709
|
+
exportedBy[exp.name].push(file);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
const relationships = {
|
|
716
|
+
header: { buildId, formatVersion: INDEX_FORMAT_VERSION },
|
|
717
|
+
generatedAt,
|
|
718
|
+
graph: {
|
|
719
|
+
imports: graphData.imports || {},
|
|
720
|
+
...(graphData.importDetails ? { importDetails: graphData.importDetails } : {}),
|
|
721
|
+
importedBy,
|
|
722
|
+
exports: graphData.exports || {}
|
|
723
|
+
},
|
|
724
|
+
symbols: {
|
|
725
|
+
exportedBy
|
|
726
|
+
},
|
|
727
|
+
stats: graphData.stats || internalFileGraph.getStats()
|
|
728
|
+
};
|
|
729
|
+
await fs.writeFile(relationshipsPath, JSON.stringify(relationships, null, 2));
|
|
471
730
|
// Write manifest (both full and incremental)
|
|
731
|
+
// For full rebuild, write to staging; for incremental, write to active
|
|
732
|
+
const activeManifestPath = path.join(activeContextDir, MANIFEST_FILENAME);
|
|
472
733
|
const manifest = {
|
|
473
734
|
version: 1,
|
|
474
735
|
generatedAt: new Date().toISOString(),
|
|
475
736
|
files: currentHashes ?? (await computeFileHashes(files, this.rootPath))
|
|
476
737
|
};
|
|
477
|
-
await writeManifest(
|
|
738
|
+
await writeManifest(activeManifestPath, manifest);
|
|
478
739
|
const persistedStats = {
|
|
479
740
|
indexedFiles: stats.indexedFiles,
|
|
480
741
|
totalChunks: stats.totalChunks,
|
|
481
742
|
totalFiles: stats.totalFiles,
|
|
482
|
-
generatedAt
|
|
743
|
+
generatedAt
|
|
483
744
|
};
|
|
484
|
-
|
|
745
|
+
const activeIndexingStatsPath = path.join(activeContextDir, INDEXING_STATS_FILENAME);
|
|
746
|
+
await fs.writeFile(activeIndexingStatsPath, JSON.stringify(persistedStats, null, 2));
|
|
747
|
+
// Index meta (authoritative) — write last so readers never observe meta pointing to missing artifacts.
|
|
748
|
+
const metaPath = path.join(activeContextDir, INDEX_META_FILENAME);
|
|
749
|
+
await fs.writeFile(metaPath, JSON.stringify({
|
|
750
|
+
metaVersion: INDEX_META_VERSION,
|
|
751
|
+
formatVersion: INDEX_FORMAT_VERSION,
|
|
752
|
+
buildId,
|
|
753
|
+
generatedAt,
|
|
754
|
+
toolVersion,
|
|
755
|
+
artifacts: {
|
|
756
|
+
keywordIndex: { path: KEYWORD_INDEX_FILENAME },
|
|
757
|
+
vectorDb: {
|
|
758
|
+
path: VECTOR_DB_DIRNAME,
|
|
759
|
+
provider: 'lancedb',
|
|
760
|
+
embeddingProvider: this.config.embedding?.provider ?? 'transformers',
|
|
761
|
+
embeddingModel: this.config.embedding?.model ?? DEFAULT_MODEL
|
|
762
|
+
},
|
|
763
|
+
intelligence: { path: INTELLIGENCE_FILENAME },
|
|
764
|
+
manifest: { path: MANIFEST_FILENAME },
|
|
765
|
+
indexingStats: { path: INDEXING_STATS_FILENAME },
|
|
766
|
+
relationships: { path: RELATIONSHIPS_FILENAME }
|
|
767
|
+
}
|
|
768
|
+
}, null, 2));
|
|
769
|
+
// Atomic swap for full rebuilds: move staging into active location
|
|
770
|
+
if (isFullRebuild && stagingDir) {
|
|
771
|
+
console.error('Performing atomic swap of staging to active...');
|
|
772
|
+
await atomicSwapStagingToActive(contextDir, stagingDir, buildId);
|
|
773
|
+
}
|
|
485
774
|
// Phase 5: Complete
|
|
486
775
|
this.updateProgress('complete', 100);
|
|
487
776
|
stats.duration = Date.now() - startTime;
|
|
@@ -505,6 +794,11 @@ export class CodebaseIndexer {
|
|
|
505
794
|
phase: this.progress.phase,
|
|
506
795
|
timestamp: new Date()
|
|
507
796
|
});
|
|
797
|
+
// Clean up staging directory on failure (best-effort)
|
|
798
|
+
if (stagingDir) {
|
|
799
|
+
console.error('Cleaning up staging directory after failure...');
|
|
800
|
+
await cleanupDirectory(stagingDir);
|
|
801
|
+
}
|
|
508
802
|
throw error;
|
|
509
803
|
}
|
|
510
804
|
}
|
|
@@ -636,6 +930,10 @@ export class CodebaseIndexer {
|
|
|
636
930
|
const intelligencePath = path.join(this.rootPath, CODEBASE_CONTEXT_DIRNAME, INTELLIGENCE_FILENAME);
|
|
637
931
|
const intelligenceContent = await fs.readFile(intelligencePath, 'utf-8');
|
|
638
932
|
const intelligence = JSON.parse(intelligenceContent);
|
|
933
|
+
// Phase 06: ignore legacy intelligence files that lack a versioned header.
|
|
934
|
+
if (!intelligence || typeof intelligence !== 'object' || !intelligence.header) {
|
|
935
|
+
return metadata;
|
|
936
|
+
}
|
|
639
937
|
metadata.customMetadata = {
|
|
640
938
|
...metadata.customMetadata,
|
|
641
939
|
libraryUsage: intelligence.libraryUsage,
|