codevault 1.7.3 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -5
- package/dist/chunking/semantic-chunker.d.ts.map +1 -1
- package/dist/chunking/semantic-chunker.js +20 -7
- package/dist/chunking/semantic-chunker.js.map +1 -1
- package/dist/cli/commands/index-cmd.d.ts +3 -0
- package/dist/cli/commands/index-cmd.d.ts.map +1 -0
- package/dist/cli/commands/index-cmd.js +118 -0
- package/dist/cli/commands/index-cmd.js.map +1 -0
- package/dist/cli/commands/info-cmd.d.ts +3 -0
- package/dist/cli/commands/info-cmd.d.ts.map +1 -0
- package/dist/cli/commands/info-cmd.js +47 -0
- package/dist/cli/commands/info-cmd.js.map +1 -0
- package/dist/cli/commands/mcp-cmd.d.ts +3 -0
- package/dist/cli/commands/mcp-cmd.d.ts.map +1 -0
- package/dist/cli/commands/mcp-cmd.js +33 -0
- package/dist/cli/commands/mcp-cmd.js.map +1 -0
- package/dist/cli/commands/search-cmd.d.ts +3 -0
- package/dist/cli/commands/search-cmd.d.ts.map +1 -0
- package/dist/cli/commands/search-cmd.js +52 -0
- package/dist/cli/commands/search-cmd.js.map +1 -0
- package/dist/cli/commands/search-with-code-cmd.d.ts +3 -0
- package/dist/cli/commands/search-with-code-cmd.d.ts.map +1 -0
- package/dist/cli/commands/search-with-code-cmd.js +74 -0
- package/dist/cli/commands/search-with-code-cmd.js.map +1 -0
- package/dist/cli/commands/update-cmd.d.ts +3 -0
- package/dist/cli/commands/update-cmd.d.ts.map +1 -0
- package/dist/cli/commands/update-cmd.js +24 -0
- package/dist/cli/commands/update-cmd.js.map +1 -0
- package/dist/cli/commands/watch-cmd.d.ts +3 -0
- package/dist/cli/commands/watch-cmd.d.ts.map +1 -0
- package/dist/cli/commands/watch-cmd.js +47 -0
- package/dist/cli/commands/watch-cmd.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +47 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli.d.ts +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +4 -398
- package/dist/cli.js.map +1 -1
- package/dist/codemap/io.d.ts +1 -0
- package/dist/codemap/io.d.ts.map +1 -1
- package/dist/codemap/io.js.map +1 -1
- package/dist/config/constants.d.ts +11 -0
- package/dist/config/constants.d.ts.map +1 -1
- package/dist/config/constants.js +9 -0
- package/dist/config/constants.js.map +1 -1
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +9 -2
- package/dist/config/loader.js.map +1 -1
- package/dist/core/IndexerEngine.d.ts +9 -17
- package/dist/core/IndexerEngine.d.ts.map +1 -1
- package/dist/core/IndexerEngine.js +72 -347
- package/dist/core/IndexerEngine.js.map +1 -1
- package/dist/core/SearchService.d.ts +13 -13
- package/dist/core/SearchService.d.ts.map +1 -1
- package/dist/core/SearchService.js +133 -325
- package/dist/core/SearchService.js.map +1 -1
- package/dist/core/batch-indexer.d.ts +5 -0
- package/dist/core/batch-indexer.d.ts.map +1 -1
- package/dist/core/batch-indexer.js +22 -23
- package/dist/core/batch-indexer.js.map +1 -1
- package/dist/core/indexer.d.ts +13 -0
- package/dist/core/indexer.d.ts.map +1 -1
- package/dist/core/indexer.js +13 -0
- package/dist/core/indexer.js.map +1 -1
- package/dist/core/indexing/FileProcessor.d.ts +42 -0
- package/dist/core/indexing/FileProcessor.d.ts.map +1 -0
- package/dist/core/indexing/FileProcessor.js +245 -0
- package/dist/core/indexing/FileProcessor.js.map +1 -0
- package/dist/core/indexing/IndexContext.d.ts +42 -0
- package/dist/core/indexing/IndexContext.d.ts.map +1 -0
- package/dist/core/indexing/IndexContext.js +133 -0
- package/dist/core/indexing/IndexContext.js.map +1 -0
- package/dist/core/indexing/IndexFinalizationStage.d.ts +39 -0
- package/dist/core/indexing/IndexFinalizationStage.d.ts.map +1 -0
- package/dist/core/indexing/IndexFinalizationStage.js +114 -0
- package/dist/core/indexing/IndexFinalizationStage.js.map +1 -0
- package/dist/core/indexing/IndexState.d.ts +50 -0
- package/dist/core/indexing/IndexState.d.ts.map +1 -0
- package/dist/core/indexing/IndexState.js +66 -0
- package/dist/core/indexing/IndexState.js.map +1 -0
- package/dist/core/indexing/chunk-pipeline.d.ts +39 -3
- package/dist/core/indexing/chunk-pipeline.d.ts.map +1 -1
- package/dist/core/indexing/chunk-pipeline.js +59 -24
- package/dist/core/indexing/chunk-pipeline.js.map +1 -1
- package/dist/core/search/CandidateRetriever.d.ts +51 -0
- package/dist/core/search/CandidateRetriever.d.ts.map +1 -0
- package/dist/core/search/CandidateRetriever.js +119 -0
- package/dist/core/search/CandidateRetriever.js.map +1 -0
- package/dist/core/search/HybridFusion.d.ts +89 -0
- package/dist/core/search/HybridFusion.d.ts.map +1 -0
- package/dist/core/search/HybridFusion.js +263 -0
- package/dist/core/search/HybridFusion.js.map +1 -0
- package/dist/core/search/ResultMapper.d.ts +31 -0
- package/dist/core/search/ResultMapper.d.ts.map +1 -0
- package/dist/core/search/ResultMapper.js +131 -0
- package/dist/core/search/ResultMapper.js.map +1 -0
- package/dist/core/search/SearchContextManager.d.ts +69 -0
- package/dist/core/search/SearchContextManager.d.ts.map +1 -0
- package/dist/core/search/SearchContextManager.js +139 -0
- package/dist/core/search/SearchContextManager.js.map +1 -0
- package/dist/core/search.d.ts +31 -0
- package/dist/core/search.d.ts.map +1 -1
- package/dist/core/search.js +33 -0
- package/dist/core/search.js.map +1 -1
- package/dist/core/types.d.ts +13 -0
- package/dist/core/types.d.ts.map +1 -1
- package/dist/database/db.d.ts +36 -18
- package/dist/database/db.d.ts.map +1 -1
- package/dist/database/db.js +151 -24
- package/dist/database/db.js.map +1 -1
- package/dist/indexer/ChangeQueue.d.ts +67 -0
- package/dist/indexer/ChangeQueue.d.ts.map +1 -0
- package/dist/indexer/ChangeQueue.js +182 -0
- package/dist/indexer/ChangeQueue.js.map +1 -0
- package/dist/indexer/ProviderManager.d.ts +33 -0
- package/dist/indexer/ProviderManager.d.ts.map +1 -0
- package/dist/indexer/ProviderManager.js +75 -0
- package/dist/indexer/ProviderManager.js.map +1 -0
- package/dist/indexer/WatchService.d.ts +68 -0
- package/dist/indexer/WatchService.d.ts.map +1 -0
- package/dist/indexer/WatchService.js +155 -0
- package/dist/indexer/WatchService.js.map +1 -0
- package/dist/indexer/merkle.d.ts +5 -0
- package/dist/indexer/merkle.d.ts.map +1 -1
- package/dist/indexer/merkle.js +36 -12
- package/dist/indexer/merkle.js.map +1 -1
- package/dist/indexer/update.d.ts +14 -2
- package/dist/indexer/update.d.ts.map +1 -1
- package/dist/indexer/update.js +11 -0
- package/dist/indexer/update.js.map +1 -1
- package/dist/indexer/watch.d.ts +15 -20
- package/dist/indexer/watch.d.ts.map +1 -1
- package/dist/indexer/watch.js +14 -223
- package/dist/indexer/watch.js.map +1 -1
- package/dist/mcp/handlers/project.d.ts.map +1 -1
- package/dist/mcp/handlers/project.js +17 -1
- package/dist/mcp/handlers/project.js.map +1 -1
- package/dist/mcp-server.d.ts +6 -0
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js +63 -2
- package/dist/mcp-server.js.map +1 -1
- package/dist/providers/base.d.ts +3 -0
- package/dist/providers/base.d.ts.map +1 -1
- package/dist/providers/base.js.map +1 -1
- package/dist/providers/index.d.ts +7 -0
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +15 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/mock.d.ts +16 -0
- package/dist/providers/mock.d.ts.map +1 -0
- package/dist/providers/mock.js +46 -0
- package/dist/providers/mock.js.map +1 -0
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +4 -8
- package/dist/providers/openai.js.map +1 -1
- package/dist/ranking/api-reranker.d.ts.map +1 -1
- package/dist/ranking/api-reranker.js +9 -5
- package/dist/ranking/api-reranker.js.map +1 -1
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +21 -1
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/scope.d.ts +1 -1
- package/dist/search/scope.d.ts.map +1 -1
- package/dist/search/scope.js +3 -3
- package/dist/search/scope.js.map +1 -1
- package/dist/storage/encrypted-chunks.d.ts +7 -1
- package/dist/storage/encrypted-chunks.d.ts.map +1 -1
- package/dist/storage/encrypted-chunks.js +187 -75
- package/dist/storage/encrypted-chunks.js.map +1 -1
- package/dist/symbols/extract.d.ts +2 -1
- package/dist/symbols/extract.d.ts.map +1 -1
- package/dist/symbols/extract.js +1 -1
- package/dist/symbols/extract.js.map +1 -1
- package/dist/symbols/graph.d.ts.map +1 -1
- package/dist/symbols/graph.js.map +1 -1
- package/dist/synthesis/conversational-synthesizer.d.ts.map +1 -1
- package/dist/synthesis/conversational-synthesizer.js +68 -76
- package/dist/synthesis/conversational-synthesizer.js.map +1 -1
- package/dist/synthesis/prompt-builder.d.ts +2 -0
- package/dist/synthesis/prompt-builder.d.ts.map +1 -1
- package/dist/synthesis/prompt-builder.js +80 -81
- package/dist/synthesis/prompt-builder.js.map +1 -1
- package/dist/synthesis/synthesizer.d.ts +11 -0
- package/dist/synthesis/synthesizer.d.ts.map +1 -1
- package/dist/synthesis/synthesizer.js +29 -1
- package/dist/synthesis/synthesizer.js.map +1 -1
- package/dist/tests/helpers/test-repo.d.ts +7 -0
- package/dist/tests/helpers/test-repo.d.ts.map +1 -0
- package/dist/tests/helpers/test-repo.js +21 -0
- package/dist/tests/helpers/test-repo.js.map +1 -0
- package/dist/tests/integration/index-search.integration.test.d.ts +2 -0
- package/dist/tests/integration/index-search.integration.test.d.ts.map +1 -0
- package/dist/tests/integration/index-search.integration.test.js +163 -0
- package/dist/tests/integration/index-search.integration.test.js.map +1 -0
- package/dist/tests/simple-lru.test.js +22 -10
- package/dist/tests/simple-lru.test.js.map +1 -1
- package/dist/utils/cli-ui.d.ts.map +1 -1
- package/dist/utils/cli-ui.js +9 -3
- package/dist/utils/cli-ui.js.map +1 -1
- package/dist/utils/logger.d.ts +4 -1
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/path-helpers.d.ts.map +1 -1
- package/dist/utils/path-helpers.js +17 -1
- package/dist/utils/path-helpers.js.map +1 -1
- package/dist/utils/scan-patterns.d.ts.map +1 -1
- package/dist/utils/scan-patterns.js +1 -2
- package/dist/utils/scan-patterns.js.map +1 -1
- package/dist/utils/simple-lru.d.ts +21 -2
- package/dist/utils/simple-lru.d.ts.map +1 -1
- package/dist/utils/simple-lru.js +126 -19
- package/dist/utils/simple-lru.js.map +1 -1
- package/package.json +13 -4
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import { createEmbeddingProvider, getModelProfile, getSizeLimits } from '../../providers/index.js';
|
|
4
|
+
import { BATCH_SIZE } from '../../providers/base.js';
|
|
5
|
+
import { readCodemap } from '../../codemap/io.js';
|
|
6
|
+
import { loadMerkle, cloneMerkle } from '../../indexer/merkle.js';
|
|
7
|
+
import { resolveEncryptionPreference } from '../../storage/encrypted-chunks.js';
|
|
8
|
+
import { Database, initDatabase } from '../../database/db.js';
|
|
9
|
+
import { BatchEmbeddingProcessor } from '../batch-indexer.js';
|
|
10
|
+
import { logger } from '../../utils/logger.js';
|
|
11
|
+
import { resolveProviderContext } from '../../config/resolver.js';
|
|
12
|
+
/**
|
|
13
|
+
* IndexContext prepares the indexing environment by:
|
|
14
|
+
* - Validating repository
|
|
15
|
+
* - Initializing embedding provider
|
|
16
|
+
* - Setting up database
|
|
17
|
+
* - Loading codemap and merkle tree
|
|
18
|
+
* - Creating batch processor
|
|
19
|
+
*/
|
|
20
|
+
export class IndexContext {
|
|
21
|
+
static async prepare(options) {
|
|
22
|
+
const { repoPath = '.', provider = 'auto', changedFiles = null, embeddingProviderOverride = null, encryptMode = undefined } = options;
|
|
23
|
+
const repo = path.resolve(repoPath);
|
|
24
|
+
// Validate repository exists
|
|
25
|
+
try {
|
|
26
|
+
await fs.promises.access(repo);
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
throw new Error(`Directory ${repo} does not exist`);
|
|
30
|
+
}
|
|
31
|
+
// Setup provider context and embedding provider
|
|
32
|
+
const providerContext = resolveProviderContext(repo);
|
|
33
|
+
const providerInstance = embeddingProviderOverride ||
|
|
34
|
+
createEmbeddingProvider(provider, providerContext.embedding);
|
|
35
|
+
if (!embeddingProviderOverride && providerInstance.init) {
|
|
36
|
+
await providerInstance.init();
|
|
37
|
+
}
|
|
38
|
+
// Get provider and model information
|
|
39
|
+
const providerName = providerInstance.getName();
|
|
40
|
+
const modelName = providerInstance.getModelName ? providerInstance.getModelName() : null;
|
|
41
|
+
const modelProfile = await getModelProfile(providerName, modelName || providerName);
|
|
42
|
+
const limits = getSizeLimits(modelProfile);
|
|
43
|
+
// Log configuration
|
|
44
|
+
if (!process.env.CODEVAULT_QUIET) {
|
|
45
|
+
logger.info(`Chunking Configuration`, {
|
|
46
|
+
provider: providerName,
|
|
47
|
+
model: modelName,
|
|
48
|
+
dimensions: providerInstance.getDimensions(),
|
|
49
|
+
mode: limits.unit
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
// Initialize database
|
|
53
|
+
await initDatabase(providerInstance.getDimensions(), repo);
|
|
54
|
+
// Setup paths
|
|
55
|
+
const codemapPath = path.join(repo, 'codevault.codemap.json');
|
|
56
|
+
const chunkDir = path.join(repo, '.codevault/chunks');
|
|
57
|
+
const dbPath = path.join(repo, '.codevault/codevault.db');
|
|
58
|
+
// Check for dimension mismatches
|
|
59
|
+
await IndexContext.checkDimensionMismatch(dbPath, providerInstance);
|
|
60
|
+
// Setup encryption
|
|
61
|
+
const encryptionPreference = resolveEncryptionPreference({
|
|
62
|
+
mode: encryptMode,
|
|
63
|
+
logger: console
|
|
64
|
+
});
|
|
65
|
+
// Load existing state
|
|
66
|
+
const codemap = readCodemap(codemapPath);
|
|
67
|
+
const merkle = loadMerkle(repo);
|
|
68
|
+
const updatedMerkle = cloneMerkle(merkle);
|
|
69
|
+
// Create database connection
|
|
70
|
+
const db = new Database(dbPath);
|
|
71
|
+
// Create batch processor
|
|
72
|
+
const batchProcessor = new BatchEmbeddingProcessor(providerInstance, db, BATCH_SIZE);
|
|
73
|
+
const isPartialUpdate = changedFiles !== null;
|
|
74
|
+
return {
|
|
75
|
+
repo,
|
|
76
|
+
repoPath,
|
|
77
|
+
provider,
|
|
78
|
+
providerInstance,
|
|
79
|
+
providerName,
|
|
80
|
+
modelName,
|
|
81
|
+
modelProfile,
|
|
82
|
+
limits,
|
|
83
|
+
codemapPath,
|
|
84
|
+
chunkDir,
|
|
85
|
+
dbPath,
|
|
86
|
+
encryptionPreference,
|
|
87
|
+
codemap,
|
|
88
|
+
merkle,
|
|
89
|
+
updatedMerkle,
|
|
90
|
+
db,
|
|
91
|
+
batchProcessor,
|
|
92
|
+
isPartialUpdate
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Check if there's a dimension or provider mismatch and warn user
|
|
97
|
+
*/
|
|
98
|
+
static async checkDimensionMismatch(dbPath, embeddingProvider) {
|
|
99
|
+
try {
|
|
100
|
+
await fs.promises.access(dbPath);
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return; // DB doesn't exist yet
|
|
104
|
+
}
|
|
105
|
+
const db = new Database(dbPath);
|
|
106
|
+
try {
|
|
107
|
+
const existingDimensions = await db.getExistingDimensions();
|
|
108
|
+
if (existingDimensions.length > 0) {
|
|
109
|
+
const currentProvider = embeddingProvider.getName();
|
|
110
|
+
const currentDimensions = embeddingProvider.getDimensions();
|
|
111
|
+
const hasMismatch = existingDimensions.some(row => row.embedding_provider !== currentProvider ||
|
|
112
|
+
row.embedding_dimensions !== currentDimensions);
|
|
113
|
+
if (hasMismatch) {
|
|
114
|
+
logger.warn('Dimension/Provider Mismatch Detected!', {
|
|
115
|
+
existing: existingDimensions,
|
|
116
|
+
current: { provider: currentProvider, dimensions: currentDimensions },
|
|
117
|
+
recommendation: 'Full re-index recommended'
|
|
118
|
+
});
|
|
119
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
catch (error) {
|
|
124
|
+
logger.debug('Migration check encountered an error (continuing)', {
|
|
125
|
+
error: error instanceof Error ? error.message : String(error)
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
finally {
|
|
129
|
+
db.close();
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=IndexContext.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexContext.js","sourceRoot":"","sources":["../../../src/core/indexing/IndexContext.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,uBAAuB,EAAE,eAAe,EAAE,aAAa,EAA0B,MAAM,0BAA0B,CAAC;AAC3H,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAgB,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,UAAU,EAAE,WAAW,EAAmB,MAAM,yBAAyB,CAAC;AACnF,OAAO,EAAE,2BAA2B,EAAE,MAAM,mCAAmC,CAAC;AAChF,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAC9D,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAwBlE;;;;;;;GAOG;AACH,MAAM,OAAO,YAAY;IACvB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAA4B;QAC/C,MAAM,EACJ,QAAQ,GAAG,GAAG,EACd,QAAQ,GAAG,MAAM,EACjB,YAAY,GAAG,IAAI,EACnB,yBAAyB,GAAG,IAAI,EAChC,WAAW,GAAG,SAAS,EACxB,GAAG,OAAO,CAAC;QAEZ,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,6BAA6B;QAC7B,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,aAAa,IAAI,iBAAiB,CAAC,CAAC;QACtD,CAAC;QAED,gDAAgD;QAChD,MAAM,eAAe,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;QACrD,MAAM,gBAAgB,GAAG,yBAAyB;YAChD,uBAAuB,CAAC,QAAQ,EAAE,eAAe,CAAC,SAAS,CAAC,CAAC;QAE/D,IAAI,CAAC,yBAAyB,IAAI,gBAAgB,CAAC,IAAI,EAAE,CAAC;YACxD,MAAM,gBAAgB,CAAC,IAAI,EAAE,CAAC;QAChC,CAAC;QAED,qCAAqC;QACrC,MAAM,YAAY,GAAG,gBAAgB,CAAC,OAAO,EAAE,CAAC;QAChD,MAAM,SAAS,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACzF,MAAM,YAAY,GAAG,MAAM,eAAe,CAAC,YAAY,EAAE,SAAS,IAAI,YAAY,CAAC,CAAC;QACpF,MAAM,MAAM,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;QAE3C,oBAAoB;QACpB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE;gBACpC,QAAQ,EAAE,YAAY;gBACtB,KAAK,EAAE,SAAS;gBAChB,UAAU,EAAE,gBAAgB,CAAC,aAAa,EAAE;gBAC5C,IAAI,EAAE,MAAM,CAAC,IAAI;aAClB,CAAC,CAAC;QACL,CAAC;QAED,sBAAsB;QACtB,MAAM,YAAY,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,IAAI,CAAC,CAAC;QAE3D,cAAc;QACd,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,wBAAwB,CAAC,CAAC;QAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,yBAAyB,CAAC,CAAC;QAE1D,iCAAiC;QACjC,MAAM,YAAY,CAAC,sBAAsB,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;QAEpE,mBAAmB;QACnB,MAAM,oBAAoB,GAAG,2BAA2B,CAAC;YACvD,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,OAAO;SAChB,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAE1C,6BAA6B;QAC7B,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEhC,yBAAyB;QACzB,MAAM,cAAc,GAAG,IAAI,uBAAuB,CAAC,gBAAgB,EAAE,EAAE,EAAE,UAAU,CAAC,CAAC;QAErF,MAAM,eAAe,GAAG,YAAY,KAAK,IAAI,CAAC;QAE9C,OAAO;YACL,IAAI;YACJ,QAAQ;YACR,QAAQ;YACR,gBAAgB;YAChB,YAAY;YACZ,SAAS;YACT,YAAY;YACZ,MAAM;YACN,WAAW;YACX,QAAQ;YACR,MAAM;YACN,oBAAoB;YACpB,OAAO;YACP,MAAM;YACN,aAAa;YACb,EAAE;YACF,cAAc;YACd,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,KAAK,CAAC,sBAAsB,CACzC,MAAc,EACd,iBAAoC;QAEpC,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,uBAAuB;QACjC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,kBAAkB,GAAG,MAAM,EAAE,CAAC,qBAAqB,EAAE,CAAC;YAE5D,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,eAAe,GAAG,iBAAiB,CAAC,OAAO,EAAE,CAAC;gBACpD,MAAM,iBAAiB,GAAG,iBAAiB,CAAC,aAAa,EAAE,CAAC;gBAE5D,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CACzC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,kBAAkB,KAAK,eAAe;oBAC1C,GAAG,CAAC,oBAAoB,KAAK,iBAAiB,CACtD,CAAC;gBAEF,IAAI,WAAW,EAAE,CAAC;oBAChB,MAAM,CAAC,IAAI,CAAC,uCAAuC,EAAE;wBACnD,QAAQ,EAAE,kBAAkB;wBAC5B,OAAO,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE,UAAU,EAAE,iBAAiB,EAAE;wBACrE,cAAc,EAAE,2BAA2B;qBAC5C,CAAC,CAAC;oBAEH,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC1D,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,mDAAmD,EAAE;gBAChE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;QACL,CAAC;gBAAS,CAAC;YACT,EAAE,CAAC,KAAK,EAAE,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { IndexContextData } from './IndexContext.js';
|
|
2
|
+
import type { IndexState } from './IndexState.js';
|
|
3
|
+
import type { IndexProjectResult } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* IndexFinalizationStage handles the finalization of the indexing process:
|
|
6
|
+
* - Flushing batch processor
|
|
7
|
+
* - Saving merkle tree
|
|
8
|
+
* - Building symbol graph
|
|
9
|
+
* - Writing codemap
|
|
10
|
+
* - Building result object
|
|
11
|
+
* - Cleaning up resources
|
|
12
|
+
*/
|
|
13
|
+
export declare class IndexFinalizationStage {
|
|
14
|
+
private context;
|
|
15
|
+
private state;
|
|
16
|
+
private onProgress;
|
|
17
|
+
constructor(context: IndexContextData, state: IndexState, onProgress: ((event: any) => void) | null);
|
|
18
|
+
/**
|
|
19
|
+
* Finalize the indexing process
|
|
20
|
+
*/
|
|
21
|
+
finalize(): Promise<IndexProjectResult>;
|
|
22
|
+
/**
|
|
23
|
+
* Flush the batch processor
|
|
24
|
+
*/
|
|
25
|
+
private flushBatchProcessor;
|
|
26
|
+
/**
|
|
27
|
+
* Log chunking and processing statistics
|
|
28
|
+
*/
|
|
29
|
+
private logStatistics;
|
|
30
|
+
/**
|
|
31
|
+
* Build the final result object
|
|
32
|
+
*/
|
|
33
|
+
private buildResult;
|
|
34
|
+
/**
|
|
35
|
+
* Clean up resources
|
|
36
|
+
*/
|
|
37
|
+
private cleanup;
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=IndexFinalizationStage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexFinalizationStage.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/IndexFinalizationStage.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEtD;;;;;;;;GAQG;AACH,qBAAa,sBAAsB;IAE/B,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,KAAK;IACb,OAAO,CAAC,UAAU;gBAFV,OAAO,EAAE,gBAAgB,EACzB,KAAK,EAAE,UAAU,EACjB,UAAU,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,KAAK,IAAI,CAAC,GAAG,IAAI;IAGnD;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC,kBAAkB,CAAC;IAiC7C;;OAEG;YACW,mBAAmB;IAajC;;OAEG;IACH,OAAO,CAAC,aAAa;IAUrB;;OAEG;IACH,OAAO,CAAC,WAAW;IAYnB;;OAEG;YACW,OAAO;CAatB"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { saveMerkle } from '../../indexer/merkle.js';
|
|
2
|
+
import { writeCodemap } from '../../codemap/io.js';
|
|
3
|
+
import { attachSymbolGraphToCodemap } from '../../symbols/graph.js';
|
|
4
|
+
import { getTokenCountStats } from '../../chunking/token-counter.js';
|
|
5
|
+
import { logger } from '../../utils/logger.js';
|
|
6
|
+
/**
|
|
7
|
+
* IndexFinalizationStage handles the finalization of the indexing process:
|
|
8
|
+
* - Flushing batch processor
|
|
9
|
+
* - Saving merkle tree
|
|
10
|
+
* - Building symbol graph
|
|
11
|
+
* - Writing codemap
|
|
12
|
+
* - Building result object
|
|
13
|
+
* - Cleaning up resources
|
|
14
|
+
*/
|
|
15
|
+
export class IndexFinalizationStage {
|
|
16
|
+
context;
|
|
17
|
+
state;
|
|
18
|
+
onProgress;
|
|
19
|
+
constructor(context, state, onProgress) {
|
|
20
|
+
this.context = context;
|
|
21
|
+
this.state = state;
|
|
22
|
+
this.onProgress = onProgress;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Finalize the indexing process
|
|
26
|
+
*/
|
|
27
|
+
async finalize() {
|
|
28
|
+
try {
|
|
29
|
+
// Notify progress
|
|
30
|
+
if (this.onProgress) {
|
|
31
|
+
this.onProgress({ type: 'finalizing' });
|
|
32
|
+
}
|
|
33
|
+
// Flush any remaining embeddings
|
|
34
|
+
await this.flushBatchProcessor();
|
|
35
|
+
// Save merkle tree if modified
|
|
36
|
+
if (this.state.merkleDirty) {
|
|
37
|
+
saveMerkle(this.context.repo, this.state.updatedMerkle);
|
|
38
|
+
}
|
|
39
|
+
// Build symbol graph and write codemap
|
|
40
|
+
attachSymbolGraphToCodemap(this.state.codemap);
|
|
41
|
+
this.state.codemap = writeCodemap(this.context.codemapPath, this.state.codemap);
|
|
42
|
+
// Get token statistics
|
|
43
|
+
const tokenStats = getTokenCountStats();
|
|
44
|
+
// Log statistics
|
|
45
|
+
this.logStatistics();
|
|
46
|
+
// Build result
|
|
47
|
+
return this.buildResult(tokenStats);
|
|
48
|
+
}
|
|
49
|
+
finally {
|
|
50
|
+
// Clean up resources
|
|
51
|
+
await this.cleanup();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Flush the batch processor
|
|
56
|
+
*/
|
|
57
|
+
async flushBatchProcessor() {
|
|
58
|
+
try {
|
|
59
|
+
if (this.context.batchProcessor) {
|
|
60
|
+
await this.context.batchProcessor.flush();
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
this.state.addError({
|
|
65
|
+
type: 'finalize_error',
|
|
66
|
+
error: error.message
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Log chunking and processing statistics
|
|
72
|
+
*/
|
|
73
|
+
logStatistics() {
|
|
74
|
+
if (!process.env.CODEVAULT_QUIET) {
|
|
75
|
+
logger.info('Chunking Statistics', {
|
|
76
|
+
stats: { ...this.state.chunkingStats },
|
|
77
|
+
processedChunks: this.state.processedChunks,
|
|
78
|
+
totalChunks: Object.keys(this.state.codemap).length
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Build the final result object
|
|
84
|
+
*/
|
|
85
|
+
buildResult(tokenStats) {
|
|
86
|
+
return {
|
|
87
|
+
success: true,
|
|
88
|
+
processedChunks: this.state.processedChunks,
|
|
89
|
+
totalChunks: Object.keys(this.state.codemap).length,
|
|
90
|
+
provider: this.context.providerInstance.getName(),
|
|
91
|
+
errors: this.state.errors,
|
|
92
|
+
chunkingStats: this.state.chunkingStats,
|
|
93
|
+
tokenStats: this.context.modelProfile.useTokens ? tokenStats : undefined
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Clean up resources
|
|
98
|
+
*/
|
|
99
|
+
async cleanup() {
|
|
100
|
+
// Close database connection
|
|
101
|
+
try {
|
|
102
|
+
if (this.context.db) {
|
|
103
|
+
this.context.db.close();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
catch (error) {
|
|
107
|
+
this.state.addError({
|
|
108
|
+
type: 'db_close_error',
|
|
109
|
+
error: error.message
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=IndexFinalizationStage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexFinalizationStage.js","sourceRoot":"","sources":["../../../src/core/indexing/IndexFinalizationStage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAK/C;;;;;;;;GAQG;AACH,MAAM,OAAO,sBAAsB;IAEvB;IACA;IACA;IAHV,YACU,OAAyB,EACzB,KAAiB,EACjB,UAAyC;QAFzC,YAAO,GAAP,OAAO,CAAkB;QACzB,UAAK,GAAL,KAAK,CAAY;QACjB,eAAU,GAAV,UAAU,CAA+B;IAChD,CAAC;IAEJ;;OAEG;IACH,KAAK,CAAC,QAAQ;QACZ,IAAI,CAAC;YACH,kBAAkB;YAClB,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBACpB,IAAI,CAAC,UAAU,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;YAC1C,CAAC;YAED,iCAAiC;YACjC,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAEjC,+BAA+B;YAC/B,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;gBAC3B,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YAC1D,CAAC;YAED,uCAAuC;YACvC,0BAA0B,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/C,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAEhF,uBAAuB;YACvB,MAAM,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAExC,iBAAiB;YACjB,IAAI,CAAC,aAAa,EAAE,CAAC;YAErB,eAAe;YACf,OAAO,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QACtC,CAAC;gBAAS,CAAC;YACT,qBAAqB;YACrB,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,mBAAmB;QAC/B,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;gBAChC,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;YAC5C,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;gBAClB,IAAI,EAAE,gBAAgB;gBACtB,KAAK,EAAG,KAAe,CAAC,OAAO;aAChC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,qBAAqB,EAAE;gBACjC,KAAK,EAAE,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,EAAE;gBACtC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe;gBAC3C,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM;aACpD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,UAAe;QACjC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe;YAC3C,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM;YACnD,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,OAAO,EAAE;YACjD,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM;YACzB,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa;YACvC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;SACzE,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,OAAO;QACnB,4BAA4B;QAC5B,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC;gBACpB,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;gBAClB,IAAI,EAAE,gBAAgB;gBACtB,KAAK,EAAG,KAAe,CAAC,OAAO;aAChC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { Codemap } from '../../codemap/io.js';
|
|
2
|
+
import type { MerkleTree } from '../../indexer/merkle.js';
|
|
3
|
+
import type { ChunkingStats } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* IndexState tracks mutable state during the indexing process
|
|
6
|
+
*/
|
|
7
|
+
export declare class IndexState {
|
|
8
|
+
codemap: Codemap;
|
|
9
|
+
updatedMerkle: MerkleTree;
|
|
10
|
+
merkleDirty: boolean;
|
|
11
|
+
indexMutated: boolean;
|
|
12
|
+
processedChunks: number;
|
|
13
|
+
errors: any[];
|
|
14
|
+
chunkingStats: ChunkingStats;
|
|
15
|
+
constructor(codemap: Codemap, updatedMerkle: MerkleTree);
|
|
16
|
+
/**
|
|
17
|
+
* Add an error to the error list
|
|
18
|
+
*/
|
|
19
|
+
addError(error: {
|
|
20
|
+
type: string;
|
|
21
|
+
file?: string;
|
|
22
|
+
chunkId?: string;
|
|
23
|
+
error: string;
|
|
24
|
+
}): void;
|
|
25
|
+
/**
|
|
26
|
+
* Mark merkle tree as modified
|
|
27
|
+
*/
|
|
28
|
+
markMerkleDirty(): void;
|
|
29
|
+
/**
|
|
30
|
+
* Mark index as modified
|
|
31
|
+
*/
|
|
32
|
+
markIndexMutated(): void;
|
|
33
|
+
/**
|
|
34
|
+
* Increment processed chunk counter
|
|
35
|
+
*/
|
|
36
|
+
incrementProcessedChunks(): void;
|
|
37
|
+
/**
|
|
38
|
+
* Update chunking stats
|
|
39
|
+
*/
|
|
40
|
+
updateChunkingStats(stats: Partial<ChunkingStats>): void;
|
|
41
|
+
/**
|
|
42
|
+
* Get current error count
|
|
43
|
+
*/
|
|
44
|
+
getErrorCount(): number;
|
|
45
|
+
/**
|
|
46
|
+
* Check if index was modified
|
|
47
|
+
*/
|
|
48
|
+
wasModified(): boolean;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=IndexState.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexState.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/IndexState.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEjD;;GAEG;AACH,qBAAa,UAAU;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,UAAU,CAAC;IAC1B,WAAW,UAAS;IACpB,YAAY,UAAS;IACrB,eAAe,SAAK;IACpB,MAAM,EAAE,GAAG,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAO1B;gBAEU,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,UAAU;IAKvD;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAIvF;;OAEG;IACH,eAAe,IAAI,IAAI;IAIvB;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAIxB;;OAEG;IACH,wBAAwB,IAAI,IAAI;IAIhC;;OAEG;IACH,mBAAmB,CAAC,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,IAAI;IAIxD;;OAEG;IACH,aAAa,IAAI,MAAM;IAIvB;;OAEG;IACH,WAAW,IAAI,OAAO;CAGvB"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IndexState tracks mutable state during the indexing process
|
|
3
|
+
*/
|
|
4
|
+
export class IndexState {
|
|
5
|
+
codemap;
|
|
6
|
+
updatedMerkle;
|
|
7
|
+
merkleDirty = false;
|
|
8
|
+
indexMutated = false;
|
|
9
|
+
processedChunks = 0;
|
|
10
|
+
errors = [];
|
|
11
|
+
chunkingStats = {
|
|
12
|
+
totalNodes: 0,
|
|
13
|
+
skippedSmall: 0,
|
|
14
|
+
subdivided: 0,
|
|
15
|
+
statementFallback: 0,
|
|
16
|
+
normalChunks: 0,
|
|
17
|
+
mergedSmall: 0
|
|
18
|
+
};
|
|
19
|
+
constructor(codemap, updatedMerkle) {
|
|
20
|
+
this.codemap = codemap;
|
|
21
|
+
this.updatedMerkle = updatedMerkle;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Add an error to the error list
|
|
25
|
+
*/
|
|
26
|
+
addError(error) {
|
|
27
|
+
this.errors.push(error);
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Mark merkle tree as modified
|
|
31
|
+
*/
|
|
32
|
+
markMerkleDirty() {
|
|
33
|
+
this.merkleDirty = true;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Mark index as modified
|
|
37
|
+
*/
|
|
38
|
+
markIndexMutated() {
|
|
39
|
+
this.indexMutated = true;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Increment processed chunk counter
|
|
43
|
+
*/
|
|
44
|
+
incrementProcessedChunks() {
|
|
45
|
+
this.processedChunks++;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Update chunking stats
|
|
49
|
+
*/
|
|
50
|
+
updateChunkingStats(stats) {
|
|
51
|
+
this.chunkingStats = { ...this.chunkingStats, ...stats };
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Get current error count
|
|
55
|
+
*/
|
|
56
|
+
getErrorCount() {
|
|
57
|
+
return this.errors.length;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Check if index was modified
|
|
61
|
+
*/
|
|
62
|
+
wasModified() {
|
|
63
|
+
return this.indexMutated || this.merkleDirty;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=IndexState.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexState.js","sourceRoot":"","sources":["../../../src/core/indexing/IndexState.ts"],"names":[],"mappings":"AAIA;;GAEG;AACH,MAAM,OAAO,UAAU;IACrB,OAAO,CAAU;IACjB,aAAa,CAAa;IAC1B,WAAW,GAAG,KAAK,CAAC;IACpB,YAAY,GAAG,KAAK,CAAC;IACrB,eAAe,GAAG,CAAC,CAAC;IACpB,MAAM,GAAU,EAAE,CAAC;IACnB,aAAa,GAAkB;QAC7B,UAAU,EAAE,CAAC;QACb,YAAY,EAAE,CAAC;QACf,UAAU,EAAE,CAAC;QACb,iBAAiB,EAAE,CAAC;QACpB,YAAY,EAAE,CAAC;QACf,WAAW,EAAE,CAAC;KACf,CAAC;IAEF,YAAY,OAAgB,EAAE,aAAyB;QACrD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,KAAuE;QAC9E,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,eAAe;QACb,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,wBAAwB;QACtB,IAAI,CAAC,eAAe,EAAE,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,mBAAmB,CAAC,KAA6B;QAC/C,IAAI,CAAC,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,aAAa,EAAE,GAAG,KAAK,EAAE,CAAC;IAC3D,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,CAAC;IAC/C,CAAC;CACF"}
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import Parser from 'tree-sitter';
|
|
2
|
+
import { type NodeGroup } from '../../chunking/file-grouper.js';
|
|
3
|
+
import type { TreeSitterNode } from '../../types/ast.js';
|
|
1
4
|
import type { LanguageRule } from '../../languages/rules.js';
|
|
2
5
|
import type { ModelProfile } from '../../providers/base.js';
|
|
3
6
|
type SizeLimits = {
|
|
@@ -7,6 +10,10 @@ type SizeLimits = {
|
|
|
7
10
|
overlap: number;
|
|
8
11
|
unit: string;
|
|
9
12
|
};
|
|
13
|
+
export interface OversizedChunk {
|
|
14
|
+
code: string;
|
|
15
|
+
part: number;
|
|
16
|
+
}
|
|
10
17
|
interface ExistingChunks {
|
|
11
18
|
staleChunkIds: Set<string>;
|
|
12
19
|
existingChunks: Map<string, any>;
|
|
@@ -26,12 +33,41 @@ interface EmbedStoreParams {
|
|
|
26
33
|
contextInfo: any;
|
|
27
34
|
symbolData: any;
|
|
28
35
|
}
|
|
29
|
-
|
|
36
|
+
/**
|
|
37
|
+
* Collects candidate AST nodes for chunking using a reusable parser instance.
|
|
38
|
+
*/
|
|
39
|
+
export declare class ASTTraverser {
|
|
30
40
|
private parser;
|
|
41
|
+
constructor(parser?: Parser);
|
|
42
|
+
collectNodesForFile(source: string, rule: LanguageRule): TreeSitterNode[];
|
|
43
|
+
private buildTree;
|
|
44
|
+
}
|
|
45
|
+
export interface OverlapStrategy {
|
|
46
|
+
split(node: TreeSitterNode, source: string, limits: SizeLimits, profile: ModelProfile): Promise<OversizedChunk[]>;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Default overlap strategy that falls back to statement-level chunking with 20% overlap.
|
|
50
|
+
*/
|
|
51
|
+
export declare class StatementOverlapStrategy implements OverlapStrategy {
|
|
52
|
+
split(node: TreeSitterNode, source: string, limits: SizeLimits, profile: ModelProfile): Promise<OversizedChunk[]>;
|
|
53
|
+
}
|
|
54
|
+
export declare class ChunkGrouper {
|
|
55
|
+
groupNodes(nodes: TreeSitterNode[], source: string, profile: ModelProfile, rule: LanguageRule): Promise<NodeGroup[]>;
|
|
56
|
+
}
|
|
57
|
+
export interface ChunkPipelineDependencies {
|
|
58
|
+
traverser?: ASTTraverser;
|
|
59
|
+
chunkGrouper?: ChunkGrouper;
|
|
60
|
+
overlapStrategy?: OverlapStrategy;
|
|
61
|
+
}
|
|
62
|
+
export declare class ChunkPipeline {
|
|
31
63
|
private processedNodes;
|
|
32
|
-
|
|
64
|
+
private traverser;
|
|
65
|
+
private chunkGrouper;
|
|
66
|
+
private overlapStrategy;
|
|
67
|
+
constructor(deps?: ChunkPipelineDependencies);
|
|
33
68
|
collectNodesForFile(source: string, rule: LanguageRule): Promise<import("tree-sitter").SyntaxNode[]>;
|
|
34
|
-
|
|
69
|
+
groupNodes(nodes: TreeSitterNode[], source: string, profile: ModelProfile, rule: LanguageRule): Promise<NodeGroup[]>;
|
|
70
|
+
processGroups(nodeGroups: NodeGroup[], source: string, rule: LanguageRule, limits: SizeLimits, modelProfile: ModelProfile, rel: string, existing: ExistingChunks, chunkMerkleHashes: string[], onProgress: any, embedAndStore: (params: EmbedStoreParams) => Promise<void>, chunkingStats: any): Promise<void>;
|
|
35
71
|
private yieldChunk;
|
|
36
72
|
private processChunk;
|
|
37
73
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunk-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/chunk-pipeline.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"chunk-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/chunk-pipeline.ts"],"names":[],"mappings":"AAGA,OAAO,MAAM,MAAM,aAAa,CAAC;AAEjC,OAAO,EAA8C,KAAK,SAAS,EAAE,MAAM,gCAAgC,CAAC;AAY5G,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAE5D,KAAK,UAAU,GAAG;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED,UAAU,cAAc;IACtB,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3B,cAAc,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAClC;AAED,UAAU,gBAAgB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB,EAAE,MAAM,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,GAAG,CAAC;IACvB,kBAAkB,EAAE,GAAG,EAAE,CAAC;IAC1B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,GAAG,CAAC;IACjB,UAAU,EAAE,GAAG,CAAC;CACjB;AAED;;GAEG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,MAAM;IAI3B,mBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,cAAc,EAAE;IAkDzE,OAAO,CAAC,SAAS;CAWlB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;CACnH;AAED;;GAEG;AACH,qBAAa,wBAAyB,YAAW,eAAe;IACxD,KAAK,CACT,IAAI,EAAE,cAAc,EACpB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,UAAU,EAClB,OAAO,EAAE,YAAY,GACpB,OAAO,CAAC,cAAc,EAAE,CAAC;CAO7B;AAED,qBAAa,YAAY;IACjB,UAAU,CACd,KAAK,EAAE,cAAc,EAAE,EACvB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,YAAY,EACrB,IAAI,EAAE,YAAY,GACjB,OAAO,CAAC,SAAS,EAAE,CAAC;CAGxB;AAED,MAAM,WAAW,yBAAyB;IACxC,SAAS,CAAC,EAAE,YAAY,CAAC;IACzB,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,cAAc,CAAqB;IAC3C,OAAO,CAAC,SAAS,CAAe;IAChC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,eAAe,CAAkB;gBAE7B,IAAI,GAAE,yBAA8B;IAM1C,mBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY;IAItD,UAAU,CAAC,KAAK,EAAE,cAAc,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY;IAI7F,aAAa,CACjB,UAAU,EAAE,SAAS,EAAE,EACvB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,YAAY,EAClB,MAAM,EAAE,UAAU,EAClB,YAAY,EAAE,YAAY,EAC1B,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,cAAc,EACxB,iBAAiB,EAAE,MAAM,EAAE,EAC3B,UAAU,EAAE,GAAG,EACf,aAAa,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,OAAO,CAAC,IAAI,CAAC,EAC1D,aAAa,EAAE,GAAG,GACjB,OAAO,CAAC,IAAI,CAAC;YAgCF,UAAU;YAgHV,YAAY;CAmF3B"}
|
|
@@ -1,33 +1,24 @@
|
|
|
1
|
-
import Parser from 'tree-sitter';
|
|
2
1
|
import crypto from 'crypto';
|
|
2
|
+
import Parser from 'tree-sitter';
|
|
3
3
|
import { analyzeNodeForChunking, batchAnalyzeNodes, yieldStatementChunks } from '../../chunking/semantic-chunker.js';
|
|
4
|
-
import { createCombinedChunk } from '../../chunking/file-grouper.js';
|
|
4
|
+
import { groupNodesForChunking, createCombinedChunk } from '../../chunking/file-grouper.js';
|
|
5
5
|
import { extractSymbolMetadata } from '../../symbols/extract.js';
|
|
6
6
|
import { extractSymbolName } from '../symbol-extractor.js';
|
|
7
7
|
import { extractCodevaultMetadata, extractSemanticTags, extractImportantVariables, extractDocComments, generateEnhancedEmbeddingText } from '../metadata.js';
|
|
8
8
|
import { computeFastHash } from '../../indexer/merkle.js';
|
|
9
9
|
import { SIZE_THRESHOLD, CHUNK_SIZE } from '../../config/constants.js';
|
|
10
|
-
|
|
10
|
+
/**
|
|
11
|
+
* Collects candidate AST nodes for chunking using a reusable parser instance.
|
|
12
|
+
*/
|
|
13
|
+
export class ASTTraverser {
|
|
11
14
|
parser;
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
this.parser = new Parser();
|
|
15
|
+
constructor(parser) {
|
|
16
|
+
this.parser = parser ?? new Parser();
|
|
15
17
|
}
|
|
16
|
-
|
|
18
|
+
collectNodesForFile(source, rule) {
|
|
17
19
|
this.parser.setLanguage(rule.ts);
|
|
18
|
-
|
|
19
|
-
if (
|
|
20
|
-
tree = this.parser.parse((index) => {
|
|
21
|
-
if (index < source.length) {
|
|
22
|
-
return source.slice(index, Math.min(index + CHUNK_SIZE, source.length));
|
|
23
|
-
}
|
|
24
|
-
return null;
|
|
25
|
-
});
|
|
26
|
-
}
|
|
27
|
-
else {
|
|
28
|
-
tree = this.parser.parse(source);
|
|
29
|
-
}
|
|
30
|
-
if (!tree || !tree.rootNode) {
|
|
20
|
+
const tree = this.buildTree(source);
|
|
21
|
+
if (!tree?.rootNode) {
|
|
31
22
|
throw new Error('Failed to create syntax tree');
|
|
32
23
|
}
|
|
33
24
|
const collectedNodes = [];
|
|
@@ -68,6 +59,51 @@ export class ChunkPipeline {
|
|
|
68
59
|
collectNodes(tree.rootNode);
|
|
69
60
|
return collectedNodes;
|
|
70
61
|
}
|
|
62
|
+
buildTree(source) {
|
|
63
|
+
if (source.length > SIZE_THRESHOLD) {
|
|
64
|
+
return this.parser.parse((index) => {
|
|
65
|
+
if (index < source.length) {
|
|
66
|
+
return source.slice(index, Math.min(index + CHUNK_SIZE, source.length));
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return this.parser.parse(source);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Default overlap strategy that falls back to statement-level chunking with 20% overlap.
|
|
76
|
+
*/
|
|
77
|
+
export class StatementOverlapStrategy {
|
|
78
|
+
async split(node, source, limits, profile) {
|
|
79
|
+
const statementChunks = await yieldStatementChunks(node, source, limits.max, limits.overlap, profile);
|
|
80
|
+
return statementChunks.map((chunk, index) => ({
|
|
81
|
+
code: chunk.code,
|
|
82
|
+
part: index + 1
|
|
83
|
+
}));
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
export class ChunkGrouper {
|
|
87
|
+
async groupNodes(nodes, source, profile, rule) {
|
|
88
|
+
return groupNodesForChunking(nodes, source, profile, rule);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
export class ChunkPipeline {
|
|
92
|
+
processedNodes = new Set();
|
|
93
|
+
traverser;
|
|
94
|
+
chunkGrouper;
|
|
95
|
+
overlapStrategy;
|
|
96
|
+
constructor(deps = {}) {
|
|
97
|
+
this.traverser = deps.traverser ?? new ASTTraverser();
|
|
98
|
+
this.chunkGrouper = deps.chunkGrouper ?? new ChunkGrouper();
|
|
99
|
+
this.overlapStrategy = deps.overlapStrategy ?? new StatementOverlapStrategy();
|
|
100
|
+
}
|
|
101
|
+
async collectNodesForFile(source, rule) {
|
|
102
|
+
return this.traverser.collectNodesForFile(source, rule);
|
|
103
|
+
}
|
|
104
|
+
async groupNodes(nodes, source, profile, rule) {
|
|
105
|
+
return this.chunkGrouper.groupNodes(nodes, source, profile, rule);
|
|
106
|
+
}
|
|
71
107
|
async processGroups(nodeGroups, source, rule, limits, modelProfile, rel, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats) {
|
|
72
108
|
this.processedNodes = new Set();
|
|
73
109
|
for (const nodeGroup of nodeGroups) {
|
|
@@ -139,10 +175,9 @@ export class ChunkPipeline {
|
|
|
139
175
|
}
|
|
140
176
|
else if (analysis.size > limits.max) {
|
|
141
177
|
chunkingStats.statementFallback++;
|
|
142
|
-
const
|
|
143
|
-
for (
|
|
144
|
-
|
|
145
|
-
await this.processChunk(node, stmtChunk.code, `${i + 1}`, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
178
|
+
const oversizedChunks = await this.overlapStrategy.split(node, source, limits, modelProfile);
|
|
179
|
+
for (const stmtChunk of oversizedChunks) {
|
|
180
|
+
await this.processChunk(node, stmtChunk.code, `${stmtChunk.part}`, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
146
181
|
}
|
|
147
182
|
return;
|
|
148
183
|
}
|