codevault 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunking/file-grouper.d.ts +39 -0
- package/dist/chunking/file-grouper.d.ts.map +1 -0
- package/dist/chunking/file-grouper.js +164 -0
- package/dist/chunking/file-grouper.js.map +1 -0
- package/dist/chunking/semantic-chunker.d.ts +37 -0
- package/dist/chunking/semantic-chunker.d.ts.map +1 -0
- package/dist/chunking/semantic-chunker.js +157 -0
- package/dist/chunking/semantic-chunker.js.map +1 -0
- package/dist/chunking/token-counter.d.ts +28 -0
- package/dist/chunking/token-counter.d.ts.map +1 -0
- package/dist/chunking/token-counter.js +178 -0
- package/dist/chunking/token-counter.js.map +1 -0
- package/dist/cli/commands/ask-cmd.d.ts +3 -0
- package/dist/cli/commands/ask-cmd.d.ts.map +1 -0
- package/dist/cli/commands/ask-cmd.js +130 -0
- package/dist/cli/commands/ask-cmd.js.map +1 -0
- package/dist/cli/commands/chat-cmd.d.ts +3 -0
- package/dist/cli/commands/chat-cmd.d.ts.map +1 -0
- package/dist/cli/commands/chat-cmd.js +194 -0
- package/dist/cli/commands/chat-cmd.js.map +1 -0
- package/dist/cli/commands/config-cmd.d.ts +3 -0
- package/dist/cli/commands/config-cmd.d.ts.map +1 -0
- package/dist/cli/commands/config-cmd.js +245 -0
- package/dist/cli/commands/config-cmd.js.map +1 -0
- package/dist/cli/commands/context.d.ts +3 -0
- package/dist/cli/commands/context.d.ts.map +1 -0
- package/dist/cli/commands/context.js +98 -0
- package/dist/cli/commands/context.js.map +1 -0
- package/dist/cli/commands/interactive-config.d.ts +2 -0
- package/dist/cli/commands/interactive-config.d.ts.map +1 -0
- package/dist/cli/commands/interactive-config.js +274 -0
- package/dist/cli/commands/interactive-config.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +401 -0
- package/dist/cli.js.map +1 -0
- package/dist/codemap/io.d.ts +6 -0
- package/dist/codemap/io.d.ts.map +1 -0
- package/dist/codemap/io.js +46 -0
- package/dist/codemap/io.js.map +1 -0
- package/dist/config/apply-env.d.ts +23 -0
- package/dist/config/apply-env.d.ts.map +1 -0
- package/dist/config/apply-env.js +89 -0
- package/dist/config/apply-env.js.map +1 -0
- package/dist/config/constants.d.ts +326 -0
- package/dist/config/constants.d.ts.map +1 -0
- package/dist/config/constants.js +214 -0
- package/dist/config/constants.js.map +1 -0
- package/dist/config/loader.d.ts +57 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +287 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/resolver.d.ts +30 -0
- package/dist/config/resolver.d.ts.map +1 -0
- package/dist/config/resolver.js +29 -0
- package/dist/config/resolver.js.map +1 -0
- package/dist/config/types.d.ts +46 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/context/packs.d.ts +33 -0
- package/dist/context/packs.d.ts.map +1 -0
- package/dist/context/packs.js +180 -0
- package/dist/context/packs.js.map +1 -0
- package/dist/core/IndexerEngine.d.ts +24 -0
- package/dist/core/IndexerEngine.d.ts.map +1 -0
- package/dist/core/IndexerEngine.js +372 -0
- package/dist/core/IndexerEngine.js.map +1 -0
- package/dist/core/SearchService.d.ts +25 -0
- package/dist/core/SearchService.d.ts.map +1 -0
- package/dist/core/SearchService.js +455 -0
- package/dist/core/SearchService.js.map +1 -0
- package/dist/core/batch-indexer.d.ts +56 -0
- package/dist/core/batch-indexer.d.ts.map +1 -0
- package/dist/core/batch-indexer.js +192 -0
- package/dist/core/batch-indexer.js.map +1 -0
- package/dist/core/indexer.d.ts +3 -0
- package/dist/core/indexer.d.ts.map +1 -0
- package/dist/core/indexer.js +6 -0
- package/dist/core/indexer.js.map +1 -0
- package/dist/core/indexing/chunk-pipeline.d.ts +39 -0
- package/dist/core/indexing/chunk-pipeline.d.ts.map +1 -0
- package/dist/core/indexing/chunk-pipeline.js +210 -0
- package/dist/core/indexing/chunk-pipeline.js.map +1 -0
- package/dist/core/indexing/file-scanner.d.ts +11 -0
- package/dist/core/indexing/file-scanner.d.ts.map +1 -0
- package/dist/core/indexing/file-scanner.js +49 -0
- package/dist/core/indexing/file-scanner.js.map +1 -0
- package/dist/core/metadata.d.ts +19 -0
- package/dist/core/metadata.d.ts.map +1 -0
- package/dist/core/metadata.js +161 -0
- package/dist/core/metadata.js.map +1 -0
- package/dist/core/search.d.ts +7 -0
- package/dist/core/search.d.ts.map +1 -0
- package/dist/core/search.js +16 -0
- package/dist/core/search.js.map +1 -0
- package/dist/core/symbol-extractor.d.ts +3 -0
- package/dist/core/symbol-extractor.d.ts.map +1 -0
- package/dist/core/symbol-extractor.js +78 -0
- package/dist/core/symbol-extractor.js.map +1 -0
- package/dist/core/types.d.ts +104 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +2 -0
- package/dist/core/types.js.map +1 -0
- package/dist/database/db.d.ts +101 -0
- package/dist/database/db.d.ts.map +1 -0
- package/dist/database/db.js +326 -0
- package/dist/database/db.js.map +1 -0
- package/dist/indexer/merkle.d.ts +13 -0
- package/dist/indexer/merkle.d.ts.map +1 -0
- package/dist/indexer/merkle.js +86 -0
- package/dist/indexer/merkle.js.map +1 -0
- package/dist/indexer/update.d.ts +19 -0
- package/dist/indexer/update.d.ts.map +1 -0
- package/dist/indexer/update.js +40 -0
- package/dist/indexer/update.js.map +1 -0
- package/dist/indexer/watch.d.ts +21 -0
- package/dist/indexer/watch.d.ts.map +1 -0
- package/dist/indexer/watch.js +224 -0
- package/dist/indexer/watch.js.map +1 -0
- package/dist/languages/rules.d.ts +11 -0
- package/dist/languages/rules.d.ts.map +1 -0
- package/dist/languages/rules.js +371 -0
- package/dist/languages/rules.js.map +1 -0
- package/dist/languages/tree-sitter-loader.d.ts +27 -0
- package/dist/languages/tree-sitter-loader.d.ts.map +1 -0
- package/dist/languages/tree-sitter-loader.js +76 -0
- package/dist/languages/tree-sitter-loader.js.map +1 -0
- package/dist/mcp/handlers/context.d.ts +15 -0
- package/dist/mcp/handlers/context.d.ts.map +1 -0
- package/dist/mcp/handlers/context.js +31 -0
- package/dist/mcp/handlers/context.js.map +1 -0
- package/dist/mcp/handlers/index.d.ts +5 -0
- package/dist/mcp/handlers/index.d.ts.map +1 -0
- package/dist/mcp/handlers/index.js +5 -0
- package/dist/mcp/handlers/index.js.map +1 -0
- package/dist/mcp/handlers/project.d.ts +41 -0
- package/dist/mcp/handlers/project.d.ts.map +1 -0
- package/dist/mcp/handlers/project.js +76 -0
- package/dist/mcp/handlers/project.js.map +1 -0
- package/dist/mcp/handlers/search.d.ts +27 -0
- package/dist/mcp/handlers/search.d.ts.map +1 -0
- package/dist/mcp/handlers/search.js +108 -0
- package/dist/mcp/handlers/search.js.map +1 -0
- package/dist/mcp/handlers/synthesis.d.ts +15 -0
- package/dist/mcp/handlers/synthesis.d.ts.map +1 -0
- package/dist/mcp/handlers/synthesis.js +58 -0
- package/dist/mcp/handlers/synthesis.js.map +1 -0
- package/dist/mcp/schemas.d.ts +166 -0
- package/dist/mcp/schemas.d.ts.map +1 -0
- package/dist/mcp/schemas.js +159 -0
- package/dist/mcp/schemas.js.map +1 -0
- package/dist/mcp/tools/ask-codebase.d.ts +85 -0
- package/dist/mcp/tools/ask-codebase.d.ts.map +1 -0
- package/dist/mcp/tools/ask-codebase.js +125 -0
- package/dist/mcp/tools/ask-codebase.js.map +1 -0
- package/dist/mcp/tools/use-context-pack.d.ts +57 -0
- package/dist/mcp/tools/use-context-pack.d.ts.map +1 -0
- package/dist/mcp/tools/use-context-pack.js +91 -0
- package/dist/mcp/tools/use-context-pack.js.map +1 -0
- package/dist/mcp-server.d.ts +13 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +263 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/providers/base.d.ts +39 -0
- package/dist/providers/base.d.ts.map +1 -0
- package/dist/providers/base.js +198 -0
- package/dist/providers/base.js.map +1 -0
- package/dist/providers/chat-llm.d.ts +35 -0
- package/dist/providers/chat-llm.d.ts.map +1 -0
- package/dist/providers/chat-llm.js +98 -0
- package/dist/providers/chat-llm.js.map +1 -0
- package/dist/providers/index.d.ts +6 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +12 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/openai.d.ts +18 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +132 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/token-counter.d.ts +2 -0
- package/dist/providers/token-counter.d.ts.map +1 -0
- package/dist/providers/token-counter.js +18 -0
- package/dist/providers/token-counter.js.map +1 -0
- package/dist/ranking/api-reranker.d.ts +18 -0
- package/dist/ranking/api-reranker.d.ts.map +1 -0
- package/dist/ranking/api-reranker.js +137 -0
- package/dist/ranking/api-reranker.js.map +1 -0
- package/dist/ranking/symbol-boost.d.ts +15 -0
- package/dist/ranking/symbol-boost.d.ts.map +1 -0
- package/dist/ranking/symbol-boost.js +175 -0
- package/dist/ranking/symbol-boost.js.map +1 -0
- package/dist/search/bm25.d.ts +17 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +56 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/hybrid.d.ts +21 -0
- package/dist/search/hybrid.d.ts.map +1 -0
- package/dist/search/hybrid.js +50 -0
- package/dist/search/hybrid.js.map +1 -0
- package/dist/search/scope.d.ts +5 -0
- package/dist/search/scope.d.ts.map +1 -0
- package/dist/search/scope.js +107 -0
- package/dist/search/scope.js.map +1 -0
- package/dist/storage/encrypted-chunks.d.ts +40 -0
- package/dist/storage/encrypted-chunks.d.ts.map +1 -0
- package/dist/storage/encrypted-chunks.js +238 -0
- package/dist/storage/encrypted-chunks.js.map +1 -0
- package/dist/symbols/extract.d.ts +15 -0
- package/dist/symbols/extract.d.ts.map +1 -0
- package/dist/symbols/extract.js +208 -0
- package/dist/symbols/extract.js.map +1 -0
- package/dist/symbols/graph.d.ts +3 -0
- package/dist/symbols/graph.d.ts.map +1 -0
- package/dist/symbols/graph.js +89 -0
- package/dist/symbols/graph.js.map +1 -0
- package/dist/synthesis/conversational-synthesizer.d.ts +61 -0
- package/dist/synthesis/conversational-synthesizer.d.ts.map +1 -0
- package/dist/synthesis/conversational-synthesizer.js +289 -0
- package/dist/synthesis/conversational-synthesizer.js.map +1 -0
- package/dist/synthesis/markdown-formatter.d.ts +13 -0
- package/dist/synthesis/markdown-formatter.d.ts.map +1 -0
- package/dist/synthesis/markdown-formatter.js +104 -0
- package/dist/synthesis/markdown-formatter.js.map +1 -0
- package/dist/synthesis/prompt-builder.d.ts +21 -0
- package/dist/synthesis/prompt-builder.d.ts.map +1 -0
- package/dist/synthesis/prompt-builder.js +129 -0
- package/dist/synthesis/prompt-builder.js.map +1 -0
- package/dist/synthesis/synthesizer.d.ts +30 -0
- package/dist/synthesis/synthesizer.d.ts.map +1 -0
- package/dist/synthesis/synthesizer.js +213 -0
- package/dist/synthesis/synthesizer.js.map +1 -0
- package/dist/tests/rate-limiter.test.d.ts +2 -0
- package/dist/tests/rate-limiter.test.d.ts.map +1 -0
- package/dist/tests/rate-limiter.test.js +11 -0
- package/dist/tests/rate-limiter.test.js.map +1 -0
- package/dist/tests/search-normalization.test.d.ts +2 -0
- package/dist/tests/search-normalization.test.d.ts.map +1 -0
- package/dist/tests/search-normalization.test.js +9 -0
- package/dist/tests/search-normalization.test.js.map +1 -0
- package/dist/tests/semantic-chunker.test.d.ts +2 -0
- package/dist/tests/semantic-chunker.test.d.ts.map +1 -0
- package/dist/tests/semantic-chunker.test.js +48 -0
- package/dist/tests/semantic-chunker.test.js.map +1 -0
- package/dist/tests/simple-lru.test.d.ts +2 -0
- package/dist/tests/simple-lru.test.d.ts.map +1 -0
- package/dist/tests/simple-lru.test.js +21 -0
- package/dist/tests/simple-lru.test.js.map +1 -0
- package/dist/tests/symbol-boost.test.d.ts +2 -0
- package/dist/tests/symbol-boost.test.d.ts.map +1 -0
- package/dist/tests/symbol-boost.test.js +21 -0
- package/dist/tests/symbol-boost.test.js.map +1 -0
- package/dist/types/ast.d.ts +3 -0
- package/dist/types/ast.d.ts.map +1 -0
- package/dist/types/ast.js +2 -0
- package/dist/types/ast.js.map +1 -0
- package/dist/types/codemap.d.ts +58 -0
- package/dist/types/codemap.d.ts.map +1 -0
- package/dist/types/codemap.js +224 -0
- package/dist/types/codemap.js.map +1 -0
- package/dist/types/context-pack.d.ts +47 -0
- package/dist/types/context-pack.d.ts.map +1 -0
- package/dist/types/context-pack.js +44 -0
- package/dist/types/context-pack.js.map +1 -0
- package/dist/types/search.d.ts +15 -0
- package/dist/types/search.d.ts.map +1 -0
- package/dist/types/search.js +11 -0
- package/dist/types/search.js.map +1 -0
- package/dist/utils/cli-ui.d.ts +44 -0
- package/dist/utils/cli-ui.d.ts.map +1 -0
- package/dist/utils/cli-ui.js +139 -0
- package/dist/utils/cli-ui.js.map +1 -0
- package/dist/utils/indexer-with-progress.d.ts +10 -0
- package/dist/utils/indexer-with-progress.d.ts.map +1 -0
- package/dist/utils/indexer-with-progress.js +43 -0
- package/dist/utils/indexer-with-progress.js.map +1 -0
- package/dist/utils/logger.d.ts +55 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +121 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/mutex.d.ts +63 -0
- package/dist/utils/mutex.d.ts.map +1 -0
- package/dist/utils/mutex.js +123 -0
- package/dist/utils/mutex.js.map +1 -0
- package/dist/utils/path-helpers.d.ts +27 -0
- package/dist/utils/path-helpers.d.ts.map +1 -0
- package/dist/utils/path-helpers.js +55 -0
- package/dist/utils/path-helpers.js.map +1 -0
- package/dist/utils/rate-limiter.d.ts +34 -0
- package/dist/utils/rate-limiter.d.ts.map +1 -0
- package/dist/utils/rate-limiter.js +178 -0
- package/dist/utils/rate-limiter.js.map +1 -0
- package/dist/utils/scan-patterns.d.ts +5 -0
- package/dist/utils/scan-patterns.d.ts.map +1 -0
- package/dist/utils/scan-patterns.js +29 -0
- package/dist/utils/scan-patterns.js.map +1 -0
- package/dist/utils/simple-lru.d.ts +10 -0
- package/dist/utils/simple-lru.d.ts.map +1 -0
- package/dist/utils/simple-lru.js +38 -0
- package/dist/utils/simple-lru.js.map +1 -0
- package/package.json +3 -2
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { BATCH_SIZE } from '../providers/base.js';
|
|
2
|
+
import { Mutex } from '../utils/mutex.js';
|
|
3
|
+
import { log } from '../utils/logger.js';
|
|
4
|
+
const MAX_BATCH_RETRIES = 3;
|
|
5
|
+
const INITIAL_RETRY_DELAY_MS = 1000;
|
|
6
|
+
function isRateLimitError(error) {
|
|
7
|
+
const message = error?.message || String(error);
|
|
8
|
+
return (error?.status === 429 ||
|
|
9
|
+
error?.statusCode === 429 ||
|
|
10
|
+
message.includes('rate limit') ||
|
|
11
|
+
message.includes('Rate limit') ||
|
|
12
|
+
message.includes('too many requests') ||
|
|
13
|
+
message.includes('429'));
|
|
14
|
+
}
|
|
15
|
+
function isBatchSizeError(error) {
|
|
16
|
+
const message = error?.message || String(error);
|
|
17
|
+
return (error?.status === 413 ||
|
|
18
|
+
message.includes('too large') ||
|
|
19
|
+
message.includes('payload') ||
|
|
20
|
+
message.includes('request size') ||
|
|
21
|
+
message.includes('token limit'));
|
|
22
|
+
}
|
|
23
|
+
export class BatchEmbeddingProcessor {
|
|
24
|
+
embeddingProvider;
|
|
25
|
+
db;
|
|
26
|
+
batch = [];
|
|
27
|
+
batchSize;
|
|
28
|
+
mutex = new Mutex();
|
|
29
|
+
constructor(embeddingProvider, db, batchSize = BATCH_SIZE) {
|
|
30
|
+
this.embeddingProvider = embeddingProvider;
|
|
31
|
+
this.db = db;
|
|
32
|
+
this.batchSize = batchSize;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Add a chunk to the batch queue
|
|
36
|
+
*/
|
|
37
|
+
async addChunk(chunk) {
|
|
38
|
+
this.batch.push(chunk);
|
|
39
|
+
// Process batch when it reaches the threshold
|
|
40
|
+
if (this.batch.length >= this.batchSize) {
|
|
41
|
+
await this.processBatch();
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Process any remaining chunks in the batch
|
|
46
|
+
*/
|
|
47
|
+
async flush() {
|
|
48
|
+
await this.mutex.runExclusive(async () => {
|
|
49
|
+
if (this.batch.length > 0) {
|
|
50
|
+
await this.processBatchWithRetry(this.batch, 0);
|
|
51
|
+
this.batch = [];
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Process the current batch (with mutex protection)
|
|
57
|
+
*/
|
|
58
|
+
async processBatch() {
|
|
59
|
+
await this.mutex.runExclusive(async () => {
|
|
60
|
+
if (this.batch.length > 0) {
|
|
61
|
+
await this.processBatchWithRetry(this.batch, 0);
|
|
62
|
+
this.batch = [];
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Process a batch with smart error handling and retry logic
|
|
68
|
+
*/
|
|
69
|
+
async processBatchWithRetry(currentBatch, retryCount) {
|
|
70
|
+
try {
|
|
71
|
+
await this.processBatchInternal(currentBatch);
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
// Smart error handling based on error type
|
|
75
|
+
if (isBatchSizeError(error) && currentBatch.length > 1) {
|
|
76
|
+
// Batch too large - split in half and retry
|
|
77
|
+
log.warn(`Batch size too large (${currentBatch.length} chunks), splitting and retrying`);
|
|
78
|
+
const mid = Math.floor(currentBatch.length / 2);
|
|
79
|
+
const firstHalf = currentBatch.slice(0, mid);
|
|
80
|
+
const secondHalf = currentBatch.slice(mid);
|
|
81
|
+
// Process both halves recursively
|
|
82
|
+
await this.processBatchWithRetry(firstHalf, 0);
|
|
83
|
+
await this.processBatchWithRetry(secondHalf, 0);
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
else if (isRateLimitError(error) && retryCount < MAX_BATCH_RETRIES) {
|
|
87
|
+
// Rate limit error - exponential backoff
|
|
88
|
+
const delay = INITIAL_RETRY_DELAY_MS * Math.pow(2, retryCount);
|
|
89
|
+
log.warn(`Rate limit hit, retrying batch in ${delay}ms (attempt ${retryCount + 1}/${MAX_BATCH_RETRIES})`);
|
|
90
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
91
|
+
await this.processBatchWithRetry(currentBatch, retryCount + 1);
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
// Other errors or max retries reached - fall back to individual processing
|
|
95
|
+
log.error(`Batch processing failed for ${currentBatch.length} chunks`, error);
|
|
96
|
+
log.warn('Falling back to individual processing (this will be slower)');
|
|
97
|
+
await this.fallbackToIndividualProcessing(currentBatch);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Internal batch processing implementation with database transactions
|
|
102
|
+
*/
|
|
103
|
+
async processBatchInternal(batch) {
|
|
104
|
+
if (batch.length === 0)
|
|
105
|
+
return;
|
|
106
|
+
// Extract texts for embedding
|
|
107
|
+
const texts = batch.map(chunk => chunk.enhancedEmbeddingText);
|
|
108
|
+
// Log batching activity at debug to reduce noise in normal runs
|
|
109
|
+
log.debug(`Processing batch of ${texts.length} chunks`);
|
|
110
|
+
// Generate embeddings in batch (single API call for all)
|
|
111
|
+
const generate = async () => this.embeddingProvider.generateEmbeddings(texts);
|
|
112
|
+
const embeddings = this.embeddingProvider.rateLimiter
|
|
113
|
+
? await this.embeddingProvider.rateLimiter.execute(generate)
|
|
114
|
+
: await generate();
|
|
115
|
+
log.debug(`Batch complete (${texts.length} embeddings generated)`);
|
|
116
|
+
// Store all embeddings in database within a transaction
|
|
117
|
+
await this.db.transaction(() => {
|
|
118
|
+
for (let i = 0; i < batch.length; i++) {
|
|
119
|
+
const chunk = batch[i];
|
|
120
|
+
const embedding = embeddings[i];
|
|
121
|
+
this.db.insertChunk({
|
|
122
|
+
id: chunk.chunkId,
|
|
123
|
+
file_path: chunk.params.rel,
|
|
124
|
+
symbol: chunk.params.symbol,
|
|
125
|
+
sha: chunk.params.sha,
|
|
126
|
+
lang: chunk.params.lang,
|
|
127
|
+
chunk_type: chunk.params.chunkType,
|
|
128
|
+
embedding,
|
|
129
|
+
embedding_provider: this.embeddingProvider.getName(),
|
|
130
|
+
embedding_dimensions: this.embeddingProvider.getDimensions(),
|
|
131
|
+
codevault_tags: chunk.params.codevaultMetadata.tags,
|
|
132
|
+
codevault_intent: chunk.params.codevaultMetadata.intent,
|
|
133
|
+
codevault_description: chunk.params.codevaultMetadata.description,
|
|
134
|
+
doc_comments: chunk.params.docComments,
|
|
135
|
+
variables_used: chunk.params.importantVariables,
|
|
136
|
+
context_info: chunk.params.contextInfo
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Fall back to processing chunks individually
|
|
143
|
+
*/
|
|
144
|
+
async fallbackToIndividualProcessing(batch) {
|
|
145
|
+
// Collect errors but continue processing all chunks to avoid data loss
|
|
146
|
+
const errors = [];
|
|
147
|
+
for (const chunk of batch) {
|
|
148
|
+
try {
|
|
149
|
+
const embedding = await this.embeddingProvider.generateEmbedding(chunk.enhancedEmbeddingText);
|
|
150
|
+
this.db.insertChunk({
|
|
151
|
+
id: chunk.chunkId,
|
|
152
|
+
file_path: chunk.params.rel,
|
|
153
|
+
symbol: chunk.params.symbol,
|
|
154
|
+
sha: chunk.params.sha,
|
|
155
|
+
lang: chunk.params.lang,
|
|
156
|
+
chunk_type: chunk.params.chunkType,
|
|
157
|
+
embedding,
|
|
158
|
+
embedding_provider: this.embeddingProvider.getName(),
|
|
159
|
+
embedding_dimensions: this.embeddingProvider.getDimensions(),
|
|
160
|
+
codevault_tags: chunk.params.codevaultMetadata.tags,
|
|
161
|
+
codevault_intent: chunk.params.codevaultMetadata.intent,
|
|
162
|
+
codevault_description: chunk.params.codevaultMetadata.description,
|
|
163
|
+
doc_comments: chunk.params.docComments,
|
|
164
|
+
variables_used: chunk.params.importantVariables,
|
|
165
|
+
context_info: chunk.params.contextInfo
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
catch (individualError) {
|
|
169
|
+
log.error(`Failed to process chunk ${chunk.chunkId}`, individualError);
|
|
170
|
+
errors.push({
|
|
171
|
+
chunkId: chunk.chunkId,
|
|
172
|
+
error: individualError
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Report errors but don't throw to allow indexing to continue
|
|
177
|
+
if (errors.length > 0) {
|
|
178
|
+
log.warn(`${errors.length}/${batch.length} chunks failed in fallback processing`);
|
|
179
|
+
if (errors.length === batch.length) {
|
|
180
|
+
// Only throw if ALL chunks failed
|
|
181
|
+
throw new Error(`All ${errors.length} chunks failed to process: ${errors[0].error.message}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Get current batch size for monitoring
|
|
187
|
+
*/
|
|
188
|
+
getBatchCount() {
|
|
189
|
+
return this.batch.length;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
//# sourceMappingURL=batch-indexer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batch-indexer.js","sourceRoot":"","sources":["../../src/core/batch-indexer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAGlD,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAEzC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAC5B,MAAM,sBAAsB,GAAG,IAAI,CAAC;AAEpC,SAAS,gBAAgB,CAAC,KAAU;IAClC,MAAM,OAAO,GAAG,KAAK,EAAE,OAAO,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IAChD,OAAO,CACL,KAAK,EAAE,MAAM,KAAK,GAAG;QACrB,KAAK,EAAE,UAAU,KAAK,GAAG;QACzB,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;QAC9B,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;QAC9B,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAC;QACrC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CACxB,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAU;IAClC,MAAM,OAAO,GAAG,KAAK,EAAE,OAAO,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IAChD,OAAO,CACL,KAAK,EAAE,MAAM,KAAK,GAAG;QACrB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;QAC7B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;QAC3B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC;QAChC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAChC,CAAC;AACJ,CAAC;AAmBD,MAAM,OAAO,uBAAuB;IAMxB;IACA;IANF,KAAK,GAAmB,EAAE,CAAC;IAC3B,SAAS,CAAS;IAClB,KAAK,GAAG,IAAI,KAAK,EAAE,CAAC;IAE5B,YACU,iBAAoC,EACpC,EAAY,EACpB,YAAoB,UAAU;QAFtB,sBAAiB,GAAjB,iBAAiB,CAAmB;QACpC,OAAE,GAAF,EAAE,CAAU;QAGpB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAmB;QAChC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAEvB,8CAA8C;QAC9C,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACxC,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC5B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,IAAI,EAAE;YACvC,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,MAAM,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;YAClB,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY;QACxB,MAAM,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,IAAI,EAAE;YACvC,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,MAAM,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;YAClB,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,qBAAqB,CAAC,YAA4B,EAAE,UAAkB;QAClF,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,oBAAoB,CAAC,YAAY,CAAC,CAAC;QAChD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,2CAA2C;YAC3C,IAAI,gBAAgB,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvD,4CAA4C;gBAC5C,GAAG,CAAC,IAAI,CAAC,yBAAyB,YAAY,CAAC,MAAM,kCAAkC,CAAC,CAAC;gBACzF,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAChD,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAC7C,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAE3C,kCAAkC;gBAClC,MAAM,IAAI,CAAC,qBAAqB,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;gBAC/C,MAAM,IAAI,CAAC,qBAAqB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;gBAChD,OAAO;YACT,CAAC;iBAAM,IAAI,gBAAgB,CAAC,KAAK,CAAC,IAAI,UAAU,GAAG,iBAAiB,EAAE,CAAC;gBACrE,yCAAyC;gBACzC,MAAM,KAAK,GAAG,sBAAsB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;gBAC/D,GAAG,CAAC,IAAI,CAAC,qCAAqC,KAAK,eAAe,UAAU,GAAG,CAAC,IAAI,iBAAiB,GAAG,CAAC,CAAC;gBAC1G,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;gBAEzD,MAAM,IAAI,CAAC,qBAAqB,CAAC,YAAY,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;gBAC/D,OAAO;YACT,CAAC;YAED,2EAA2E;YAC3E,GAAG,CAAC,KAAK,CAAC,+BAA+B,YAAY,CAAC,MAAM,SAAS,EAAE,KAAK,CAAC,CAAC;YAC9E,GAAG,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;YAExE,MAAM,IAAI,CAAC,8BAA8B,CAAC,YAAY,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,oBAAoB,CAAC,KAAqB;QACtD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAE/B,8BAA8B;QAC9B,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAE9D,gEAAgE;QAChE,GAAG,CAAC,KAAK,CAAC,uBAAuB,KAAK,CAAC,MAAM,SAAS,CAAC,CAAC;QAExD,yDAAyD;QACzD,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE,CAAC,IAAI,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;QAC9E,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW;YACnD,CAAC,CAAC,MAAM,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC;YAC5D,CAAC,CAAC,MAAM,QAAQ,EAAE,CAAC;QAErB,GAAG,CAAC,KAAK,CAAC,mBAAmB,KAAK,CAAC,MAAM,wBAAwB,CAAC,CAAC;QAEnE,wDAAwD;QACxD,MAAM,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACvB,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAEhC,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC;oBAClB,EAAE,EAAE,KAAK,CAAC,OAAO;oBACjB,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG;oBAC3B,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM;oBAC3B,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG;oBACrB,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI;oBACvB,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,SAAS;oBAClC,SAAS;oBACT,kBAAkB,EAAE,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE;oBACpD,oBAAoB,EAAE,IAAI,CAAC,iBAAiB,CAAC,aAAa,EAAE;oBAC5D,cAAc,EAAE,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI;oBACnD,gBAAgB,EAAE,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,MAAM;oBACvD,qBAAqB,EAAE,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,WAAW;oBACjE,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,WAAW;oBACtC,cAAc,EAAE,KAAK,CAAC,MAAM,CAAC,kBAAkB;oBAC/C,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,WAAW;iBACvC,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,8BAA8B,CAAC,KAAqB;QAChE,uEAAuE;QACvE,MAAM,MAAM,GAA6C,EAAE,CAAC;QAE5D,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,iBAAiB,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBAE9F,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC;oBAClB,EAAE,EAAE,KAAK,CAAC,OAAO;oBACjB,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG;oBAC3B,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM;oBAC3B,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,GAAG;oBACrB,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI;oBACvB,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,SAAS;oBAClC,SAAS;oBACT,kBAAkB,EAAE,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE;oBACpD,oBAAoB,EAAE,IAAI,CAAC,iBAAiB,CAAC,aAAa,EAAE;oBAC5D,cAAc,EAAE,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI;oBACnD,gBAAgB,EAAE,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,MAAM;oBACvD,qBAAqB,EAAE,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,WAAW;oBACjE,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,WAAW;oBACtC,cAAc,EAAE,KAAK,CAAC,MAAM,CAAC,kBAAkB;oBAC/C,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,WAAW;iBACvC,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,eAAe,EAAE,CAAC;gBACzB,GAAG,CAAC,KAAK,CAAC,2BAA2B,KAAK,CAAC,OAAO,EAAE,EAAE,eAAe,CAAC,CAAC;gBACvE,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,KAAK,EAAE,eAAwB;iBAChC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,8DAA8D;QAC9D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,GAAG,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,uCAAuC,CAAC,CAAC;YAClF,IAAI,MAAM,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;gBACnC,kCAAkC;gBAClC,MAAM,IAAI,KAAK,CAAC,OAAO,MAAM,CAAC,MAAM,8BAA8B,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YAC/F,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAC3B,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../src/core/indexer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAE1E,wBAAsB,YAAY,CAAC,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAGjG"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../src/core/indexer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,UAA+B,EAAE;IAChE,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,OAAO,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;AAChC,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { LanguageRule } from '../../languages/rules.js';
|
|
2
|
+
import type { ModelProfile } from '../../providers/base.js';
|
|
3
|
+
type SizeLimits = {
|
|
4
|
+
optimal: number;
|
|
5
|
+
min: number;
|
|
6
|
+
max: number;
|
|
7
|
+
overlap: number;
|
|
8
|
+
unit: string;
|
|
9
|
+
};
|
|
10
|
+
interface ExistingChunks {
|
|
11
|
+
staleChunkIds: Set<string>;
|
|
12
|
+
existingChunks: Map<string, any>;
|
|
13
|
+
}
|
|
14
|
+
interface EmbedStoreParams {
|
|
15
|
+
code: string;
|
|
16
|
+
enhancedEmbeddingText: string;
|
|
17
|
+
chunkId: string;
|
|
18
|
+
sha: string;
|
|
19
|
+
lang: string;
|
|
20
|
+
rel: string;
|
|
21
|
+
symbol: string;
|
|
22
|
+
chunkType: string;
|
|
23
|
+
codevaultMetadata: any;
|
|
24
|
+
importantVariables: any[];
|
|
25
|
+
docComments: string | null;
|
|
26
|
+
contextInfo: any;
|
|
27
|
+
symbolData: any;
|
|
28
|
+
}
|
|
29
|
+
export declare class ChunkPipeline {
|
|
30
|
+
private parser;
|
|
31
|
+
private processedNodes;
|
|
32
|
+
constructor();
|
|
33
|
+
collectNodesForFile(source: string, rule: LanguageRule): Promise<import("tree-sitter").SyntaxNode[]>;
|
|
34
|
+
processGroups(nodeGroups: any[], source: string, rule: LanguageRule, limits: SizeLimits, modelProfile: ModelProfile, rel: string, existing: ExistingChunks, chunkMerkleHashes: string[], onProgress: any, embedAndStore: (params: EmbedStoreParams) => Promise<void>, chunkingStats: any): Promise<void>;
|
|
35
|
+
private yieldChunk;
|
|
36
|
+
private processChunk;
|
|
37
|
+
}
|
|
38
|
+
export {};
|
|
39
|
+
//# sourceMappingURL=chunk-pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/chunk-pipeline.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAE5D,KAAK,UAAU,GAAG;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,UAAU,cAAc;IACtB,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3B,cAAc,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAClC;AAED,UAAU,gBAAgB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB,EAAE,MAAM,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,GAAG,CAAC;IACvB,kBAAkB,EAAE,GAAG,EAAE,CAAC;IAC1B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,GAAG,CAAC;IACjB,UAAU,EAAE,GAAG,CAAC;CACjB;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,cAAc,CAAqB;;IAMrC,mBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY;IA6DtD,aAAa,CACjB,UAAU,EAAE,GAAG,EAAE,EACjB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,YAAY,EAClB,MAAM,EAAE,UAAU,EAClB,YAAY,EAAE,YAAY,EAC1B,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,cAAc,EACxB,iBAAiB,EAAE,MAAM,EAAE,EAC3B,UAAU,EAAE,GAAG,EACf,aAAa,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,OAAO,CAAC,IAAI,CAAC,EAC1D,aAAa,EAAE,GAAG,GACjB,OAAO,CAAC,IAAI,CAAC;YAgCF,UAAU;YAqGV,YAAY;CAmF3B"}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import Parser from 'tree-sitter';
|
|
2
|
+
import crypto from 'crypto';
|
|
3
|
+
import { analyzeNodeForChunking, batchAnalyzeNodes, yieldStatementChunks } from '../../chunking/semantic-chunker.js';
|
|
4
|
+
import { createCombinedChunk } from '../../chunking/file-grouper.js';
|
|
5
|
+
import { extractSymbolMetadata } from '../../symbols/extract.js';
|
|
6
|
+
import { extractSymbolName } from '../symbol-extractor.js';
|
|
7
|
+
import { extractCodevaultMetadata, extractSemanticTags, extractImportantVariables, extractDocComments, generateEnhancedEmbeddingText } from '../metadata.js';
|
|
8
|
+
import { computeFastHash } from '../../indexer/merkle.js';
|
|
9
|
+
import { SIZE_THRESHOLD, CHUNK_SIZE } from '../../config/constants.js';
|
|
10
|
+
export class ChunkPipeline {
|
|
11
|
+
parser;
|
|
12
|
+
processedNodes = new Set();
|
|
13
|
+
constructor() {
|
|
14
|
+
this.parser = new Parser();
|
|
15
|
+
}
|
|
16
|
+
async collectNodesForFile(source, rule) {
|
|
17
|
+
this.parser.setLanguage(rule.ts);
|
|
18
|
+
let tree;
|
|
19
|
+
if (source.length > SIZE_THRESHOLD) {
|
|
20
|
+
tree = this.parser.parse((index) => {
|
|
21
|
+
if (index < source.length) {
|
|
22
|
+
return source.slice(index, Math.min(index + CHUNK_SIZE, source.length));
|
|
23
|
+
}
|
|
24
|
+
return null;
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
tree = this.parser.parse(source);
|
|
29
|
+
}
|
|
30
|
+
if (!tree || !tree.rootNode) {
|
|
31
|
+
throw new Error('Failed to create syntax tree');
|
|
32
|
+
}
|
|
33
|
+
const collectedNodes = [];
|
|
34
|
+
const collectNodes = (node) => {
|
|
35
|
+
if (node.type === 'export_statement') {
|
|
36
|
+
let hasDeclaration = false;
|
|
37
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
38
|
+
const child = node.child(i);
|
|
39
|
+
if (child && ['function_declaration', 'class_declaration', 'method_definition'].includes(child.type)) {
|
|
40
|
+
hasDeclaration = true;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (!hasDeclaration && rule.nodeTypes.includes(node.type)) {
|
|
45
|
+
collectedNodes.push(node);
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
if (hasDeclaration) {
|
|
49
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
50
|
+
const child = node.child(i);
|
|
51
|
+
if (child) {
|
|
52
|
+
collectNodes(child);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (rule.nodeTypes.includes(node.type)) {
|
|
59
|
+
collectedNodes.push(node);
|
|
60
|
+
}
|
|
61
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
62
|
+
const child = node.child(i);
|
|
63
|
+
if (child) {
|
|
64
|
+
collectNodes(child);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
collectNodes(tree.rootNode);
|
|
69
|
+
return collectedNodes;
|
|
70
|
+
}
|
|
71
|
+
async processGroups(nodeGroups, source, rule, limits, modelProfile, rel, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats) {
|
|
72
|
+
this.processedNodes = new Set();
|
|
73
|
+
for (const nodeGroup of nodeGroups) {
|
|
74
|
+
if (nodeGroup.nodes.length === 1) {
|
|
75
|
+
await this.yieldChunk(nodeGroup.nodes[0], source, rule, limits, modelProfile, rel, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
const combinedChunk = createCombinedChunk(nodeGroup, source);
|
|
79
|
+
if (combinedChunk) {
|
|
80
|
+
chunkingStats.totalNodes += nodeGroup.nodes.length;
|
|
81
|
+
chunkingStats.fileGrouped = (chunkingStats.fileGrouped || 0) + 1;
|
|
82
|
+
chunkingStats.functionsGrouped = (chunkingStats.functionsGrouped || 0) + nodeGroup.nodes.length;
|
|
83
|
+
await this.processChunk(combinedChunk.node, combinedChunk.code, `group_${nodeGroup.nodes.length}funcs`, null, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
async yieldChunk(node, source, rule, limits, modelProfile, rel, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats, parentNode = null) {
|
|
89
|
+
chunkingStats.totalNodes++;
|
|
90
|
+
const analysis = await analyzeNodeForChunking(node, source, rule, modelProfile);
|
|
91
|
+
if (analysis.size < limits.min && parentNode !== null) {
|
|
92
|
+
chunkingStats.skippedSmall++;
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
if (analysis.needsSubdivision && analysis.subdivisionCandidates.length > 0) {
|
|
96
|
+
chunkingStats.subdivided++;
|
|
97
|
+
const subAnalyses = await batchAnalyzeNodes(analysis.subdivisionCandidates, source, rule, modelProfile, true);
|
|
98
|
+
const smallChunks = [];
|
|
99
|
+
for (let i = 0; i < subAnalyses.length; i++) {
|
|
100
|
+
const subAnalysis = subAnalyses[i];
|
|
101
|
+
const subNode = subAnalysis.node;
|
|
102
|
+
if (subAnalysis.size < limits.min) {
|
|
103
|
+
const subCode = source.slice(subNode.startIndex, subNode.endIndex);
|
|
104
|
+
smallChunks.push({
|
|
105
|
+
node: subNode,
|
|
106
|
+
code: subCode,
|
|
107
|
+
size: subAnalysis.size
|
|
108
|
+
});
|
|
109
|
+
if (subNode.id !== undefined) {
|
|
110
|
+
this.processedNodes.add(subNode.id);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
if (subNode.id !== undefined) {
|
|
115
|
+
this.processedNodes.add(subNode.id);
|
|
116
|
+
}
|
|
117
|
+
await this.yieldChunk(subNode, source, rule, limits, modelProfile, rel, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats, node);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (smallChunks.length > 0) {
|
|
121
|
+
const totalSmallSize = smallChunks.reduce((sum, c) => sum + c.size, 0);
|
|
122
|
+
if (totalSmallSize >= limits.min || smallChunks.length >= 3) {
|
|
123
|
+
const mergedCode = smallChunks.map((c) => c.code).join('\n\n');
|
|
124
|
+
const mergedNode = {
|
|
125
|
+
...node,
|
|
126
|
+
type: `${node.type}_merged`,
|
|
127
|
+
startIndex: smallChunks[0].node.startIndex,
|
|
128
|
+
endIndex: smallChunks[smallChunks.length - 1].node.endIndex
|
|
129
|
+
};
|
|
130
|
+
const suffix = `small_methods_${smallChunks.length}`;
|
|
131
|
+
chunkingStats.mergedSmall++;
|
|
132
|
+
await this.processChunk(mergedNode, mergedCode, suffix, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
chunkingStats.skippedSmall += smallChunks.length;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
else if (analysis.size > limits.max) {
|
|
141
|
+
chunkingStats.statementFallback++;
|
|
142
|
+
const statementChunks = await yieldStatementChunks(node, source, limits.max, limits.overlap, modelProfile);
|
|
143
|
+
for (let i = 0; i < statementChunks.length; i++) {
|
|
144
|
+
const stmtChunk = statementChunks[i];
|
|
145
|
+
await this.processChunk(node, stmtChunk.code, `${i + 1}`, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
146
|
+
}
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
chunkingStats.normalChunks++;
|
|
150
|
+
const code = source.slice(node.startIndex, node.endIndex);
|
|
151
|
+
await this.processChunk(node, code, null, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
|
|
152
|
+
}
|
|
153
|
+
async processChunk(node, code, suffix, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats) {
|
|
154
|
+
let symbol = extractSymbolName(node, source);
|
|
155
|
+
if (!symbol)
|
|
156
|
+
return;
|
|
157
|
+
if (suffix) {
|
|
158
|
+
symbol = `${symbol}_part${suffix}`;
|
|
159
|
+
}
|
|
160
|
+
const docComments = extractDocComments(source, node, rule);
|
|
161
|
+
const codevaultMetadata = extractCodevaultMetadata(docComments);
|
|
162
|
+
const automaticTags = extractSemanticTags(rel, symbol, code);
|
|
163
|
+
const allTags = [...new Set([...codevaultMetadata.tags, ...automaticTags])];
|
|
164
|
+
codevaultMetadata.tags = allTags;
|
|
165
|
+
const importantVariables = extractImportantVariables(node, source, rule);
|
|
166
|
+
const symbolData = extractSymbolMetadata({ node, source, symbol });
|
|
167
|
+
const enhancedEmbeddingText = generateEnhancedEmbeddingText(code, codevaultMetadata, importantVariables, docComments);
|
|
168
|
+
const chunkType = node.type.includes('class') ? 'class' :
|
|
169
|
+
node.type.includes('method') ? 'method' : 'function';
|
|
170
|
+
const contextInfo = {
|
|
171
|
+
nodeType: node.type,
|
|
172
|
+
startLine: source.slice(0, node.startIndex).split('\n').length,
|
|
173
|
+
endLine: source.slice(0, node.endIndex).split('\n').length,
|
|
174
|
+
codeLength: code.length,
|
|
175
|
+
hasDocumentation: !!docComments,
|
|
176
|
+
variableCount: importantVariables.length,
|
|
177
|
+
isSubdivision: !!suffix,
|
|
178
|
+
hasParentContext: !!parentNode
|
|
179
|
+
};
|
|
180
|
+
const sha = crypto.createHash('sha1').update(code).digest('hex');
|
|
181
|
+
const chunkId = `${rel}:${symbol}:${sha.substring(0, 8)}`;
|
|
182
|
+
const chunkMerkleHash = await computeFastHash(code);
|
|
183
|
+
if (existing.existingChunks.has(chunkId)) {
|
|
184
|
+
existing.staleChunkIds.delete(chunkId);
|
|
185
|
+
chunkMerkleHashes.push(chunkMerkleHash);
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
await embedAndStore({
|
|
189
|
+
code,
|
|
190
|
+
enhancedEmbeddingText,
|
|
191
|
+
chunkId,
|
|
192
|
+
sha,
|
|
193
|
+
lang: rule.lang,
|
|
194
|
+
rel,
|
|
195
|
+
symbol,
|
|
196
|
+
chunkType,
|
|
197
|
+
codevaultMetadata,
|
|
198
|
+
importantVariables,
|
|
199
|
+
docComments,
|
|
200
|
+
contextInfo,
|
|
201
|
+
symbolData
|
|
202
|
+
});
|
|
203
|
+
existing.staleChunkIds.delete(chunkId);
|
|
204
|
+
chunkMerkleHashes.push(chunkMerkleHash);
|
|
205
|
+
if (onProgress) {
|
|
206
|
+
onProgress({ type: 'chunk_processed', file: rel, symbol, chunkId });
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
//# sourceMappingURL=chunk-pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunk-pipeline.js","sourceRoot":"","sources":["../../../src/core/indexing/chunk-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAG5B,OAAO,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,MAAM,oCAAoC,CAAC;AACrH,OAAO,EAAyB,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AAC5F,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,EACL,wBAAwB,EACxB,mBAAmB,EACnB,yBAAyB,EACzB,kBAAkB,EAClB,6BAA6B,EAC9B,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AAkCvE,MAAM,OAAO,aAAa;IAChB,MAAM,CAAS;IACf,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC;IAE3C;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,mBAAmB,CAAC,MAAc,EAAE,IAAkB;QAC1D,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjC,IAAI,IAAI,CAAC;QACT,IAAI,MAAM,CAAC,MAAM,GAAG,cAAc,EAAE,CAAC;YACnC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAa,EAAE,EAAE;gBACzC,IAAI,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;oBAC1B,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;gBAC1E,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,cAAc,GAAqB,EAAE,CAAC;QAC5C,MAAM,YAAY,GAAG,CAAC,IAAoB,EAAE,EAAE;YAC3C,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;gBACtC,IAAI,cAAc,GAAG,KAAK,CAAC;gBAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;oBACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;oBAC5B,IAAI,KAAK,IAAI,CAAC,sBAAsB,EAAE,mBAAmB,EAAE,mBAAmB,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;wBACrG,cAAc,GAAG,IAAI,CAAC;wBACtB,MAAM;oBACR,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC1D,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAC1B,OAAO;gBACT,CAAC;gBAED,IAAI,cAAc,EAAE,CAAC;oBACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;wBACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;wBAC5B,IAAI,KAAK,EAAE,CAAC;4BACV,YAAY,CAAC,KAAK,CAAC,CAAC;wBACtB,CAAC;oBACH,CAAC;oBACD,OAAO;gBACT,CAAC;YACH,CAAC;YAED,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,CAAC;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC5B,IAAI,KAAK,EAAE,CAAC;oBACV,YAAY,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QAEF,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC5B,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,aAAa,CACjB,UAAiB,EACjB,MAAc,EACd,IAAkB,EAClB,MAAkB,EAClB,YAA0B,EAC1B,GAAW,EACX,QAAwB,EACxB,iBAA2B,EAC3B,UAAe,EACf,aAA0D,EAC1D,aAAkB;QAElB,IAAI,CAAC,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC;QAExC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,IAAI,SAAS,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjC,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,EAAE,QAAQ,EAAE,iBAAiB,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;YAC5J,CAAC;iBAAM,CAAC;gBACN,MAAM,aAAa,GAAG,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;gBAC7D,IAAI,aAAa,EAAE,CAAC;oBAClB,aAAa,CAAC,UAAU,IAAI,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC;oBACnD,aAAa,CAAC,WAAW,GAAG,CAAC,aAAa,CAAC,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;oBACjE,aAAa,CAAC,gBAAgB,GAAG,CAAC,aAAa,CAAC,gBAAgB,IAAI,CAAC,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC;oBAEhG,MAAM,IAAI,CAAC,YAAY,CACrB,aAAa,CAAC,IAAI,EAClB,aAAa,CAAC,IAAI,EAClB,SAAS,SAAS,CAAC,KAAK,CAAC,MAAM,OAAO,EACtC,IAAI,EACJ,MAAM,EACN,GAAG,EACH,IAAI,EACJ,QAAQ,EACR,iBAAiB,EACjB,UAAU,EACV,aAAa,EACb,aAAa,CACd,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,UAAU,CACpB,IAAoB,EACpB,MAAc,EACd,IAAkB,EAClB,MAAkB,EAClB,YAA0B,EAC1B,GAAW,EACX,QAAwB,EACxB,iBAA2B,EAC3B,UAAe,EACf,aAA0D,EAC1D,aAAkB,EAClB,aAAoC,IAAI;QAE1C,aAAa,CAAC,UAAU,EAAE,CAAC;QAE3B,MAAM,QAAQ,GAAG,MAAM,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,CAAC,CAAC;QAEhF,IAAI,QAAQ,CAAC,IAAI,GAAG,MAAM,CAAC,GAAG,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;YACtD,aAAa,CAAC,YAAY,EAAE,CAAC;YAC7B,OAAO;QACT,CAAC;QAED,IAAI,QAAQ,CAAC,gBAAgB,IAAI,QAAQ,CAAC,qBAAqB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3E,aAAa,CAAC,UAAU,EAAE,CAAC;YAE3B,MAAM,WAAW,GAAG,MAAM,iBAAiB,CACzC,QAAQ,CAAC,qBAAqB,EAC9B,MAAM,EACN,IAAI,EACJ,YAAY,EACZ,IAAI,CACL,CAAC;YAEF,MAAM,WAAW,GAAU,EAAE,CAAC;YAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5C,MAAM,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;gBACnC,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC;gBAEjC,IAAI,WAAW,CAAC,IAAI,GAAG,MAAM,CAAC,GAAG,EAAE,CAAC;oBAClC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;oBACnE,WAAW,CAAC,IAAI,CAAC;wBACf,IAAI,EAAE,OAAO;wBACb,IAAI,EAAE,OAAO;wBACb,IAAI,EAAE,WAAW,CAAC,IAAI;qBACvB,CAAC,CAAC;oBACH,IAAI,OAAO,CAAC,EAAE,KAAK,SAAS,EAAE,CAAC;wBAC7B,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;oBACtC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,IAAI,OAAO,CAAC,EAAE,KAAK,SAAS,EAAE,CAAC;wBAC7B,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;oBACtC,CAAC;oBACD,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,EAAE,QAAQ,EAAE,iBAAiB,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,EAAE,IAAI,CAAC,CAAC;gBACvJ,CAAC;YACH,CAAC;YAED,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,cAAc,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,CAAM,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;gBAEpF,IAAI,cAAc,IAAI,MAAM,CAAC,GAAG,IAAI,WAAW,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;oBAC5D,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBACpE,MAAM,UAAU,GAAmB;wBACjC,GAAG,IAAI;wBACP,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,SAAS;wBAC3B,UAAU,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU;wBAC1C,QAAQ,EAAE,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ;qBAC5D,CAAC;oBACF,MAAM,MAAM,GAAG,iBAAiB,WAAW,CAAC,MAAM,EAAE,CAAC;oBAErD,aAAa,CAAC,WAAW,EAAE,CAAC;oBAC5B,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,iBAAiB,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;gBAChK,CAAC;qBAAM,CAAC;oBACN,aAAa,CAAC,YAAY,IAAI,WAAW,CAAC,MAAM,CAAC;gBACnD,CAAC;YACH,CAAC;YAED,OAAO;QACT,CAAC;aAAM,IAAI,QAAQ,CAAC,IAAI,GAAG,MAAM,CAAC,GAAG,EAAE,CAAC;YACtC,aAAa,CAAC,iBAAiB,EAAE,CAAC;YAClC,MAAM,eAAe,GAAG,MAAM,oBAAoB,CAChD,IAAI,EACJ,MAAM,EACN,MAAM,CAAC,GAAG,EACV,MAAM,CAAC,OAAO,EACd,YAAY,CACb,CAAC;YAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChD,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;gBACrC,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,iBAAiB,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;YAClK,CAAC;YACD,OAAO;QACT,CAAC;QAED,aAAa,CAAC,YAAY,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,iBAAiB,EAAE,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;IAClJ,CAAC;IAEO,KAAK,CAAC,YAAY,CACtB,IAAoB,EACpB,IAAY,EACZ,MAAqB,EACrB,UAAiC,EACjC,MAAc,EACd,GAAW,EACX,IAAkB,EAClB,QAAwB,EACxB,iBAA2B,EAC3B,UAAe,EACf,aAA0D,EAC1D,aAAkB;QAEpB,IAAI,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAC7C,IAAI,CAAC,MAAM;YAAE,OAAO;QAEpB,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,GAAG,GAAG,MAAM,QAAQ,MAAM,EAAE,CAAC;QACrC,CAAC;QAED,MAAM,WAAW,GAAG,kBAAkB,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;QAC3D,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,WAAW,CAAC,CAAC;QAChE,MAAM,aAAa,GAAG,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,iBAAiB,CAAC,IAAI,EAAE,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;QAC5E,iBAAiB,CAAC,IAAI,GAAG,OAAO,CAAC;QAEjC,MAAM,kBAAkB,GAAG,yBAAyB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;QACzE,MAAM,UAAU,GAAG,qBAAqB,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAEnE,MAAM,qBAAqB,GAAG,6BAA6B,CACzD,IAAI,EACJ,iBAAiB,EACjB,kBAAkB,EAClB,WAAW,CACZ,CAAC;QAEF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvD,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC;QAEvD,MAAM,WAAW,GAAG;YAClB,QAAQ,EAAE,IAAI,CAAC,IAAI;YACnB,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;YAC9D,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;YAC1D,UAAU,EAAE,IAAI,CAAC,MAAM;YACvB,gBAAgB,EAAE,CAAC,CAAC,WAAW;YAC/B,aAAa,EAAE,kBAAkB,CAAC,MAAM;YACxC,aAAa,EAAE,CAAC,CAAC,MAAM;YACvB,gBAAgB,EAAE,CAAC,CAAC,UAAU;SAC/B,CAAC;QAEF,MAAM,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACjE,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,MAAM,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAC1D,MAAM,eAAe,GAAG,MAAM,eAAe,CAAC,IAAI,CAAC,CAAC;QAEpD,IAAI,QAAQ,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACzC,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACvC,iBAAiB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACxC,OAAO;QACT,CAAC;QAED,MAAM,aAAa,CAAC;YAClB,IAAI;YACJ,qBAAqB;YACrB,OAAO;YACP,GAAG;YACH,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,GAAG;YACH,MAAM;YACN,SAAS;YACT,iBAAiB;YACjB,kBAAkB;YAClB,WAAW;YACX,WAAW;YACX,UAAU;SACX,CAAC,CAAC;QAEH,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACvC,iBAAiB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACxC,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;QACtE,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface ScanResult {
|
|
2
|
+
files: string[];
|
|
3
|
+
toDelete: string[];
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Lightweight helper responsible only for discovering files eligible for indexing.
|
|
7
|
+
*/
|
|
8
|
+
export declare class FileScanner {
|
|
9
|
+
scan(repo: string, normalizedChanged: string[] | null): Promise<ScanResult>;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=file-scanner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-scanner.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/file-scanner.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;GAEG;AACH,qBAAa,WAAW;IAChB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,iBAAiB,EAAE,MAAM,EAAE,GAAG,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC;CAyClF"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import fg from 'fast-glob';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import { LANG_RULES, getSupportedLanguageExtensions } from '../../languages/rules.js';
|
|
5
|
+
import { DEFAULT_SCAN_IGNORES } from '../../utils/scan-patterns.js';
|
|
6
|
+
/**
|
|
7
|
+
* Lightweight helper responsible only for discovering files eligible for indexing.
|
|
8
|
+
*/
|
|
9
|
+
export class FileScanner {
|
|
10
|
+
async scan(repo, normalizedChanged) {
|
|
11
|
+
const languagePatterns = getSupportedLanguageExtensions().map(ext => `**/*${ext}`);
|
|
12
|
+
let files = [];
|
|
13
|
+
if (normalizedChanged === null) {
|
|
14
|
+
files = await fg(languagePatterns, {
|
|
15
|
+
cwd: repo,
|
|
16
|
+
absolute: false,
|
|
17
|
+
followSymbolicLinks: false,
|
|
18
|
+
ignore: DEFAULT_SCAN_IGNORES,
|
|
19
|
+
onlyFiles: true,
|
|
20
|
+
dot: false
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
files = normalizedChanged.filter(rel => {
|
|
25
|
+
const ext = path.extname(rel).toLowerCase();
|
|
26
|
+
return !!LANG_RULES[ext];
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
const uniqueFiles = [];
|
|
30
|
+
const toDelete = [];
|
|
31
|
+
const seenFiles = new Set();
|
|
32
|
+
for (const rel of files) {
|
|
33
|
+
if (!rel || seenFiles.has(rel)) {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const absPath = path.join(repo, rel);
|
|
37
|
+
try {
|
|
38
|
+
await fs.promises.access(absPath);
|
|
39
|
+
seenFiles.add(rel);
|
|
40
|
+
uniqueFiles.push(rel);
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
toDelete.push(rel);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return { files: uniqueFiles, toDelete };
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=file-scanner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-scanner.js","sourceRoot":"","sources":["../../../src/core/indexing/file-scanner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,WAAW,CAAC;AAC3B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,UAAU,EAAE,8BAA8B,EAAE,MAAM,0BAA0B,CAAC;AACtF,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AAOpE;;GAEG;AACH,MAAM,OAAO,WAAW;IACtB,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,iBAAkC;QACzD,MAAM,gBAAgB,GAAG,8BAA8B,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;QACnF,IAAI,KAAK,GAAa,EAAE,CAAC;QAEzB,IAAI,iBAAiB,KAAK,IAAI,EAAE,CAAC;YAC/B,KAAK,GAAG,MAAM,EAAE,CAAC,gBAAgB,EAAE;gBACjC,GAAG,EAAE,IAAI;gBACT,QAAQ,EAAE,KAAK;gBACf,mBAAmB,EAAE,KAAK;gBAC1B,MAAM,EAAE,oBAAoB;gBAC5B,SAAS,EAAE,IAAI;gBACf,GAAG,EAAE,KAAK;aACX,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,iBAAiB,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;gBACrC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;gBAC5C,OAAO,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YAC3B,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,WAAW,GAAa,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;QAEpC,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,IAAI,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/B,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YACrC,IAAI,CAAC;gBACH,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBAClC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACnB,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxB,CAAC;YAAC,MAAM,CAAC;gBACP,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;IAC1C,CAAC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { LanguageRule } from '../languages/rules.js';
|
|
2
|
+
import type { TreeSitterNode } from '../types/ast.js';
|
|
3
|
+
export interface CodevaultMetadata {
|
|
4
|
+
tags: string[];
|
|
5
|
+
intent: string | null;
|
|
6
|
+
description: string | null;
|
|
7
|
+
}
|
|
8
|
+
export declare function extractCodevaultMetadata(commentText: string | null): CodevaultMetadata;
|
|
9
|
+
export declare function extractSemanticTags(filePath: string, symbolName: string | null, code: string): string[];
|
|
10
|
+
interface ImportantVariable {
|
|
11
|
+
type: string;
|
|
12
|
+
name: string;
|
|
13
|
+
value: string;
|
|
14
|
+
}
|
|
15
|
+
export declare function extractImportantVariables(node: TreeSitterNode, source: string, rule: LanguageRule): ImportantVariable[];
|
|
16
|
+
export declare function extractDocComments(source: string, node: TreeSitterNode, rule: LanguageRule): string | null;
|
|
17
|
+
export declare function generateEnhancedEmbeddingText(code: string, metadata: CodevaultMetadata, variables: ImportantVariable[], docComments: string | null): string;
|
|
18
|
+
export {};
|
|
19
|
+
//# sourceMappingURL=metadata.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../src/core/metadata.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEtD,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,wBAAgB,wBAAwB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,GAAG,iBAAiB,CAyBtF;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAgEvG;AAED,UAAU,iBAAiB;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,iBAAiB,EAAE,CA0BvH;AAkCD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,YAAY,GAAG,MAAM,GAAG,IAAI,CAY1G;AAED,wBAAgB,6BAA6B,CAC3C,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,iBAAiB,EAAE,EAC9B,WAAW,EAAE,MAAM,GAAG,IAAI,GACzB,MAAM,CAyBR"}
|