viberag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +219 -0
- package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
- package/dist/cli/__tests__/mcp-setup.test.js +597 -0
- package/dist/cli/app.d.ts +2 -0
- package/dist/cli/app.js +238 -0
- package/dist/cli/commands/handlers.d.ts +57 -0
- package/dist/cli/commands/handlers.js +231 -0
- package/dist/cli/commands/index.d.ts +2 -0
- package/dist/cli/commands/index.js +2 -0
- package/dist/cli/commands/mcp-setup.d.ts +107 -0
- package/dist/cli/commands/mcp-setup.js +509 -0
- package/dist/cli/commands/useRagCommands.d.ts +23 -0
- package/dist/cli/commands/useRagCommands.js +180 -0
- package/dist/cli/components/CleanWizard.d.ts +17 -0
- package/dist/cli/components/CleanWizard.js +169 -0
- package/dist/cli/components/InitWizard.d.ts +20 -0
- package/dist/cli/components/InitWizard.js +370 -0
- package/dist/cli/components/McpSetupWizard.d.ts +37 -0
- package/dist/cli/components/McpSetupWizard.js +387 -0
- package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
- package/dist/cli/components/SearchResultsDisplay.js +130 -0
- package/dist/cli/components/WelcomeBanner.d.ts +10 -0
- package/dist/cli/components/WelcomeBanner.js +26 -0
- package/dist/cli/components/index.d.ts +1 -0
- package/dist/cli/components/index.js +1 -0
- package/dist/cli/data/mcp-editors.d.ts +80 -0
- package/dist/cli/data/mcp-editors.js +270 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +26 -0
- package/dist/cli-bundle.cjs +5269 -0
- package/dist/common/commands/terminalSetup.d.ts +2 -0
- package/dist/common/commands/terminalSetup.js +144 -0
- package/dist/common/components/CommandSuggestions.d.ts +9 -0
- package/dist/common/components/CommandSuggestions.js +20 -0
- package/dist/common/components/StaticWithResize.d.ts +23 -0
- package/dist/common/components/StaticWithResize.js +62 -0
- package/dist/common/components/StatusBar.d.ts +8 -0
- package/dist/common/components/StatusBar.js +64 -0
- package/dist/common/components/TextInput.d.ts +12 -0
- package/dist/common/components/TextInput.js +239 -0
- package/dist/common/components/index.d.ts +3 -0
- package/dist/common/components/index.js +3 -0
- package/dist/common/hooks/index.d.ts +4 -0
- package/dist/common/hooks/index.js +4 -0
- package/dist/common/hooks/useCommandHistory.d.ts +7 -0
- package/dist/common/hooks/useCommandHistory.js +51 -0
- package/dist/common/hooks/useCtrlC.d.ts +9 -0
- package/dist/common/hooks/useCtrlC.js +40 -0
- package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
- package/dist/common/hooks/useKittyKeyboard.js +26 -0
- package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
- package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
- package/dist/common/hooks/useTerminalResize.d.ts +28 -0
- package/dist/common/hooks/useTerminalResize.js +51 -0
- package/dist/common/hooks/useTextBuffer.d.ts +13 -0
- package/dist/common/hooks/useTextBuffer.js +165 -0
- package/dist/common/index.d.ts +13 -0
- package/dist/common/index.js +17 -0
- package/dist/common/types.d.ts +162 -0
- package/dist/common/types.js +1 -0
- package/dist/mcp/index.d.ts +12 -0
- package/dist/mcp/index.js +66 -0
- package/dist/mcp/server.d.ts +25 -0
- package/dist/mcp/server.js +837 -0
- package/dist/mcp/watcher.d.ts +86 -0
- package/dist/mcp/watcher.js +334 -0
- package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
- package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
- package/dist/rag/__tests__/helpers.d.ts +30 -0
- package/dist/rag/__tests__/helpers.js +67 -0
- package/dist/rag/__tests__/merkle.test.d.ts +5 -0
- package/dist/rag/__tests__/merkle.test.js +161 -0
- package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
- package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
- package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
- package/dist/rag/__tests__/multi-language.test.js +535 -0
- package/dist/rag/__tests__/rag.test.d.ts +10 -0
- package/dist/rag/__tests__/rag.test.js +311 -0
- package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
- package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
- package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
- package/dist/rag/__tests__/search-filters.test.js +250 -0
- package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
- package/dist/rag/__tests__/search-modes.test.js +133 -0
- package/dist/rag/config/index.d.ts +61 -0
- package/dist/rag/config/index.js +111 -0
- package/dist/rag/constants.d.ts +41 -0
- package/dist/rag/constants.js +57 -0
- package/dist/rag/embeddings/fastembed.d.ts +62 -0
- package/dist/rag/embeddings/fastembed.js +124 -0
- package/dist/rag/embeddings/gemini.d.ts +26 -0
- package/dist/rag/embeddings/gemini.js +116 -0
- package/dist/rag/embeddings/index.d.ts +10 -0
- package/dist/rag/embeddings/index.js +9 -0
- package/dist/rag/embeddings/local-4b.d.ts +28 -0
- package/dist/rag/embeddings/local-4b.js +51 -0
- package/dist/rag/embeddings/local.d.ts +29 -0
- package/dist/rag/embeddings/local.js +119 -0
- package/dist/rag/embeddings/mistral.d.ts +22 -0
- package/dist/rag/embeddings/mistral.js +85 -0
- package/dist/rag/embeddings/openai.d.ts +22 -0
- package/dist/rag/embeddings/openai.js +85 -0
- package/dist/rag/embeddings/types.d.ts +37 -0
- package/dist/rag/embeddings/types.js +1 -0
- package/dist/rag/gitignore/index.d.ts +57 -0
- package/dist/rag/gitignore/index.js +178 -0
- package/dist/rag/index.d.ts +15 -0
- package/dist/rag/index.js +25 -0
- package/dist/rag/indexer/chunker.d.ts +129 -0
- package/dist/rag/indexer/chunker.js +1352 -0
- package/dist/rag/indexer/index.d.ts +6 -0
- package/dist/rag/indexer/index.js +6 -0
- package/dist/rag/indexer/indexer.d.ts +73 -0
- package/dist/rag/indexer/indexer.js +356 -0
- package/dist/rag/indexer/types.d.ts +68 -0
- package/dist/rag/indexer/types.js +47 -0
- package/dist/rag/logger/index.d.ts +20 -0
- package/dist/rag/logger/index.js +75 -0
- package/dist/rag/manifest/index.d.ts +50 -0
- package/dist/rag/manifest/index.js +97 -0
- package/dist/rag/merkle/diff.d.ts +26 -0
- package/dist/rag/merkle/diff.js +95 -0
- package/dist/rag/merkle/hash.d.ts +34 -0
- package/dist/rag/merkle/hash.js +165 -0
- package/dist/rag/merkle/index.d.ts +68 -0
- package/dist/rag/merkle/index.js +298 -0
- package/dist/rag/merkle/node.d.ts +51 -0
- package/dist/rag/merkle/node.js +69 -0
- package/dist/rag/search/filters.d.ts +21 -0
- package/dist/rag/search/filters.js +100 -0
- package/dist/rag/search/fts.d.ts +32 -0
- package/dist/rag/search/fts.js +61 -0
- package/dist/rag/search/hybrid.d.ts +17 -0
- package/dist/rag/search/hybrid.js +58 -0
- package/dist/rag/search/index.d.ts +89 -0
- package/dist/rag/search/index.js +367 -0
- package/dist/rag/search/types.d.ts +130 -0
- package/dist/rag/search/types.js +4 -0
- package/dist/rag/search/vector.d.ts +25 -0
- package/dist/rag/search/vector.js +44 -0
- package/dist/rag/storage/index.d.ts +92 -0
- package/dist/rag/storage/index.js +287 -0
- package/dist/rag/storage/lancedb-native.d.ts +7 -0
- package/dist/rag/storage/lancedb-native.js +10 -0
- package/dist/rag/storage/schema.d.ts +23 -0
- package/dist/rag/storage/schema.js +50 -0
- package/dist/rag/storage/types.d.ts +100 -0
- package/dist/rag/storage/types.js +68 -0
- package/package.json +67 -0
- package/scripts/check-node-version.js +37 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Indexer module for code chunking and indexing.
|
|
3
|
+
*/
|
|
4
|
+
export { Chunker } from './chunker.js';
|
|
5
|
+
export { Indexer, type IndexOptions } from './indexer.js';
|
|
6
|
+
export { createEmptyIndexStats, type Chunk, type ChunkType, type IndexStats, type ProgressCallback, type SupportedLanguage, } from './types.js';
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Indexer - Orchestrates the full indexing pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline:
|
|
5
|
+
* 1. Load previous Merkle tree from manifest
|
|
6
|
+
* 2. Build current Merkle tree from filesystem
|
|
7
|
+
* 3. Compare trees → get new/modified/deleted files
|
|
8
|
+
* 4. Delete chunks for deleted files
|
|
9
|
+
* 5. For new/modified files:
|
|
10
|
+
* - Chunk with tree-sitter
|
|
11
|
+
* - Compute embeddings (with cache lookup)
|
|
12
|
+
* - Upsert to LanceDB
|
|
13
|
+
* 6. Save updated manifest
|
|
14
|
+
*/
|
|
15
|
+
import type { Logger } from '../logger/index.js';
|
|
16
|
+
import { type IndexStats, type ProgressCallback } from './types.js';
|
|
17
|
+
/**
|
|
18
|
+
* Options for the index operation.
|
|
19
|
+
*/
|
|
20
|
+
export interface IndexOptions {
|
|
21
|
+
/** Force full reindex, ignoring Merkle tree diff */
|
|
22
|
+
force?: boolean;
|
|
23
|
+
/** Progress callback for UI updates */
|
|
24
|
+
progressCallback?: ProgressCallback;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Indexer class for orchestrating the full indexing pipeline.
|
|
28
|
+
*/
|
|
29
|
+
export declare class Indexer {
|
|
30
|
+
private readonly projectRoot;
|
|
31
|
+
private config;
|
|
32
|
+
private storage;
|
|
33
|
+
private chunker;
|
|
34
|
+
private embeddings;
|
|
35
|
+
private logger;
|
|
36
|
+
constructor(projectRoot: string, logger?: Logger);
|
|
37
|
+
/**
|
|
38
|
+
* Run the indexing pipeline.
|
|
39
|
+
*/
|
|
40
|
+
index(options?: IndexOptions): Promise<IndexStats>;
|
|
41
|
+
/**
|
|
42
|
+
* Create a diff that treats all files as new (for force reindex).
|
|
43
|
+
*/
|
|
44
|
+
private createForceDiff;
|
|
45
|
+
/**
|
|
46
|
+
* Recursively collect all file paths from a serialized Merkle tree node.
|
|
47
|
+
*/
|
|
48
|
+
private collectAllFilesFromSerialized;
|
|
49
|
+
/**
|
|
50
|
+
* Process a batch of files: read, chunk, embed, and prepare CodeChunks.
|
|
51
|
+
*/
|
|
52
|
+
private processFileBatch;
|
|
53
|
+
/**
|
|
54
|
+
* Get a friendly name for the embedding provider.
|
|
55
|
+
*/
|
|
56
|
+
private getProviderDisplayName;
|
|
57
|
+
/**
|
|
58
|
+
* Initialize all components.
|
|
59
|
+
*/
|
|
60
|
+
private initialize;
|
|
61
|
+
/**
|
|
62
|
+
* Create the appropriate embedding provider based on config.
|
|
63
|
+
*/
|
|
64
|
+
private createEmbeddingProvider;
|
|
65
|
+
/**
|
|
66
|
+
* Log a message.
|
|
67
|
+
*/
|
|
68
|
+
private log;
|
|
69
|
+
/**
|
|
70
|
+
* Close all resources.
|
|
71
|
+
*/
|
|
72
|
+
close(): void;
|
|
73
|
+
}
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Indexer - Orchestrates the full indexing pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline:
|
|
5
|
+
* 1. Load previous Merkle tree from manifest
|
|
6
|
+
* 2. Build current Merkle tree from filesystem
|
|
7
|
+
* 3. Compare trees → get new/modified/deleted files
|
|
8
|
+
* 4. Delete chunks for deleted files
|
|
9
|
+
* 5. For new/modified files:
|
|
10
|
+
* - Chunk with tree-sitter
|
|
11
|
+
* - Compute embeddings (with cache lookup)
|
|
12
|
+
* - Upsert to LanceDB
|
|
13
|
+
* 6. Save updated manifest
|
|
14
|
+
*/
|
|
15
|
+
import fs from 'node:fs/promises';
|
|
16
|
+
import path from 'node:path';
|
|
17
|
+
import { loadConfig } from '../config/index.js';
|
|
18
|
+
import { GeminiEmbeddingProvider, Local4BEmbeddingProvider, LocalEmbeddingProvider, MistralEmbeddingProvider, OpenAIEmbeddingProvider, } from '../embeddings/index.js';
|
|
19
|
+
import { loadManifest, saveManifest, manifestExists, createEmptyManifest, updateManifestStats, updateManifestTree, } from '../manifest/index.js';
|
|
20
|
+
import { MerkleTree } from '../merkle/index.js';
|
|
21
|
+
import { Storage } from '../storage/index.js';
|
|
22
|
+
import { Chunker } from './chunker.js';
|
|
23
|
+
import { createEmptyIndexStats, } from './types.js';
|
|
24
|
+
/**
|
|
25
|
+
* Indexer class for orchestrating the full indexing pipeline.
|
|
26
|
+
*/
|
|
27
|
+
export class Indexer {
|
|
28
|
+
constructor(projectRoot, logger) {
|
|
29
|
+
Object.defineProperty(this, "projectRoot", {
|
|
30
|
+
enumerable: true,
|
|
31
|
+
configurable: true,
|
|
32
|
+
writable: true,
|
|
33
|
+
value: void 0
|
|
34
|
+
});
|
|
35
|
+
Object.defineProperty(this, "config", {
|
|
36
|
+
enumerable: true,
|
|
37
|
+
configurable: true,
|
|
38
|
+
writable: true,
|
|
39
|
+
value: null
|
|
40
|
+
});
|
|
41
|
+
Object.defineProperty(this, "storage", {
|
|
42
|
+
enumerable: true,
|
|
43
|
+
configurable: true,
|
|
44
|
+
writable: true,
|
|
45
|
+
value: null
|
|
46
|
+
});
|
|
47
|
+
Object.defineProperty(this, "chunker", {
|
|
48
|
+
enumerable: true,
|
|
49
|
+
configurable: true,
|
|
50
|
+
writable: true,
|
|
51
|
+
value: null
|
|
52
|
+
});
|
|
53
|
+
Object.defineProperty(this, "embeddings", {
|
|
54
|
+
enumerable: true,
|
|
55
|
+
configurable: true,
|
|
56
|
+
writable: true,
|
|
57
|
+
value: null
|
|
58
|
+
});
|
|
59
|
+
Object.defineProperty(this, "logger", {
|
|
60
|
+
enumerable: true,
|
|
61
|
+
configurable: true,
|
|
62
|
+
writable: true,
|
|
63
|
+
value: null
|
|
64
|
+
});
|
|
65
|
+
this.projectRoot = projectRoot;
|
|
66
|
+
this.logger = logger ?? null;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Run the indexing pipeline.
|
|
70
|
+
*/
|
|
71
|
+
async index(options = {}) {
|
|
72
|
+
const stats = createEmptyIndexStats();
|
|
73
|
+
const { force = false, progressCallback } = options;
|
|
74
|
+
try {
|
|
75
|
+
// Initialize components
|
|
76
|
+
await this.initialize(progressCallback);
|
|
77
|
+
const config = this.config;
|
|
78
|
+
const storage = this.storage;
|
|
79
|
+
const chunker = this.chunker;
|
|
80
|
+
const embeddings = this.embeddings;
|
|
81
|
+
// 1. Load previous manifest and Merkle tree
|
|
82
|
+
this.log('info', 'Loading manifest');
|
|
83
|
+
let manifest = (await manifestExists(this.projectRoot))
|
|
84
|
+
? await loadManifest(this.projectRoot)
|
|
85
|
+
: createEmptyManifest();
|
|
86
|
+
const previousTree = manifest.tree
|
|
87
|
+
? MerkleTree.fromJSON(manifest.tree)
|
|
88
|
+
: MerkleTree.empty();
|
|
89
|
+
// 2. Build current Merkle tree from filesystem
|
|
90
|
+
this.log('info', 'Building Merkle tree');
|
|
91
|
+
progressCallback?.(0, 100, 'Scanning files');
|
|
92
|
+
const currentTree = await MerkleTree.build(this.projectRoot, config.extensions, config.excludePatterns, previousTree);
|
|
93
|
+
stats.filesScanned = currentTree.buildStats.filesScanned;
|
|
94
|
+
this.log('info', `Scanned ${stats.filesScanned} files, indexed ${currentTree.fileCount}`);
|
|
95
|
+
// 3. Compare trees to get diff
|
|
96
|
+
const diff = force
|
|
97
|
+
? this.createForceDiff(currentTree)
|
|
98
|
+
: previousTree.compare(currentTree);
|
|
99
|
+
stats.filesNew = diff.new.length;
|
|
100
|
+
stats.filesModified = diff.modified.length;
|
|
101
|
+
stats.filesDeleted = diff.deleted.length;
|
|
102
|
+
this.log('info', `Changes: ${diff.new.length} new, ${diff.modified.length} modified, ${diff.deleted.length} deleted`);
|
|
103
|
+
// Short-circuit if no changes
|
|
104
|
+
if (!diff.hasChanges && !force) {
|
|
105
|
+
this.log('info', 'No changes detected');
|
|
106
|
+
return stats;
|
|
107
|
+
}
|
|
108
|
+
// 4. Handle force reindex - drop and recreate table to avoid schema issues
|
|
109
|
+
if (force) {
|
|
110
|
+
this.log('info', 'Force reindex: resetting chunks table');
|
|
111
|
+
await storage.resetChunksTable();
|
|
112
|
+
}
|
|
113
|
+
// 5. Delete chunks for deleted files
|
|
114
|
+
if (diff.deleted.length > 0) {
|
|
115
|
+
this.log('info', `Deleting chunks for ${diff.deleted.length} files`);
|
|
116
|
+
stats.chunksDeleted = await storage.deleteChunksByFilepaths(diff.deleted);
|
|
117
|
+
}
|
|
118
|
+
// 6. Process new and modified files
|
|
119
|
+
const filesToProcess = [...diff.new, ...diff.modified];
|
|
120
|
+
const totalFiles = filesToProcess.length;
|
|
121
|
+
if (totalFiles > 0) {
|
|
122
|
+
this.log('info', `Processing ${totalFiles} files`);
|
|
123
|
+
// First, delete existing chunks for modified files
|
|
124
|
+
if (diff.modified.length > 0 && !force) {
|
|
125
|
+
const deletedCount = await storage.deleteChunksByFilepaths(diff.modified);
|
|
126
|
+
stats.chunksDeleted += deletedCount;
|
|
127
|
+
}
|
|
128
|
+
// Process files in batches
|
|
129
|
+
const batchSize = 10;
|
|
130
|
+
for (let i = 0; i < filesToProcess.length; i += batchSize) {
|
|
131
|
+
const batch = filesToProcess.slice(i, i + batchSize);
|
|
132
|
+
const batchChunks = await this.processFileBatch(batch, chunker, embeddings, storage, stats);
|
|
133
|
+
if (batchChunks.length > 0) {
|
|
134
|
+
// Use addChunks after table reset to avoid schema mismatch,
|
|
135
|
+
// upsertChunks for normal incremental updates
|
|
136
|
+
if (force) {
|
|
137
|
+
await storage.addChunks(batchChunks);
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
await storage.upsertChunks(batchChunks);
|
|
141
|
+
}
|
|
142
|
+
stats.chunksAdded += batchChunks.length;
|
|
143
|
+
}
|
|
144
|
+
const progress = Math.round(((i + batch.length) / totalFiles) * 100);
|
|
145
|
+
progressCallback?.(i + batch.length, totalFiles, 'Indexing files');
|
|
146
|
+
this.log('debug', `Progress: ${progress}% (${i + batch.length}/${totalFiles})`);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// 7. Update manifest with new tree and stats
|
|
150
|
+
const chunkCount = await storage.countChunks();
|
|
151
|
+
manifest = updateManifestTree(manifest, currentTree.toJSON());
|
|
152
|
+
manifest = updateManifestStats(manifest, {
|
|
153
|
+
totalFiles: currentTree.fileCount,
|
|
154
|
+
totalChunks: chunkCount,
|
|
155
|
+
});
|
|
156
|
+
await saveManifest(this.projectRoot, manifest);
|
|
157
|
+
this.log('info', `Index complete: ${stats.chunksAdded} chunks added, ${stats.chunksDeleted} deleted`);
|
|
158
|
+
return stats;
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
this.log('error', 'Indexing failed', error);
|
|
162
|
+
throw error;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Create a diff that treats all files as new (for force reindex).
|
|
167
|
+
*/
|
|
168
|
+
createForceDiff(tree) {
|
|
169
|
+
const allFiles = [];
|
|
170
|
+
const serialized = tree.toJSON();
|
|
171
|
+
this.collectAllFilesFromSerialized(serialized, allFiles);
|
|
172
|
+
return {
|
|
173
|
+
new: allFiles,
|
|
174
|
+
modified: [],
|
|
175
|
+
deleted: [],
|
|
176
|
+
hasChanges: allFiles.length > 0,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Recursively collect all file paths from a serialized Merkle tree node.
|
|
181
|
+
*/
|
|
182
|
+
collectAllFilesFromSerialized(node, files) {
|
|
183
|
+
if (!node)
|
|
184
|
+
return;
|
|
185
|
+
if (node.type === 'file') {
|
|
186
|
+
files.push(node.path);
|
|
187
|
+
}
|
|
188
|
+
else if (node.children) {
|
|
189
|
+
for (const child of Object.values(node.children)) {
|
|
190
|
+
this.collectAllFilesFromSerialized(child, files);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Process a batch of files: read, chunk, embed, and prepare CodeChunks.
|
|
196
|
+
*/
|
|
197
|
+
async processFileBatch(filepaths, chunker, embeddings, storage, stats) {
|
|
198
|
+
const allChunks = [];
|
|
199
|
+
for (const filepath of filepaths) {
|
|
200
|
+
try {
|
|
201
|
+
const absolutePath = path.join(this.projectRoot, filepath);
|
|
202
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
203
|
+
const fileHash = (await import('../merkle/hash.js')).computeStringHash(content);
|
|
204
|
+
// Chunk the file (with size limits from config)
|
|
205
|
+
const chunks = await chunker.chunkFile(filepath, content, this.config.chunkMaxSize);
|
|
206
|
+
// Check embedding cache for each chunk
|
|
207
|
+
const contentHashes = chunks.map(c => c.contentHash);
|
|
208
|
+
const cachedEmbeddings = await storage.getCachedEmbeddings(contentHashes);
|
|
209
|
+
// Compute embeddings for cache misses
|
|
210
|
+
const missingChunks = chunks.filter(c => !cachedEmbeddings.has(c.contentHash));
|
|
211
|
+
if (missingChunks.length > 0) {
|
|
212
|
+
// Embed contextHeader + text for semantic relevance
|
|
213
|
+
const texts = missingChunks.map(c => c.contextHeader ? `${c.contextHeader}\n${c.text}` : c.text);
|
|
214
|
+
const newEmbeddings = await embeddings.embed(texts);
|
|
215
|
+
stats.embeddingsComputed += missingChunks.length;
|
|
216
|
+
// Cache the new embeddings
|
|
217
|
+
const cacheEntries = missingChunks.map((chunk, i) => ({
|
|
218
|
+
contentHash: chunk.contentHash,
|
|
219
|
+
vector: newEmbeddings[i],
|
|
220
|
+
createdAt: new Date().toISOString(),
|
|
221
|
+
}));
|
|
222
|
+
await storage.cacheEmbeddings(cacheEntries);
|
|
223
|
+
// Add to cachedEmbeddings map
|
|
224
|
+
missingChunks.forEach((chunk, i) => {
|
|
225
|
+
cachedEmbeddings.set(chunk.contentHash, newEmbeddings[i]);
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
stats.embeddingsCached += chunks.length - missingChunks.length;
|
|
229
|
+
// Build CodeChunk objects
|
|
230
|
+
const filename = path.basename(filepath);
|
|
231
|
+
const extension = path.extname(filepath);
|
|
232
|
+
for (const chunk of chunks) {
|
|
233
|
+
const vector = cachedEmbeddings.get(chunk.contentHash);
|
|
234
|
+
allChunks.push({
|
|
235
|
+
id: `${filepath}:${chunk.startLine}`,
|
|
236
|
+
vector,
|
|
237
|
+
text: chunk.text,
|
|
238
|
+
contentHash: chunk.contentHash,
|
|
239
|
+
filepath,
|
|
240
|
+
filename,
|
|
241
|
+
extension,
|
|
242
|
+
type: chunk.type,
|
|
243
|
+
name: chunk.name,
|
|
244
|
+
startLine: chunk.startLine,
|
|
245
|
+
endLine: chunk.endLine,
|
|
246
|
+
fileHash,
|
|
247
|
+
// New metadata fields from schema v2
|
|
248
|
+
signature: chunk.signature,
|
|
249
|
+
docstring: chunk.docstring,
|
|
250
|
+
isExported: chunk.isExported,
|
|
251
|
+
decoratorNames: chunk.decoratorNames,
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
catch (error) {
|
|
256
|
+
this.log('warn', `Failed to process file: ${filepath}`, error);
|
|
257
|
+
// Continue with other files
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return allChunks;
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Get a friendly name for the embedding provider.
|
|
264
|
+
*/
|
|
265
|
+
getProviderDisplayName(provider) {
|
|
266
|
+
switch (provider) {
|
|
267
|
+
case 'local':
|
|
268
|
+
return 'Qwen3-0.6B';
|
|
269
|
+
case 'gemini':
|
|
270
|
+
return 'Gemini';
|
|
271
|
+
case 'mistral':
|
|
272
|
+
return 'Mistral';
|
|
273
|
+
case 'openai':
|
|
274
|
+
return 'OpenAI';
|
|
275
|
+
default:
|
|
276
|
+
return provider;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Initialize all components.
|
|
281
|
+
*/
|
|
282
|
+
async initialize(progressCallback) {
|
|
283
|
+
// Load config
|
|
284
|
+
this.config = await loadConfig(this.projectRoot);
|
|
285
|
+
const providerName = this.getProviderDisplayName(this.config.embeddingProvider);
|
|
286
|
+
// Initialize storage
|
|
287
|
+
progressCallback?.(0, 0, 'Connecting to database');
|
|
288
|
+
this.storage = new Storage(this.projectRoot, this.config.embeddingDimensions);
|
|
289
|
+
await this.storage.connect();
|
|
290
|
+
// Initialize chunker (loads tree-sitter parsers)
|
|
291
|
+
progressCallback?.(0, 0, 'Loading parsers');
|
|
292
|
+
this.chunker = new Chunker();
|
|
293
|
+
await this.chunker.initialize();
|
|
294
|
+
// Initialize embeddings based on provider type
|
|
295
|
+
// For local models, this may download the model on first run
|
|
296
|
+
const isLocal = this.config.embeddingProvider === 'local';
|
|
297
|
+
progressCallback?.(0, 0, isLocal
|
|
298
|
+
? `Loading ${providerName} model`
|
|
299
|
+
: `Connecting to ${providerName}`);
|
|
300
|
+
this.embeddings = this.createEmbeddingProvider(this.config);
|
|
301
|
+
// Pass model progress to the UI for local models
|
|
302
|
+
await this.embeddings.initialize(isLocal && progressCallback
|
|
303
|
+
? (status, progress, _message) => {
|
|
304
|
+
if (status === 'downloading') {
|
|
305
|
+
progressCallback(progress ?? 0, 100, `Downloading ${providerName} (${progress}%)`);
|
|
306
|
+
}
|
|
307
|
+
else if (status === 'loading') {
|
|
308
|
+
progressCallback(0, 0, `Loading ${providerName} model`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
: undefined);
|
|
312
|
+
this.log('info', 'Indexer initialized');
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Create the appropriate embedding provider based on config.
|
|
316
|
+
*/
|
|
317
|
+
createEmbeddingProvider(config) {
|
|
318
|
+
const apiKey = config.apiKey;
|
|
319
|
+
switch (config.embeddingProvider) {
|
|
320
|
+
case 'local':
|
|
321
|
+
return new LocalEmbeddingProvider();
|
|
322
|
+
case 'local-4b':
|
|
323
|
+
return new Local4BEmbeddingProvider();
|
|
324
|
+
case 'gemini':
|
|
325
|
+
return new GeminiEmbeddingProvider(apiKey);
|
|
326
|
+
case 'mistral':
|
|
327
|
+
return new MistralEmbeddingProvider(apiKey);
|
|
328
|
+
case 'openai':
|
|
329
|
+
return new OpenAIEmbeddingProvider(apiKey);
|
|
330
|
+
default:
|
|
331
|
+
throw new Error(`Unknown embedding provider: ${config.embeddingProvider}`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Log a message.
|
|
336
|
+
*/
|
|
337
|
+
log(level, message, error) {
|
|
338
|
+
if (!this.logger)
|
|
339
|
+
return;
|
|
340
|
+
if (level === 'error') {
|
|
341
|
+
this.logger.error('Indexer', message, error);
|
|
342
|
+
}
|
|
343
|
+
else {
|
|
344
|
+
this.logger[level]('Indexer', message);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Close all resources.
|
|
349
|
+
*/
|
|
350
|
+
close() {
|
|
351
|
+
this.storage?.close();
|
|
352
|
+
this.chunker?.close();
|
|
353
|
+
this.embeddings?.close();
|
|
354
|
+
this.log('info', 'Indexer closed');
|
|
355
|
+
}
|
|
356
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types of code chunks extracted by tree-sitter.
|
|
3
|
+
*/
|
|
4
|
+
export type ChunkType = 'function' | 'class' | 'method' | 'module';
|
|
5
|
+
/**
|
|
6
|
+
* A raw code chunk extracted from a file before embedding.
|
|
7
|
+
*/
|
|
8
|
+
export interface Chunk {
|
|
9
|
+
/** The source code text */
|
|
10
|
+
text: string;
|
|
11
|
+
/** Context header for embedding (e.g., "// File: path.ts, Class: Foo") */
|
|
12
|
+
contextHeader: string;
|
|
13
|
+
/** Type of chunk */
|
|
14
|
+
type: ChunkType;
|
|
15
|
+
/** Symbol name (empty for module chunks) */
|
|
16
|
+
name: string;
|
|
17
|
+
/** Start line number (1-indexed) */
|
|
18
|
+
startLine: number;
|
|
19
|
+
/** End line number (1-indexed) */
|
|
20
|
+
endLine: number;
|
|
21
|
+
/** SHA256 hash of contextHeader + text */
|
|
22
|
+
contentHash: string;
|
|
23
|
+
/** Function/method signature line (null for module chunks) */
|
|
24
|
+
signature: string | null;
|
|
25
|
+
/** Extracted documentation (JSDoc, docstring, etc.) */
|
|
26
|
+
docstring: string | null;
|
|
27
|
+
/** Whether symbol has export modifier */
|
|
28
|
+
isExported: boolean;
|
|
29
|
+
/** Comma-separated decorator/annotation names (null if none) */
|
|
30
|
+
decoratorNames: string | null;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Supported languages for tree-sitter parsing.
|
|
34
|
+
*/
|
|
35
|
+
export type SupportedLanguage = 'python' | 'javascript' | 'typescript' | 'tsx' | 'go' | 'rust' | 'java' | 'csharp' | 'dart' | 'swift' | 'kotlin' | 'php';
|
|
36
|
+
/**
|
|
37
|
+
* Map of file extensions to languages.
|
|
38
|
+
*/
|
|
39
|
+
export declare const EXTENSION_TO_LANGUAGE: Record<string, SupportedLanguage>;
|
|
40
|
+
/**
|
|
41
|
+
* Statistics from indexing operations.
|
|
42
|
+
*/
|
|
43
|
+
export interface IndexStats {
|
|
44
|
+
/** Number of files scanned */
|
|
45
|
+
filesScanned: number;
|
|
46
|
+
/** Number of new files indexed */
|
|
47
|
+
filesNew: number;
|
|
48
|
+
/** Number of modified files re-indexed */
|
|
49
|
+
filesModified: number;
|
|
50
|
+
/** Number of deleted files removed from index */
|
|
51
|
+
filesDeleted: number;
|
|
52
|
+
/** Number of chunks added */
|
|
53
|
+
chunksAdded: number;
|
|
54
|
+
/** Number of chunks deleted */
|
|
55
|
+
chunksDeleted: number;
|
|
56
|
+
/** Number of embeddings computed (cache miss) */
|
|
57
|
+
embeddingsComputed: number;
|
|
58
|
+
/** Number of embeddings retrieved from cache */
|
|
59
|
+
embeddingsCached: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Progress callback for indexing operations.
|
|
63
|
+
*/
|
|
64
|
+
export type ProgressCallback = (current: number, total: number, stage: string) => void;
|
|
65
|
+
/**
|
|
66
|
+
* Create empty index stats.
|
|
67
|
+
*/
|
|
68
|
+
export declare function createEmptyIndexStats(): IndexStats;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Map of file extensions to languages.
|
|
3
|
+
*/
|
|
4
|
+
export const EXTENSION_TO_LANGUAGE = {
|
|
5
|
+
// JavaScript/TypeScript family
|
|
6
|
+
'.js': 'javascript',
|
|
7
|
+
'.mjs': 'javascript',
|
|
8
|
+
'.cjs': 'javascript',
|
|
9
|
+
'.ts': 'typescript',
|
|
10
|
+
'.mts': 'typescript',
|
|
11
|
+
'.cts': 'typescript',
|
|
12
|
+
'.tsx': 'tsx',
|
|
13
|
+
// Python
|
|
14
|
+
'.py': 'python',
|
|
15
|
+
// Go
|
|
16
|
+
'.go': 'go',
|
|
17
|
+
// Rust
|
|
18
|
+
'.rs': 'rust',
|
|
19
|
+
// Java
|
|
20
|
+
'.java': 'java',
|
|
21
|
+
// C#
|
|
22
|
+
'.cs': 'csharp',
|
|
23
|
+
// Dart
|
|
24
|
+
'.dart': 'dart',
|
|
25
|
+
// Swift
|
|
26
|
+
'.swift': 'swift',
|
|
27
|
+
// Kotlin
|
|
28
|
+
'.kt': 'kotlin',
|
|
29
|
+
'.kts': 'kotlin',
|
|
30
|
+
// PHP
|
|
31
|
+
'.php': 'php',
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* Create empty index stats.
|
|
35
|
+
*/
|
|
36
|
+
export function createEmptyIndexStats() {
|
|
37
|
+
return {
|
|
38
|
+
filesScanned: 0,
|
|
39
|
+
filesNew: 0,
|
|
40
|
+
filesModified: 0,
|
|
41
|
+
filesDeleted: 0,
|
|
42
|
+
chunksAdded: 0,
|
|
43
|
+
chunksDeleted: 0,
|
|
44
|
+
embeddingsComputed: 0,
|
|
45
|
+
embeddingsCached: 0,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
2
|
+
export interface Logger {
|
|
3
|
+
debug(component: string, message: string, data?: object): void;
|
|
4
|
+
info(component: string, message: string, data?: object): void;
|
|
5
|
+
warn(component: string, message: string, data?: object): void;
|
|
6
|
+
error(component: string, message: string, error?: Error): void;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Get the path to today's log file.
|
|
10
|
+
*/
|
|
11
|
+
export declare function getLogPath(projectRoot: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* Create a logger that writes to daily log files.
|
|
14
|
+
* Log files are created in the .viberag/logs/ directory.
|
|
15
|
+
*/
|
|
16
|
+
export declare function createLogger(projectRoot: string): Logger;
|
|
17
|
+
/**
|
|
18
|
+
* Create a no-op logger for testing or when logging is disabled.
|
|
19
|
+
*/
|
|
20
|
+
export declare function createNullLogger(): Logger;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { getLogsDir } from '../constants.js';
|
|
4
|
+
/**
|
|
5
|
+
* Get the path to today's log file.
|
|
6
|
+
*/
|
|
7
|
+
export function getLogPath(projectRoot) {
|
|
8
|
+
const logsDir = getLogsDir(projectRoot);
|
|
9
|
+
const date = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
|
|
10
|
+
return path.join(logsDir, `${date}.log`);
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Format a log entry.
|
|
14
|
+
*/
|
|
15
|
+
function formatEntry(level, component, message, extra) {
|
|
16
|
+
const timestamp = new Date().toISOString();
|
|
17
|
+
const levelStr = level.toUpperCase().padEnd(5);
|
|
18
|
+
let entry = `[${timestamp}] [${levelStr}] ${component}: ${message}`;
|
|
19
|
+
if (extra) {
|
|
20
|
+
if (extra instanceof Error) {
|
|
21
|
+
entry += `\n Error: ${extra.message}`;
|
|
22
|
+
if (extra.stack) {
|
|
23
|
+
entry += `\n Stack: ${extra.stack}`;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
entry += `\n ${JSON.stringify(extra)}`;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return entry;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Create a logger that writes to daily log files.
|
|
34
|
+
* Log files are created in the .viberag/logs/ directory.
|
|
35
|
+
*/
|
|
36
|
+
export function createLogger(projectRoot) {
|
|
37
|
+
const logsDir = getLogsDir(projectRoot);
|
|
38
|
+
let initialized = false;
|
|
39
|
+
function ensureDir() {
|
|
40
|
+
if (!initialized) {
|
|
41
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
42
|
+
initialized = true;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
function write(entry) {
|
|
46
|
+
ensureDir();
|
|
47
|
+
const logPath = getLogPath(projectRoot);
|
|
48
|
+
fs.appendFileSync(logPath, entry + '\n');
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
debug(component, message, data) {
|
|
52
|
+
write(formatEntry('debug', component, message, data));
|
|
53
|
+
},
|
|
54
|
+
info(component, message, data) {
|
|
55
|
+
write(formatEntry('info', component, message, data));
|
|
56
|
+
},
|
|
57
|
+
warn(component, message, data) {
|
|
58
|
+
write(formatEntry('warn', component, message, data));
|
|
59
|
+
},
|
|
60
|
+
error(component, message, error) {
|
|
61
|
+
write(formatEntry('error', component, message, error));
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Create a no-op logger for testing or when logging is disabled.
|
|
67
|
+
*/
|
|
68
|
+
export function createNullLogger() {
|
|
69
|
+
return {
|
|
70
|
+
debug() { },
|
|
71
|
+
info() { },
|
|
72
|
+
warn() { },
|
|
73
|
+
error() { },
|
|
74
|
+
};
|
|
75
|
+
}
|