@sharc-code/mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -0
- package/dist/backend-client.d.ts +251 -0
- package/dist/backend-client.d.ts.map +1 -0
- package/dist/backend-client.js +269 -0
- package/dist/backend-client.js.map +1 -0
- package/dist/backend-handlers.d.ts +243 -0
- package/dist/backend-handlers.d.ts.map +1 -0
- package/dist/backend-handlers.js +1453 -0
- package/dist/backend-handlers.js.map +1 -0
- package/dist/config.d.ts +47 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +94 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +344 -0
- package/dist/index.js.map +1 -0
- package/dist/project-detector.d.ts +42 -0
- package/dist/project-detector.d.ts.map +1 -0
- package/dist/project-detector.js +135 -0
- package/dist/project-detector.js.map +1 -0
- package/dist/utils/env-manager.d.ts +19 -0
- package/dist/utils/env-manager.d.ts.map +1 -0
- package/dist/utils/env-manager.js +99 -0
- package/dist/utils/env-manager.js.map +1 -0
- package/dist/utils.d.ts +10 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +27 -0
- package/dist/utils.js.map +1 -0
- package/dist/watcher/file-watcher.d.ts +64 -0
- package/dist/watcher/file-watcher.d.ts.map +1 -0
- package/dist/watcher/file-watcher.js +263 -0
- package/dist/watcher/file-watcher.js.map +1 -0
- package/dist/watcher/incremental-indexer.d.ts +68 -0
- package/dist/watcher/incremental-indexer.d.ts.map +1 -0
- package/dist/watcher/incremental-indexer.js +254 -0
- package/dist/watcher/incremental-indexer.js.map +1 -0
- package/dist/watcher/index.d.ts +10 -0
- package/dist/watcher/index.d.ts.map +1 -0
- package/dist/watcher/index.js +10 -0
- package/dist/watcher/index.js.map +1 -0
- package/dist/watcher/processing-queue.d.ts +79 -0
- package/dist/watcher/processing-queue.d.ts.map +1 -0
- package/dist/watcher/processing-queue.js +150 -0
- package/dist/watcher/processing-queue.js.map +1 -0
- package/dist/watcher/syntax-guard.d.ts +59 -0
- package/dist/watcher/syntax-guard.d.ts.map +1 -0
- package/dist/watcher/syntax-guard.js +136 -0
- package/dist/watcher/syntax-guard.js.map +1 -0
- package/package.json +52 -0
|
@@ -0,0 +1,1453 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Backend-aware Tool Handlers
|
|
3
|
+
* These handlers use the SHARC backend for embedding, search, and sync operations.
|
|
4
|
+
* File scanning and chunking still happen locally.
|
|
5
|
+
* Snapshot/status data is persisted in the backend database.
|
|
6
|
+
*/
|
|
7
|
+
import * as fs from "fs/promises";
|
|
8
|
+
import * as fsSync from "fs";
|
|
9
|
+
import * as path from "path";
|
|
10
|
+
import * as crypto from "crypto";
|
|
11
|
+
import { ensureAbsolutePath, truncateContent, trackCodebasePath } from "./utils.js";
|
|
12
|
+
// Debug logging to file (since MCP stdout may not be visible in Claude Code)
|
|
13
|
+
const DEBUG_LOG_FILE = path.join(process.env.TEMP || process.env.TMP || '/tmp', 'sharc-mcp-debug.log');
|
|
14
|
+
function debugLog(message) {
|
|
15
|
+
const timestamp = new Date().toISOString();
|
|
16
|
+
const line = `[${timestamp}] ${message}\n`;
|
|
17
|
+
try {
|
|
18
|
+
fsSync.appendFileSync(DEBUG_LOG_FILE, line);
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
// Ignore file write errors
|
|
22
|
+
}
|
|
23
|
+
console.log(message);
|
|
24
|
+
}
|
|
25
|
+
import { BackendClient, } from "./backend-client.js";
|
|
26
|
+
// Import splitters from @sharc-code/splitter package
|
|
27
|
+
import { AstCodeSplitter, LangChainCodeSplitter } from "@sharc-code/splitter";
|
|
28
|
+
// Import file watcher components
|
|
29
|
+
import { FileWatcherService, IncrementalIndexer, SyntaxGuard } from "./watcher/index.js";
|
|
30
|
+
const CHUNK_TIERS = [
|
|
31
|
+
{
|
|
32
|
+
// Tier 1: Code (AST-supported) - semantic units, no overlap needed
|
|
33
|
+
name: 'code',
|
|
34
|
+
extensions: ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py', '.pyw', '.java', '.go', '.rs', '.cs', '.cpp', '.hpp', '.cc', '.cxx', '.c', '.h', '.scala'],
|
|
35
|
+
maxSize: 3500,
|
|
36
|
+
overlap: 0,
|
|
37
|
+
useAst: true,
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
// Tier 2: Documentation - smaller chunks, overlap helps with prose flow
|
|
41
|
+
name: 'docs',
|
|
42
|
+
extensions: ['.md', '.mdx', '.rst', '.txt'],
|
|
43
|
+
maxSize: 1500,
|
|
44
|
+
overlap: 150,
|
|
45
|
+
useAst: false,
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
// Tier 3: Config/Data - structured data, moderate overlap
|
|
49
|
+
name: 'config',
|
|
50
|
+
extensions: ['.json', '.yaml', '.yml', '.toml', '.xml', '.env.example', '.ini', '.cfg'],
|
|
51
|
+
maxSize: 1500,
|
|
52
|
+
overlap: 100,
|
|
53
|
+
useAst: false,
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
// Tier 4: Other code (LangChain fallback) - languages not supported by AST
|
|
57
|
+
name: 'other',
|
|
58
|
+
extensions: ['.rb', '.php', '.swift', '.kt', '.kts', '.vue', '.svelte', '.html', '.css', '.scss', '.less', '.sql'],
|
|
59
|
+
maxSize: 1500,
|
|
60
|
+
overlap: 100,
|
|
61
|
+
useAst: false,
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
// Build a lookup map for fast tier resolution
|
|
65
|
+
const EXTENSION_TO_TIER = new Map();
|
|
66
|
+
for (const tier of CHUNK_TIERS) {
|
|
67
|
+
for (const ext of tier.extensions) {
|
|
68
|
+
EXTENSION_TO_TIER.set(ext, tier);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Default tier for unknown extensions
|
|
72
|
+
const DEFAULT_TIER = {
|
|
73
|
+
name: 'unknown',
|
|
74
|
+
extensions: [],
|
|
75
|
+
maxSize: 1500,
|
|
76
|
+
overlap: 100,
|
|
77
|
+
useAst: false,
|
|
78
|
+
};
|
|
79
|
+
function getTierForExtension(ext) {
|
|
80
|
+
return EXTENSION_TO_TIER.get(ext.toLowerCase()) || DEFAULT_TIER;
|
|
81
|
+
}
|
|
82
|
+
// Collect all supported extensions from all tiers
|
|
83
|
+
const DEFAULT_FILE_EXTENSIONS = CHUNK_TIERS.flatMap(tier => tier.extensions);
|
|
84
|
+
// Export for use by startup-sync and project-detector
|
|
85
|
+
export { CHUNK_TIERS, DEFAULT_FILE_EXTENSIONS, getTierForExtension };
|
|
86
|
+
// Cache TTL for snapshot data (5 seconds to allow quick status updates during indexing)
|
|
87
|
+
const SNAPSHOT_CACHE_TTL = 5000;
|
|
88
|
+
export class BackendToolHandlers {
|
|
89
|
+
constructor(config) {
|
|
90
|
+
// Local cache for snapshot data to reduce API calls
|
|
91
|
+
this.cachedSnapshot = null;
|
|
92
|
+
this.cacheTimestamp = 0;
|
|
93
|
+
// File watcher for incremental indexing
|
|
94
|
+
this.fileWatcherService = null;
|
|
95
|
+
this.incrementalIndexers = new Map();
|
|
96
|
+
// Splitter cache for reuse across chunking operations
|
|
97
|
+
this.splitterCache = new Map();
|
|
98
|
+
if (!config.sharcBackendUrl || !config.sharcApiKey) {
|
|
99
|
+
throw new Error('SHARC_BACKEND_URL and SHARC_API_KEY are required for backend mode');
|
|
100
|
+
}
|
|
101
|
+
this.backendClient = new BackendClient(config.sharcBackendUrl, config.sharcApiKey);
|
|
102
|
+
this.currentWorkspace = process.cwd();
|
|
103
|
+
this.config = config;
|
|
104
|
+
this.syntaxGuard = new SyntaxGuard();
|
|
105
|
+
console.log(`[BackendHandlers] Initialized with backend URL: ${config.sharcBackendUrl}`);
|
|
106
|
+
}
|
|
107
|
+
// ==================== Snapshot Cache Methods ====================
|
|
108
|
+
/**
|
|
109
|
+
* Load snapshot from backend with caching
|
|
110
|
+
*/
|
|
111
|
+
async getSnapshot(forceRefresh = false) {
|
|
112
|
+
const now = Date.now();
|
|
113
|
+
if (!forceRefresh && this.cachedSnapshot && (now - this.cacheTimestamp) < SNAPSHOT_CACHE_TTL) {
|
|
114
|
+
return this.cachedSnapshot;
|
|
115
|
+
}
|
|
116
|
+
try {
|
|
117
|
+
this.cachedSnapshot = await this.backendClient.loadSnapshot();
|
|
118
|
+
this.cacheTimestamp = now;
|
|
119
|
+
return this.cachedSnapshot;
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
console.error('[BackendHandlers] Failed to load snapshot from backend:', error);
|
|
123
|
+
// Return empty snapshot on error
|
|
124
|
+
return {
|
|
125
|
+
formatVersion: 'v2',
|
|
126
|
+
codebases: {},
|
|
127
|
+
lastUpdated: new Date().toISOString(),
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Invalidate local cache to force refresh on next access
|
|
133
|
+
*/
|
|
134
|
+
invalidateCache() {
|
|
135
|
+
this.cacheTimestamp = 0;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Get list of indexed codebases
|
|
139
|
+
*/
|
|
140
|
+
async getIndexedCodebases() {
|
|
141
|
+
const snapshot = await this.getSnapshot();
|
|
142
|
+
return Object.entries(snapshot.codebases)
|
|
143
|
+
.filter(([_, info]) => info.status === 'indexed')
|
|
144
|
+
.map(([path]) => path);
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Get list of codebases currently being indexed
|
|
148
|
+
*/
|
|
149
|
+
async getIndexingCodebases() {
|
|
150
|
+
const snapshot = await this.getSnapshot();
|
|
151
|
+
return Object.entries(snapshot.codebases)
|
|
152
|
+
.filter(([_, info]) => info.status === 'indexing')
|
|
153
|
+
.map(([path]) => path);
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Get codebase status
|
|
157
|
+
*/
|
|
158
|
+
async getCodebaseStatus(codebasePath) {
|
|
159
|
+
const snapshot = await this.getSnapshot();
|
|
160
|
+
const info = snapshot.codebases[codebasePath];
|
|
161
|
+
if (!info)
|
|
162
|
+
return 'not_found';
|
|
163
|
+
return info.status;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Get complete codebase info
|
|
167
|
+
*/
|
|
168
|
+
async getCodebaseInfo(codebasePath) {
|
|
169
|
+
const snapshot = await this.getSnapshot();
|
|
170
|
+
return snapshot.codebases[codebasePath] || null;
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Get indexing progress for a codebase
|
|
174
|
+
*/
|
|
175
|
+
async getIndexingProgress(codebasePath) {
|
|
176
|
+
const snapshot = await this.getSnapshot();
|
|
177
|
+
const info = snapshot.codebases[codebasePath];
|
|
178
|
+
if (info && info.status === 'indexing') {
|
|
179
|
+
return info.indexingPercentage;
|
|
180
|
+
}
|
|
181
|
+
return undefined;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Set codebase to indexing status
|
|
185
|
+
*/
|
|
186
|
+
async setCodebaseIndexing(codebasePath, progress) {
|
|
187
|
+
const info = {
|
|
188
|
+
status: 'indexing',
|
|
189
|
+
indexingPercentage: progress,
|
|
190
|
+
lastUpdated: new Date().toISOString(),
|
|
191
|
+
};
|
|
192
|
+
await this.backendClient.updateCodebaseInfo(codebasePath, info);
|
|
193
|
+
this.invalidateCache();
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Set codebase to indexed status
|
|
197
|
+
*/
|
|
198
|
+
async setCodebaseIndexed(codebasePath, stats) {
|
|
199
|
+
const info = {
|
|
200
|
+
status: 'indexed',
|
|
201
|
+
indexedFiles: stats.indexedFiles,
|
|
202
|
+
totalChunks: stats.totalChunks,
|
|
203
|
+
indexStatus: stats.status,
|
|
204
|
+
lastUpdated: new Date().toISOString(),
|
|
205
|
+
};
|
|
206
|
+
await this.backendClient.updateCodebaseInfo(codebasePath, info);
|
|
207
|
+
this.invalidateCache();
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Set codebase to failed status
|
|
211
|
+
*/
|
|
212
|
+
async setCodebaseIndexFailed(codebasePath, errorMessage, lastAttemptedPercentage) {
|
|
213
|
+
const info = {
|
|
214
|
+
status: 'indexfailed',
|
|
215
|
+
errorMessage,
|
|
216
|
+
lastAttemptedPercentage,
|
|
217
|
+
lastUpdated: new Date().toISOString(),
|
|
218
|
+
};
|
|
219
|
+
await this.backendClient.updateCodebaseInfo(codebasePath, info);
|
|
220
|
+
this.invalidateCache();
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Remove codebase from snapshot completely
|
|
224
|
+
*/
|
|
225
|
+
async removeCodebaseFromSnapshot(codebasePath) {
|
|
226
|
+
await this.backendClient.removeCodebaseFromSnapshot(codebasePath);
|
|
227
|
+
this.invalidateCache();
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Generate a collection name from a codebase path
|
|
231
|
+
* Must match the backend's expectation (URL-safe, unique per path)
|
|
232
|
+
* Uses SHA-256 for better collision resistance
|
|
233
|
+
*/
|
|
234
|
+
generateCollectionName(codebasePath) {
|
|
235
|
+
const hash = crypto.createHash('sha256').update(codebasePath).digest('hex').substring(0, 12);
|
|
236
|
+
const baseName = path.basename(codebasePath).replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
|
|
237
|
+
return `hybrid_code_chunks_${baseName}_${hash}`;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Sync indexed codebases from backend collections
|
|
241
|
+
* Compares snapshot data with actual vector collections and removes stale entries
|
|
242
|
+
*/
|
|
243
|
+
async syncIndexedCodebasesFromBackend() {
|
|
244
|
+
try {
|
|
245
|
+
console.log(`[SYNC-BACKEND] 🔄 Syncing indexed codebases from backend...`);
|
|
246
|
+
const response = await this.backendClient.listRepos();
|
|
247
|
+
console.log(`[SYNC-BACKEND] 📋 Found ${response.count} collections in backend`);
|
|
248
|
+
// Get snapshot codebases and compare with backend collections
|
|
249
|
+
const snapshot = await this.getSnapshot(true); // Force refresh
|
|
250
|
+
const snapshotCodebases = Object.entries(snapshot.codebases)
|
|
251
|
+
.filter(([_, info]) => info.status === 'indexed')
|
|
252
|
+
.map(([path]) => path);
|
|
253
|
+
const backendCollections = new Set(response.repos.map(r => r.name));
|
|
254
|
+
// Remove snapshot entries whose collections don't exist in backend
|
|
255
|
+
for (const codebasePath of snapshotCodebases) {
|
|
256
|
+
const collectionName = this.generateCollectionName(codebasePath);
|
|
257
|
+
if (!backendCollections.has(collectionName)) {
|
|
258
|
+
await this.removeCodebaseFromSnapshot(codebasePath);
|
|
259
|
+
console.log(`[SYNC-BACKEND] ➖ Removed stale codebase (collection not in backend): ${codebasePath}`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
console.log(`[SYNC-BACKEND] ✅ Backend sync completed`);
|
|
263
|
+
}
|
|
264
|
+
catch (error) {
|
|
265
|
+
console.error(`[SYNC-BACKEND] ❌ Error syncing from backend:`, error.message || error);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
// ==================== File Watcher Methods ====================
|
|
269
|
+
/**
|
|
270
|
+
* Initialize the file watcher service (lazy initialization)
|
|
271
|
+
*/
|
|
272
|
+
initializeFileWatcher() {
|
|
273
|
+
if (!this.fileWatcherService) {
|
|
274
|
+
// Create the watcher with our change processor
|
|
275
|
+
this.fileWatcherService = new FileWatcherService((change) => this.processFileChange(change), {
|
|
276
|
+
debounceMs: parseInt(process.env.SHARC_WATCH_DEBOUNCE_MS || '2000', 10),
|
|
277
|
+
verbose: process.env.DEBUG === 'true' || process.env.LOG_LEVEL === 'verbose',
|
|
278
|
+
});
|
|
279
|
+
console.log('[Watcher] File watcher service initialized');
|
|
280
|
+
}
|
|
281
|
+
return this.fileWatcherService;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Process a single file change (used by FileWatcherService)
|
|
285
|
+
*/
|
|
286
|
+
async processFileChange(change) {
|
|
287
|
+
// Find the incremental indexer for this file
|
|
288
|
+
for (const [codebasePath, indexer] of this.incrementalIndexers.entries()) {
|
|
289
|
+
// Check if this file belongs to this codebase
|
|
290
|
+
const absoluteCodebase = path.resolve(codebasePath);
|
|
291
|
+
const absoluteFile = path.resolve(change.absolutePath);
|
|
292
|
+
if (absoluteFile.startsWith(absoluteCodebase)) {
|
|
293
|
+
return indexer.processChange(change);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
// No indexer found for this file
|
|
297
|
+
console.warn(`[Watcher] No indexer found for file: ${change.absolutePath}`);
|
|
298
|
+
return { success: false, chunksDeleted: 0, chunksIndexed: 0, error: 'No indexer found' };
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Create an incremental indexer for a codebase
|
|
302
|
+
*/
|
|
303
|
+
createIncrementalIndexer(codebasePath, collectionName) {
|
|
304
|
+
// Create splitter cache for chunking
|
|
305
|
+
const splitterCache = new Map();
|
|
306
|
+
const getSplitterForTier = (tier) => {
|
|
307
|
+
const cacheKey = `${tier.name}-${tier.maxSize}-${tier.overlap}`;
|
|
308
|
+
if (!splitterCache.has(cacheKey)) {
|
|
309
|
+
if (tier.useAst) {
|
|
310
|
+
splitterCache.set(cacheKey, new AstCodeSplitter(tier.maxSize, tier.overlap));
|
|
311
|
+
}
|
|
312
|
+
else {
|
|
313
|
+
splitterCache.set(cacheKey, new LangChainCodeSplitter(tier.maxSize, tier.overlap));
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return splitterCache.get(cacheKey);
|
|
317
|
+
};
|
|
318
|
+
const indexer = new IncrementalIndexer({
|
|
319
|
+
codebasePath,
|
|
320
|
+
collectionName,
|
|
321
|
+
backendClient: this.backendClient,
|
|
322
|
+
getLanguageFromExtension: (ext) => this.getLanguageFromExtension(ext),
|
|
323
|
+
checkSyntax: (content, language) => {
|
|
324
|
+
return this.syntaxGuard.checkSyntax(content, language);
|
|
325
|
+
},
|
|
326
|
+
chunkFile: async (content, language, relativePath) => {
|
|
327
|
+
const ext = path.extname(relativePath).toLowerCase();
|
|
328
|
+
const tier = getTierForExtension(ext);
|
|
329
|
+
const splitter = getSplitterForTier(tier);
|
|
330
|
+
const chunks = await splitter.split(content, language, relativePath);
|
|
331
|
+
const indexChunks = [];
|
|
332
|
+
for (const chunk of chunks) {
|
|
333
|
+
if (!chunk.content || chunk.content.trim().length === 0) {
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
// Add file path context for non-code tiers
|
|
337
|
+
let finalContent = chunk.content;
|
|
338
|
+
if (!tier.useAst && tier.name !== 'code') {
|
|
339
|
+
const pathParts = relativePath.replace(/\\/g, '/').split('/');
|
|
340
|
+
const shortPath = pathParts.slice(-2).join('/');
|
|
341
|
+
const commentPrefix = language === 'python' ? '#' : '//';
|
|
342
|
+
finalContent = `${commentPrefix} File: ${shortPath}\n${chunk.content}`;
|
|
343
|
+
}
|
|
344
|
+
indexChunks.push({
|
|
345
|
+
content: finalContent,
|
|
346
|
+
relativePath,
|
|
347
|
+
startLine: chunk.metadata.startLine,
|
|
348
|
+
endLine: chunk.metadata.endLine,
|
|
349
|
+
language,
|
|
350
|
+
metadata: { codebasePath },
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
return indexChunks;
|
|
354
|
+
},
|
|
355
|
+
});
|
|
356
|
+
return indexer;
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Start watching a codebase for file changes
|
|
360
|
+
*/
|
|
361
|
+
startWatching(codebasePath, collectionName) {
|
|
362
|
+
const normalizedPath = path.resolve(codebasePath);
|
|
363
|
+
// Initialize watcher service if needed
|
|
364
|
+
const watcher = this.initializeFileWatcher();
|
|
365
|
+
// Check if already watching
|
|
366
|
+
if (watcher.isWatching(normalizedPath)) {
|
|
367
|
+
console.log(`[Watcher] Already watching: ${normalizedPath}`);
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
// Create incremental indexer for this codebase
|
|
371
|
+
const indexer = this.createIncrementalIndexer(normalizedPath, collectionName);
|
|
372
|
+
this.incrementalIndexers.set(normalizedPath, indexer);
|
|
373
|
+
// Start watching
|
|
374
|
+
watcher.watchCodebase(normalizedPath, collectionName);
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Stop watching a codebase
|
|
378
|
+
*/
|
|
379
|
+
async stopWatching(codebasePath) {
|
|
380
|
+
const normalizedPath = path.resolve(codebasePath);
|
|
381
|
+
if (this.fileWatcherService) {
|
|
382
|
+
await this.fileWatcherService.unwatchCodebase(normalizedPath);
|
|
383
|
+
}
|
|
384
|
+
// Clean up indexer
|
|
385
|
+
this.incrementalIndexers.delete(normalizedPath);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Get list of watched codebases
|
|
389
|
+
*/
|
|
390
|
+
getWatchedCodebases() {
|
|
391
|
+
if (!this.fileWatcherService) {
|
|
392
|
+
return [];
|
|
393
|
+
}
|
|
394
|
+
return this.fileWatcherService.getWatchedCodebases();
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Check if a codebase is being watched
|
|
398
|
+
*/
|
|
399
|
+
isWatching(codebasePath) {
|
|
400
|
+
if (!this.fileWatcherService) {
|
|
401
|
+
return false;
|
|
402
|
+
}
|
|
403
|
+
return this.fileWatcherService.isWatching(codebasePath);
|
|
404
|
+
}
|
|
405
|
+
/**
|
|
406
|
+
* Graceful shutdown of all watchers and cleanup resources
|
|
407
|
+
*/
|
|
408
|
+
async shutdown() {
|
|
409
|
+
if (this.fileWatcherService) {
|
|
410
|
+
await this.fileWatcherService.shutdown();
|
|
411
|
+
this.fileWatcherService = null;
|
|
412
|
+
}
|
|
413
|
+
this.incrementalIndexers.clear();
|
|
414
|
+
// Clear splitter cache to release any native resources (tree-sitter parsers)
|
|
415
|
+
this.splitterCache.clear();
|
|
416
|
+
console.log('[BackendHandlers] Shutdown complete');
|
|
417
|
+
}
|
|
418
|
+
// ==================== Splitter Helper ====================
|
|
419
|
+
/**
|
|
420
|
+
* Get a splitter for a given tier (cached for reuse)
|
|
421
|
+
*/
|
|
422
|
+
getSplitterForTier(tier) {
|
|
423
|
+
const cacheKey = `${tier.name}-${tier.maxSize}-${tier.overlap}`;
|
|
424
|
+
if (!this.splitterCache.has(cacheKey)) {
|
|
425
|
+
if (tier.useAst) {
|
|
426
|
+
this.splitterCache.set(cacheKey, new AstCodeSplitter(tier.maxSize, tier.overlap));
|
|
427
|
+
}
|
|
428
|
+
else {
|
|
429
|
+
this.splitterCache.set(cacheKey, new LangChainCodeSplitter(tier.maxSize, tier.overlap));
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
return this.splitterCache.get(cacheKey);
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Check if a path exists (async replacement for fs.existsSync)
|
|
436
|
+
*/
|
|
437
|
+
async pathExists(filePath) {
|
|
438
|
+
try {
|
|
439
|
+
await fs.access(filePath);
|
|
440
|
+
return true;
|
|
441
|
+
}
|
|
442
|
+
catch {
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Scan and chunk files from a codebase directory
|
|
448
|
+
* Uses async file I/O to avoid blocking the event loop
|
|
449
|
+
*/
|
|
450
|
+
async scanAndChunkFiles(codebasePath, customExtensions = [], ignorePatterns = [], onProgress) {
|
|
451
|
+
const allChunks = [];
|
|
452
|
+
// Get all supported extensions
|
|
453
|
+
const extensions = new Set([...DEFAULT_FILE_EXTENSIONS, ...customExtensions]);
|
|
454
|
+
console.log(`[CHUNK] Scanning with extensions: ${[...extensions].join(', ')}`);
|
|
455
|
+
// Find all files recursively using async operations
|
|
456
|
+
const files = [];
|
|
457
|
+
const scanDir = async (dir) => {
|
|
458
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
459
|
+
for (const entry of entries) {
|
|
460
|
+
const fullPath = path.join(dir, entry.name);
|
|
461
|
+
const relativePath = path.relative(codebasePath, fullPath);
|
|
462
|
+
// Skip ignored patterns
|
|
463
|
+
if (this.shouldIgnore(relativePath, ignorePatterns)) {
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
466
|
+
if (entry.isDirectory()) {
|
|
467
|
+
// Skip hidden directories and common ignore dirs
|
|
468
|
+
if (!entry.name.startsWith('.') &&
|
|
469
|
+
!['node_modules', 'dist', 'build', '__pycache__', 'venv', '.git'].includes(entry.name)) {
|
|
470
|
+
await scanDir(fullPath);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
else if (entry.isFile()) {
|
|
474
|
+
const ext = path.extname(entry.name).toLowerCase();
|
|
475
|
+
if (extensions.has(ext)) {
|
|
476
|
+
files.push(fullPath);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
};
|
|
481
|
+
await scanDir(codebasePath);
|
|
482
|
+
console.log(`[CHUNK] Found ${files.length} files to process`);
|
|
483
|
+
// Create splitters for each tier (cached for reuse)
|
|
484
|
+
const splitterCache = new Map();
|
|
485
|
+
const getSplitterForTier = (tier) => {
|
|
486
|
+
const cacheKey = `${tier.name}-${tier.maxSize}-${tier.overlap}`;
|
|
487
|
+
if (!splitterCache.has(cacheKey)) {
|
|
488
|
+
if (tier.useAst) {
|
|
489
|
+
splitterCache.set(cacheKey, new AstCodeSplitter(tier.maxSize, tier.overlap));
|
|
490
|
+
}
|
|
491
|
+
else {
|
|
492
|
+
splitterCache.set(cacheKey, new LangChainCodeSplitter(tier.maxSize, tier.overlap));
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
return splitterCache.get(cacheKey);
|
|
496
|
+
};
|
|
497
|
+
// Track tier statistics
|
|
498
|
+
const tierStats = {};
|
|
499
|
+
for (let i = 0; i < files.length; i++) {
|
|
500
|
+
const filePath = files[i];
|
|
501
|
+
const relativePath = path.relative(codebasePath, filePath);
|
|
502
|
+
try {
|
|
503
|
+
// Check file size before reading
|
|
504
|
+
const stat = await fs.stat(filePath);
|
|
505
|
+
if (stat.size > BackendToolHandlers.MAX_FILE_SIZE) {
|
|
506
|
+
console.warn(`[CHUNK] Skipping large file (${(stat.size / 1024 / 1024).toFixed(1)}MB): ${relativePath}`);
|
|
507
|
+
continue;
|
|
508
|
+
}
|
|
509
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
510
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
511
|
+
const language = this.getLanguageFromExtension(ext);
|
|
512
|
+
// Get tier-specific settings for this file type
|
|
513
|
+
const tier = getTierForExtension(ext);
|
|
514
|
+
const splitter = getSplitterForTier(tier);
|
|
515
|
+
// Split the file into chunks
|
|
516
|
+
const chunks = await splitter.split(content, language, relativePath);
|
|
517
|
+
// Track tier stats
|
|
518
|
+
if (!tierStats[tier.name]) {
|
|
519
|
+
tierStats[tier.name] = { files: 0, chunks: 0 };
|
|
520
|
+
}
|
|
521
|
+
tierStats[tier.name].files++;
|
|
522
|
+
for (const chunk of chunks) {
|
|
523
|
+
// Skip empty or whitespace-only chunks
|
|
524
|
+
if (!chunk.content || chunk.content.trim().length === 0) {
|
|
525
|
+
console.warn(`[CHUNK] Skipping empty chunk from ${relativePath}:${chunk.metadata.startLine}`);
|
|
526
|
+
continue;
|
|
527
|
+
}
|
|
528
|
+
// Add file path context for non-code tiers (docs, config, other)
|
|
529
|
+
let finalContent = chunk.content;
|
|
530
|
+
if (!tier.useAst && tier.name !== 'code') {
|
|
531
|
+
// Shorten path to last 2 segments
|
|
532
|
+
const pathParts = relativePath.replace(/\\/g, '/').split('/');
|
|
533
|
+
const shortPath = pathParts.slice(-2).join('/');
|
|
534
|
+
const commentPrefix = language === 'python' ? '#' : '//';
|
|
535
|
+
finalContent = `${commentPrefix} File: ${shortPath}\n${chunk.content}`;
|
|
536
|
+
}
|
|
537
|
+
allChunks.push({
|
|
538
|
+
content: finalContent,
|
|
539
|
+
relativePath,
|
|
540
|
+
startLine: chunk.metadata.startLine,
|
|
541
|
+
endLine: chunk.metadata.endLine,
|
|
542
|
+
language,
|
|
543
|
+
metadata: {
|
|
544
|
+
codebasePath,
|
|
545
|
+
},
|
|
546
|
+
});
|
|
547
|
+
tierStats[tier.name].chunks++;
|
|
548
|
+
}
|
|
549
|
+
// Report progress
|
|
550
|
+
if (onProgress) {
|
|
551
|
+
const percentage = ((i + 1) / files.length) * 100;
|
|
552
|
+
onProgress({
|
|
553
|
+
phase: 'chunking',
|
|
554
|
+
percentage,
|
|
555
|
+
current: i + 1,
|
|
556
|
+
total: files.length,
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
catch (error) {
|
|
561
|
+
console.warn(`[CHUNK] Error processing ${relativePath}: ${error.message}`);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
// Log tier statistics
|
|
565
|
+
console.log(`[CHUNK] Generated ${allChunks.length} chunks from ${files.length} files`);
|
|
566
|
+
for (const [tierName, stats] of Object.entries(tierStats)) {
|
|
567
|
+
console.log(`[CHUNK] ${tierName}: ${stats.files} files → ${stats.chunks} chunks`);
|
|
568
|
+
}
|
|
569
|
+
return allChunks;
|
|
570
|
+
}
|
|
571
|
+
shouldIgnore(relativePath, ignorePatterns) {
|
|
572
|
+
const defaultIgnore = [
|
|
573
|
+
'node_modules', '.git', 'dist', 'build', '__pycache__',
|
|
574
|
+
'venv', '.env', '.vscode', '.idea', 'coverage',
|
|
575
|
+
];
|
|
576
|
+
const allPatterns = [...defaultIgnore, ...ignorePatterns];
|
|
577
|
+
for (const pattern of allPatterns) {
|
|
578
|
+
if (relativePath.includes(pattern)) {
|
|
579
|
+
return true;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return false;
|
|
583
|
+
}
|
|
584
|
+
/**
|
|
585
|
+
* Generate SHA-256 hashes for all supported files in the codebase
|
|
586
|
+
*/
|
|
587
|
+
async generateFileHashes(rootDir, ignorePatterns = []) {
|
|
588
|
+
const hashes = new Map();
|
|
589
|
+
const supportedExtensions = new Set(DEFAULT_FILE_EXTENSIONS);
|
|
590
|
+
const scanDir = async (dir) => {
|
|
591
|
+
let entries;
|
|
592
|
+
try {
|
|
593
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
594
|
+
}
|
|
595
|
+
catch {
|
|
596
|
+
return; // Can't read directory, skip
|
|
597
|
+
}
|
|
598
|
+
for (const entry of entries) {
|
|
599
|
+
const fullPath = path.join(dir, entry.name);
|
|
600
|
+
const relativePath = path.relative(rootDir, fullPath);
|
|
601
|
+
// Skip hidden files/dirs and ignored patterns
|
|
602
|
+
if (entry.name.startsWith('.') || this.shouldIgnore(relativePath, ignorePatterns)) {
|
|
603
|
+
continue;
|
|
604
|
+
}
|
|
605
|
+
if (entry.isDirectory()) {
|
|
606
|
+
await scanDir(fullPath);
|
|
607
|
+
}
|
|
608
|
+
else if (entry.isFile()) {
|
|
609
|
+
const ext = path.extname(entry.name).toLowerCase();
|
|
610
|
+
if (supportedExtensions.has(ext)) {
|
|
611
|
+
try {
|
|
612
|
+
const stat = await fs.stat(fullPath);
|
|
613
|
+
if (stat.size > BackendToolHandlers.MAX_FILE_SIZE) {
|
|
614
|
+
continue; // Skip large files
|
|
615
|
+
}
|
|
616
|
+
const content = await fs.readFile(fullPath, 'utf-8');
|
|
617
|
+
const hash = crypto.createHash('sha256').update(content).digest('hex');
|
|
618
|
+
hashes.set(relativePath, hash);
|
|
619
|
+
}
|
|
620
|
+
catch {
|
|
621
|
+
// Can't read file, skip
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
};
|
|
627
|
+
await scanDir(rootDir);
|
|
628
|
+
return hashes;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Compare old and new file hashes to detect changes
|
|
632
|
+
*/
|
|
633
|
+
compareFileHashes(oldHashes, newHashes) {
|
|
634
|
+
const result = { added: [], modified: [], removed: [] };
|
|
635
|
+
// If no old hashes, all files are "added" (first index)
|
|
636
|
+
if (!oldHashes || oldHashes.length === 0) {
|
|
637
|
+
result.added = Array.from(newHashes.keys());
|
|
638
|
+
return result;
|
|
639
|
+
}
|
|
640
|
+
const oldMap = new Map(oldHashes);
|
|
641
|
+
// Find added and modified files
|
|
642
|
+
for (const [filePath, hash] of newHashes) {
|
|
643
|
+
if (!oldMap.has(filePath)) {
|
|
644
|
+
result.added.push(filePath);
|
|
645
|
+
}
|
|
646
|
+
else if (oldMap.get(filePath) !== hash) {
|
|
647
|
+
result.modified.push(filePath);
|
|
648
|
+
}
|
|
649
|
+
oldMap.delete(filePath);
|
|
650
|
+
}
|
|
651
|
+
// Remaining in oldMap are removed files
|
|
652
|
+
result.removed = Array.from(oldMap.keys());
|
|
653
|
+
return result;
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* Apply incremental changes to the index (Merkle diff)
|
|
657
|
+
* Only indexes added/modified files, deletes removed file vectors
|
|
658
|
+
*/
|
|
659
|
+
async applyIncrementalChanges(codebasePath, collectionName, changes,
|
|
660
|
+
// TODO: Implement ignore pattern filtering for incremental indexing
|
|
661
|
+
// Currently ignorePatterns is accepted but not applied during incremental updates
|
|
662
|
+
_ignorePatterns = []) {
|
|
663
|
+
let chunksIndexed = 0;
|
|
664
|
+
let chunksDeleted = 0;
|
|
665
|
+
// Delete vectors for removed files (batch delete)
|
|
666
|
+
if (changes.removed.length > 0) {
|
|
667
|
+
try {
|
|
668
|
+
await this.backendClient.deleteFileVectorsBatch(collectionName, changes.removed);
|
|
669
|
+
chunksDeleted = changes.removed.length;
|
|
670
|
+
debugLog(`[MerkleDiff] Deleted vectors for ${changes.removed.length} removed files`);
|
|
671
|
+
}
|
|
672
|
+
catch (e) {
|
|
673
|
+
debugLog(`[MerkleDiff] Could not delete vectors: ${e}`);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
// Process added and modified files
|
|
677
|
+
const filesToIndex = [...changes.added, ...changes.modified];
|
|
678
|
+
if (filesToIndex.length === 0) {
|
|
679
|
+
return { chunksIndexed, chunksDeleted, filesProcessed: 0 };
|
|
680
|
+
}
|
|
681
|
+
debugLog(`[MerkleDiff] Processing ${filesToIndex.length} files (${changes.added.length} added, ${changes.modified.length} modified)`);
|
|
682
|
+
const allChunks = [];
|
|
683
|
+
const deleteFirst = changes.modified; // Delete old chunks for modified files
|
|
684
|
+
for (const relativePath of filesToIndex) {
|
|
685
|
+
const absolutePath = path.join(codebasePath, relativePath);
|
|
686
|
+
// Path traversal protection - ensure resolved path is within codebase
|
|
687
|
+
const resolvedPath = path.resolve(absolutePath);
|
|
688
|
+
const resolvedCodebase = path.resolve(codebasePath);
|
|
689
|
+
if (!resolvedPath.startsWith(resolvedCodebase + path.sep) && resolvedPath !== resolvedCodebase) {
|
|
690
|
+
console.warn(`[SECURITY] Path traversal blocked: ${relativePath}`);
|
|
691
|
+
continue;
|
|
692
|
+
}
|
|
693
|
+
try {
|
|
694
|
+
// Check file size before reading
|
|
695
|
+
const stat = await fs.stat(absolutePath);
|
|
696
|
+
if (stat.size > BackendToolHandlers.MAX_FILE_SIZE) {
|
|
697
|
+
console.warn(`[CHUNK] Skipping large file (${(stat.size / 1024 / 1024).toFixed(1)}MB): ${relativePath}`);
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
701
|
+
const ext = path.extname(relativePath).toLowerCase();
|
|
702
|
+
const language = this.getLanguageFromExtension(ext);
|
|
703
|
+
// Chunk the file using tiered strategy
|
|
704
|
+
const tier = getTierForExtension(ext);
|
|
705
|
+
const chunks = await this.chunkFileWithTier(content, language, relativePath, codebasePath, tier);
|
|
706
|
+
allChunks.push(...chunks);
|
|
707
|
+
}
|
|
708
|
+
catch {
|
|
709
|
+
// File might have been deleted, skip
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
// Index all chunks in one batch
|
|
713
|
+
if (allChunks.length > 0) {
|
|
714
|
+
try {
|
|
715
|
+
debugLog(`[MerkleDiff] Indexing ${allChunks.length} chunks...`);
|
|
716
|
+
const response = await this.backendClient.indexChunks({
|
|
717
|
+
collection: collectionName,
|
|
718
|
+
chunks: allChunks,
|
|
719
|
+
createIfNotExists: true,
|
|
720
|
+
finalize: false, // Don't rebuild HNSW for incremental updates
|
|
721
|
+
deleteFirst,
|
|
722
|
+
});
|
|
723
|
+
chunksIndexed = response.indexed;
|
|
724
|
+
chunksDeleted += response.deleted || 0;
|
|
725
|
+
debugLog(`[MerkleDiff] Indexed ${chunksIndexed} chunks, deleted ${response.deleted || 0} for modified files`);
|
|
726
|
+
}
|
|
727
|
+
catch (error) {
|
|
728
|
+
debugLog(`[MerkleDiff] Error indexing chunks: ${error.message || error}`);
|
|
729
|
+
throw error;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
return { chunksIndexed, chunksDeleted, filesProcessed: filesToIndex.length };
|
|
733
|
+
}
|
|
734
|
+
/**
|
|
735
|
+
* Chunk a single file using the tier strategy (helper for incremental indexing)
|
|
736
|
+
*/
|
|
737
|
+
async chunkFileWithTier(content, language, relativePath, codebasePath, tier) {
|
|
738
|
+
let splitter = this.splitterCache.get(tier.name);
|
|
739
|
+
if (!splitter) {
|
|
740
|
+
if (tier.useAst) {
|
|
741
|
+
splitter = new AstCodeSplitter(tier.maxSize);
|
|
742
|
+
}
|
|
743
|
+
else {
|
|
744
|
+
splitter = new LangChainCodeSplitter(tier.maxSize, tier.overlap);
|
|
745
|
+
}
|
|
746
|
+
this.splitterCache.set(tier.name, splitter);
|
|
747
|
+
}
|
|
748
|
+
const rawChunks = await splitter.split(content, language, relativePath);
|
|
749
|
+
return rawChunks.map(chunk => ({
|
|
750
|
+
content: chunk.content,
|
|
751
|
+
relativePath,
|
|
752
|
+
startLine: chunk.metadata.startLine,
|
|
753
|
+
endLine: chunk.metadata.endLine,
|
|
754
|
+
language,
|
|
755
|
+
}));
|
|
756
|
+
}
|
|
757
|
+
getLanguageFromExtension(ext) {
|
|
758
|
+
const langMap = {
|
|
759
|
+
// Tier 1: Code (AST-supported)
|
|
760
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
761
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
762
|
+
'.py': 'python', '.pyw': 'python',
|
|
763
|
+
'.rs': 'rust',
|
|
764
|
+
'.go': 'go',
|
|
765
|
+
'.java': 'java',
|
|
766
|
+
'.c': 'c', '.h': 'c',
|
|
767
|
+
'.cpp': 'cpp', '.hpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp',
|
|
768
|
+
'.cs': 'csharp',
|
|
769
|
+
'.scala': 'scala',
|
|
770
|
+
// Tier 2: Documentation
|
|
771
|
+
'.md': 'markdown', '.mdx': 'markdown',
|
|
772
|
+
'.rst': 'rst', '.txt': 'text',
|
|
773
|
+
// Tier 3: Config/Data
|
|
774
|
+
'.json': 'json',
|
|
775
|
+
'.yaml': 'yaml', '.yml': 'yaml',
|
|
776
|
+
'.toml': 'toml',
|
|
777
|
+
'.xml': 'xml',
|
|
778
|
+
'.ini': 'ini', '.cfg': 'ini',
|
|
779
|
+
// Tier 4: Other code
|
|
780
|
+
'.rb': 'ruby',
|
|
781
|
+
'.php': 'php',
|
|
782
|
+
'.swift': 'swift',
|
|
783
|
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
|
784
|
+
'.vue': 'vue',
|
|
785
|
+
'.svelte': 'svelte',
|
|
786
|
+
'.html': 'html',
|
|
787
|
+
'.css': 'css', '.scss': 'scss', '.less': 'less',
|
|
788
|
+
'.sql': 'sql',
|
|
789
|
+
};
|
|
790
|
+
return langMap[ext] || 'unknown';
|
|
791
|
+
}
|
|
792
|
+
async handleIndexCodebase(args) {
|
|
793
|
+
const { path: codebasePath, force, splitter: _splitter, customExtensions, ignorePatterns } = args;
|
|
794
|
+
const forceReindex = force || false;
|
|
795
|
+
const customFileExtensions = customExtensions || [];
|
|
796
|
+
const customIgnorePatterns = ignorePatterns || [];
|
|
797
|
+
try {
|
|
798
|
+
// Sync with backend first
|
|
799
|
+
await this.syncIndexedCodebasesFromBackend();
|
|
800
|
+
// Force absolute path resolution
|
|
801
|
+
const absolutePath = ensureAbsolutePath(codebasePath);
|
|
802
|
+
// Validate path exists
|
|
803
|
+
if (!(await this.pathExists(absolutePath))) {
|
|
804
|
+
return {
|
|
805
|
+
content: [{
|
|
806
|
+
type: "text",
|
|
807
|
+
text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
|
|
808
|
+
}],
|
|
809
|
+
isError: true
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
// Check if it's a directory
|
|
813
|
+
const stat = await fs.stat(absolutePath);
|
|
814
|
+
if (!stat.isDirectory()) {
|
|
815
|
+
return {
|
|
816
|
+
content: [{
|
|
817
|
+
type: "text",
|
|
818
|
+
text: `Error: Path '${absolutePath}' is not a directory`
|
|
819
|
+
}],
|
|
820
|
+
isError: true
|
|
821
|
+
};
|
|
822
|
+
}
|
|
823
|
+
// Check if already indexing
|
|
824
|
+
const indexingCodebases = await this.getIndexingCodebases();
|
|
825
|
+
if (indexingCodebases.includes(absolutePath)) {
|
|
826
|
+
return {
|
|
827
|
+
content: [{
|
|
828
|
+
type: "text",
|
|
829
|
+
text: `Codebase '${absolutePath}' is already being indexed in the background. Please wait for completion.`
|
|
830
|
+
}],
|
|
831
|
+
isError: true
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
const collectionName = this.generateCollectionName(absolutePath);
|
|
835
|
+
const indexedCodebases = await this.getIndexedCodebases();
|
|
836
|
+
const isAlreadyIndexed = indexedCodebases.includes(absolutePath);
|
|
837
|
+
// If force reindex, delete existing collection and do full re-index
|
|
838
|
+
if (forceReindex && isAlreadyIndexed) {
|
|
839
|
+
try {
|
|
840
|
+
const hasCollection = await this.backendClient.hasCollection(collectionName);
|
|
841
|
+
if (hasCollection) {
|
|
842
|
+
console.log(`[FORCE-REINDEX] 🔄 Deleting existing collection: ${collectionName}`);
|
|
843
|
+
await this.backendClient.deleteRepo(collectionName);
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
catch (error) {
|
|
847
|
+
console.warn(`[FORCE-REINDEX] Could not delete collection: ${error}`);
|
|
848
|
+
}
|
|
849
|
+
await this.removeCodebaseFromSnapshot(absolutePath);
|
|
850
|
+
// Continue to full index below
|
|
851
|
+
}
|
|
852
|
+
// If already indexed (and not forcing), do Merkle diff for incremental sync
|
|
853
|
+
if (!forceReindex && isAlreadyIndexed) {
|
|
854
|
+
debugLog(`[Index] Codebase already indexed, checking for changes via Merkle diff...`);
|
|
855
|
+
const startTime = Date.now();
|
|
856
|
+
try {
|
|
857
|
+
// Load existing Merkle snapshot
|
|
858
|
+
const merkleSnapshot = await this.backendClient.loadMerkleSnapshot(absolutePath);
|
|
859
|
+
debugLog(`[Index] Merkle snapshot loaded: ${merkleSnapshot ? `${merkleSnapshot.fileHashes?.length || 0} file hashes` : 'null'}`);
|
|
860
|
+
// Generate current file hashes
|
|
861
|
+
debugLog(`[Index] Scanning files for changes...`);
|
|
862
|
+
const currentHashes = await this.generateFileHashes(absolutePath, customIgnorePatterns);
|
|
863
|
+
debugLog(`[Index] Found ${currentHashes.size} files`);
|
|
864
|
+
// Compare to detect changes
|
|
865
|
+
const changes = this.compareFileHashes(merkleSnapshot?.fileHashes || null, currentHashes);
|
|
866
|
+
const totalChanges = changes.added.length + changes.modified.length + changes.removed.length;
|
|
867
|
+
if (totalChanges === 0) {
|
|
868
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
869
|
+
debugLog(`[Index] No changes detected (${duration}s)`);
|
|
870
|
+
// Ensure watcher is running
|
|
871
|
+
this.startWatching(absolutePath, collectionName);
|
|
872
|
+
return {
|
|
873
|
+
content: [{
|
|
874
|
+
type: "text",
|
|
875
|
+
text: `✅ Index is up to date for '${absolutePath}'.\n\nNo file changes detected since last index. Checked ${currentHashes.size} files in ${duration}s.`
|
|
876
|
+
}]
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
// Apply incremental changes
|
|
880
|
+
debugLog(`[Index] Found ${totalChanges} changes: +${changes.added.length} added, ~${changes.modified.length} modified, -${changes.removed.length} removed`);
|
|
881
|
+
const result = await this.applyIncrementalChanges(absolutePath, collectionName, changes, customIgnorePatterns);
|
|
882
|
+
// Save updated Merkle snapshot
|
|
883
|
+
await this.backendClient.saveMerkleSnapshot(absolutePath, {
|
|
884
|
+
fileHashes: Array.from(currentHashes.entries()),
|
|
885
|
+
merkleDAG: { nodes: [], rootIds: [] },
|
|
886
|
+
fileCount: currentHashes.size,
|
|
887
|
+
});
|
|
888
|
+
// Start/resume file watcher
|
|
889
|
+
this.startWatching(absolutePath, collectionName);
|
|
890
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
891
|
+
debugLog(`[Index] Incremental sync completed in ${duration}s`);
|
|
892
|
+
return {
|
|
893
|
+
content: [{
|
|
894
|
+
type: "text",
|
|
895
|
+
text: `✅ Synced '${absolutePath}' in ${duration}s\n\n` +
|
|
896
|
+
`Changes: +${changes.added.length} added, ~${changes.modified.length} modified, -${changes.removed.length} removed\n` +
|
|
897
|
+
`Indexed ${result.chunksIndexed} chunks, deleted ${result.chunksDeleted} old chunks.`
|
|
898
|
+
}]
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
catch (error) {
|
|
902
|
+
debugLog(`[Index] Merkle diff failed, falling back to full re-index: ${error.message}`);
|
|
903
|
+
debugLog(`[Index] Error stack: ${error.stack}`);
|
|
904
|
+
// Fall through to full index
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
// Full index (new codebase or force=true or Merkle diff failed)
|
|
908
|
+
console.log(`[Index] Starting full index for: ${absolutePath}`);
|
|
909
|
+
// Set to indexing status
|
|
910
|
+
await this.setCodebaseIndexing(absolutePath, 0);
|
|
911
|
+
// Track the codebase path
|
|
912
|
+
trackCodebasePath(absolutePath);
|
|
913
|
+
// Start background indexing (with error logging)
|
|
914
|
+
this.startBackgroundIndexing(absolutePath, collectionName, customFileExtensions, customIgnorePatterns)
|
|
915
|
+
.catch(err => debugLog(`[BACKGROUND-INDEX] Unhandled error: ${err.message || err}`));
|
|
916
|
+
const pathInfo = codebasePath !== absolutePath
|
|
917
|
+
? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'`
|
|
918
|
+
: '';
|
|
919
|
+
return {
|
|
920
|
+
content: [{
|
|
921
|
+
type: "text",
|
|
922
|
+
text: `Started indexing '${absolutePath}' via SHARC backend.${pathInfo}\n\nIndexing ${isAlreadyIndexed ? '(full re-index)' : '(first time)'} is running in the background. You can search while indexing is in progress, but results may be incomplete until done.`
|
|
923
|
+
}]
|
|
924
|
+
};
|
|
925
|
+
}
|
|
926
|
+
catch (error) {
|
|
927
|
+
console.error('Error in handleIndexCodebase:', error);
|
|
928
|
+
return {
|
|
929
|
+
content: [{
|
|
930
|
+
type: "text",
|
|
931
|
+
text: `Error starting indexing: ${error.message || error}`
|
|
932
|
+
}],
|
|
933
|
+
isError: true
|
|
934
|
+
};
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
async startBackgroundIndexing(codebasePath, collectionName, customExtensions, ignorePatterns) {
|
|
938
|
+
try {
|
|
939
|
+
console.log(`[BACKGROUND-INDEX] Starting background indexing for: ${codebasePath}`);
|
|
940
|
+
console.log(`[BACKGROUND-INDEX] Collection name: ${collectionName}`);
|
|
941
|
+
// Scan and chunk files
|
|
942
|
+
let lastUpdateTime = Date.now();
|
|
943
|
+
const chunks = await this.scanAndChunkFiles(codebasePath, customExtensions, ignorePatterns, async (progress) => {
|
|
944
|
+
const chunkProgress = progress.percentage * 0.5; // 0-50% for chunking
|
|
945
|
+
// Update progress periodically (every 2 seconds)
|
|
946
|
+
const now = Date.now();
|
|
947
|
+
if (now - lastUpdateTime >= 2000) {
|
|
948
|
+
await this.setCodebaseIndexing(codebasePath, chunkProgress);
|
|
949
|
+
lastUpdateTime = now;
|
|
950
|
+
}
|
|
951
|
+
console.log(`[BACKGROUND-INDEX] Chunking progress: ${progress.percentage.toFixed(1)}% (${progress.current}/${progress.total})`);
|
|
952
|
+
});
|
|
953
|
+
if (chunks.length === 0) {
|
|
954
|
+
console.warn(`[BACKGROUND-INDEX] No chunks generated for ${codebasePath}`);
|
|
955
|
+
await this.setCodebaseIndexFailed(codebasePath, 'No indexable files found', 0);
|
|
956
|
+
return;
|
|
957
|
+
}
|
|
958
|
+
// Send ALL chunks in one request if possible (faster, less overhead)
|
|
959
|
+
// Only batch if we exceed the 10,000 chunk limit per request
|
|
960
|
+
const MAX_CHUNKS_PER_REQUEST = 10000;
|
|
961
|
+
const BATCH_SIZE = Math.min(chunks.length, MAX_CHUNKS_PER_REQUEST);
|
|
962
|
+
let indexedCount = 0;
|
|
963
|
+
const totalChunks = chunks.length;
|
|
964
|
+
const totalBatches = Math.ceil(chunks.length / BATCH_SIZE);
|
|
965
|
+
if (totalBatches === 1) {
|
|
966
|
+
console.log(`[BACKGROUND-INDEX] Sending ALL ${totalChunks} chunks in single request`);
|
|
967
|
+
}
|
|
968
|
+
else {
|
|
969
|
+
console.log(`[BACKGROUND-INDEX] Sending ${totalChunks} chunks in ${totalBatches} batches of ${BATCH_SIZE}`);
|
|
970
|
+
}
|
|
971
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
972
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
973
|
+
const isLastBatch = (i + BATCH_SIZE >= chunks.length);
|
|
974
|
+
try {
|
|
975
|
+
debugLog(`[BACKGROUND-INDEX] Calling indexChunks for batch (${batch.length} chunks, finalize=${isLastBatch})...`);
|
|
976
|
+
await this.backendClient.indexChunks({
|
|
977
|
+
collection: collectionName,
|
|
978
|
+
chunks: batch,
|
|
979
|
+
createIfNotExists: true,
|
|
980
|
+
finalize: isLastBatch, // Only enable HNSW on final batch
|
|
981
|
+
});
|
|
982
|
+
debugLog(`[BACKGROUND-INDEX] indexChunks returned successfully`);
|
|
983
|
+
indexedCount += batch.length;
|
|
984
|
+
// Update progress (50-100% for embedding/indexing)
|
|
985
|
+
const progress = 50 + (indexedCount / totalChunks) * 50;
|
|
986
|
+
// Update status less frequently (every 5 seconds instead of 2)
|
|
987
|
+
const now = Date.now();
|
|
988
|
+
if (now - lastUpdateTime >= 5000 || isLastBatch) {
|
|
989
|
+
await this.setCodebaseIndexing(codebasePath, progress);
|
|
990
|
+
lastUpdateTime = now;
|
|
991
|
+
}
|
|
992
|
+
console.log(`[BACKGROUND-INDEX] Indexed ${indexedCount}/${totalChunks} chunks (${progress.toFixed(1)}%)${isLastBatch ? ' [FINAL]' : ''}`);
|
|
993
|
+
}
|
|
994
|
+
catch (error) {
|
|
995
|
+
console.error(`[BACKGROUND-INDEX] Error indexing batch: ${error.message}`);
|
|
996
|
+
throw error;
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
// Mark as indexed
|
|
1000
|
+
debugLog(`[BACKGROUND-INDEX] Marking codebase as indexed...`);
|
|
1001
|
+
await this.setCodebaseIndexed(codebasePath, {
|
|
1002
|
+
indexedFiles: -1, // We don't track file count in backend mode
|
|
1003
|
+
totalChunks,
|
|
1004
|
+
status: 'completed',
|
|
1005
|
+
});
|
|
1006
|
+
debugLog(`[BACKGROUND-INDEX] Codebase marked as indexed`);
|
|
1007
|
+
debugLog(`[BACKGROUND-INDEX] ✅ Indexing completed! ${totalChunks} chunks indexed to collection: ${collectionName}`);
|
|
1008
|
+
// Save Merkle snapshot for future incremental syncs
|
|
1009
|
+
try {
|
|
1010
|
+
debugLog(`[BACKGROUND-INDEX] Saving Merkle snapshot...`);
|
|
1011
|
+
const fileHashes = await this.generateFileHashes(codebasePath, ignorePatterns);
|
|
1012
|
+
await this.backendClient.saveMerkleSnapshot(codebasePath, {
|
|
1013
|
+
fileHashes: Array.from(fileHashes.entries()),
|
|
1014
|
+
merkleDAG: { nodes: [], rootIds: [] },
|
|
1015
|
+
fileCount: fileHashes.size,
|
|
1016
|
+
});
|
|
1017
|
+
debugLog(`[BACKGROUND-INDEX] Merkle snapshot saved (${fileHashes.size} files)`);
|
|
1018
|
+
}
|
|
1019
|
+
catch (e) {
|
|
1020
|
+
debugLog(`[BACKGROUND-INDEX] Could not save Merkle snapshot: ${e}`);
|
|
1021
|
+
}
|
|
1022
|
+
// Auto-start file watching for incremental updates
|
|
1023
|
+
this.startWatching(codebasePath, collectionName);
|
|
1024
|
+
debugLog(`[BACKGROUND-INDEX] File watcher started`);
|
|
1025
|
+
}
|
|
1026
|
+
catch (error) {
|
|
1027
|
+
debugLog(`[BACKGROUND-INDEX] Error during indexing for ${codebasePath}: ${error.message || error}`);
|
|
1028
|
+
const lastProgress = await this.getIndexingProgress(codebasePath);
|
|
1029
|
+
await this.setCodebaseIndexFailed(codebasePath, error.message || String(error), lastProgress);
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
async handleSearchCode(args) {
|
|
1033
|
+
const { path: codebasePath, query, limit = 10, extensionFilter } = args;
|
|
1034
|
+
try {
|
|
1035
|
+
// Sync with backend first
|
|
1036
|
+
await this.syncIndexedCodebasesFromBackend();
|
|
1037
|
+
// Force absolute path resolution
|
|
1038
|
+
const absolutePath = ensureAbsolutePath(codebasePath);
|
|
1039
|
+
// Validate path exists
|
|
1040
|
+
if (!(await this.pathExists(absolutePath))) {
|
|
1041
|
+
return {
|
|
1042
|
+
content: [{
|
|
1043
|
+
type: "text",
|
|
1044
|
+
text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
|
|
1045
|
+
}],
|
|
1046
|
+
isError: true
|
|
1047
|
+
};
|
|
1048
|
+
}
|
|
1049
|
+
// Check if it's a directory
|
|
1050
|
+
const stat = await fs.stat(absolutePath);
|
|
1051
|
+
if (!stat.isDirectory()) {
|
|
1052
|
+
return {
|
|
1053
|
+
content: [{
|
|
1054
|
+
type: "text",
|
|
1055
|
+
text: `Error: Path '${absolutePath}' is not a directory`
|
|
1056
|
+
}],
|
|
1057
|
+
isError: true
|
|
1058
|
+
};
|
|
1059
|
+
}
|
|
1060
|
+
trackCodebasePath(absolutePath);
|
|
1061
|
+
// Check if this codebase is indexed or being indexed
|
|
1062
|
+
const indexedCodebases = await this.getIndexedCodebases();
|
|
1063
|
+
const indexingCodebases = await this.getIndexingCodebases();
|
|
1064
|
+
const isIndexed = indexedCodebases.includes(absolutePath);
|
|
1065
|
+
const isIndexing = indexingCodebases.includes(absolutePath);
|
|
1066
|
+
if (!isIndexed && !isIndexing) {
|
|
1067
|
+
return {
|
|
1068
|
+
content: [{
|
|
1069
|
+
type: "text",
|
|
1070
|
+
text: `Codebase '${absolutePath}' is not indexed.\n\nPlease run index_codebase first:\n index_codebase(path="${absolutePath}")\n\nThis only needs to be done once per project.`
|
|
1071
|
+
}],
|
|
1072
|
+
isError: true
|
|
1073
|
+
};
|
|
1074
|
+
}
|
|
1075
|
+
let indexingStatusMessage = '';
|
|
1076
|
+
if (isIndexing) {
|
|
1077
|
+
indexingStatusMessage = `\n⚠️ **Indexing in Progress**: This codebase is currently being indexed in the background. Search results may be incomplete until indexing completes.`;
|
|
1078
|
+
}
|
|
1079
|
+
const collectionName = this.generateCollectionName(absolutePath);
|
|
1080
|
+
console.log(`[SEARCH] Searching in collection: ${collectionName}`);
|
|
1081
|
+
console.log(`[SEARCH] Query: "${query}"`);
|
|
1082
|
+
// Search via backend
|
|
1083
|
+
const response = await this.backendClient.search({
|
|
1084
|
+
query,
|
|
1085
|
+
collection: collectionName,
|
|
1086
|
+
limit: Math.min(limit, 50),
|
|
1087
|
+
extensionFilter: Array.isArray(extensionFilter) ? extensionFilter : undefined,
|
|
1088
|
+
rerank: true,
|
|
1089
|
+
});
|
|
1090
|
+
if (response.results.length === 0) {
|
|
1091
|
+
let noResultsMessage = `No results found for query: "${query}" in codebase '${absolutePath}'`;
|
|
1092
|
+
if (isIndexing) {
|
|
1093
|
+
noResultsMessage += `\n\nNote: This codebase is still being indexed. Try searching again after indexing completes.`;
|
|
1094
|
+
}
|
|
1095
|
+
return {
|
|
1096
|
+
content: [{
|
|
1097
|
+
type: "text",
|
|
1098
|
+
text: noResultsMessage
|
|
1099
|
+
}]
|
|
1100
|
+
};
|
|
1101
|
+
}
|
|
1102
|
+
// Format results
|
|
1103
|
+
const formattedResults = response.results.map((result, index) => {
|
|
1104
|
+
const location = `${result.relativePath}:${result.startLine}-${result.endLine}`;
|
|
1105
|
+
const context = truncateContent(result.content, 5000);
|
|
1106
|
+
const codebaseInfo = path.basename(absolutePath);
|
|
1107
|
+
return `${index + 1}. Code snippet (${result.language}) [${codebaseInfo}]\n` +
|
|
1108
|
+
` Location: ${location}\n` +
|
|
1109
|
+
` Score: ${result.score.toFixed(4)}\n` +
|
|
1110
|
+
` Context: \n\`\`\`${result.language}\n${context}\n\`\`\`\n`;
|
|
1111
|
+
}).join('\n');
|
|
1112
|
+
let resultMessage = `Found ${response.results.length} results for query: "${query}" in codebase '${absolutePath}'${indexingStatusMessage}\n\n${formattedResults}`;
|
|
1113
|
+
if (isIndexing) {
|
|
1114
|
+
resultMessage += `\n\n💡 **Tip**: This codebase is still being indexed. More results may become available as indexing progresses.`;
|
|
1115
|
+
}
|
|
1116
|
+
return {
|
|
1117
|
+
content: [{
|
|
1118
|
+
type: "text",
|
|
1119
|
+
text: resultMessage
|
|
1120
|
+
}]
|
|
1121
|
+
};
|
|
1122
|
+
}
|
|
1123
|
+
catch (error) {
|
|
1124
|
+
console.error('[SEARCH] Error:', error);
|
|
1125
|
+
return {
|
|
1126
|
+
content: [{
|
|
1127
|
+
type: "text",
|
|
1128
|
+
text: `Error searching code: ${error.message || error}`
|
|
1129
|
+
}],
|
|
1130
|
+
isError: true
|
|
1131
|
+
};
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
async handleClearIndex(args) {
|
|
1135
|
+
const { path: codebasePath } = args;
|
|
1136
|
+
const indexedCodebases = await this.getIndexedCodebases();
|
|
1137
|
+
const indexingCodebases = await this.getIndexingCodebases();
|
|
1138
|
+
if (indexedCodebases.length === 0 && indexingCodebases.length === 0) {
|
|
1139
|
+
return {
|
|
1140
|
+
content: [{
|
|
1141
|
+
type: "text",
|
|
1142
|
+
text: "No codebases are currently indexed or being indexed."
|
|
1143
|
+
}]
|
|
1144
|
+
};
|
|
1145
|
+
}
|
|
1146
|
+
try {
|
|
1147
|
+
// Force absolute path resolution
|
|
1148
|
+
const absolutePath = ensureAbsolutePath(codebasePath);
|
|
1149
|
+
// Validate path exists
|
|
1150
|
+
if (!(await this.pathExists(absolutePath))) {
|
|
1151
|
+
return {
|
|
1152
|
+
content: [{
|
|
1153
|
+
type: "text",
|
|
1154
|
+
text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
|
|
1155
|
+
}],
|
|
1156
|
+
isError: true
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
// Check if it's a directory
|
|
1160
|
+
const stat = await fs.stat(absolutePath);
|
|
1161
|
+
if (!stat.isDirectory()) {
|
|
1162
|
+
return {
|
|
1163
|
+
content: [{
|
|
1164
|
+
type: "text",
|
|
1165
|
+
text: `Error: Path '${absolutePath}' is not a directory`
|
|
1166
|
+
}],
|
|
1167
|
+
isError: true
|
|
1168
|
+
};
|
|
1169
|
+
}
|
|
1170
|
+
// Check if this codebase is indexed or being indexed
|
|
1171
|
+
const isIndexed = indexedCodebases.includes(absolutePath);
|
|
1172
|
+
const isIndexing = indexingCodebases.includes(absolutePath);
|
|
1173
|
+
if (!isIndexed && !isIndexing) {
|
|
1174
|
+
return {
|
|
1175
|
+
content: [{
|
|
1176
|
+
type: "text",
|
|
1177
|
+
text: `Error: Codebase '${absolutePath}' is not indexed or being indexed.`
|
|
1178
|
+
}],
|
|
1179
|
+
isError: true
|
|
1180
|
+
};
|
|
1181
|
+
}
|
|
1182
|
+
const collectionName = this.generateCollectionName(absolutePath);
|
|
1183
|
+
console.log(`[CLEAR] Deleting collection: ${collectionName}`);
|
|
1184
|
+
try {
|
|
1185
|
+
await this.backendClient.deleteRepo(collectionName);
|
|
1186
|
+
console.log(`[CLEAR] Successfully deleted collection: ${collectionName}`);
|
|
1187
|
+
}
|
|
1188
|
+
catch (error) {
|
|
1189
|
+
if (!error.message?.includes('not found')) {
|
|
1190
|
+
throw error;
|
|
1191
|
+
}
|
|
1192
|
+
console.log(`[CLEAR] Collection already deleted or not found`);
|
|
1193
|
+
}
|
|
1194
|
+
// Remove from snapshot (backend)
|
|
1195
|
+
await this.removeCodebaseFromSnapshot(absolutePath);
|
|
1196
|
+
// Get updated counts
|
|
1197
|
+
const remainingIndexed = await this.getIndexedCodebases();
|
|
1198
|
+
const remainingIndexing = await this.getIndexingCodebases();
|
|
1199
|
+
let resultText = `Successfully cleared codebase '${absolutePath}'`;
|
|
1200
|
+
if (remainingIndexed.length > 0 || remainingIndexing.length > 0) {
|
|
1201
|
+
resultText += `\n${remainingIndexed.length} other indexed codebase(s) and ${remainingIndexing.length} indexing codebase(s) remain`;
|
|
1202
|
+
}
|
|
1203
|
+
return {
|
|
1204
|
+
content: [{
|
|
1205
|
+
type: "text",
|
|
1206
|
+
text: resultText
|
|
1207
|
+
}]
|
|
1208
|
+
};
|
|
1209
|
+
}
|
|
1210
|
+
catch (error) {
|
|
1211
|
+
console.error('[CLEAR] Error:', error);
|
|
1212
|
+
return {
|
|
1213
|
+
content: [{
|
|
1214
|
+
type: "text",
|
|
1215
|
+
text: `Error clearing index: ${error.message || error}`
|
|
1216
|
+
}],
|
|
1217
|
+
isError: true
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
async handleGetIndexingStatus(args) {
|
|
1222
|
+
const { path: codebasePath } = args;
|
|
1223
|
+
try {
|
|
1224
|
+
// Force absolute path resolution
|
|
1225
|
+
const absolutePath = ensureAbsolutePath(codebasePath);
|
|
1226
|
+
// Validate path exists
|
|
1227
|
+
if (!(await this.pathExists(absolutePath))) {
|
|
1228
|
+
return {
|
|
1229
|
+
content: [{
|
|
1230
|
+
type: "text",
|
|
1231
|
+
text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
|
|
1232
|
+
}],
|
|
1233
|
+
isError: true
|
|
1234
|
+
};
|
|
1235
|
+
}
|
|
1236
|
+
// Check if it's a directory
|
|
1237
|
+
const stat = await fs.stat(absolutePath);
|
|
1238
|
+
if (!stat.isDirectory()) {
|
|
1239
|
+
return {
|
|
1240
|
+
content: [{
|
|
1241
|
+
type: "text",
|
|
1242
|
+
text: `Error: Path '${absolutePath}' is not a directory`
|
|
1243
|
+
}],
|
|
1244
|
+
isError: true
|
|
1245
|
+
};
|
|
1246
|
+
}
|
|
1247
|
+
// Check indexing status from backend snapshot
|
|
1248
|
+
const status = await this.getCodebaseStatus(absolutePath);
|
|
1249
|
+
const info = await this.getCodebaseInfo(absolutePath);
|
|
1250
|
+
let statusMessage = '';
|
|
1251
|
+
switch (status) {
|
|
1252
|
+
case 'indexed':
|
|
1253
|
+
if (info && 'totalChunks' in info) {
|
|
1254
|
+
const indexedInfo = info;
|
|
1255
|
+
statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`;
|
|
1256
|
+
statusMessage += `\n📊 Statistics: ${indexedInfo.totalChunks} chunks`;
|
|
1257
|
+
statusMessage += `\n🕐 Last updated: ${new Date(indexedInfo.lastUpdated).toLocaleString()}`;
|
|
1258
|
+
}
|
|
1259
|
+
else {
|
|
1260
|
+
statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`;
|
|
1261
|
+
}
|
|
1262
|
+
break;
|
|
1263
|
+
case 'indexing':
|
|
1264
|
+
if (info && 'indexingPercentage' in info) {
|
|
1265
|
+
const indexingInfo = info;
|
|
1266
|
+
const progressPercentage = indexingInfo.indexingPercentage || 0;
|
|
1267
|
+
statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed. Progress: ${progressPercentage.toFixed(1)}%`;
|
|
1268
|
+
if (progressPercentage < 50) {
|
|
1269
|
+
statusMessage += ' (Scanning and chunking files...)';
|
|
1270
|
+
}
|
|
1271
|
+
else {
|
|
1272
|
+
statusMessage += ' (Generating embeddings via backend...)';
|
|
1273
|
+
}
|
|
1274
|
+
statusMessage += `\n🕐 Last updated: ${new Date(indexingInfo.lastUpdated).toLocaleString()}`;
|
|
1275
|
+
}
|
|
1276
|
+
else {
|
|
1277
|
+
statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed.`;
|
|
1278
|
+
}
|
|
1279
|
+
break;
|
|
1280
|
+
case 'indexfailed':
|
|
1281
|
+
if (info && 'errorMessage' in info) {
|
|
1282
|
+
const failedInfo = info;
|
|
1283
|
+
statusMessage = `❌ Codebase '${absolutePath}' indexing failed.`;
|
|
1284
|
+
statusMessage += `\n🚨 Error: ${failedInfo.errorMessage}`;
|
|
1285
|
+
if (failedInfo.lastAttemptedPercentage !== undefined) {
|
|
1286
|
+
statusMessage += `\n📊 Failed at: ${failedInfo.lastAttemptedPercentage.toFixed(1)}% progress`;
|
|
1287
|
+
}
|
|
1288
|
+
statusMessage += `\n💡 You can retry indexing by running the index_codebase command again.`;
|
|
1289
|
+
}
|
|
1290
|
+
else {
|
|
1291
|
+
statusMessage = `❌ Codebase '${absolutePath}' indexing failed. You can retry indexing.`;
|
|
1292
|
+
}
|
|
1293
|
+
break;
|
|
1294
|
+
default:
|
|
1295
|
+
statusMessage = `❌ Codebase '${absolutePath}' is not indexed.\n\nPlease run index_codebase first:\n index_codebase(path="${absolutePath}")\n\nThis only needs to be done once per project.`;
|
|
1296
|
+
break;
|
|
1297
|
+
}
|
|
1298
|
+
const pathInfo = codebasePath !== absolutePath
|
|
1299
|
+
? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'`
|
|
1300
|
+
: '';
|
|
1301
|
+
return {
|
|
1302
|
+
content: [{
|
|
1303
|
+
type: "text",
|
|
1304
|
+
text: statusMessage + pathInfo
|
|
1305
|
+
}]
|
|
1306
|
+
};
|
|
1307
|
+
}
|
|
1308
|
+
catch (error) {
|
|
1309
|
+
return {
|
|
1310
|
+
content: [{
|
|
1311
|
+
type: "text",
|
|
1312
|
+
text: `Error getting indexing status: ${error.message || error}`
|
|
1313
|
+
}],
|
|
1314
|
+
isError: true
|
|
1315
|
+
};
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
async handleStartWatch(args) {
|
|
1319
|
+
const { path: codebasePath } = args;
|
|
1320
|
+
try {
|
|
1321
|
+
// Force absolute path resolution
|
|
1322
|
+
const absolutePath = ensureAbsolutePath(codebasePath);
|
|
1323
|
+
// Validate path exists
|
|
1324
|
+
if (!(await this.pathExists(absolutePath))) {
|
|
1325
|
+
return {
|
|
1326
|
+
content: [{
|
|
1327
|
+
type: "text",
|
|
1328
|
+
text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
|
|
1329
|
+
}],
|
|
1330
|
+
isError: true
|
|
1331
|
+
};
|
|
1332
|
+
}
|
|
1333
|
+
// Check if it's a directory
|
|
1334
|
+
const stat = await fs.stat(absolutePath);
|
|
1335
|
+
if (!stat.isDirectory()) {
|
|
1336
|
+
return {
|
|
1337
|
+
content: [{
|
|
1338
|
+
type: "text",
|
|
1339
|
+
text: `Error: Path '${absolutePath}' is not a directory`
|
|
1340
|
+
}],
|
|
1341
|
+
isError: true
|
|
1342
|
+
};
|
|
1343
|
+
}
|
|
1344
|
+
// Check if this codebase is indexed
|
|
1345
|
+
const status = await this.getCodebaseStatus(absolutePath);
|
|
1346
|
+
if (status !== 'indexed') {
|
|
1347
|
+
return {
|
|
1348
|
+
content: [{
|
|
1349
|
+
type: "text",
|
|
1350
|
+
text: `Codebase '${absolutePath}' is not indexed.\n\nPlease run index_codebase first:\n index_codebase(path="${absolutePath}")\n\nThis only needs to be done once per project.`
|
|
1351
|
+
}],
|
|
1352
|
+
isError: true
|
|
1353
|
+
};
|
|
1354
|
+
}
|
|
1355
|
+
// Check if already watching
|
|
1356
|
+
if (this.isWatching(absolutePath)) {
|
|
1357
|
+
return {
|
|
1358
|
+
content: [{
|
|
1359
|
+
type: "text",
|
|
1360
|
+
text: `Codebase '${absolutePath}' is already being watched for changes.`
|
|
1361
|
+
}]
|
|
1362
|
+
};
|
|
1363
|
+
}
|
|
1364
|
+
// Start watching
|
|
1365
|
+
const collectionName = this.generateCollectionName(absolutePath);
|
|
1366
|
+
this.startWatching(absolutePath, collectionName);
|
|
1367
|
+
return {
|
|
1368
|
+
content: [{
|
|
1369
|
+
type: "text",
|
|
1370
|
+
text: `Started watching codebase '${absolutePath}' for file changes. Files will be automatically re-indexed when modified.`
|
|
1371
|
+
}]
|
|
1372
|
+
};
|
|
1373
|
+
}
|
|
1374
|
+
catch (error) {
|
|
1375
|
+
console.error('[START_WATCH] Error:', error);
|
|
1376
|
+
return {
|
|
1377
|
+
content: [{
|
|
1378
|
+
type: "text",
|
|
1379
|
+
text: `Error starting watch: ${error.message || error}`
|
|
1380
|
+
}],
|
|
1381
|
+
isError: true
|
|
1382
|
+
};
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
async handleStopWatch(args) {
|
|
1386
|
+
const { path: codebasePath } = args;
|
|
1387
|
+
try {
|
|
1388
|
+
// Force absolute path resolution
|
|
1389
|
+
const absolutePath = ensureAbsolutePath(codebasePath);
|
|
1390
|
+
// Check if watching
|
|
1391
|
+
if (!this.isWatching(absolutePath)) {
|
|
1392
|
+
return {
|
|
1393
|
+
content: [{
|
|
1394
|
+
type: "text",
|
|
1395
|
+
text: `Codebase '${absolutePath}' is not being watched.`
|
|
1396
|
+
}]
|
|
1397
|
+
};
|
|
1398
|
+
}
|
|
1399
|
+
// Stop watching
|
|
1400
|
+
await this.stopWatching(absolutePath);
|
|
1401
|
+
return {
|
|
1402
|
+
content: [{
|
|
1403
|
+
type: "text",
|
|
1404
|
+
text: `Stopped watching codebase '${absolutePath}'. Files will no longer be automatically re-indexed.`
|
|
1405
|
+
}]
|
|
1406
|
+
};
|
|
1407
|
+
}
|
|
1408
|
+
catch (error) {
|
|
1409
|
+
console.error('[STOP_WATCH] Error:', error);
|
|
1410
|
+
return {
|
|
1411
|
+
content: [{
|
|
1412
|
+
type: "text",
|
|
1413
|
+
text: `Error stopping watch: ${error.message || error}`
|
|
1414
|
+
}],
|
|
1415
|
+
isError: true
|
|
1416
|
+
};
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
async handleGetWatchStatus(_args) {
|
|
1420
|
+
try {
|
|
1421
|
+
const watchedCodebases = this.getWatchedCodebases();
|
|
1422
|
+
if (watchedCodebases.length === 0) {
|
|
1423
|
+
return {
|
|
1424
|
+
content: [{
|
|
1425
|
+
type: "text",
|
|
1426
|
+
text: "No codebases are currently being watched for file changes."
|
|
1427
|
+
}]
|
|
1428
|
+
};
|
|
1429
|
+
}
|
|
1430
|
+
const list = watchedCodebases.map((p, i) => `${i + 1}. ${p}`).join('\n');
|
|
1431
|
+
return {
|
|
1432
|
+
content: [{
|
|
1433
|
+
type: "text",
|
|
1434
|
+
text: `Currently watching ${watchedCodebases.length} codebase(s) for file changes:\n\n${list}\n\nFiles in these codebases will be automatically re-indexed when modified.`
|
|
1435
|
+
}]
|
|
1436
|
+
};
|
|
1437
|
+
}
|
|
1438
|
+
catch (error) {
|
|
1439
|
+
console.error('[GET_WATCH_STATUS] Error:', error);
|
|
1440
|
+
return {
|
|
1441
|
+
content: [{
|
|
1442
|
+
type: "text",
|
|
1443
|
+
text: `Error getting watch status: ${error.message || error}`
|
|
1444
|
+
}],
|
|
1445
|
+
isError: true
|
|
1446
|
+
};
|
|
1447
|
+
}
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
// ==================== Merkle Diff Helpers ====================
|
|
1451
|
+
// Used for incremental indexing - only process changed files
|
|
1452
|
+
BackendToolHandlers.MAX_FILE_SIZE = 1000000; // 1MB - skip large/minified files
|
|
1453
|
+
//# sourceMappingURL=backend-handlers.js.map
|