@sharc-code/mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +134 -0
  2. package/dist/backend-client.d.ts +251 -0
  3. package/dist/backend-client.d.ts.map +1 -0
  4. package/dist/backend-client.js +269 -0
  5. package/dist/backend-client.js.map +1 -0
  6. package/dist/backend-handlers.d.ts +243 -0
  7. package/dist/backend-handlers.d.ts.map +1 -0
  8. package/dist/backend-handlers.js +1453 -0
  9. package/dist/backend-handlers.js.map +1 -0
  10. package/dist/config.d.ts +47 -0
  11. package/dist/config.d.ts.map +1 -0
  12. package/dist/config.js +94 -0
  13. package/dist/config.js.map +1 -0
  14. package/dist/index.d.ts +3 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +344 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/project-detector.d.ts +42 -0
  19. package/dist/project-detector.d.ts.map +1 -0
  20. package/dist/project-detector.js +135 -0
  21. package/dist/project-detector.js.map +1 -0
  22. package/dist/utils/env-manager.d.ts +19 -0
  23. package/dist/utils/env-manager.d.ts.map +1 -0
  24. package/dist/utils/env-manager.js +99 -0
  25. package/dist/utils/env-manager.js.map +1 -0
  26. package/dist/utils.d.ts +10 -0
  27. package/dist/utils.d.ts.map +1 -0
  28. package/dist/utils.js +27 -0
  29. package/dist/utils.js.map +1 -0
  30. package/dist/watcher/file-watcher.d.ts +64 -0
  31. package/dist/watcher/file-watcher.d.ts.map +1 -0
  32. package/dist/watcher/file-watcher.js +263 -0
  33. package/dist/watcher/file-watcher.js.map +1 -0
  34. package/dist/watcher/incremental-indexer.d.ts +68 -0
  35. package/dist/watcher/incremental-indexer.d.ts.map +1 -0
  36. package/dist/watcher/incremental-indexer.js +254 -0
  37. package/dist/watcher/incremental-indexer.js.map +1 -0
  38. package/dist/watcher/index.d.ts +10 -0
  39. package/dist/watcher/index.d.ts.map +1 -0
  40. package/dist/watcher/index.js +10 -0
  41. package/dist/watcher/index.js.map +1 -0
  42. package/dist/watcher/processing-queue.d.ts +79 -0
  43. package/dist/watcher/processing-queue.d.ts.map +1 -0
  44. package/dist/watcher/processing-queue.js +150 -0
  45. package/dist/watcher/processing-queue.js.map +1 -0
  46. package/dist/watcher/syntax-guard.d.ts +59 -0
  47. package/dist/watcher/syntax-guard.d.ts.map +1 -0
  48. package/dist/watcher/syntax-guard.js +136 -0
  49. package/dist/watcher/syntax-guard.js.map +1 -0
  50. package/package.json +52 -0
@@ -0,0 +1,1453 @@
1
+ /**
2
+ * Backend-aware Tool Handlers
3
+ * These handlers use the SHARC backend for embedding, search, and sync operations.
4
+ * File scanning and chunking still happen locally.
5
+ * Snapshot/status data is persisted in the backend database.
6
+ */
7
+ import * as fs from "fs/promises";
8
+ import * as fsSync from "fs";
9
+ import * as path from "path";
10
+ import * as crypto from "crypto";
11
+ import { ensureAbsolutePath, truncateContent, trackCodebasePath } from "./utils.js";
12
+ // Debug logging to file (since MCP stdout may not be visible in Claude Code)
13
+ const DEBUG_LOG_FILE = path.join(process.env.TEMP || process.env.TMP || '/tmp', 'sharc-mcp-debug.log');
14
+ function debugLog(message) {
15
+ const timestamp = new Date().toISOString();
16
+ const line = `[${timestamp}] ${message}\n`;
17
+ try {
18
+ fsSync.appendFileSync(DEBUG_LOG_FILE, line);
19
+ }
20
+ catch {
21
+ // Ignore file write errors
22
+ }
23
+ console.log(message);
24
+ }
25
+ import { BackendClient, } from "./backend-client.js";
26
+ // Import splitters from @sharc-code/splitter package
27
+ import { AstCodeSplitter, LangChainCodeSplitter } from "@sharc-code/splitter";
28
+ // Import file watcher components
29
+ import { FileWatcherService, IncrementalIndexer, SyntaxGuard } from "./watcher/index.js";
30
+ const CHUNK_TIERS = [
31
+ {
32
+ // Tier 1: Code (AST-supported) - semantic units, no overlap needed
33
+ name: 'code',
34
+ extensions: ['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.py', '.pyw', '.java', '.go', '.rs', '.cs', '.cpp', '.hpp', '.cc', '.cxx', '.c', '.h', '.scala'],
35
+ maxSize: 3500,
36
+ overlap: 0,
37
+ useAst: true,
38
+ },
39
+ {
40
+ // Tier 2: Documentation - smaller chunks, overlap helps with prose flow
41
+ name: 'docs',
42
+ extensions: ['.md', '.mdx', '.rst', '.txt'],
43
+ maxSize: 1500,
44
+ overlap: 150,
45
+ useAst: false,
46
+ },
47
+ {
48
+ // Tier 3: Config/Data - structured data, moderate overlap
49
+ name: 'config',
50
+ extensions: ['.json', '.yaml', '.yml', '.toml', '.xml', '.env.example', '.ini', '.cfg'],
51
+ maxSize: 1500,
52
+ overlap: 100,
53
+ useAst: false,
54
+ },
55
+ {
56
+ // Tier 4: Other code (LangChain fallback) - languages not supported by AST
57
+ name: 'other',
58
+ extensions: ['.rb', '.php', '.swift', '.kt', '.kts', '.vue', '.svelte', '.html', '.css', '.scss', '.less', '.sql'],
59
+ maxSize: 1500,
60
+ overlap: 100,
61
+ useAst: false,
62
+ },
63
+ ];
64
+ // Build a lookup map for fast tier resolution
65
+ const EXTENSION_TO_TIER = new Map();
66
+ for (const tier of CHUNK_TIERS) {
67
+ for (const ext of tier.extensions) {
68
+ EXTENSION_TO_TIER.set(ext, tier);
69
+ }
70
+ }
71
+ // Default tier for unknown extensions
72
+ const DEFAULT_TIER = {
73
+ name: 'unknown',
74
+ extensions: [],
75
+ maxSize: 1500,
76
+ overlap: 100,
77
+ useAst: false,
78
+ };
79
+ function getTierForExtension(ext) {
80
+ return EXTENSION_TO_TIER.get(ext.toLowerCase()) || DEFAULT_TIER;
81
+ }
82
+ // Collect all supported extensions from all tiers
83
+ const DEFAULT_FILE_EXTENSIONS = CHUNK_TIERS.flatMap(tier => tier.extensions);
84
+ // Export for use by startup-sync and project-detector
85
+ export { CHUNK_TIERS, DEFAULT_FILE_EXTENSIONS, getTierForExtension };
86
+ // Cache TTL for snapshot data (5 seconds to allow quick status updates during indexing)
87
+ const SNAPSHOT_CACHE_TTL = 5000;
88
+ export class BackendToolHandlers {
89
+ constructor(config) {
90
+ // Local cache for snapshot data to reduce API calls
91
+ this.cachedSnapshot = null;
92
+ this.cacheTimestamp = 0;
93
+ // File watcher for incremental indexing
94
+ this.fileWatcherService = null;
95
+ this.incrementalIndexers = new Map();
96
+ // Splitter cache for reuse across chunking operations
97
+ this.splitterCache = new Map();
98
+ if (!config.sharcBackendUrl || !config.sharcApiKey) {
99
+ throw new Error('SHARC_BACKEND_URL and SHARC_API_KEY are required for backend mode');
100
+ }
101
+ this.backendClient = new BackendClient(config.sharcBackendUrl, config.sharcApiKey);
102
+ this.currentWorkspace = process.cwd();
103
+ this.config = config;
104
+ this.syntaxGuard = new SyntaxGuard();
105
+ console.log(`[BackendHandlers] Initialized with backend URL: ${config.sharcBackendUrl}`);
106
+ }
107
+ // ==================== Snapshot Cache Methods ====================
108
+ /**
109
+ * Load snapshot from backend with caching
110
+ */
111
+ async getSnapshot(forceRefresh = false) {
112
+ const now = Date.now();
113
+ if (!forceRefresh && this.cachedSnapshot && (now - this.cacheTimestamp) < SNAPSHOT_CACHE_TTL) {
114
+ return this.cachedSnapshot;
115
+ }
116
+ try {
117
+ this.cachedSnapshot = await this.backendClient.loadSnapshot();
118
+ this.cacheTimestamp = now;
119
+ return this.cachedSnapshot;
120
+ }
121
+ catch (error) {
122
+ console.error('[BackendHandlers] Failed to load snapshot from backend:', error);
123
+ // Return empty snapshot on error
124
+ return {
125
+ formatVersion: 'v2',
126
+ codebases: {},
127
+ lastUpdated: new Date().toISOString(),
128
+ };
129
+ }
130
+ }
131
+ /**
132
+ * Invalidate local cache to force refresh on next access
133
+ */
134
+ invalidateCache() {
135
+ this.cacheTimestamp = 0;
136
+ }
137
+ /**
138
+ * Get list of indexed codebases
139
+ */
140
+ async getIndexedCodebases() {
141
+ const snapshot = await this.getSnapshot();
142
+ return Object.entries(snapshot.codebases)
143
+ .filter(([_, info]) => info.status === 'indexed')
144
+ .map(([path]) => path);
145
+ }
146
+ /**
147
+ * Get list of codebases currently being indexed
148
+ */
149
+ async getIndexingCodebases() {
150
+ const snapshot = await this.getSnapshot();
151
+ return Object.entries(snapshot.codebases)
152
+ .filter(([_, info]) => info.status === 'indexing')
153
+ .map(([path]) => path);
154
+ }
155
+ /**
156
+ * Get codebase status
157
+ */
158
+ async getCodebaseStatus(codebasePath) {
159
+ const snapshot = await this.getSnapshot();
160
+ const info = snapshot.codebases[codebasePath];
161
+ if (!info)
162
+ return 'not_found';
163
+ return info.status;
164
+ }
165
+ /**
166
+ * Get complete codebase info
167
+ */
168
+ async getCodebaseInfo(codebasePath) {
169
+ const snapshot = await this.getSnapshot();
170
+ return snapshot.codebases[codebasePath] || null;
171
+ }
172
+ /**
173
+ * Get indexing progress for a codebase
174
+ */
175
+ async getIndexingProgress(codebasePath) {
176
+ const snapshot = await this.getSnapshot();
177
+ const info = snapshot.codebases[codebasePath];
178
+ if (info && info.status === 'indexing') {
179
+ return info.indexingPercentage;
180
+ }
181
+ return undefined;
182
+ }
183
+ /**
184
+ * Set codebase to indexing status
185
+ */
186
+ async setCodebaseIndexing(codebasePath, progress) {
187
+ const info = {
188
+ status: 'indexing',
189
+ indexingPercentage: progress,
190
+ lastUpdated: new Date().toISOString(),
191
+ };
192
+ await this.backendClient.updateCodebaseInfo(codebasePath, info);
193
+ this.invalidateCache();
194
+ }
195
+ /**
196
+ * Set codebase to indexed status
197
+ */
198
+ async setCodebaseIndexed(codebasePath, stats) {
199
+ const info = {
200
+ status: 'indexed',
201
+ indexedFiles: stats.indexedFiles,
202
+ totalChunks: stats.totalChunks,
203
+ indexStatus: stats.status,
204
+ lastUpdated: new Date().toISOString(),
205
+ };
206
+ await this.backendClient.updateCodebaseInfo(codebasePath, info);
207
+ this.invalidateCache();
208
+ }
209
+ /**
210
+ * Set codebase to failed status
211
+ */
212
+ async setCodebaseIndexFailed(codebasePath, errorMessage, lastAttemptedPercentage) {
213
+ const info = {
214
+ status: 'indexfailed',
215
+ errorMessage,
216
+ lastAttemptedPercentage,
217
+ lastUpdated: new Date().toISOString(),
218
+ };
219
+ await this.backendClient.updateCodebaseInfo(codebasePath, info);
220
+ this.invalidateCache();
221
+ }
222
+ /**
223
+ * Remove codebase from snapshot completely
224
+ */
225
+ async removeCodebaseFromSnapshot(codebasePath) {
226
+ await this.backendClient.removeCodebaseFromSnapshot(codebasePath);
227
+ this.invalidateCache();
228
+ }
229
+ /**
230
+ * Generate a collection name from a codebase path
231
+ * Must match the backend's expectation (URL-safe, unique per path)
232
+ * Uses SHA-256 for better collision resistance
233
+ */
234
+ generateCollectionName(codebasePath) {
235
+ const hash = crypto.createHash('sha256').update(codebasePath).digest('hex').substring(0, 12);
236
+ const baseName = path.basename(codebasePath).replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
237
+ return `hybrid_code_chunks_${baseName}_${hash}`;
238
+ }
239
+ /**
240
+ * Sync indexed codebases from backend collections
241
+ * Compares snapshot data with actual vector collections and removes stale entries
242
+ */
243
+ async syncIndexedCodebasesFromBackend() {
244
+ try {
245
+ console.log(`[SYNC-BACKEND] 🔄 Syncing indexed codebases from backend...`);
246
+ const response = await this.backendClient.listRepos();
247
+ console.log(`[SYNC-BACKEND] 📋 Found ${response.count} collections in backend`);
248
+ // Get snapshot codebases and compare with backend collections
249
+ const snapshot = await this.getSnapshot(true); // Force refresh
250
+ const snapshotCodebases = Object.entries(snapshot.codebases)
251
+ .filter(([_, info]) => info.status === 'indexed')
252
+ .map(([path]) => path);
253
+ const backendCollections = new Set(response.repos.map(r => r.name));
254
+ // Remove snapshot entries whose collections don't exist in backend
255
+ for (const codebasePath of snapshotCodebases) {
256
+ const collectionName = this.generateCollectionName(codebasePath);
257
+ if (!backendCollections.has(collectionName)) {
258
+ await this.removeCodebaseFromSnapshot(codebasePath);
259
+ console.log(`[SYNC-BACKEND] ➖ Removed stale codebase (collection not in backend): ${codebasePath}`);
260
+ }
261
+ }
262
+ console.log(`[SYNC-BACKEND] ✅ Backend sync completed`);
263
+ }
264
+ catch (error) {
265
+ console.error(`[SYNC-BACKEND] ❌ Error syncing from backend:`, error.message || error);
266
+ }
267
+ }
268
+ // ==================== File Watcher Methods ====================
269
+ /**
270
+ * Initialize the file watcher service (lazy initialization)
271
+ */
272
+ initializeFileWatcher() {
273
+ if (!this.fileWatcherService) {
274
+ // Create the watcher with our change processor
275
+ this.fileWatcherService = new FileWatcherService((change) => this.processFileChange(change), {
276
+ debounceMs: parseInt(process.env.SHARC_WATCH_DEBOUNCE_MS || '2000', 10),
277
+ verbose: process.env.DEBUG === 'true' || process.env.LOG_LEVEL === 'verbose',
278
+ });
279
+ console.log('[Watcher] File watcher service initialized');
280
+ }
281
+ return this.fileWatcherService;
282
+ }
283
+ /**
284
+ * Process a single file change (used by FileWatcherService)
285
+ */
286
+ async processFileChange(change) {
287
+ // Find the incremental indexer for this file
288
+ for (const [codebasePath, indexer] of this.incrementalIndexers.entries()) {
289
+ // Check if this file belongs to this codebase
290
+ const absoluteCodebase = path.resolve(codebasePath);
291
+ const absoluteFile = path.resolve(change.absolutePath);
292
+ if (absoluteFile.startsWith(absoluteCodebase)) {
293
+ return indexer.processChange(change);
294
+ }
295
+ }
296
+ // No indexer found for this file
297
+ console.warn(`[Watcher] No indexer found for file: ${change.absolutePath}`);
298
+ return { success: false, chunksDeleted: 0, chunksIndexed: 0, error: 'No indexer found' };
299
+ }
300
+ /**
301
+ * Create an incremental indexer for a codebase
302
+ */
303
+ createIncrementalIndexer(codebasePath, collectionName) {
304
+ // Create splitter cache for chunking
305
+ const splitterCache = new Map();
306
+ const getSplitterForTier = (tier) => {
307
+ const cacheKey = `${tier.name}-${tier.maxSize}-${tier.overlap}`;
308
+ if (!splitterCache.has(cacheKey)) {
309
+ if (tier.useAst) {
310
+ splitterCache.set(cacheKey, new AstCodeSplitter(tier.maxSize, tier.overlap));
311
+ }
312
+ else {
313
+ splitterCache.set(cacheKey, new LangChainCodeSplitter(tier.maxSize, tier.overlap));
314
+ }
315
+ }
316
+ return splitterCache.get(cacheKey);
317
+ };
318
+ const indexer = new IncrementalIndexer({
319
+ codebasePath,
320
+ collectionName,
321
+ backendClient: this.backendClient,
322
+ getLanguageFromExtension: (ext) => this.getLanguageFromExtension(ext),
323
+ checkSyntax: (content, language) => {
324
+ return this.syntaxGuard.checkSyntax(content, language);
325
+ },
326
+ chunkFile: async (content, language, relativePath) => {
327
+ const ext = path.extname(relativePath).toLowerCase();
328
+ const tier = getTierForExtension(ext);
329
+ const splitter = getSplitterForTier(tier);
330
+ const chunks = await splitter.split(content, language, relativePath);
331
+ const indexChunks = [];
332
+ for (const chunk of chunks) {
333
+ if (!chunk.content || chunk.content.trim().length === 0) {
334
+ continue;
335
+ }
336
+ // Add file path context for non-code tiers
337
+ let finalContent = chunk.content;
338
+ if (!tier.useAst && tier.name !== 'code') {
339
+ const pathParts = relativePath.replace(/\\/g, '/').split('/');
340
+ const shortPath = pathParts.slice(-2).join('/');
341
+ const commentPrefix = language === 'python' ? '#' : '//';
342
+ finalContent = `${commentPrefix} File: ${shortPath}\n${chunk.content}`;
343
+ }
344
+ indexChunks.push({
345
+ content: finalContent,
346
+ relativePath,
347
+ startLine: chunk.metadata.startLine,
348
+ endLine: chunk.metadata.endLine,
349
+ language,
350
+ metadata: { codebasePath },
351
+ });
352
+ }
353
+ return indexChunks;
354
+ },
355
+ });
356
+ return indexer;
357
+ }
358
+ /**
359
+ * Start watching a codebase for file changes
360
+ */
361
+ startWatching(codebasePath, collectionName) {
362
+ const normalizedPath = path.resolve(codebasePath);
363
+ // Initialize watcher service if needed
364
+ const watcher = this.initializeFileWatcher();
365
+ // Check if already watching
366
+ if (watcher.isWatching(normalizedPath)) {
367
+ console.log(`[Watcher] Already watching: ${normalizedPath}`);
368
+ return;
369
+ }
370
+ // Create incremental indexer for this codebase
371
+ const indexer = this.createIncrementalIndexer(normalizedPath, collectionName);
372
+ this.incrementalIndexers.set(normalizedPath, indexer);
373
+ // Start watching
374
+ watcher.watchCodebase(normalizedPath, collectionName);
375
+ }
376
+ /**
377
+ * Stop watching a codebase
378
+ */
379
+ async stopWatching(codebasePath) {
380
+ const normalizedPath = path.resolve(codebasePath);
381
+ if (this.fileWatcherService) {
382
+ await this.fileWatcherService.unwatchCodebase(normalizedPath);
383
+ }
384
+ // Clean up indexer
385
+ this.incrementalIndexers.delete(normalizedPath);
386
+ }
387
+ /**
388
+ * Get list of watched codebases
389
+ */
390
+ getWatchedCodebases() {
391
+ if (!this.fileWatcherService) {
392
+ return [];
393
+ }
394
+ return this.fileWatcherService.getWatchedCodebases();
395
+ }
396
+ /**
397
+ * Check if a codebase is being watched
398
+ */
399
+ isWatching(codebasePath) {
400
+ if (!this.fileWatcherService) {
401
+ return false;
402
+ }
403
+ return this.fileWatcherService.isWatching(codebasePath);
404
+ }
405
+ /**
406
+ * Graceful shutdown of all watchers and cleanup resources
407
+ */
408
+ async shutdown() {
409
+ if (this.fileWatcherService) {
410
+ await this.fileWatcherService.shutdown();
411
+ this.fileWatcherService = null;
412
+ }
413
+ this.incrementalIndexers.clear();
414
+ // Clear splitter cache to release any native resources (tree-sitter parsers)
415
+ this.splitterCache.clear();
416
+ console.log('[BackendHandlers] Shutdown complete');
417
+ }
418
+ // ==================== Splitter Helper ====================
419
+ /**
420
+ * Get a splitter for a given tier (cached for reuse)
421
+ */
422
+ getSplitterForTier(tier) {
423
+ const cacheKey = `${tier.name}-${tier.maxSize}-${tier.overlap}`;
424
+ if (!this.splitterCache.has(cacheKey)) {
425
+ if (tier.useAst) {
426
+ this.splitterCache.set(cacheKey, new AstCodeSplitter(tier.maxSize, tier.overlap));
427
+ }
428
+ else {
429
+ this.splitterCache.set(cacheKey, new LangChainCodeSplitter(tier.maxSize, tier.overlap));
430
+ }
431
+ }
432
+ return this.splitterCache.get(cacheKey);
433
+ }
434
+ /**
435
+ * Check if a path exists (async replacement for fs.existsSync)
436
+ */
437
+ async pathExists(filePath) {
438
+ try {
439
+ await fs.access(filePath);
440
+ return true;
441
+ }
442
+ catch {
443
+ return false;
444
+ }
445
+ }
446
+ /**
447
+ * Scan and chunk files from a codebase directory
448
+ * Uses async file I/O to avoid blocking the event loop
449
+ */
450
+ async scanAndChunkFiles(codebasePath, customExtensions = [], ignorePatterns = [], onProgress) {
451
+ const allChunks = [];
452
+ // Get all supported extensions
453
+ const extensions = new Set([...DEFAULT_FILE_EXTENSIONS, ...customExtensions]);
454
+ console.log(`[CHUNK] Scanning with extensions: ${[...extensions].join(', ')}`);
455
+ // Find all files recursively using async operations
456
+ const files = [];
457
+ const scanDir = async (dir) => {
458
+ const entries = await fs.readdir(dir, { withFileTypes: true });
459
+ for (const entry of entries) {
460
+ const fullPath = path.join(dir, entry.name);
461
+ const relativePath = path.relative(codebasePath, fullPath);
462
+ // Skip ignored patterns
463
+ if (this.shouldIgnore(relativePath, ignorePatterns)) {
464
+ continue;
465
+ }
466
+ if (entry.isDirectory()) {
467
+ // Skip hidden directories and common ignore dirs
468
+ if (!entry.name.startsWith('.') &&
469
+ !['node_modules', 'dist', 'build', '__pycache__', 'venv', '.git'].includes(entry.name)) {
470
+ await scanDir(fullPath);
471
+ }
472
+ }
473
+ else if (entry.isFile()) {
474
+ const ext = path.extname(entry.name).toLowerCase();
475
+ if (extensions.has(ext)) {
476
+ files.push(fullPath);
477
+ }
478
+ }
479
+ }
480
+ };
481
+ await scanDir(codebasePath);
482
+ console.log(`[CHUNK] Found ${files.length} files to process`);
483
+ // Create splitters for each tier (cached for reuse)
484
+ const splitterCache = new Map();
485
+ const getSplitterForTier = (tier) => {
486
+ const cacheKey = `${tier.name}-${tier.maxSize}-${tier.overlap}`;
487
+ if (!splitterCache.has(cacheKey)) {
488
+ if (tier.useAst) {
489
+ splitterCache.set(cacheKey, new AstCodeSplitter(tier.maxSize, tier.overlap));
490
+ }
491
+ else {
492
+ splitterCache.set(cacheKey, new LangChainCodeSplitter(tier.maxSize, tier.overlap));
493
+ }
494
+ }
495
+ return splitterCache.get(cacheKey);
496
+ };
497
+ // Track tier statistics
498
+ const tierStats = {};
499
+ for (let i = 0; i < files.length; i++) {
500
+ const filePath = files[i];
501
+ const relativePath = path.relative(codebasePath, filePath);
502
+ try {
503
+ // Check file size before reading
504
+ const stat = await fs.stat(filePath);
505
+ if (stat.size > BackendToolHandlers.MAX_FILE_SIZE) {
506
+ console.warn(`[CHUNK] Skipping large file (${(stat.size / 1024 / 1024).toFixed(1)}MB): ${relativePath}`);
507
+ continue;
508
+ }
509
+ const content = await fs.readFile(filePath, 'utf-8');
510
+ const ext = path.extname(filePath).toLowerCase();
511
+ const language = this.getLanguageFromExtension(ext);
512
+ // Get tier-specific settings for this file type
513
+ const tier = getTierForExtension(ext);
514
+ const splitter = getSplitterForTier(tier);
515
+ // Split the file into chunks
516
+ const chunks = await splitter.split(content, language, relativePath);
517
+ // Track tier stats
518
+ if (!tierStats[tier.name]) {
519
+ tierStats[tier.name] = { files: 0, chunks: 0 };
520
+ }
521
+ tierStats[tier.name].files++;
522
+ for (const chunk of chunks) {
523
+ // Skip empty or whitespace-only chunks
524
+ if (!chunk.content || chunk.content.trim().length === 0) {
525
+ console.warn(`[CHUNK] Skipping empty chunk from ${relativePath}:${chunk.metadata.startLine}`);
526
+ continue;
527
+ }
528
+ // Add file path context for non-code tiers (docs, config, other)
529
+ let finalContent = chunk.content;
530
+ if (!tier.useAst && tier.name !== 'code') {
531
+ // Shorten path to last 2 segments
532
+ const pathParts = relativePath.replace(/\\/g, '/').split('/');
533
+ const shortPath = pathParts.slice(-2).join('/');
534
+ const commentPrefix = language === 'python' ? '#' : '//';
535
+ finalContent = `${commentPrefix} File: ${shortPath}\n${chunk.content}`;
536
+ }
537
+ allChunks.push({
538
+ content: finalContent,
539
+ relativePath,
540
+ startLine: chunk.metadata.startLine,
541
+ endLine: chunk.metadata.endLine,
542
+ language,
543
+ metadata: {
544
+ codebasePath,
545
+ },
546
+ });
547
+ tierStats[tier.name].chunks++;
548
+ }
549
+ // Report progress
550
+ if (onProgress) {
551
+ const percentage = ((i + 1) / files.length) * 100;
552
+ onProgress({
553
+ phase: 'chunking',
554
+ percentage,
555
+ current: i + 1,
556
+ total: files.length,
557
+ });
558
+ }
559
+ }
560
+ catch (error) {
561
+ console.warn(`[CHUNK] Error processing ${relativePath}: ${error.message}`);
562
+ }
563
+ }
564
+ // Log tier statistics
565
+ console.log(`[CHUNK] Generated ${allChunks.length} chunks from ${files.length} files`);
566
+ for (const [tierName, stats] of Object.entries(tierStats)) {
567
+ console.log(`[CHUNK] ${tierName}: ${stats.files} files → ${stats.chunks} chunks`);
568
+ }
569
+ return allChunks;
570
+ }
571
+ shouldIgnore(relativePath, ignorePatterns) {
572
+ const defaultIgnore = [
573
+ 'node_modules', '.git', 'dist', 'build', '__pycache__',
574
+ 'venv', '.env', '.vscode', '.idea', 'coverage',
575
+ ];
576
+ const allPatterns = [...defaultIgnore, ...ignorePatterns];
577
+ for (const pattern of allPatterns) {
578
+ if (relativePath.includes(pattern)) {
579
+ return true;
580
+ }
581
+ }
582
+ return false;
583
+ }
584
+ /**
585
+ * Generate SHA-256 hashes for all supported files in the codebase
586
+ */
587
+ async generateFileHashes(rootDir, ignorePatterns = []) {
588
+ const hashes = new Map();
589
+ const supportedExtensions = new Set(DEFAULT_FILE_EXTENSIONS);
590
+ const scanDir = async (dir) => {
591
+ let entries;
592
+ try {
593
+ entries = await fs.readdir(dir, { withFileTypes: true });
594
+ }
595
+ catch {
596
+ return; // Can't read directory, skip
597
+ }
598
+ for (const entry of entries) {
599
+ const fullPath = path.join(dir, entry.name);
600
+ const relativePath = path.relative(rootDir, fullPath);
601
+ // Skip hidden files/dirs and ignored patterns
602
+ if (entry.name.startsWith('.') || this.shouldIgnore(relativePath, ignorePatterns)) {
603
+ continue;
604
+ }
605
+ if (entry.isDirectory()) {
606
+ await scanDir(fullPath);
607
+ }
608
+ else if (entry.isFile()) {
609
+ const ext = path.extname(entry.name).toLowerCase();
610
+ if (supportedExtensions.has(ext)) {
611
+ try {
612
+ const stat = await fs.stat(fullPath);
613
+ if (stat.size > BackendToolHandlers.MAX_FILE_SIZE) {
614
+ continue; // Skip large files
615
+ }
616
+ const content = await fs.readFile(fullPath, 'utf-8');
617
+ const hash = crypto.createHash('sha256').update(content).digest('hex');
618
+ hashes.set(relativePath, hash);
619
+ }
620
+ catch {
621
+ // Can't read file, skip
622
+ }
623
+ }
624
+ }
625
+ }
626
+ };
627
+ await scanDir(rootDir);
628
+ return hashes;
629
+ }
630
+ /**
631
+ * Compare old and new file hashes to detect changes
632
+ */
633
+ compareFileHashes(oldHashes, newHashes) {
634
+ const result = { added: [], modified: [], removed: [] };
635
+ // If no old hashes, all files are "added" (first index)
636
+ if (!oldHashes || oldHashes.length === 0) {
637
+ result.added = Array.from(newHashes.keys());
638
+ return result;
639
+ }
640
+ const oldMap = new Map(oldHashes);
641
+ // Find added and modified files
642
+ for (const [filePath, hash] of newHashes) {
643
+ if (!oldMap.has(filePath)) {
644
+ result.added.push(filePath);
645
+ }
646
+ else if (oldMap.get(filePath) !== hash) {
647
+ result.modified.push(filePath);
648
+ }
649
+ oldMap.delete(filePath);
650
+ }
651
+ // Remaining in oldMap are removed files
652
+ result.removed = Array.from(oldMap.keys());
653
+ return result;
654
+ }
655
+ /**
656
+ * Apply incremental changes to the index (Merkle diff)
657
+ * Only indexes added/modified files, deletes removed file vectors
658
+ */
659
+ async applyIncrementalChanges(codebasePath, collectionName, changes,
660
+ // TODO: Implement ignore pattern filtering for incremental indexing
661
+ // Currently ignorePatterns is accepted but not applied during incremental updates
662
+ _ignorePatterns = []) {
663
+ let chunksIndexed = 0;
664
+ let chunksDeleted = 0;
665
+ // Delete vectors for removed files (batch delete)
666
+ if (changes.removed.length > 0) {
667
+ try {
668
+ await this.backendClient.deleteFileVectorsBatch(collectionName, changes.removed);
669
+ chunksDeleted = changes.removed.length;
670
+ debugLog(`[MerkleDiff] Deleted vectors for ${changes.removed.length} removed files`);
671
+ }
672
+ catch (e) {
673
+ debugLog(`[MerkleDiff] Could not delete vectors: ${e}`);
674
+ }
675
+ }
676
+ // Process added and modified files
677
+ const filesToIndex = [...changes.added, ...changes.modified];
678
+ if (filesToIndex.length === 0) {
679
+ return { chunksIndexed, chunksDeleted, filesProcessed: 0 };
680
+ }
681
+ debugLog(`[MerkleDiff] Processing ${filesToIndex.length} files (${changes.added.length} added, ${changes.modified.length} modified)`);
682
+ const allChunks = [];
683
+ const deleteFirst = changes.modified; // Delete old chunks for modified files
684
+ for (const relativePath of filesToIndex) {
685
+ const absolutePath = path.join(codebasePath, relativePath);
686
+ // Path traversal protection - ensure resolved path is within codebase
687
+ const resolvedPath = path.resolve(absolutePath);
688
+ const resolvedCodebase = path.resolve(codebasePath);
689
+ if (!resolvedPath.startsWith(resolvedCodebase + path.sep) && resolvedPath !== resolvedCodebase) {
690
+ console.warn(`[SECURITY] Path traversal blocked: ${relativePath}`);
691
+ continue;
692
+ }
693
+ try {
694
+ // Check file size before reading
695
+ const stat = await fs.stat(absolutePath);
696
+ if (stat.size > BackendToolHandlers.MAX_FILE_SIZE) {
697
+ console.warn(`[CHUNK] Skipping large file (${(stat.size / 1024 / 1024).toFixed(1)}MB): ${relativePath}`);
698
+ continue;
699
+ }
700
+ const content = await fs.readFile(absolutePath, 'utf-8');
701
+ const ext = path.extname(relativePath).toLowerCase();
702
+ const language = this.getLanguageFromExtension(ext);
703
+ // Chunk the file using tiered strategy
704
+ const tier = getTierForExtension(ext);
705
+ const chunks = await this.chunkFileWithTier(content, language, relativePath, codebasePath, tier);
706
+ allChunks.push(...chunks);
707
+ }
708
+ catch {
709
+ // File might have been deleted, skip
710
+ }
711
+ }
712
+ // Index all chunks in one batch
713
+ if (allChunks.length > 0) {
714
+ try {
715
+ debugLog(`[MerkleDiff] Indexing ${allChunks.length} chunks...`);
716
+ const response = await this.backendClient.indexChunks({
717
+ collection: collectionName,
718
+ chunks: allChunks,
719
+ createIfNotExists: true,
720
+ finalize: false, // Don't rebuild HNSW for incremental updates
721
+ deleteFirst,
722
+ });
723
+ chunksIndexed = response.indexed;
724
+ chunksDeleted += response.deleted || 0;
725
+ debugLog(`[MerkleDiff] Indexed ${chunksIndexed} chunks, deleted ${response.deleted || 0} for modified files`);
726
+ }
727
+ catch (error) {
728
+ debugLog(`[MerkleDiff] Error indexing chunks: ${error.message || error}`);
729
+ throw error;
730
+ }
731
+ }
732
+ return { chunksIndexed, chunksDeleted, filesProcessed: filesToIndex.length };
733
+ }
734
+ /**
735
+ * Chunk a single file using the tier strategy (helper for incremental indexing)
736
+ */
737
+ async chunkFileWithTier(content, language, relativePath, codebasePath, tier) {
738
+ let splitter = this.splitterCache.get(tier.name);
739
+ if (!splitter) {
740
+ if (tier.useAst) {
741
+ splitter = new AstCodeSplitter(tier.maxSize);
742
+ }
743
+ else {
744
+ splitter = new LangChainCodeSplitter(tier.maxSize, tier.overlap);
745
+ }
746
+ this.splitterCache.set(tier.name, splitter);
747
+ }
748
+ const rawChunks = await splitter.split(content, language, relativePath);
749
+ return rawChunks.map(chunk => ({
750
+ content: chunk.content,
751
+ relativePath,
752
+ startLine: chunk.metadata.startLine,
753
+ endLine: chunk.metadata.endLine,
754
+ language,
755
+ }));
756
+ }
757
+ getLanguageFromExtension(ext) {
758
+ const langMap = {
759
+ // Tier 1: Code (AST-supported)
760
+ '.ts': 'typescript', '.tsx': 'typescript',
761
+ '.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
762
+ '.py': 'python', '.pyw': 'python',
763
+ '.rs': 'rust',
764
+ '.go': 'go',
765
+ '.java': 'java',
766
+ '.c': 'c', '.h': 'c',
767
+ '.cpp': 'cpp', '.hpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp',
768
+ '.cs': 'csharp',
769
+ '.scala': 'scala',
770
+ // Tier 2: Documentation
771
+ '.md': 'markdown', '.mdx': 'markdown',
772
+ '.rst': 'rst', '.txt': 'text',
773
+ // Tier 3: Config/Data
774
+ '.json': 'json',
775
+ '.yaml': 'yaml', '.yml': 'yaml',
776
+ '.toml': 'toml',
777
+ '.xml': 'xml',
778
+ '.ini': 'ini', '.cfg': 'ini',
779
+ // Tier 4: Other code
780
+ '.rb': 'ruby',
781
+ '.php': 'php',
782
+ '.swift': 'swift',
783
+ '.kt': 'kotlin', '.kts': 'kotlin',
784
+ '.vue': 'vue',
785
+ '.svelte': 'svelte',
786
+ '.html': 'html',
787
+ '.css': 'css', '.scss': 'scss', '.less': 'less',
788
+ '.sql': 'sql',
789
+ };
790
+ return langMap[ext] || 'unknown';
791
+ }
792
+ async handleIndexCodebase(args) {
793
+ const { path: codebasePath, force, splitter: _splitter, customExtensions, ignorePatterns } = args;
794
+ const forceReindex = force || false;
795
+ const customFileExtensions = customExtensions || [];
796
+ const customIgnorePatterns = ignorePatterns || [];
797
+ try {
798
+ // Sync with backend first
799
+ await this.syncIndexedCodebasesFromBackend();
800
+ // Force absolute path resolution
801
+ const absolutePath = ensureAbsolutePath(codebasePath);
802
+ // Validate path exists
803
+ if (!(await this.pathExists(absolutePath))) {
804
+ return {
805
+ content: [{
806
+ type: "text",
807
+ text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
808
+ }],
809
+ isError: true
810
+ };
811
+ }
812
+ // Check if it's a directory
813
+ const stat = await fs.stat(absolutePath);
814
+ if (!stat.isDirectory()) {
815
+ return {
816
+ content: [{
817
+ type: "text",
818
+ text: `Error: Path '${absolutePath}' is not a directory`
819
+ }],
820
+ isError: true
821
+ };
822
+ }
823
+ // Check if already indexing
824
+ const indexingCodebases = await this.getIndexingCodebases();
825
+ if (indexingCodebases.includes(absolutePath)) {
826
+ return {
827
+ content: [{
828
+ type: "text",
829
+ text: `Codebase '${absolutePath}' is already being indexed in the background. Please wait for completion.`
830
+ }],
831
+ isError: true
832
+ };
833
+ }
834
+ const collectionName = this.generateCollectionName(absolutePath);
835
+ const indexedCodebases = await this.getIndexedCodebases();
836
+ const isAlreadyIndexed = indexedCodebases.includes(absolutePath);
837
+ // If force reindex, delete existing collection and do full re-index
838
+ if (forceReindex && isAlreadyIndexed) {
839
+ try {
840
+ const hasCollection = await this.backendClient.hasCollection(collectionName);
841
+ if (hasCollection) {
842
+ console.log(`[FORCE-REINDEX] 🔄 Deleting existing collection: ${collectionName}`);
843
+ await this.backendClient.deleteRepo(collectionName);
844
+ }
845
+ }
846
+ catch (error) {
847
+ console.warn(`[FORCE-REINDEX] Could not delete collection: ${error}`);
848
+ }
849
+ await this.removeCodebaseFromSnapshot(absolutePath);
850
+ // Continue to full index below
851
+ }
852
+ // If already indexed (and not forcing), do Merkle diff for incremental sync
853
+ if (!forceReindex && isAlreadyIndexed) {
854
+ debugLog(`[Index] Codebase already indexed, checking for changes via Merkle diff...`);
855
+ const startTime = Date.now();
856
+ try {
857
+ // Load existing Merkle snapshot
858
+ const merkleSnapshot = await this.backendClient.loadMerkleSnapshot(absolutePath);
859
+ debugLog(`[Index] Merkle snapshot loaded: ${merkleSnapshot ? `${merkleSnapshot.fileHashes?.length || 0} file hashes` : 'null'}`);
860
+ // Generate current file hashes
861
+ debugLog(`[Index] Scanning files for changes...`);
862
+ const currentHashes = await this.generateFileHashes(absolutePath, customIgnorePatterns);
863
+ debugLog(`[Index] Found ${currentHashes.size} files`);
864
+ // Compare to detect changes
865
+ const changes = this.compareFileHashes(merkleSnapshot?.fileHashes || null, currentHashes);
866
+ const totalChanges = changes.added.length + changes.modified.length + changes.removed.length;
867
+ if (totalChanges === 0) {
868
+ const duration = ((Date.now() - startTime) / 1000).toFixed(1);
869
+ debugLog(`[Index] No changes detected (${duration}s)`);
870
+ // Ensure watcher is running
871
+ this.startWatching(absolutePath, collectionName);
872
+ return {
873
+ content: [{
874
+ type: "text",
875
+ text: `✅ Index is up to date for '${absolutePath}'.\n\nNo file changes detected since last index. Checked ${currentHashes.size} files in ${duration}s.`
876
+ }]
877
+ };
878
+ }
879
+ // Apply incremental changes
880
+ debugLog(`[Index] Found ${totalChanges} changes: +${changes.added.length} added, ~${changes.modified.length} modified, -${changes.removed.length} removed`);
881
+ const result = await this.applyIncrementalChanges(absolutePath, collectionName, changes, customIgnorePatterns);
882
+ // Save updated Merkle snapshot
883
+ await this.backendClient.saveMerkleSnapshot(absolutePath, {
884
+ fileHashes: Array.from(currentHashes.entries()),
885
+ merkleDAG: { nodes: [], rootIds: [] },
886
+ fileCount: currentHashes.size,
887
+ });
888
+ // Start/resume file watcher
889
+ this.startWatching(absolutePath, collectionName);
890
+ const duration = ((Date.now() - startTime) / 1000).toFixed(1);
891
+ debugLog(`[Index] Incremental sync completed in ${duration}s`);
892
+ return {
893
+ content: [{
894
+ type: "text",
895
+ text: `✅ Synced '${absolutePath}' in ${duration}s\n\n` +
896
+ `Changes: +${changes.added.length} added, ~${changes.modified.length} modified, -${changes.removed.length} removed\n` +
897
+ `Indexed ${result.chunksIndexed} chunks, deleted ${result.chunksDeleted} old chunks.`
898
+ }]
899
+ };
900
+ }
901
+ catch (error) {
902
+ debugLog(`[Index] Merkle diff failed, falling back to full re-index: ${error.message}`);
903
+ debugLog(`[Index] Error stack: ${error.stack}`);
904
+ // Fall through to full index
905
+ }
906
+ }
907
+ // Full index (new codebase or force=true or Merkle diff failed)
908
+ console.log(`[Index] Starting full index for: ${absolutePath}`);
909
+ // Set to indexing status
910
+ await this.setCodebaseIndexing(absolutePath, 0);
911
+ // Track the codebase path
912
+ trackCodebasePath(absolutePath);
913
+ // Start background indexing (with error logging)
914
+ this.startBackgroundIndexing(absolutePath, collectionName, customFileExtensions, customIgnorePatterns)
915
+ .catch(err => debugLog(`[BACKGROUND-INDEX] Unhandled error: ${err.message || err}`));
916
+ const pathInfo = codebasePath !== absolutePath
917
+ ? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'`
918
+ : '';
919
+ return {
920
+ content: [{
921
+ type: "text",
922
+ text: `Started indexing '${absolutePath}' via SHARC backend.${pathInfo}\n\nIndexing ${isAlreadyIndexed ? '(full re-index)' : '(first time)'} is running in the background. You can search while indexing is in progress, but results may be incomplete until done.`
923
+ }]
924
+ };
925
+ }
926
+ catch (error) {
927
+ console.error('Error in handleIndexCodebase:', error);
928
+ return {
929
+ content: [{
930
+ type: "text",
931
+ text: `Error starting indexing: ${error.message || error}`
932
+ }],
933
+ isError: true
934
+ };
935
+ }
936
+ }
937
+ async startBackgroundIndexing(codebasePath, collectionName, customExtensions, ignorePatterns) {
938
+ try {
939
+ console.log(`[BACKGROUND-INDEX] Starting background indexing for: ${codebasePath}`);
940
+ console.log(`[BACKGROUND-INDEX] Collection name: ${collectionName}`);
941
+ // Scan and chunk files
942
+ let lastUpdateTime = Date.now();
943
+ const chunks = await this.scanAndChunkFiles(codebasePath, customExtensions, ignorePatterns, async (progress) => {
944
+ const chunkProgress = progress.percentage * 0.5; // 0-50% for chunking
945
+ // Update progress periodically (every 2 seconds)
946
+ const now = Date.now();
947
+ if (now - lastUpdateTime >= 2000) {
948
+ await this.setCodebaseIndexing(codebasePath, chunkProgress);
949
+ lastUpdateTime = now;
950
+ }
951
+ console.log(`[BACKGROUND-INDEX] Chunking progress: ${progress.percentage.toFixed(1)}% (${progress.current}/${progress.total})`);
952
+ });
953
+ if (chunks.length === 0) {
954
+ console.warn(`[BACKGROUND-INDEX] No chunks generated for ${codebasePath}`);
955
+ await this.setCodebaseIndexFailed(codebasePath, 'No indexable files found', 0);
956
+ return;
957
+ }
958
+ // Send ALL chunks in one request if possible (faster, less overhead)
959
+ // Only batch if we exceed the 10,000 chunk limit per request
960
+ const MAX_CHUNKS_PER_REQUEST = 10000;
961
+ const BATCH_SIZE = Math.min(chunks.length, MAX_CHUNKS_PER_REQUEST);
962
+ let indexedCount = 0;
963
+ const totalChunks = chunks.length;
964
+ const totalBatches = Math.ceil(chunks.length / BATCH_SIZE);
965
+ if (totalBatches === 1) {
966
+ console.log(`[BACKGROUND-INDEX] Sending ALL ${totalChunks} chunks in single request`);
967
+ }
968
+ else {
969
+ console.log(`[BACKGROUND-INDEX] Sending ${totalChunks} chunks in ${totalBatches} batches of ${BATCH_SIZE}`);
970
+ }
971
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
972
+ const batch = chunks.slice(i, i + BATCH_SIZE);
973
+ const isLastBatch = (i + BATCH_SIZE >= chunks.length);
974
+ try {
975
+ debugLog(`[BACKGROUND-INDEX] Calling indexChunks for batch (${batch.length} chunks, finalize=${isLastBatch})...`);
976
+ await this.backendClient.indexChunks({
977
+ collection: collectionName,
978
+ chunks: batch,
979
+ createIfNotExists: true,
980
+ finalize: isLastBatch, // Only enable HNSW on final batch
981
+ });
982
+ debugLog(`[BACKGROUND-INDEX] indexChunks returned successfully`);
983
+ indexedCount += batch.length;
984
+ // Update progress (50-100% for embedding/indexing)
985
+ const progress = 50 + (indexedCount / totalChunks) * 50;
986
+ // Update status less frequently (every 5 seconds instead of 2)
987
+ const now = Date.now();
988
+ if (now - lastUpdateTime >= 5000 || isLastBatch) {
989
+ await this.setCodebaseIndexing(codebasePath, progress);
990
+ lastUpdateTime = now;
991
+ }
992
+ console.log(`[BACKGROUND-INDEX] Indexed ${indexedCount}/${totalChunks} chunks (${progress.toFixed(1)}%)${isLastBatch ? ' [FINAL]' : ''}`);
993
+ }
994
+ catch (error) {
995
+ console.error(`[BACKGROUND-INDEX] Error indexing batch: ${error.message}`);
996
+ throw error;
997
+ }
998
+ }
999
+ // Mark as indexed
1000
+ debugLog(`[BACKGROUND-INDEX] Marking codebase as indexed...`);
1001
+ await this.setCodebaseIndexed(codebasePath, {
1002
+ indexedFiles: -1, // We don't track file count in backend mode
1003
+ totalChunks,
1004
+ status: 'completed',
1005
+ });
1006
+ debugLog(`[BACKGROUND-INDEX] Codebase marked as indexed`);
1007
+ debugLog(`[BACKGROUND-INDEX] ✅ Indexing completed! ${totalChunks} chunks indexed to collection: ${collectionName}`);
1008
+ // Save Merkle snapshot for future incremental syncs
1009
+ try {
1010
+ debugLog(`[BACKGROUND-INDEX] Saving Merkle snapshot...`);
1011
+ const fileHashes = await this.generateFileHashes(codebasePath, ignorePatterns);
1012
+ await this.backendClient.saveMerkleSnapshot(codebasePath, {
1013
+ fileHashes: Array.from(fileHashes.entries()),
1014
+ merkleDAG: { nodes: [], rootIds: [] },
1015
+ fileCount: fileHashes.size,
1016
+ });
1017
+ debugLog(`[BACKGROUND-INDEX] Merkle snapshot saved (${fileHashes.size} files)`);
1018
+ }
1019
+ catch (e) {
1020
+ debugLog(`[BACKGROUND-INDEX] Could not save Merkle snapshot: ${e}`);
1021
+ }
1022
+ // Auto-start file watching for incremental updates
1023
+ this.startWatching(codebasePath, collectionName);
1024
+ debugLog(`[BACKGROUND-INDEX] File watcher started`);
1025
+ }
1026
+ catch (error) {
1027
+ debugLog(`[BACKGROUND-INDEX] Error during indexing for ${codebasePath}: ${error.message || error}`);
1028
+ const lastProgress = await this.getIndexingProgress(codebasePath);
1029
+ await this.setCodebaseIndexFailed(codebasePath, error.message || String(error), lastProgress);
1030
+ }
1031
+ }
1032
+ async handleSearchCode(args) {
1033
+ const { path: codebasePath, query, limit = 10, extensionFilter } = args;
1034
+ try {
1035
+ // Sync with backend first
1036
+ await this.syncIndexedCodebasesFromBackend();
1037
+ // Force absolute path resolution
1038
+ const absolutePath = ensureAbsolutePath(codebasePath);
1039
+ // Validate path exists
1040
+ if (!(await this.pathExists(absolutePath))) {
1041
+ return {
1042
+ content: [{
1043
+ type: "text",
1044
+ text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
1045
+ }],
1046
+ isError: true
1047
+ };
1048
+ }
1049
+ // Check if it's a directory
1050
+ const stat = await fs.stat(absolutePath);
1051
+ if (!stat.isDirectory()) {
1052
+ return {
1053
+ content: [{
1054
+ type: "text",
1055
+ text: `Error: Path '${absolutePath}' is not a directory`
1056
+ }],
1057
+ isError: true
1058
+ };
1059
+ }
1060
+ trackCodebasePath(absolutePath);
1061
+ // Check if this codebase is indexed or being indexed
1062
+ const indexedCodebases = await this.getIndexedCodebases();
1063
+ const indexingCodebases = await this.getIndexingCodebases();
1064
+ const isIndexed = indexedCodebases.includes(absolutePath);
1065
+ const isIndexing = indexingCodebases.includes(absolutePath);
1066
+ if (!isIndexed && !isIndexing) {
1067
+ return {
1068
+ content: [{
1069
+ type: "text",
1070
+ text: `Codebase '${absolutePath}' is not indexed.\n\nPlease run index_codebase first:\n index_codebase(path="${absolutePath}")\n\nThis only needs to be done once per project.`
1071
+ }],
1072
+ isError: true
1073
+ };
1074
+ }
1075
+ let indexingStatusMessage = '';
1076
+ if (isIndexing) {
1077
+ indexingStatusMessage = `\n⚠️ **Indexing in Progress**: This codebase is currently being indexed in the background. Search results may be incomplete until indexing completes.`;
1078
+ }
1079
+ const collectionName = this.generateCollectionName(absolutePath);
1080
+ console.log(`[SEARCH] Searching in collection: ${collectionName}`);
1081
+ console.log(`[SEARCH] Query: "${query}"`);
1082
+ // Search via backend
1083
+ const response = await this.backendClient.search({
1084
+ query,
1085
+ collection: collectionName,
1086
+ limit: Math.min(limit, 50),
1087
+ extensionFilter: Array.isArray(extensionFilter) ? extensionFilter : undefined,
1088
+ rerank: true,
1089
+ });
1090
+ if (response.results.length === 0) {
1091
+ let noResultsMessage = `No results found for query: "${query}" in codebase '${absolutePath}'`;
1092
+ if (isIndexing) {
1093
+ noResultsMessage += `\n\nNote: This codebase is still being indexed. Try searching again after indexing completes.`;
1094
+ }
1095
+ return {
1096
+ content: [{
1097
+ type: "text",
1098
+ text: noResultsMessage
1099
+ }]
1100
+ };
1101
+ }
1102
+ // Format results
1103
+ const formattedResults = response.results.map((result, index) => {
1104
+ const location = `${result.relativePath}:${result.startLine}-${result.endLine}`;
1105
+ const context = truncateContent(result.content, 5000);
1106
+ const codebaseInfo = path.basename(absolutePath);
1107
+ return `${index + 1}. Code snippet (${result.language}) [${codebaseInfo}]\n` +
1108
+ ` Location: ${location}\n` +
1109
+ ` Score: ${result.score.toFixed(4)}\n` +
1110
+ ` Context: \n\`\`\`${result.language}\n${context}\n\`\`\`\n`;
1111
+ }).join('\n');
1112
+ let resultMessage = `Found ${response.results.length} results for query: "${query}" in codebase '${absolutePath}'${indexingStatusMessage}\n\n${formattedResults}`;
1113
+ if (isIndexing) {
1114
+ resultMessage += `\n\n💡 **Tip**: This codebase is still being indexed. More results may become available as indexing progresses.`;
1115
+ }
1116
+ return {
1117
+ content: [{
1118
+ type: "text",
1119
+ text: resultMessage
1120
+ }]
1121
+ };
1122
+ }
1123
+ catch (error) {
1124
+ console.error('[SEARCH] Error:', error);
1125
+ return {
1126
+ content: [{
1127
+ type: "text",
1128
+ text: `Error searching code: ${error.message || error}`
1129
+ }],
1130
+ isError: true
1131
+ };
1132
+ }
1133
+ }
1134
+ async handleClearIndex(args) {
1135
+ const { path: codebasePath } = args;
1136
+ const indexedCodebases = await this.getIndexedCodebases();
1137
+ const indexingCodebases = await this.getIndexingCodebases();
1138
+ if (indexedCodebases.length === 0 && indexingCodebases.length === 0) {
1139
+ return {
1140
+ content: [{
1141
+ type: "text",
1142
+ text: "No codebases are currently indexed or being indexed."
1143
+ }]
1144
+ };
1145
+ }
1146
+ try {
1147
+ // Force absolute path resolution
1148
+ const absolutePath = ensureAbsolutePath(codebasePath);
1149
+ // Validate path exists
1150
+ if (!(await this.pathExists(absolutePath))) {
1151
+ return {
1152
+ content: [{
1153
+ type: "text",
1154
+ text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
1155
+ }],
1156
+ isError: true
1157
+ };
1158
+ }
1159
+ // Check if it's a directory
1160
+ const stat = await fs.stat(absolutePath);
1161
+ if (!stat.isDirectory()) {
1162
+ return {
1163
+ content: [{
1164
+ type: "text",
1165
+ text: `Error: Path '${absolutePath}' is not a directory`
1166
+ }],
1167
+ isError: true
1168
+ };
1169
+ }
1170
+ // Check if this codebase is indexed or being indexed
1171
+ const isIndexed = indexedCodebases.includes(absolutePath);
1172
+ const isIndexing = indexingCodebases.includes(absolutePath);
1173
+ if (!isIndexed && !isIndexing) {
1174
+ return {
1175
+ content: [{
1176
+ type: "text",
1177
+ text: `Error: Codebase '${absolutePath}' is not indexed or being indexed.`
1178
+ }],
1179
+ isError: true
1180
+ };
1181
+ }
1182
+ const collectionName = this.generateCollectionName(absolutePath);
1183
+ console.log(`[CLEAR] Deleting collection: ${collectionName}`);
1184
+ try {
1185
+ await this.backendClient.deleteRepo(collectionName);
1186
+ console.log(`[CLEAR] Successfully deleted collection: ${collectionName}`);
1187
+ }
1188
+ catch (error) {
1189
+ if (!error.message?.includes('not found')) {
1190
+ throw error;
1191
+ }
1192
+ console.log(`[CLEAR] Collection already deleted or not found`);
1193
+ }
1194
+ // Remove from snapshot (backend)
1195
+ await this.removeCodebaseFromSnapshot(absolutePath);
1196
+ // Get updated counts
1197
+ const remainingIndexed = await this.getIndexedCodebases();
1198
+ const remainingIndexing = await this.getIndexingCodebases();
1199
+ let resultText = `Successfully cleared codebase '${absolutePath}'`;
1200
+ if (remainingIndexed.length > 0 || remainingIndexing.length > 0) {
1201
+ resultText += `\n${remainingIndexed.length} other indexed codebase(s) and ${remainingIndexing.length} indexing codebase(s) remain`;
1202
+ }
1203
+ return {
1204
+ content: [{
1205
+ type: "text",
1206
+ text: resultText
1207
+ }]
1208
+ };
1209
+ }
1210
+ catch (error) {
1211
+ console.error('[CLEAR] Error:', error);
1212
+ return {
1213
+ content: [{
1214
+ type: "text",
1215
+ text: `Error clearing index: ${error.message || error}`
1216
+ }],
1217
+ isError: true
1218
+ };
1219
+ }
1220
+ }
1221
+ async handleGetIndexingStatus(args) {
1222
+ const { path: codebasePath } = args;
1223
+ try {
1224
+ // Force absolute path resolution
1225
+ const absolutePath = ensureAbsolutePath(codebasePath);
1226
+ // Validate path exists
1227
+ if (!(await this.pathExists(absolutePath))) {
1228
+ return {
1229
+ content: [{
1230
+ type: "text",
1231
+ text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
1232
+ }],
1233
+ isError: true
1234
+ };
1235
+ }
1236
+ // Check if it's a directory
1237
+ const stat = await fs.stat(absolutePath);
1238
+ if (!stat.isDirectory()) {
1239
+ return {
1240
+ content: [{
1241
+ type: "text",
1242
+ text: `Error: Path '${absolutePath}' is not a directory`
1243
+ }],
1244
+ isError: true
1245
+ };
1246
+ }
1247
+ // Check indexing status from backend snapshot
1248
+ const status = await this.getCodebaseStatus(absolutePath);
1249
+ const info = await this.getCodebaseInfo(absolutePath);
1250
+ let statusMessage = '';
1251
+ switch (status) {
1252
+ case 'indexed':
1253
+ if (info && 'totalChunks' in info) {
1254
+ const indexedInfo = info;
1255
+ statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`;
1256
+ statusMessage += `\n📊 Statistics: ${indexedInfo.totalChunks} chunks`;
1257
+ statusMessage += `\n🕐 Last updated: ${new Date(indexedInfo.lastUpdated).toLocaleString()}`;
1258
+ }
1259
+ else {
1260
+ statusMessage = `✅ Codebase '${absolutePath}' is fully indexed and ready for search.`;
1261
+ }
1262
+ break;
1263
+ case 'indexing':
1264
+ if (info && 'indexingPercentage' in info) {
1265
+ const indexingInfo = info;
1266
+ const progressPercentage = indexingInfo.indexingPercentage || 0;
1267
+ statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed. Progress: ${progressPercentage.toFixed(1)}%`;
1268
+ if (progressPercentage < 50) {
1269
+ statusMessage += ' (Scanning and chunking files...)';
1270
+ }
1271
+ else {
1272
+ statusMessage += ' (Generating embeddings via backend...)';
1273
+ }
1274
+ statusMessage += `\n🕐 Last updated: ${new Date(indexingInfo.lastUpdated).toLocaleString()}`;
1275
+ }
1276
+ else {
1277
+ statusMessage = `🔄 Codebase '${absolutePath}' is currently being indexed.`;
1278
+ }
1279
+ break;
1280
+ case 'indexfailed':
1281
+ if (info && 'errorMessage' in info) {
1282
+ const failedInfo = info;
1283
+ statusMessage = `❌ Codebase '${absolutePath}' indexing failed.`;
1284
+ statusMessage += `\n🚨 Error: ${failedInfo.errorMessage}`;
1285
+ if (failedInfo.lastAttemptedPercentage !== undefined) {
1286
+ statusMessage += `\n📊 Failed at: ${failedInfo.lastAttemptedPercentage.toFixed(1)}% progress`;
1287
+ }
1288
+ statusMessage += `\n💡 You can retry indexing by running the index_codebase command again.`;
1289
+ }
1290
+ else {
1291
+ statusMessage = `❌ Codebase '${absolutePath}' indexing failed. You can retry indexing.`;
1292
+ }
1293
+ break;
1294
+ default:
1295
+ statusMessage = `❌ Codebase '${absolutePath}' is not indexed.\n\nPlease run index_codebase first:\n index_codebase(path="${absolutePath}")\n\nThis only needs to be done once per project.`;
1296
+ break;
1297
+ }
1298
+ const pathInfo = codebasePath !== absolutePath
1299
+ ? `\nNote: Input path '${codebasePath}' was resolved to absolute path '${absolutePath}'`
1300
+ : '';
1301
+ return {
1302
+ content: [{
1303
+ type: "text",
1304
+ text: statusMessage + pathInfo
1305
+ }]
1306
+ };
1307
+ }
1308
+ catch (error) {
1309
+ return {
1310
+ content: [{
1311
+ type: "text",
1312
+ text: `Error getting indexing status: ${error.message || error}`
1313
+ }],
1314
+ isError: true
1315
+ };
1316
+ }
1317
+ }
1318
+ async handleStartWatch(args) {
1319
+ const { path: codebasePath } = args;
1320
+ try {
1321
+ // Force absolute path resolution
1322
+ const absolutePath = ensureAbsolutePath(codebasePath);
1323
+ // Validate path exists
1324
+ if (!(await this.pathExists(absolutePath))) {
1325
+ return {
1326
+ content: [{
1327
+ type: "text",
1328
+ text: `Error: Path '${absolutePath}' does not exist. Original input: '${codebasePath}'`
1329
+ }],
1330
+ isError: true
1331
+ };
1332
+ }
1333
+ // Check if it's a directory
1334
+ const stat = await fs.stat(absolutePath);
1335
+ if (!stat.isDirectory()) {
1336
+ return {
1337
+ content: [{
1338
+ type: "text",
1339
+ text: `Error: Path '${absolutePath}' is not a directory`
1340
+ }],
1341
+ isError: true
1342
+ };
1343
+ }
1344
+ // Check if this codebase is indexed
1345
+ const status = await this.getCodebaseStatus(absolutePath);
1346
+ if (status !== 'indexed') {
1347
+ return {
1348
+ content: [{
1349
+ type: "text",
1350
+ text: `Codebase '${absolutePath}' is not indexed.\n\nPlease run index_codebase first:\n index_codebase(path="${absolutePath}")\n\nThis only needs to be done once per project.`
1351
+ }],
1352
+ isError: true
1353
+ };
1354
+ }
1355
+ // Check if already watching
1356
+ if (this.isWatching(absolutePath)) {
1357
+ return {
1358
+ content: [{
1359
+ type: "text",
1360
+ text: `Codebase '${absolutePath}' is already being watched for changes.`
1361
+ }]
1362
+ };
1363
+ }
1364
+ // Start watching
1365
+ const collectionName = this.generateCollectionName(absolutePath);
1366
+ this.startWatching(absolutePath, collectionName);
1367
+ return {
1368
+ content: [{
1369
+ type: "text",
1370
+ text: `Started watching codebase '${absolutePath}' for file changes. Files will be automatically re-indexed when modified.`
1371
+ }]
1372
+ };
1373
+ }
1374
+ catch (error) {
1375
+ console.error('[START_WATCH] Error:', error);
1376
+ return {
1377
+ content: [{
1378
+ type: "text",
1379
+ text: `Error starting watch: ${error.message || error}`
1380
+ }],
1381
+ isError: true
1382
+ };
1383
+ }
1384
+ }
1385
+ async handleStopWatch(args) {
1386
+ const { path: codebasePath } = args;
1387
+ try {
1388
+ // Force absolute path resolution
1389
+ const absolutePath = ensureAbsolutePath(codebasePath);
1390
+ // Check if watching
1391
+ if (!this.isWatching(absolutePath)) {
1392
+ return {
1393
+ content: [{
1394
+ type: "text",
1395
+ text: `Codebase '${absolutePath}' is not being watched.`
1396
+ }]
1397
+ };
1398
+ }
1399
+ // Stop watching
1400
+ await this.stopWatching(absolutePath);
1401
+ return {
1402
+ content: [{
1403
+ type: "text",
1404
+ text: `Stopped watching codebase '${absolutePath}'. Files will no longer be automatically re-indexed.`
1405
+ }]
1406
+ };
1407
+ }
1408
+ catch (error) {
1409
+ console.error('[STOP_WATCH] Error:', error);
1410
+ return {
1411
+ content: [{
1412
+ type: "text",
1413
+ text: `Error stopping watch: ${error.message || error}`
1414
+ }],
1415
+ isError: true
1416
+ };
1417
+ }
1418
+ }
1419
+ async handleGetWatchStatus(_args) {
1420
+ try {
1421
+ const watchedCodebases = this.getWatchedCodebases();
1422
+ if (watchedCodebases.length === 0) {
1423
+ return {
1424
+ content: [{
1425
+ type: "text",
1426
+ text: "No codebases are currently being watched for file changes."
1427
+ }]
1428
+ };
1429
+ }
1430
+ const list = watchedCodebases.map((p, i) => `${i + 1}. ${p}`).join('\n');
1431
+ return {
1432
+ content: [{
1433
+ type: "text",
1434
+ text: `Currently watching ${watchedCodebases.length} codebase(s) for file changes:\n\n${list}\n\nFiles in these codebases will be automatically re-indexed when modified.`
1435
+ }]
1436
+ };
1437
+ }
1438
+ catch (error) {
1439
+ console.error('[GET_WATCH_STATUS] Error:', error);
1440
+ return {
1441
+ content: [{
1442
+ type: "text",
1443
+ text: `Error getting watch status: ${error.message || error}`
1444
+ }],
1445
+ isError: true
1446
+ };
1447
+ }
1448
+ }
1449
+ }
1450
+ // ==================== Merkle Diff Helpers ====================
1451
+ // Used for incremental indexing - only process changed files
1452
+ BackendToolHandlers.MAX_FILE_SIZE = 1000000; // 1MB - skip large/minified files
1453
+ //# sourceMappingURL=backend-handlers.js.map