code-graph-context 1.1.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +221 -101
  2. package/dist/core/config/fairsquare-framework-schema.js +47 -60
  3. package/dist/core/config/nestjs-framework-schema.js +71 -44
  4. package/dist/core/config/schema.js +1 -1
  5. package/dist/core/config/timeouts.js +27 -0
  6. package/dist/core/embeddings/embeddings.service.js +122 -2
  7. package/dist/core/embeddings/natural-language-to-cypher.service.js +416 -17
  8. package/dist/core/parsers/parser-factory.js +5 -3
  9. package/dist/core/parsers/typescript-parser.js +618 -50
  10. package/dist/core/parsers/workspace-parser.js +554 -0
  11. package/dist/core/utils/edge-factory.js +37 -0
  12. package/dist/core/utils/file-change-detection.js +105 -0
  13. package/dist/core/utils/file-utils.js +20 -0
  14. package/dist/core/utils/index.js +3 -0
  15. package/dist/core/utils/path-utils.js +75 -0
  16. package/dist/core/utils/progress-reporter.js +112 -0
  17. package/dist/core/utils/project-id.js +176 -0
  18. package/dist/core/utils/retry.js +41 -0
  19. package/dist/core/workspace/index.js +4 -0
  20. package/dist/core/workspace/workspace-detector.js +221 -0
  21. package/dist/mcp/constants.js +153 -5
  22. package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
  23. package/dist/mcp/handlers/file-change-detection.js +105 -0
  24. package/dist/mcp/handlers/graph-generator.handler.js +97 -32
  25. package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
  26. package/dist/mcp/handlers/streaming-import.handler.js +210 -0
  27. package/dist/mcp/handlers/traversal.handler.js +130 -71
  28. package/dist/mcp/mcp.server.js +45 -6
  29. package/dist/mcp/service-init.js +79 -0
  30. package/dist/mcp/services/job-manager.js +165 -0
  31. package/dist/mcp/services/watch-manager.js +376 -0
  32. package/dist/mcp/services.js +2 -2
  33. package/dist/mcp/tools/check-parse-status.tool.js +64 -0
  34. package/dist/mcp/tools/impact-analysis.tool.js +84 -18
  35. package/dist/mcp/tools/index.js +13 -1
  36. package/dist/mcp/tools/list-projects.tool.js +62 -0
  37. package/dist/mcp/tools/list-watchers.tool.js +51 -0
  38. package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
  39. package/dist/mcp/tools/parse-typescript-project.tool.js +318 -58
  40. package/dist/mcp/tools/search-codebase.tool.js +56 -16
  41. package/dist/mcp/tools/start-watch-project.tool.js +100 -0
  42. package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
  43. package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
  44. package/dist/mcp/utils.js +35 -13
  45. package/dist/mcp/workers/parse-worker.js +198 -0
  46. package/dist/storage/neo4j/neo4j.service.js +147 -48
  47. package/package.json +4 -2
@@ -3,19 +3,66 @@
3
3
  * Parses TypeScript/NestJS projects and builds Neo4j graph
4
4
  */
5
5
  import { writeFileSync } from 'fs';
6
- import { stat } from 'fs/promises';
7
- import { join, resolve } from 'path';
8
- import { glob } from 'glob';
6
+ import { constants as fsConstants } from 'fs';
7
+ import { stat, access, realpath } from 'fs/promises';
8
+ import { dirname, join, resolve, sep } from 'path';
9
+ import { fileURLToPath } from 'url';
10
+ import { Worker } from 'worker_threads';
9
11
  import { z } from 'zod';
10
- import { EXCLUDE_PATTERNS_GLOB } from '../../constants.js';
11
12
  import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
12
13
  import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
13
14
  import { ParserFactory } from '../../core/parsers/parser-factory.js';
15
+ import { resolveProjectId, getProjectName, UPSERT_PROJECT_QUERY, UPDATE_PROJECT_STATUS_QUERY, } from '../../core/utils/project-id.js';
14
16
  import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
15
- import { hashFile } from '../../utils/file-utils.js';
16
17
  import { TOOL_NAMES, TOOL_METADATA, DEFAULTS, FILE_PATHS, LOG_CONFIG } from '../constants.js';
18
+ import { deleteSourceFileSubgraphs, loadExistingNodesForEdgeDetection, getCrossFileEdges, } from '../handlers/cross-file-edge.helpers.js';
19
+ import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
17
20
  import { GraphGeneratorHandler } from '../handlers/graph-generator.handler.js';
21
+ import { StreamingImportHandler } from '../handlers/streaming-import.handler.js';
22
+ import { jobManager } from '../services/job-manager.js';
23
+ import { watchManager } from '../services/watch-manager.js';
18
24
  import { createErrorResponse, createSuccessResponse, formatParseSuccess, formatParsePartialSuccess, debugLog, } from '../utils.js';
25
+ // Threshold for using streaming import (files)
26
+ const STREAMING_THRESHOLD = 100;
27
+ // Worker thread timeout (30 minutes)
28
+ const WORKER_TIMEOUT_MS = 30 * 60 * 1000;
29
+ /**
30
+ * Validates that a path exists and is accessible
31
+ * @throws Error if path doesn't exist or isn't accessible
32
+ */
33
+ const validatePathExists = async (path, pathType) => {
34
+ try {
35
+ await access(path, fsConstants.R_OK);
36
+ const stats = await stat(path);
37
+ if (pathType === 'directory' && !stats.isDirectory()) {
38
+ throw new Error(`Path exists but is not a directory: ${path}`);
39
+ }
40
+ if (pathType === 'file' && !stats.isFile()) {
41
+ throw new Error(`Path exists but is not a file: ${path}`);
42
+ }
43
+ }
44
+ catch (error) {
45
+ if (error.code === 'ENOENT') {
46
+ throw new Error(`Path does not exist: ${path}`);
47
+ }
48
+ throw error;
49
+ }
50
+ };
51
+ /**
52
+ * Validates that a resolved file path stays within the project directory
53
+ * to prevent path traversal attacks via symlinks
54
+ * @throws Error if path escapes project directory
55
+ */
56
+ const _validatePathWithinProject = async (filePath, projectPath) => {
57
+ const realProjectPath = await realpath(projectPath);
58
+ const realFilePath = await realpath(filePath);
59
+ // Ensure file path is within project directory
60
+ if (!realFilePath.startsWith(realProjectPath + sep) && realFilePath !== realProjectPath) {
61
+ throw new Error(`SECURITY: Path traversal detected - file "${filePath}" resolves outside project directory`);
62
+ }
63
+ };
64
+ // Export for potential use by other modules
65
+ export { _validatePathWithinProject as validatePathWithinProject };
19
66
  export const createParseTypescriptProjectTool = (server) => {
20
67
  server.registerTool(TOOL_NAMES.parseTypescriptProject, {
21
68
  title: TOOL_METADATA[TOOL_NAMES.parseTypescriptProject].title,
@@ -23,7 +70,11 @@ export const createParseTypescriptProjectTool = (server) => {
23
70
  inputSchema: {
24
71
  projectPath: z.string().describe('Path to the TypeScript project root directory'),
25
72
  tsconfigPath: z.string().describe('Path to TypeScript project tsconfig.json file'),
26
- clearExisting: z.boolean().optional().describe('Clear existing graph data first'),
73
+ projectId: z
74
+ .string()
75
+ .optional()
76
+ .describe('Optional project ID override. If not provided, auto-generated from projectPath'),
77
+ clearExisting: z.boolean().optional().describe('Clear existing graph data for this project first'),
27
78
  excludeNodeTypes: z
28
79
  .array(z.string())
29
80
  .optional()
@@ -33,49 +84,293 @@ export const createParseTypescriptProjectTool = (server) => {
33
84
  .optional()
34
85
  .default('auto')
35
86
  .describe('Project framework type (auto-detect by default)'),
87
+ chunkSize: z
88
+ .number()
89
+ .optional()
90
+ .default(50)
91
+ .describe('Files per chunk for streaming import (default: 50). Set to 0 to disable streaming.'),
92
+ useStreaming: z
93
+ .enum(['auto', 'always', 'never'])
94
+ .optional()
95
+ .default('auto')
96
+ .describe('When to use streaming import: auto (>100 files), always, or never'),
97
+ async: z
98
+ .boolean()
99
+ .optional()
100
+ .default(false)
101
+ .describe('Run parsing in background and return job ID immediately. Use check_parse_status to monitor.'),
102
+ watch: z
103
+ .boolean()
104
+ .optional()
105
+ .default(false)
106
+ .describe('Start file watching after parse completes. Only works with async: false.'),
107
+ watchDebounceMs: z
108
+ .number()
109
+ .optional()
110
+ .default(1000)
111
+ .describe('Debounce delay for watch mode in milliseconds (default: 1000)'),
36
112
  },
37
- }, async ({ tsconfigPath, projectPath, clearExisting, projectType = 'auto' }) => {
113
+ }, async ({ tsconfigPath, projectPath, projectId, clearExisting, projectType = 'auto', chunkSize = 100, useStreaming = 'auto', async: asyncMode = false, watch = false, watchDebounceMs = 1000, }) => {
38
114
  try {
115
+ // SECURITY: Validate input paths before processing
116
+ await validatePathExists(projectPath, 'directory');
117
+ await validatePathExists(tsconfigPath, 'file');
118
+ // Note: tsconfig can be outside project in monorepo setups, so we just validate it exists
39
119
  await debugLog('TypeScript project parsing started', {
40
120
  projectPath,
41
121
  tsconfigPath,
42
122
  clearExisting,
43
123
  projectType,
124
+ chunkSize,
125
+ useStreaming,
126
+ asyncMode,
44
127
  });
128
+ // Reject conflicting parameters: watch only works with sync mode
129
+ if (asyncMode && watch) {
130
+ return createErrorResponse(new Error('Invalid parameter combination: watch=true cannot be used with async=true. ' +
131
+ 'File watching requires synchronous parsing. Either set async=false or watch=false.'));
132
+ }
133
+ // Resolve projectId early
134
+ const resolvedProjectId = resolveProjectId(projectPath, projectId);
135
+ // Handle async mode - return job ID immediately and process in Worker thread
136
+ if (asyncMode) {
137
+ const jobId = jobManager.createJob(projectPath, resolvedProjectId);
138
+ jobManager.startJob(jobId);
139
+ // Get path to worker script
140
+ const __filename = fileURLToPath(import.meta.url);
141
+ const __dirname = dirname(__filename);
142
+ const workerPath = join(__dirname, '..', 'workers', 'parse-worker.js');
143
+ // Create Worker thread to run parsing without blocking MCP server
144
+ const worker = new Worker(workerPath, {
145
+ workerData: {
146
+ projectPath,
147
+ tsconfigPath,
148
+ projectId: resolvedProjectId,
149
+ projectType,
150
+ chunkSize: chunkSize > 0 ? chunkSize : 50,
151
+ },
152
+ resourceLimits: {
153
+ maxOldGenerationSizeMb: 8192, // 8GB heap for large monorepos
154
+ maxYoungGenerationSizeMb: 1024,
155
+ },
156
+ });
157
+ // Worker cleanup function
158
+ const terminateWorker = async (reason) => {
159
+ try {
160
+ await worker.terminate();
161
+ await debugLog('Worker terminated', { jobId, reason });
162
+ }
163
+ catch (terminateError) {
164
+ console.warn('Error terminating worker:', terminateError);
165
+ }
166
+ };
167
+ // Set timeout for worker execution (30 minutes)
168
+ const timeoutId = setTimeout(async () => {
169
+ const job = jobManager.getJob(jobId);
170
+ if (job && job.status === 'running') {
171
+ jobManager.failJob(jobId, `Worker timed out after ${WORKER_TIMEOUT_MS / 60000} minutes`);
172
+ await terminateWorker('timeout');
173
+ }
174
+ }, WORKER_TIMEOUT_MS);
175
+ // Handle progress messages from worker
176
+ worker.on('message', (msg) => {
177
+ if (msg.type === 'progress') {
178
+ jobManager.updateProgress(jobId, msg.data);
179
+ }
180
+ else if (msg.type === 'complete') {
181
+ clearTimeout(timeoutId);
182
+ jobManager.completeJob(jobId, msg.data);
183
+ debugLog('Async parsing completed', { jobId, result: msg.data });
184
+ terminateWorker('complete');
185
+ }
186
+ else if (msg.type === 'error') {
187
+ clearTimeout(timeoutId);
188
+ jobManager.failJob(jobId, msg.error);
189
+ debugLog('Async parsing failed', { jobId, error: msg.error });
190
+ terminateWorker('error');
191
+ }
192
+ });
193
+ // Handle worker errors
194
+ worker.on('error', (err) => {
195
+ clearTimeout(timeoutId);
196
+ jobManager.failJob(jobId, err.message ?? String(err));
197
+ console.error('Worker thread error:', err);
198
+ terminateWorker('worker-error');
199
+ });
200
+ // Handle worker exit
201
+ worker.on('exit', (code) => {
202
+ clearTimeout(timeoutId);
203
+ if (code !== 0) {
204
+ const job = jobManager.getJob(jobId);
205
+ if (job && job.status === 'running') {
206
+ jobManager.failJob(jobId, `Worker stopped with exit code ${code}`);
207
+ }
208
+ }
209
+ });
210
+ return createSuccessResponse(`Background parsing started in Worker thread.\n` +
211
+ `Job ID: ${jobId}\n` +
212
+ `Project ID: ${resolvedProjectId}\n\n` +
213
+ `Use check_parse_status({ jobId: "${jobId}" }) to monitor progress.`);
214
+ }
45
215
  const neo4jService = new Neo4jService();
46
216
  const embeddingsService = new EmbeddingsService();
47
217
  const graphGeneratorHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
218
+ // Determine if we should use streaming import
219
+ const parser = projectType === 'auto'
220
+ ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedProjectId)
221
+ : ParserFactory.createParser({
222
+ workspacePath: projectPath,
223
+ tsConfigPath: tsconfigPath,
224
+ projectType: projectType,
225
+ projectId: resolvedProjectId,
226
+ });
227
+ const totalFiles = parser.getSourceFilePaths().length;
228
+ const shouldUseStreaming = useStreaming === 'always' || (useStreaming === 'auto' && totalFiles > STREAMING_THRESHOLD && chunkSize > 0);
229
+ console.log(`📊 Project has ${totalFiles} files. Streaming: ${shouldUseStreaming ? 'enabled' : 'disabled'}`);
230
+ if (shouldUseStreaming && clearExisting !== false) {
231
+ // Use streaming import for large projects
232
+ console.log(`🚀 Using streaming import with chunk size ${chunkSize}`);
233
+ await debugLog('Using streaming import', { totalFiles, chunkSize });
234
+ // Create Project node BEFORE starting import (status: parsing)
235
+ const projectName = await getProjectName(projectPath);
236
+ await neo4jService.run(UPSERT_PROJECT_QUERY, {
237
+ projectId: resolvedProjectId,
238
+ name: projectName,
239
+ path: resolve(projectPath),
240
+ status: 'parsing',
241
+ });
242
+ await debugLog('Project node created with parsing status', { projectId: resolvedProjectId });
243
+ try {
244
+ // Clear existing project data first
245
+ graphGeneratorHandler.setProjectId(resolvedProjectId);
246
+ await neo4jService.run(QUERIES.CLEAR_PROJECT, { projectId: resolvedProjectId });
247
+ const streamingHandler = new StreamingImportHandler(graphGeneratorHandler);
248
+ const result = await streamingHandler.importProjectStreaming(parser, {
249
+ chunkSize,
250
+ projectId: resolvedProjectId,
251
+ });
252
+ await debugLog('Streaming import completed', result);
253
+ // Update Project node status to complete
254
+ await neo4jService.run(UPDATE_PROJECT_STATUS_QUERY, {
255
+ projectId: resolvedProjectId,
256
+ status: 'complete',
257
+ nodeCount: result.nodesImported,
258
+ edgeCount: result.edgesImported,
259
+ });
260
+ await debugLog('Project status updated to complete', { projectId: resolvedProjectId });
261
+ return createSuccessResponse(`Successfully imported project using streaming mode:\n` +
262
+ `- Project: ${projectName}\n` +
263
+ `- Files processed: ${result.filesProcessed}\n` +
264
+ `- Nodes imported: ${result.nodesImported}\n` +
265
+ `- Edges imported: ${result.edgesImported}\n` +
266
+ `- Chunks: ${result.chunksProcessed}\n` +
267
+ `- Time: ${(result.elapsedMs / 1000).toFixed(2)}s\n` +
268
+ `- Project ID: ${resolvedProjectId}\n\n` +
269
+ `Tip: Use "${projectName}" instead of "${resolvedProjectId}" in other tools.`);
270
+ }
271
+ catch (streamingError) {
272
+ // Update Project node status to failed
273
+ await neo4jService.run(UPDATE_PROJECT_STATUS_QUERY, {
274
+ projectId: resolvedProjectId,
275
+ status: 'failed',
276
+ nodeCount: 0,
277
+ edgeCount: 0,
278
+ });
279
+ await debugLog('Project status updated to failed', { projectId: resolvedProjectId, error: streamingError });
280
+ throw streamingError;
281
+ }
282
+ }
283
+ // Standard non-streaming import
284
+ // Create Project node BEFORE starting import (status: parsing)
285
+ const projectName = await getProjectName(projectPath);
286
+ await neo4jService.run(UPSERT_PROJECT_QUERY, {
287
+ projectId: resolvedProjectId,
288
+ name: projectName,
289
+ path: resolve(projectPath),
290
+ status: 'parsing',
291
+ });
292
+ await debugLog('Project node created with parsing status', { projectId: resolvedProjectId });
48
293
  const graphData = await parseProject({
49
294
  neo4jService,
50
295
  tsconfigPath,
51
296
  projectPath,
297
+ projectId,
52
298
  clearExisting,
53
299
  projectType,
54
300
  });
55
- const { nodes, edges, savedCrossFileEdges } = graphData;
56
- console.log(`Parsed ${nodes.length} nodes / ${edges.length} edges`);
57
- await debugLog('Parsing completed', { nodeCount: nodes.length, edgeCount: edges.length });
301
+ const { nodes, edges, savedCrossFileEdges, resolvedProjectId: finalProjectId } = graphData;
302
+ console.log(`Parsed ${nodes.length} nodes / ${edges.length} edges for project ${finalProjectId}`);
303
+ await debugLog('Parsing completed', {
304
+ nodeCount: nodes.length,
305
+ edgeCount: edges.length,
306
+ projectId: finalProjectId,
307
+ });
58
308
  const outputPath = join(projectPath, FILE_PATHS.graphOutput);
59
309
  writeFileSync(outputPath, JSON.stringify(graphData, null, LOG_CONFIG.jsonIndentation));
60
310
  console.log(`Graph data written to ${outputPath}`);
61
311
  try {
312
+ // Set projectId for project-scoped operations (clear, indexes)
313
+ graphGeneratorHandler.setProjectId(finalProjectId);
62
314
  const result = await graphGeneratorHandler.generateGraph(outputPath, DEFAULTS.batchSize, clearExisting);
63
315
  // Recreate cross-file edges after incremental parse
64
316
  if (!clearExisting && savedCrossFileEdges.length > 0) {
65
317
  await debugLog('Recreating cross-file edges', { edgesToRecreate: savedCrossFileEdges.length });
66
318
  const recreateResult = await neo4jService.run(QUERIES.RECREATE_CROSS_FILE_EDGES, {
67
319
  edges: savedCrossFileEdges,
320
+ projectId: finalProjectId,
68
321
  });
69
322
  const recreatedCount = recreateResult[0]?.recreatedCount ?? 0;
70
323
  await debugLog('Cross-file edges recreated', { recreatedCount, expected: savedCrossFileEdges.length });
71
324
  }
72
325
  console.log('Graph generation completed:', result);
73
326
  await debugLog('Neo4j import completed', result);
74
- return createSuccessResponse(formatParseSuccess(nodes.length, edges.length, result));
327
+ // Update Project node status to complete
328
+ await neo4jService.run(UPDATE_PROJECT_STATUS_QUERY, {
329
+ projectId: finalProjectId,
330
+ status: 'complete',
331
+ nodeCount: result.nodesImported,
332
+ edgeCount: result.edgesImported,
333
+ });
334
+ await debugLog('Project status updated to complete', { projectId: finalProjectId });
335
+ // Start file watcher if requested (only in synchronous mode)
336
+ let watchMessage = '';
337
+ if (watch && !asyncMode) {
338
+ try {
339
+ const watcherInfo = await watchManager.startWatching({
340
+ projectPath,
341
+ projectId: finalProjectId,
342
+ tsconfigPath,
343
+ debounceMs: watchDebounceMs,
344
+ });
345
+ await debugLog('File watcher started', { projectId: finalProjectId, status: watcherInfo.status });
346
+ watchMessage = `\n\nFile watcher started (debounce: ${watchDebounceMs}ms). Graph will auto-update on file changes.`;
347
+ }
348
+ catch (watchError) {
349
+ console.error('Failed to start file watcher:', watchError);
350
+ await debugLog('File watcher failed to start', { error: watchError });
351
+ watchMessage = `\n\nWarning: Failed to start file watcher: ${watchError instanceof Error ? watchError.message : String(watchError)}`;
352
+ }
353
+ }
354
+ // Add watcher tip if not already watching
355
+ const watcherTip = watch && !asyncMode
356
+ ? ''
357
+ : `\n\nTip: Use start_watch_project to automatically update the graph when files change.`;
358
+ return createSuccessResponse(formatParseSuccess(nodes.length, edges.length, result) +
359
+ `\n\nTip: Use "${projectName}" instead of "${finalProjectId}" in other tools.` +
360
+ watcherTip +
361
+ watchMessage);
75
362
  }
76
363
  catch (neo4jError) {
77
364
  console.error('Neo4j import failed:', neo4jError);
78
365
  await debugLog('Neo4j import failed', neo4jError);
366
+ // Update Project node status to failed
367
+ await neo4jService.run(UPDATE_PROJECT_STATUS_QUERY, {
368
+ projectId: finalProjectId,
369
+ status: 'failed',
370
+ nodeCount: 0,
371
+ edgeCount: 0,
372
+ });
373
+ await debugLog('Project status updated to failed', { projectId: finalProjectId });
79
374
  return createSuccessResponse(formatParsePartialSuccess(nodes.length, edges.length, outputPath, neo4jError.message));
80
375
  }
81
376
  }
@@ -87,13 +382,16 @@ export const createParseTypescriptProjectTool = (server) => {
87
382
  });
88
383
  };
89
384
  const parseProject = async (options) => {
90
- const { neo4jService, tsconfigPath, projectPath, clearExisting = true, projectType = 'auto' } = options;
385
+ const { neo4jService, tsconfigPath, projectPath, projectId, clearExisting = true, projectType = 'auto' } = options;
386
+ // Resolve projectId early - needed for incremental queries before parser is created
387
+ const resolvedId = resolveProjectId(projectPath, projectId);
91
388
  const parser = projectType === 'auto'
92
- ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath)
389
+ ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId)
93
390
  : ParserFactory.createParser({
94
391
  workspacePath: projectPath,
95
392
  tsConfigPath: tsconfigPath,
96
393
  projectType: projectType,
394
+ projectId: resolvedId,
97
395
  });
98
396
  let incrementalStats;
99
397
  let savedCrossFileEdges = [];
@@ -103,15 +401,15 @@ const parseProject = async (options) => {
103
401
  }
104
402
  else {
105
403
  // Incremental: detect changes and parse only affected files
106
- const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService);
404
+ const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService, resolvedId);
107
405
  incrementalStats = { filesReparsed: filesToReparse.length, filesDeleted: filesToDelete.length };
108
406
  await debugLog('Incremental change detection', { filesToReparse, filesToDelete });
109
407
  const filesToRemoveFromGraph = [...filesToDelete, ...filesToReparse];
110
408
  if (filesToRemoveFromGraph.length > 0) {
111
409
  // Save cross-file edges before deletion (they'll be recreated after import)
112
- savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph);
410
+ savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph, resolvedId);
113
411
  await debugLog('Saved cross-file edges', { count: savedCrossFileEdges.length, edges: savedCrossFileEdges });
114
- await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph);
412
+ await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph, resolvedId);
115
413
  }
116
414
  if (filesToReparse.length > 0) {
117
415
  await debugLog('Incremental parse starting', {
@@ -119,7 +417,7 @@ const parseProject = async (options) => {
119
417
  filesDeleted: filesToDelete.length,
120
418
  });
121
419
  // Load existing nodes from Neo4j for edge target matching
122
- const existingNodes = await loadExistingNodesForEdgeDetection(neo4jService, filesToRemoveFromGraph);
420
+ const existingNodes = await loadExistingNodesForEdgeDetection(neo4jService, filesToRemoveFromGraph, resolvedId);
123
421
  await debugLog('Loaded existing nodes for edge detection', { count: existingNodes.length });
124
422
  parser.setExistingNodes(existingNodes);
125
423
  await parser.parseWorkspace(filesToReparse);
@@ -130,57 +428,19 @@ const parseProject = async (options) => {
130
428
  }
131
429
  const { nodes, edges } = parser.exportToJson();
132
430
  const frameworkSchemas = parser['frameworkSchemas']?.map((s) => s.name) ?? ['Auto-detected'];
431
+ const resolvedProjectId = parser.getProjectId();
133
432
  return {
134
433
  nodes,
135
434
  edges,
136
435
  savedCrossFileEdges,
436
+ resolvedProjectId,
137
437
  metadata: {
138
438
  coreSchema: CORE_TYPESCRIPT_SCHEMA.name,
139
439
  frameworkSchemas,
140
440
  projectType,
441
+ projectId: resolvedProjectId,
141
442
  generated: new Date().toISOString(),
142
443
  ...(incrementalStats && { incremental: incrementalStats }),
143
444
  },
144
445
  };
145
446
  };
146
- const deleteSourceFileSubgraphs = async (neo4jService, filePaths) => {
147
- await neo4jService.run(QUERIES.DELETE_SOURCE_FILE_SUBGRAPHS, { filePaths });
148
- };
149
- const loadExistingNodesForEdgeDetection = async (neo4jService, excludeFilePaths) => {
150
- const queryResult = await neo4jService.run(QUERIES.GET_EXISTING_NODES_FOR_EDGE_DETECTION, { excludeFilePaths });
151
- return queryResult;
152
- };
153
- const getCrossFileEdges = async (neo4jService, filePaths) => {
154
- const queryResult = await neo4jService.run(QUERIES.GET_CROSS_FILE_EDGES, { filePaths });
155
- return queryResult;
156
- };
157
- const detectChangedFiles = async (projectPath, neo4jService) => {
158
- const relativeFiles = await glob('**/*.ts', { cwd: projectPath, ignore: EXCLUDE_PATTERNS_GLOB });
159
- const currentFiles = new Set(relativeFiles.map((f) => resolve(projectPath, f)));
160
- const queryResult = await neo4jService.run(QUERIES.GET_SOURCE_FILE_TRACKING_INFO);
161
- const indexedFiles = queryResult;
162
- const indexedMap = new Map(indexedFiles.map((f) => [f.filePath, f]));
163
- const filesToReparse = [];
164
- const filesToDelete = [];
165
- for (const absolutePath of currentFiles) {
166
- const indexed = indexedMap.get(absolutePath);
167
- if (!indexed) {
168
- filesToReparse.push(absolutePath);
169
- continue;
170
- }
171
- const fileStats = await stat(absolutePath);
172
- if (fileStats.mtimeMs === indexed.mtime && fileStats.size === indexed.size) {
173
- continue;
174
- }
175
- const currentHash = await hashFile(absolutePath);
176
- if (currentHash !== indexed.contentHash) {
177
- filesToReparse.push(absolutePath);
178
- }
179
- }
180
- for (const indexedPath of indexedMap.keys()) {
181
- if (!currentFiles.has(indexedPath)) {
182
- filesToDelete.push(indexedPath);
183
- }
184
- }
185
- return { filesToReparse, filesToDelete };
186
- };
@@ -5,14 +5,15 @@
5
5
  import { z } from 'zod';
6
6
  import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
7
7
  import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
8
- import { TOOL_NAMES, TOOL_METADATA, DEFAULTS, MESSAGES } from '../constants.js';
8
+ import { TOOL_NAMES, TOOL_METADATA, DEFAULTS } from '../constants.js';
9
9
  import { TraversalHandler } from '../handlers/traversal.handler.js';
10
- import { createErrorResponse, createSuccessResponse, debugLog } from '../utils.js';
10
+ import { createErrorResponse, createSuccessResponse, debugLog, sanitizeNumericInput, resolveProjectIdOrError, } from '../utils.js';
11
11
  export const createSearchCodebaseTool = (server) => {
12
12
  server.registerTool(TOOL_NAMES.searchCodebase, {
13
13
  title: TOOL_METADATA[TOOL_NAMES.searchCodebase].title,
14
14
  description: TOOL_METADATA[TOOL_NAMES.searchCodebase].description,
15
15
  inputSchema: {
16
+ projectId: z.string().describe('Project ID, name, or path (e.g., "backend" or "proj_a1b2c3d4e5f6")'),
16
17
  query: z.string().describe('Natural language query to search the codebase'),
17
18
  maxDepth: z
18
19
  .number()
@@ -38,47 +39,83 @@ export const createSearchCodebaseTool = (server) => {
38
39
  .optional()
39
40
  .describe(`Length of code snippets to include (default: ${DEFAULTS.codeSnippetLength})`)
40
41
  .default(DEFAULTS.codeSnippetLength),
42
+ minSimilarity: z
43
+ .number()
44
+ .optional()
45
+ .describe('Minimum similarity score threshold (0.0-1.0). Results below this are filtered out. Default: 0.65')
46
+ .default(0.65),
41
47
  useWeightedTraversal: z
42
48
  .boolean()
43
49
  .optional()
44
50
  .describe('Use weighted traversal strategy that scores each node for relevance (default: false)')
45
51
  .default(true),
46
52
  },
47
- }, async ({ query, maxDepth = DEFAULTS.traversalDepth, maxNodesPerChain = 5, skip = 0, includeCode = true, snippetLength = DEFAULTS.codeSnippetLength, useWeightedTraversal = true, }) => {
53
+ }, async ({ projectId, query, maxDepth = DEFAULTS.traversalDepth, maxNodesPerChain = 5, skip = 0, includeCode = true, snippetLength = DEFAULTS.codeSnippetLength, minSimilarity = 0.65, useWeightedTraversal = true, }) => {
54
+ const neo4jService = new Neo4jService();
48
55
  try {
49
- await debugLog('Search codebase started', { query });
50
- const neo4jService = new Neo4jService();
56
+ // Resolve project ID from name, path, or ID
57
+ const projectResult = await resolveProjectIdOrError(projectId, neo4jService);
58
+ if (!projectResult.success)
59
+ return projectResult.error;
60
+ const resolvedProjectId = projectResult.projectId;
61
+ // Sanitize numeric inputs to ensure integers (Neo4j LIMIT requires integers)
62
+ const sanitizedMaxDepth = sanitizeNumericInput(maxDepth, DEFAULTS.traversalDepth, 10);
63
+ const sanitizedMaxNodesPerChain = sanitizeNumericInput(maxNodesPerChain, 5);
64
+ const sanitizedSkip = sanitizeNumericInput(skip, 0);
65
+ const sanitizedSnippetLength = sanitizeNumericInput(snippetLength, DEFAULTS.codeSnippetLength);
66
+ await debugLog('Search codebase started', { projectId: resolvedProjectId, query });
51
67
  const embeddingsService = new EmbeddingsService();
52
68
  const traversalHandler = new TraversalHandler(neo4jService);
53
69
  const embedding = await embeddingsService.embedText(query);
54
70
  const vectorResults = await neo4jService.run(QUERIES.VECTOR_SEARCH, {
55
71
  limit: 1,
56
72
  embedding,
73
+ projectId: resolvedProjectId,
74
+ fetchMultiplier: 10,
75
+ minSimilarity,
57
76
  });
58
77
  if (vectorResults.length === 0) {
59
- await debugLog('No relevant code found', { query });
60
- return createSuccessResponse(MESSAGES.errors.noRelevantCode);
78
+ await debugLog('No relevant code found', { projectId: resolvedProjectId, query, minSimilarity });
79
+ return createSuccessResponse(`No code found with similarity >= ${minSimilarity}. ` +
80
+ `Try rephrasing your query or lowering the minSimilarity threshold. Query: "${query}"`);
61
81
  }
62
82
  const startNode = vectorResults[0].node;
63
83
  const nodeId = startNode.properties.id;
84
+ const similarityScore = vectorResults[0].score;
85
+ // Check if best match meets threshold - prevents traversing low-relevance results
86
+ if (similarityScore < minSimilarity) {
87
+ await debugLog('Best match below similarity threshold', {
88
+ projectId: resolvedProjectId,
89
+ query,
90
+ score: similarityScore,
91
+ threshold: minSimilarity,
92
+ });
93
+ return createSuccessResponse(`No sufficiently relevant code found. Best match score: ${similarityScore.toFixed(3)} ` +
94
+ `(threshold: ${minSimilarity}). Try rephrasing your query.`);
95
+ }
64
96
  await debugLog('Vector search completed, starting traversal', {
97
+ projectId: resolvedProjectId,
65
98
  nodeId,
99
+ similarityScore,
66
100
  resultsCount: vectorResults.length,
67
- maxDepth,
68
- maxNodesPerChain,
69
- skip,
101
+ maxDepth: sanitizedMaxDepth,
102
+ maxNodesPerChain: sanitizedMaxNodesPerChain,
103
+ skip: sanitizedSkip,
70
104
  includeCode,
71
- snippetLength,
105
+ snippetLength: sanitizedSnippetLength,
72
106
  });
107
+ // Include similarity score in the title so users can see relevance
108
+ const scoreDisplay = typeof similarityScore === 'number' ? similarityScore.toFixed(3) : 'N/A';
73
109
  return await traversalHandler.traverseFromNode(nodeId, embedding, {
74
- maxDepth,
110
+ projectId: resolvedProjectId,
111
+ maxDepth: sanitizedMaxDepth,
75
112
  direction: 'BOTH', // Show both incoming (who calls this) and outgoing (what this calls)
76
113
  includeCode,
77
- maxNodesPerChain,
78
- skip,
114
+ maxNodesPerChain: sanitizedMaxNodesPerChain,
115
+ skip: sanitizedSkip,
79
116
  summaryOnly: false,
80
- snippetLength,
81
- title: `Exploration from Node: ${nodeId}`,
117
+ snippetLength: sanitizedSnippetLength,
118
+ title: `Search Results (similarity: ${scoreDisplay}) - Starting from: ${nodeId}`,
82
119
  useWeightedTraversal,
83
120
  });
84
121
  }
@@ -87,5 +124,8 @@ export const createSearchCodebaseTool = (server) => {
87
124
  await debugLog('Search codebase error', error);
88
125
  return createErrorResponse(error);
89
126
  }
127
+ finally {
128
+ await neo4jService.close();
129
+ }
90
130
  });
91
131
  };