code-graph-context 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +221 -101
  2. package/dist/core/config/fairsquare-framework-schema.js +47 -60
  3. package/dist/core/config/nestjs-framework-schema.js +11 -1
  4. package/dist/core/config/schema.js +1 -1
  5. package/dist/core/config/timeouts.js +27 -0
  6. package/dist/core/embeddings/embeddings.service.js +122 -2
  7. package/dist/core/embeddings/natural-language-to-cypher.service.js +416 -17
  8. package/dist/core/parsers/parser-factory.js +5 -3
  9. package/dist/core/parsers/typescript-parser.js +614 -45
  10. package/dist/core/parsers/workspace-parser.js +553 -0
  11. package/dist/core/utils/edge-factory.js +37 -0
  12. package/dist/core/utils/file-change-detection.js +105 -0
  13. package/dist/core/utils/file-utils.js +20 -0
  14. package/dist/core/utils/index.js +3 -0
  15. package/dist/core/utils/path-utils.js +75 -0
  16. package/dist/core/utils/progress-reporter.js +112 -0
  17. package/dist/core/utils/project-id.js +176 -0
  18. package/dist/core/utils/retry.js +41 -0
  19. package/dist/core/workspace/index.js +4 -0
  20. package/dist/core/workspace/workspace-detector.js +221 -0
  21. package/dist/mcp/constants.js +153 -5
  22. package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
  23. package/dist/mcp/handlers/file-change-detection.js +105 -0
  24. package/dist/mcp/handlers/graph-generator.handler.js +97 -32
  25. package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
  26. package/dist/mcp/handlers/streaming-import.handler.js +210 -0
  27. package/dist/mcp/handlers/traversal.handler.js +130 -71
  28. package/dist/mcp/mcp.server.js +45 -6
  29. package/dist/mcp/service-init.js +79 -0
  30. package/dist/mcp/services/job-manager.js +165 -0
  31. package/dist/mcp/services/watch-manager.js +376 -0
  32. package/dist/mcp/services.js +2 -2
  33. package/dist/mcp/tools/check-parse-status.tool.js +64 -0
  34. package/dist/mcp/tools/impact-analysis.tool.js +84 -18
  35. package/dist/mcp/tools/index.js +13 -1
  36. package/dist/mcp/tools/list-projects.tool.js +62 -0
  37. package/dist/mcp/tools/list-watchers.tool.js +51 -0
  38. package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
  39. package/dist/mcp/tools/parse-typescript-project.tool.js +318 -58
  40. package/dist/mcp/tools/search-codebase.tool.js +56 -16
  41. package/dist/mcp/tools/start-watch-project.tool.js +100 -0
  42. package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
  43. package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
  44. package/dist/mcp/utils.js +35 -13
  45. package/dist/mcp/workers/parse-worker.js +198 -0
  46. package/dist/storage/neo4j/neo4j.service.js +147 -48
  47. package/package.json +4 -2
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Workspace Detector
3
+ * Auto-discovers monorepo structure (Turborepo, pnpm, yarn, npm workspaces)
4
+ */
5
+ import fs from 'fs/promises';
6
+ import path from 'path';
7
+ import { glob } from 'glob';
8
+ import YAML from 'yaml';
9
+ import { debugLog } from '../utils/file-utils.js';
10
+ export class WorkspaceDetector {
11
+ /**
12
+ * Detect workspace configuration from a root path
13
+ */
14
+ async detect(rootPath) {
15
+ const absoluteRoot = path.resolve(rootPath);
16
+ // Check for different workspace types in order of specificity
17
+ const type = await this.detectWorkspaceType(absoluteRoot);
18
+ if (type === 'single') {
19
+ // Single project, not a monorepo
20
+ return {
21
+ type: 'single',
22
+ rootPath: absoluteRoot,
23
+ packages: await this.getSingleProjectPackage(absoluteRoot),
24
+ };
25
+ }
26
+ // Get workspace patterns and enumerate packages
27
+ const patterns = await this.getWorkspacePatterns(absoluteRoot, type);
28
+ const packages = await this.enumeratePackages(absoluteRoot, patterns);
29
+ await debugLog('Workspace detected', { type, packageCount: packages.length });
30
+ return {
31
+ type,
32
+ rootPath: absoluteRoot,
33
+ packages,
34
+ };
35
+ }
36
+ /**
37
+ * Detect the type of workspace/monorepo
38
+ */
39
+ async detectWorkspaceType(rootPath) {
40
+ // Check for Turborepo (has turbo.json)
41
+ const turboJsonPath = path.join(rootPath, 'turbo.json');
42
+ const hasTurboJson = await this.fileExists(turboJsonPath);
43
+ await debugLog('Checking for turbo.json', { path: turboJsonPath, exists: hasTurboJson });
44
+ if (hasTurboJson) {
45
+ return 'turborepo';
46
+ }
47
+ // Check for pnpm workspaces (has pnpm-workspace.yaml)
48
+ const pnpmWorkspacePath = path.join(rootPath, 'pnpm-workspace.yaml');
49
+ const hasPnpmWorkspace = await this.fileExists(pnpmWorkspacePath);
50
+ await debugLog('Checking for pnpm-workspace.yaml', { path: pnpmWorkspacePath, exists: hasPnpmWorkspace });
51
+ if (hasPnpmWorkspace) {
52
+ return 'pnpm';
53
+ }
54
+ // Check package.json for workspaces field
55
+ const packageJsonPath = path.join(rootPath, 'package.json');
56
+ if (await this.fileExists(packageJsonPath)) {
57
+ try {
58
+ const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
59
+ if (packageJson.workspaces) {
60
+ // Yarn uses workspaces array or object with packages
61
+ if (Array.isArray(packageJson.workspaces)) {
62
+ return 'yarn';
63
+ }
64
+ if (packageJson.workspaces.packages) {
65
+ return 'yarn';
66
+ }
67
+ // npm also uses workspaces
68
+ return 'npm';
69
+ }
70
+ }
71
+ catch {
72
+ // Ignore parse errors
73
+ }
74
+ }
75
+ await debugLog('No workspace markers found', { rootPath, result: 'single' });
76
+ return 'single';
77
+ }
78
+ /**
79
+ * Get workspace glob patterns based on workspace type
80
+ */
81
+ async getWorkspacePatterns(rootPath, type) {
82
+ switch (type) {
83
+ case 'turborepo':
84
+ case 'pnpm': {
85
+ // pnpm-workspace.yaml defines packages
86
+ const pnpmWorkspacePath = path.join(rootPath, 'pnpm-workspace.yaml');
87
+ if (await this.fileExists(pnpmWorkspacePath)) {
88
+ try {
89
+ const content = await fs.readFile(pnpmWorkspacePath, 'utf-8');
90
+ const config = YAML.parse(content);
91
+ if (config?.packages && Array.isArray(config.packages)) {
92
+ return config.packages;
93
+ }
94
+ }
95
+ catch {
96
+ // Fall through to defaults
97
+ }
98
+ }
99
+ // Turborepo default patterns
100
+ return ['apps/*', 'packages/*'];
101
+ }
102
+ case 'yarn':
103
+ case 'npm': {
104
+ // Read from package.json workspaces
105
+ const packageJsonPath = path.join(rootPath, 'package.json');
106
+ try {
107
+ const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
108
+ if (Array.isArray(packageJson.workspaces)) {
109
+ return packageJson.workspaces;
110
+ }
111
+ if (packageJson.workspaces?.packages) {
112
+ return packageJson.workspaces.packages;
113
+ }
114
+ }
115
+ catch {
116
+ // Fall through to defaults
117
+ }
118
+ return ['packages/*'];
119
+ }
120
+ default:
121
+ return [];
122
+ }
123
+ }
124
+ /**
125
+ * Enumerate all packages matching workspace patterns
126
+ */
127
+ async enumeratePackages(rootPath, patterns) {
128
+ const packages = [];
129
+ const seenPaths = new Set();
130
+ await debugLog('Enumerating packages with patterns', { patterns, rootPath });
131
+ for (const pattern of patterns) {
132
+ // Handle negation patterns (start with !)
133
+ if (pattern.startsWith('!')) {
134
+ continue; // Skip negation patterns in enumeration
135
+ }
136
+ // Glob for directories matching the pattern
137
+ const matches = await glob(pattern, {
138
+ cwd: rootPath,
139
+ absolute: true,
140
+ nodir: false,
141
+ mark: true, // Adds trailing slash to directories
142
+ });
143
+ await debugLog('Glob pattern matched', { pattern, matchCount: matches.length, sample: matches.slice(0, 5) });
144
+ // Filter to only directories (those ending with /)
145
+ const directories = matches.filter((m) => m.endsWith('/') || !m.includes('.'));
146
+ await debugLog('After directory filter', { pattern, directoryCount: directories.length });
147
+ for (const match of directories) {
148
+ // Remove trailing slash if present
149
+ const packagePath = match.endsWith('/') ? match.slice(0, -1) : match;
150
+ // Skip if already seen
151
+ if (seenPaths.has(packagePath))
152
+ continue;
153
+ seenPaths.add(packagePath);
154
+ // Check if this is a valid package (has package.json)
155
+ const packageJsonPath = path.join(packagePath, 'package.json');
156
+ if (!(await this.fileExists(packageJsonPath))) {
157
+ continue;
158
+ }
159
+ // Read package name
160
+ let packageName;
161
+ try {
162
+ const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
163
+ packageName = packageJson.name ?? path.basename(packagePath);
164
+ }
165
+ catch {
166
+ packageName = path.basename(packagePath);
167
+ }
168
+ // Check for tsconfig
169
+ const tsConfigPath = path.join(packagePath, 'tsconfig.json');
170
+ const hasTsConfig = await this.fileExists(tsConfigPath);
171
+ packages.push({
172
+ name: packageName,
173
+ path: packagePath,
174
+ tsConfigPath: hasTsConfig ? tsConfigPath : null,
175
+ relativePath: path.relative(rootPath, packagePath),
176
+ });
177
+ }
178
+ }
179
+ // Sort by path for consistent ordering
180
+ packages.sort((a, b) => a.path.localeCompare(b.path));
181
+ return packages;
182
+ }
183
+ /**
184
+ * Get package info for a single (non-monorepo) project
185
+ */
186
+ async getSingleProjectPackage(rootPath) {
187
+ const packageJsonPath = path.join(rootPath, 'package.json');
188
+ let packageName = path.basename(rootPath);
189
+ if (await this.fileExists(packageJsonPath)) {
190
+ try {
191
+ const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
192
+ packageName = packageJson.name ?? packageName;
193
+ }
194
+ catch {
195
+ // Use directory name
196
+ }
197
+ }
198
+ const tsConfigPath = path.join(rootPath, 'tsconfig.json');
199
+ const hasTsConfig = await this.fileExists(tsConfigPath);
200
+ return [
201
+ {
202
+ name: packageName,
203
+ path: rootPath,
204
+ tsConfigPath: hasTsConfig ? tsConfigPath : null,
205
+ relativePath: '.',
206
+ },
207
+ ];
208
+ }
209
+ /**
210
+ * Check if a file exists
211
+ */
212
+ async fileExists(filePath) {
213
+ try {
214
+ await fs.access(filePath);
215
+ return true;
216
+ }
217
+ catch {
218
+ return false;
219
+ }
220
+ }
221
+ }
@@ -22,6 +22,11 @@ export const TOOL_NAMES = {
22
22
  parseTypescriptProject: 'parse_typescript_project',
23
23
  testNeo4jConnection: 'test_neo4j_connection',
24
24
  impactAnalysis: 'impact_analysis',
25
+ checkParseStatus: 'check_parse_status',
26
+ listProjects: 'list_projects',
27
+ startWatchProject: 'start_watch_project',
28
+ stopWatchProject: 'stop_watch_project',
29
+ listWatchers: 'list_watchers',
25
30
  };
26
31
  // Tool Metadata
27
32
  export const TOOL_METADATA = {
@@ -33,6 +38,9 @@ export const TOOL_METADATA = {
33
38
  title: 'Search Codebase',
34
39
  description: `Search the codebase using semantic similarity to find relevant code, functions, classes, and implementations.
35
40
 
41
+ **Before searching:**
42
+ Use list_projects to see available projects and get the project name/ID to search.
43
+
36
44
  Returns normalized JSON with source code snippets. Uses JSON:API pattern to deduplicate nodes.
37
45
 
38
46
  **Default Usage (Recommended)**:
@@ -53,11 +61,48 @@ Use these parameters ONLY if you encounter token limit errors (>25,000 tokens):
53
61
  **Progressive Strategy**:
54
62
  1. Try with defaults first
55
63
  2. If token error: Use maxDepth=1, includeCode=false for structure
56
- 3. Then traverse deeper or Read specific files for full code`,
64
+ 3. Then traverse deeper or Read specific files for full code
65
+
66
+ **Compact Mode** (for exploration without full source code):
67
+ - includeCode: false → Returns just names, types, and file paths
68
+ - snippetLength: 200 → Smaller code previews
69
+ - maxNodesPerChain: 2 → Fewer relationship chains per depth`,
57
70
  },
58
71
  [TOOL_NAMES.naturalLanguageToCypher]: {
59
72
  title: 'Natural Language to Cypher',
60
- description: 'Convert natural language queries into Cypher queries for Neo4j. This tool is useful for generating specific queries based on user requests about the codebase.',
73
+ description: `Convert natural language queries into Cypher queries for Neo4j.
74
+
75
+ **Before using:**
76
+ Use list_projects to see available projects and get the project name.
77
+
78
+ **When to use:**
79
+ - For complex queries that search_codebase can't handle
80
+ - When you need custom filtering or aggregation
81
+ - To explore specific relationship patterns
82
+
83
+ **Parameters:**
84
+ - projectId: Project name, path, or ID (use list_projects to find)
85
+ - query: Natural language description of what you want to find
86
+
87
+ **Examples:**
88
+ - "Find all classes with more than 5 methods"
89
+ - "List functions that have more than 3 parameters"
90
+ - "Find files that import from a path containing 'utils'"
91
+ - "Show interfaces with 'Response' in their name"
92
+ - "Find all exported functions"
93
+
94
+ **Tips:**
95
+ - Import nodes store file paths, not module names (use 'path containing X')
96
+ - Node types: SourceFile, ClassDeclaration, FunctionDeclaration, MethodDeclaration, InterfaceDeclaration
97
+ - Relationships: CONTAINS, IMPORTS, HAS_PARAMETER, IMPLEMENTS, EXTENDS, HAS_MEMBER
98
+ - For NestJS, use semanticType property instead of decorators (e.g., semanticType = 'NestController')
99
+
100
+ **Query Phrasing:**
101
+ Phrase queries using properties known to exist (filePath, name) rather than abstract concepts:
102
+ - Use "in account folder" or "filePath contains /account/" instead of "in account module"
103
+ - Use "classes extending DbService" not "services that extend DbService" (Service is a decorator, not a type)
104
+ - Use "with name containing 'Controller'" instead of "controllers"
105
+ The tool performs better with concrete, schema-aligned language.`,
61
106
  },
62
107
  [TOOL_NAMES.traverseFromNode]: {
63
108
  title: 'Traverse from Node',
@@ -74,14 +119,41 @@ Advanced options (use when needed):
74
119
  - summaryOnly: Set to true for just file paths and statistics without detailed traversal
75
120
 
76
121
  Best practices:
122
+ - Use list_projects first to see available projects
77
123
  - Start with search_codebase to find initial nodes
78
124
  - Default includes source code snippets for immediate context
79
125
  - Set includeCode: false for high-level architecture view only
80
- - Use summaryOnly: true for a quick overview of many connections`,
126
+ - Use summaryOnly: true for a quick overview of many connections
127
+
128
+ **Compact Mode** (for exploration without full source code):
129
+ - summaryOnly: true → Returns only file paths and statistics
130
+ - includeCode: false → Structure without source code
131
+ - snippetLength: 200 → Smaller code previews
132
+ - maxTotalNodes: 20 → Limit total unique nodes returned`,
81
133
  },
82
134
  [TOOL_NAMES.parseTypescriptProject]: {
83
135
  title: 'Parse TypeScript Project',
84
- description: 'Parse a TypeScript/NestJS project and store in Neo4j graph',
136
+ description: `Parse a TypeScript/NestJS project and build a code graph in Neo4j.
137
+
138
+ **IMPORTANT: Always use async mode for parsing:**
139
+ - Set async: true to avoid timeouts on large codebases
140
+ - Use check_parse_status to monitor progress
141
+
142
+ **Workflow:**
143
+ 1. Call with async: true and projectPath
144
+ 2. Note the returned jobId
145
+ 3. Poll check_parse_status({ jobId }) until completed
146
+ 4. Use list_projects to confirm the project was added
147
+
148
+ **Parameters:**
149
+ - projectPath (required): Absolute path to the project root
150
+ - async (recommended: true): Run parsing in background
151
+ - clearExisting: Set true to replace existing graph for this project
152
+ - projectId: Optional custom ID (auto-generated from path if omitted)
153
+
154
+ **Example:**
155
+ parse_typescript_project({ projectPath: "/path/to/project", async: true })
156
+ → Returns jobId for polling`,
85
157
  },
86
158
  [TOOL_NAMES.testNeo4jConnection]: {
87
159
  title: 'Test Neo4j Connection & APOC',
@@ -91,6 +163,9 @@ Best practices:
91
163
  title: 'Impact Analysis',
92
164
  description: `Analyze the impact of modifying a code node. Shows what depends on this node and helps assess risk before making changes.
93
165
 
166
+ **Before analyzing:**
167
+ Use list_projects to see available projects and get the project name.
168
+
94
169
  Returns:
95
170
  - Risk level (LOW/MEDIUM/HIGH/CRITICAL) based on dependency count and relationship types
96
171
  - Direct dependents: nodes that directly reference the target
@@ -105,6 +180,78 @@ Parameters:
105
180
 
106
181
  Use this before refactoring to understand blast radius of changes.`,
107
182
  },
183
+ [TOOL_NAMES.checkParseStatus]: {
184
+ title: 'Check Parse Status',
185
+ description: `Check the status of an async parsing job started with parse_typescript_project({ async: true }).
186
+
187
+ Returns:
188
+ - Job status (pending/running/completed/failed)
189
+ - Progress: phase, files processed, chunks completed
190
+ - Nodes and edges imported so far
191
+ - Final result on completion or error message on failure
192
+
193
+ Use this to poll for progress when parsing large codebases asynchronously.`,
194
+ },
195
+ [TOOL_NAMES.listProjects]: {
196
+ title: 'List Projects',
197
+ description: `List all parsed projects in the database with their IDs, names, and paths.
198
+
199
+ Returns:
200
+ - projectId: The full project ID (e.g., "proj_a1b2c3d4e5f6")
201
+ - name: Friendly project name from package.json (e.g., "backend")
202
+ - path: Full filesystem path to the project
203
+ - updatedAt: When the project was last parsed
204
+
205
+ Use the name or path in other tools instead of the cryptic projectId.`,
206
+ },
207
+ [TOOL_NAMES.startWatchProject]: {
208
+ title: 'Start Watch Project',
209
+ description: `Start watching a project for file changes and automatically update the graph.
210
+
211
+ **Parameters:**
212
+ - projectPath (required): Absolute path to the project root
213
+ - tsconfigPath (required): Path to tsconfig.json
214
+ - projectId (optional): Custom project ID (auto-generated if omitted)
215
+ - debounceMs (optional): Delay before processing changes (default: 1000ms)
216
+
217
+ **Behavior:**
218
+ - Watches for .ts file changes (add/change/delete)
219
+ - Automatically triggers incremental graph updates
220
+ - Sends MCP notifications for progress updates
221
+ - Excludes node_modules, dist, build, .git, *.d.ts, *.js
222
+
223
+ **Usage:**
224
+ start_watch_project({ projectPath: "/path/to/project", tsconfigPath: "/path/to/project/tsconfig.json" })
225
+
226
+ Use list_watchers to see active watchers, stop_watch_project to stop.`,
227
+ },
228
+ [TOOL_NAMES.stopWatchProject]: {
229
+ title: 'Stop Watch Project',
230
+ description: `Stop watching a project for file changes.
231
+
232
+ **Parameters:**
233
+ - projectId (required): Project ID to stop watching
234
+
235
+ **Usage:**
236
+ stop_watch_project({ projectId: "proj_abc123..." })
237
+
238
+ Use list_watchers to see active watchers.`,
239
+ },
240
+ [TOOL_NAMES.listWatchers]: {
241
+ title: 'List Watchers',
242
+ description: `List all active file watchers.
243
+
244
+ Returns information about each watcher:
245
+ - projectId: The project being watched
246
+ - projectPath: File system path
247
+ - status: active, paused, or error
248
+ - debounceMs: Configured debounce delay
249
+ - pendingChanges: Number of queued file changes
250
+ - lastUpdateTime: When the graph was last updated
251
+ - errorMessage: Error details if status is "error"
252
+
253
+ Use stop_watch_project to stop a watcher.`,
254
+ },
108
255
  };
109
256
  // Default Values
110
257
  export const DEFAULTS = {
@@ -112,8 +259,9 @@ export const DEFAULTS = {
112
259
  skipOffset: 0,
113
260
  batchSize: 500,
114
261
  maxResultsDisplayed: 30,
115
- codeSnippetLength: 1000,
262
+ codeSnippetLength: 500, // Reduced from 1000 to control output size
116
263
  chainSnippetLength: 700,
264
+ maxEmbeddingChars: 30000, // ~7500 tokens, under 8192 limit for text-embedding-3-large
117
265
  };
118
266
  // Messages
119
267
  export const MESSAGES = {
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Cross-File Edge Helpers
3
+ * Shared utilities for managing cross-file edges during incremental parsing
4
+ */
5
+ import { QUERIES } from '../../storage/neo4j/neo4j.service.js';
6
+ export const deleteSourceFileSubgraphs = async (neo4jService, filePaths, projectId) => {
7
+ await neo4jService.run(QUERIES.DELETE_SOURCE_FILE_SUBGRAPHS, { filePaths, projectId });
8
+ };
9
+ export const loadExistingNodesForEdgeDetection = async (neo4jService, excludeFilePaths, projectId) => {
10
+ const queryResult = await neo4jService.run(QUERIES.GET_EXISTING_NODES_FOR_EDGE_DETECTION, {
11
+ excludeFilePaths,
12
+ projectId,
13
+ });
14
+ return queryResult;
15
+ };
16
+ export const getCrossFileEdges = async (neo4jService, filePaths, projectId) => {
17
+ const queryResult = await neo4jService.run(QUERIES.GET_CROSS_FILE_EDGES, { filePaths, projectId });
18
+ return queryResult;
19
+ };
@@ -0,0 +1,105 @@
1
+ /**
2
+ * File Change Detection
3
+ * Shared utilities for detecting changed files for incremental parsing
4
+ */
5
+ import { stat, realpath } from 'fs/promises';
6
+ import { resolve, sep } from 'path';
7
+ import { glob } from 'glob';
8
+ import { EXCLUDE_PATTERNS_GLOB } from '../../constants.js';
9
+ import { QUERIES } from '../../storage/neo4j/neo4j.service.js';
10
+ import { hashFile } from '../../utils/file-utils.js';
11
+ /**
12
+ * Detect which files have changed and need reparsing.
13
+ * Compares current files on disk with indexed files in Neo4j.
14
+ *
15
+ * SECURITY: Validates that all file paths stay within the project directory
16
+ * after symlink resolution to prevent path traversal attacks.
17
+ *
18
+ * @param projectPath - Root path of the project
19
+ * @param neo4jService - Neo4j service instance
20
+ * @param projectId - Project ID for scoping queries
21
+ * @param options - Optional configuration
22
+ * @returns Files that need reparsing and files that were deleted
23
+ */
24
+ export const detectChangedFiles = async (projectPath, neo4jService, projectId, options = {}) => {
25
+ const { logWarnings = true } = options;
26
+ // SECURITY: Resolve project path to real path to handle symlinks consistently
27
+ const realProjectPath = await realpath(projectPath);
28
+ const relativeFiles = await glob('**/*.{ts,tsx}', { cwd: projectPath, ignore: EXCLUDE_PATTERNS_GLOB });
29
+ // SECURITY: Validate each file stays within project directory after symlink resolution
30
+ const validatedFiles = [];
31
+ for (const relFile of relativeFiles) {
32
+ const absolutePath = resolve(projectPath, relFile);
33
+ try {
34
+ const realFilePath = await realpath(absolutePath);
35
+ // Check that resolved path is within project
36
+ if (realFilePath.startsWith(realProjectPath + sep) || realFilePath === realProjectPath) {
37
+ // Use realFilePath for consistent path matching with Neo4j
38
+ validatedFiles.push(realFilePath);
39
+ }
40
+ else if (logWarnings) {
41
+ console.warn(`SECURITY: Skipping file outside project directory: ${relFile}`);
42
+ }
43
+ }
44
+ catch {
45
+ // File may have been deleted between glob and realpath - skip it
46
+ if (logWarnings) {
47
+ console.warn(`File no longer accessible: ${relFile}`);
48
+ }
49
+ }
50
+ }
51
+ const currentFiles = new Set(validatedFiles);
52
+ // Get indexed files from Neo4j
53
+ const queryResult = await neo4jService.run(QUERIES.GET_SOURCE_FILE_TRACKING_INFO, { projectId });
54
+ const indexedFiles = queryResult;
55
+ const indexedMap = new Map(indexedFiles.map((f) => [f.filePath, f]));
56
+ const filesToReparse = [];
57
+ const filesToDelete = [];
58
+ // Check each current file against indexed state
59
+ for (const filePath of currentFiles) {
60
+ const indexed = indexedMap.get(filePath);
61
+ if (!indexed) {
62
+ // New file - needs parsing
63
+ filesToReparse.push(filePath);
64
+ continue;
65
+ }
66
+ try {
67
+ const fileStats = await stat(filePath);
68
+ const currentHash = await hashFile(filePath);
69
+ // Only skip if mtime, size, AND hash all match (correctness over optimization)
70
+ if (fileStats.mtimeMs === indexed.mtime &&
71
+ fileStats.size === indexed.size &&
72
+ currentHash === indexed.contentHash) {
73
+ continue;
74
+ }
75
+ // Any mismatch means file changed
76
+ filesToReparse.push(filePath);
77
+ }
78
+ catch (error) {
79
+ const nodeError = error;
80
+ if (nodeError.code === 'ENOENT') {
81
+ // File was deleted between glob and stat - will be caught in deletion logic below
82
+ if (logWarnings) {
83
+ console.warn(`File deleted between glob and stat: ${filePath}`);
84
+ }
85
+ }
86
+ else if (nodeError.code === 'EACCES') {
87
+ // Permission denied - assume changed to be safe
88
+ if (logWarnings) {
89
+ console.warn(`Permission denied reading file: ${filePath}`);
90
+ }
91
+ filesToReparse.push(filePath);
92
+ }
93
+ else {
94
+ throw error;
95
+ }
96
+ }
97
+ }
98
+ // Find deleted files (indexed but no longer on disk)
99
+ for (const indexedPath of indexedMap.keys()) {
100
+ if (!currentFiles.has(indexedPath)) {
101
+ filesToDelete.push(indexedPath);
102
+ }
103
+ }
104
+ return { filesToReparse, filesToDelete };
105
+ };