code-graph-context 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +221 -101
  2. package/dist/core/config/fairsquare-framework-schema.js +47 -60
  3. package/dist/core/config/nestjs-framework-schema.js +11 -1
  4. package/dist/core/config/schema.js +1 -1
  5. package/dist/core/config/timeouts.js +27 -0
  6. package/dist/core/embeddings/embeddings.service.js +122 -2
  7. package/dist/core/embeddings/natural-language-to-cypher.service.js +416 -17
  8. package/dist/core/parsers/parser-factory.js +5 -3
  9. package/dist/core/parsers/typescript-parser.js +614 -45
  10. package/dist/core/parsers/workspace-parser.js +553 -0
  11. package/dist/core/utils/edge-factory.js +37 -0
  12. package/dist/core/utils/file-change-detection.js +105 -0
  13. package/dist/core/utils/file-utils.js +20 -0
  14. package/dist/core/utils/index.js +3 -0
  15. package/dist/core/utils/path-utils.js +75 -0
  16. package/dist/core/utils/progress-reporter.js +112 -0
  17. package/dist/core/utils/project-id.js +176 -0
  18. package/dist/core/utils/retry.js +41 -0
  19. package/dist/core/workspace/index.js +4 -0
  20. package/dist/core/workspace/workspace-detector.js +221 -0
  21. package/dist/mcp/constants.js +153 -5
  22. package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
  23. package/dist/mcp/handlers/file-change-detection.js +105 -0
  24. package/dist/mcp/handlers/graph-generator.handler.js +97 -32
  25. package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
  26. package/dist/mcp/handlers/streaming-import.handler.js +210 -0
  27. package/dist/mcp/handlers/traversal.handler.js +130 -71
  28. package/dist/mcp/mcp.server.js +45 -6
  29. package/dist/mcp/service-init.js +79 -0
  30. package/dist/mcp/services/job-manager.js +165 -0
  31. package/dist/mcp/services/watch-manager.js +376 -0
  32. package/dist/mcp/services.js +2 -2
  33. package/dist/mcp/tools/check-parse-status.tool.js +64 -0
  34. package/dist/mcp/tools/impact-analysis.tool.js +84 -18
  35. package/dist/mcp/tools/index.js +13 -1
  36. package/dist/mcp/tools/list-projects.tool.js +62 -0
  37. package/dist/mcp/tools/list-watchers.tool.js +51 -0
  38. package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
  39. package/dist/mcp/tools/parse-typescript-project.tool.js +318 -58
  40. package/dist/mcp/tools/search-codebase.tool.js +56 -16
  41. package/dist/mcp/tools/start-watch-project.tool.js +100 -0
  42. package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
  43. package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
  44. package/dist/mcp/utils.js +35 -13
  45. package/dist/mcp/workers/parse-worker.js +198 -0
  46. package/dist/storage/neo4j/neo4j.service.js +147 -48
  47. package/package.json +4 -2
@@ -1,10 +1,29 @@
1
1
  import fs from 'fs';
2
2
  import OpenAI from 'openai';
3
+ import { getTimeoutConfig } from '../config/timeouts.js';
3
4
  export class NaturalLanguageToCypherService {
4
5
  assistantId;
5
6
  openai;
6
7
  MODEL = 'gpt-4o-mini'; // Using GPT-4 Turbo
8
+ schemaPath = null;
9
+ cachedSemanticTypes = null;
7
10
  messageInstructions = `
11
+ === CRITICAL - CLASS/SERVICE NAME HANDLING ===
12
+ WRONG: (n:DbService), (n:UserService), (n:AuthController) - DO NOT USE CLASS NAMES AS LABELS
13
+ CORRECT: (n:Class {name: 'DbService'}) - Match on the "name" property instead
14
+
15
+ Class/service names are NOT Neo4j labels. They are values of the "name" property on Class nodes.
16
+
17
+ The ONLY valid node labels are: SourceFile, Class, Method, Function, Property, Interface,
18
+ Constructor, Parameter, Enum, Variable, Import, Export, Decorator
19
+
20
+ Examples:
21
+ - "Find DbService" -> MATCH (n:Class {name: 'DbService'}) WHERE n.projectId = $projectId RETURN n
22
+ - "Classes extending BaseService" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'BaseService'}) WHERE c.projectId = $projectId RETURN c
23
+ - "Methods in UserController" -> MATCH (c:Class {name: 'UserController'})-[:HAS_MEMBER]->(m:Method) WHERE c.projectId = $projectId RETURN m
24
+ - "Classes with @Controller decorator" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType = 'NestController' RETURN c
25
+ ===============================================
26
+
8
27
  The schema file (neo4j-apoc-schema.json) contains two sections:
9
28
  1. rawSchema: Complete Neo4j APOC schema with all node labels, properties, and relationships in the graph
10
29
  2. discoveredSchema: Dynamically discovered graph structure including:
@@ -13,13 +32,20 @@ The schema file (neo4j-apoc-schema.json) contains two sections:
13
32
  - semanticTypes: Array of {type, count} showing semantic node classifications (e.g., Service, Controller)
14
33
  - commonPatterns: Array of {from, relationship, to, count} showing frequent relationship patterns
15
34
 
35
+ IMPORTANT - Multi-Project Isolation:
36
+ All nodes have a "projectId" property that isolates data between different projects.
37
+ You MUST include a projectId filter in EVERY query to ensure project isolation.
38
+ The projectId will be provided as a parameter ($projectId).
39
+
16
40
  Your response must be a valid JSON object with this exact structure:
17
41
  {
18
- "cypher": "MATCH (n:NodeType) WHERE n.property = $param RETURN n",
42
+ "cypher": "MATCH (n:NodeType) WHERE n.projectId = $projectId AND n.property = $param RETURN n",
19
43
  "parameters": { "param": "value" } | null,
20
44
  "explanation": "Concise explanation of what the query does and why it matches the user's request"
21
45
  }
22
46
 
47
+ Note: Do NOT include projectId in the parameters object - it will be injected automatically by the system.
48
+
23
49
  Query Generation Process:
24
50
  1. CHECK NODE TYPES: Look at discoveredSchema.nodeTypes to see available node labels and their properties
25
51
  2. CHECK RELATIONSHIPS: Look at discoveredSchema.relationshipTypes to understand how nodes connect
@@ -27,14 +53,124 @@ Query Generation Process:
27
53
  4. REVIEW PATTERNS: Check discoveredSchema.commonPatterns for frequent relationship patterns in the graph
28
54
  5. EXAMINE PROPERTIES: Use rawSchema for exact property names and types
29
55
  6. GENERATE QUERY: Write the Cypher query using only node labels, relationships, and properties that exist in the schema
56
+ 7. ADD PROJECT FILTER: Always include WHERE n.projectId = $projectId for every node pattern in the query
30
57
 
31
58
  Critical Rules:
59
+ - ALWAYS filter by projectId on every node in the query (e.g., WHERE n.projectId = $projectId)
32
60
  - Use the schema information from the file_search tool - do not guess node labels or relationships
33
61
  - Use ONLY node labels and properties found in the schema
34
62
  - For nested JSON data in properties, use: apoc.convert.fromJsonMap(node.propertyName)
35
63
  - Use parameterized queries with $ syntax for any dynamic values
36
64
  - Return only the data relevant to the user's request
37
65
 
66
+ RELATIONSHIP TYPE DEFINITIONS (use these exact types):
67
+ - EXTENDS: Inheritance - one class/interface IS_A parent (use for "extends", "inherits from", "parent class", "subclass")
68
+ - IMPLEMENTS: Contract - a class implements an interface (use for "implements", "conforms to")
69
+ - HAS_MEMBER: Composition - a class/interface contains methods/properties (use for "has method", "contains property", "members")
70
+ - CONTAINS: Structure - file contains declarations (use for "in file", "declared in", "defined in")
71
+ - IMPORTS: Dependencies - file imports another (use for "imports", "depends on", "requires")
72
+ - TYPED_AS: Type annotation - parameter/property has a type (use for "typed as", "has type", "returns")
73
+ - HAS_PARAMETER: Function signature - method/function has parameters (use for "takes parameter", "accepts")
74
+
75
+ WARNING - NOT IMPLEMENTED (will return 0 results):
76
+ - CALLS: Function call tracking is NOT YET IMPLEMENTED. Do not use this relationship type.
77
+ Instead, for "calls" or "uses" queries, suggest using IMPORTS to find file dependencies.
78
+ - DECORATED_WITH: Decorator relationships are NOT IMPLEMENTED. Do not use this relationship type.
79
+ Instead, use the semanticType property (e.g., WHERE c.semanticType = 'NestController').
80
+
81
+ CRITICAL: Do NOT confuse EXTENDS (inheritance) with HAS_MEMBER (composition). "extends" always means EXTENDS relationship.
82
+
83
+ EXTENDS DIRECTION - CRITICAL:
84
+ The arrow points FROM child TO parent. The child "extends" toward the parent.
85
+ - CORRECT: (child:Class)-[:EXTENDS]->(parent:Class {name: 'BaseService'})
86
+ - WRONG: (parent:Class {name: 'BaseService'})-[:EXTENDS]->(child:Class)
87
+
88
+ Examples:
89
+ - "Classes extending DbService" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'DbService'}) WHERE c.projectId = $projectId RETURN c
90
+ - "What extends BaseController" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'BaseController'}) WHERE c.projectId = $projectId RETURN c
91
+ - "Services that extend DbService with >5 methods" ->
92
+ MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'DbService'})
93
+ WHERE c.projectId = $projectId
94
+ WITH c
95
+ MATCH (c)-[:HAS_MEMBER]->(m:Method)
96
+ WITH c, count(m) AS methodCount
97
+ WHERE methodCount > 5
98
+ RETURN c, methodCount
99
+
100
+ SEMANTIC TYPES (Framework-Specific Classifications):
101
+ The parser assigns semanticType based on decorators or naming patterns. The actual semantic types vary by framework.
102
+
103
+ IMPORTANT: Do NOT assume NestJS semantic type names like 'NestController' or 'NestService'.
104
+ Instead, refer to the SEMANTIC TYPES IN THIS PROJECT section below for the actual types discovered in this codebase.
105
+
106
+ Common semantic type patterns:
107
+ - Controllers: Look for types containing 'Controller' (e.g., 'Controller', 'NestController')
108
+ - Services: Look for types containing 'Service', 'Provider', or 'Injectable'
109
+ - Repositories: Look for types containing 'Repository', 'DAL', or 'DAO'
110
+ - Modules: Look for types containing 'Module'
111
+
112
+ If no semantic types are discovered, use name patterns as fallback:
113
+ - "Find all controllers" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Controller' RETURN c
114
+ - "Find all services" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Service' RETURN c
115
+
116
+ NOTE: Do NOT use DECORATED_WITH relationships - they don't exist in the graph. Use semanticType property instead.
117
+
118
+ FRAMEWORK-SPECIFIC PATTERNS:
119
+
120
+ React/Frontend Projects:
121
+ - React functional components are stored as Function nodes, NOT Class nodes
122
+ - Example: "Find component UserProfile" -> MATCH (f:Function {name: 'UserProfile'}) WHERE f.projectId = $projectId RETURN f
123
+ - React hooks are also Function nodes (useAuth, useState, etc.)
124
+ - JSX files (.tsx) contain functions that return JSX elements
125
+
126
+ Decorator-Based Backend Projects (NestJS, custom frameworks, etc.):
127
+ - Uses Class nodes with semanticType property set based on decorators
128
+ - The actual semanticType values depend on the framework - check the discovered schema
129
+ - Controllers: MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [discovered controller types] RETURN c
130
+ - Services: MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [discovered service types] RETURN c
131
+
132
+ IMPORTANT: When user asks about "components" or "hooks":
133
+ - If asking about React -> query Function nodes
134
+ - If asking about decorator-based frameworks -> query Class nodes with semanticType property (using discovered types)
135
+
136
+ MODULE/DIRECTORY QUERIES:
137
+ To find things "in a module" or "in a directory", use filePath pattern matching:
138
+ - "in account module" -> WHERE n.filePath CONTAINS '/account/'
139
+ - "in auth folder" -> WHERE n.filePath CONTAINS '/auth/'
140
+ - "in src/services" -> WHERE n.filePath CONTAINS '/services/'
141
+
142
+ Examples (use discovered semantic types from this project):
143
+ - "Controllers in account module" ->
144
+ MATCH (c:Class)
145
+ WHERE c.projectId = $projectId AND c.semanticType IN [discovered controller types] AND c.filePath CONTAINS '/account/'
146
+ RETURN c
147
+
148
+ - "All services in the auth folder" ->
149
+ MATCH (c:Class)
150
+ WHERE c.projectId = $projectId AND c.semanticType IN [discovered service types] AND c.filePath CONTAINS '/auth/'
151
+ RETURN c
152
+
153
+ FALLBACK (when semantic types not available):
154
+ - "Controllers in account module" ->
155
+ MATCH (c:Class)
156
+ WHERE c.projectId = $projectId AND c.name CONTAINS 'Controller' AND c.filePath CONTAINS '/account/'
157
+ RETURN c
158
+
159
+ NOTE: Do NOT assume packageName exists - use filePath for directory-based queries.
160
+ NOTE: Do NOT use DECORATED_WITH - use semanticType property instead.
161
+
162
+ IMPORTANT - Cypher Syntax (NOT SQL):
163
+ - Cypher does NOT use GROUP BY. Aggregation happens automatically in RETURN.
164
+ - WRONG (SQL): RETURN label, count(n) GROUP BY label
165
+ - CORRECT (Cypher): RETURN labels(n) AS label, count(n) AS count
166
+ - For grouping, non-aggregated values in RETURN automatically become grouping keys
167
+ - Use labels(n) to get node labels as an array
168
+ - Use collect() for aggregating into lists
169
+ - Use count(), sum(), avg(), min(), max() for aggregations
170
+ - Common patterns:
171
+ - Count by type: MATCH (n) RETURN labels(n)[0] AS type, count(n) AS count
172
+ - Group with collect: MATCH (n)-[:REL]->(m) RETURN n.name, collect(m.name) AS related
173
+
38
174
  Provide ONLY the JSON response with no additional text, markdown formatting, or explanations outside the JSON structure.
39
175
  `;
40
176
  constructor() {
@@ -42,9 +178,16 @@ Provide ONLY the JSON response with no additional text, markdown formatting, or
42
178
  if (!apiKey) {
43
179
  throw new Error('OPENAI_API_KEY environment variable is required');
44
180
  }
45
- this.openai = new OpenAI({ apiKey });
181
+ const timeoutConfig = getTimeoutConfig();
182
+ this.openai = new OpenAI({
183
+ apiKey,
184
+ timeout: timeoutConfig.openai.assistantTimeoutMs,
185
+ maxRetries: 2,
186
+ });
46
187
  }
47
188
  async getOrCreateAssistant(schemaPath) {
189
+ // Store schema path for later use in prompt injection
190
+ this.schemaPath = schemaPath;
48
191
  if (process.env.OPENAI_ASSISTANT_ID) {
49
192
  this.assistantId = process.env.OPENAI_ASSISTANT_ID;
50
193
  console.log(`Using existing assistant with ID: ${this.assistantId} `);
@@ -93,11 +236,173 @@ Provide ONLY the JSON response with no additional text, markdown formatting, or
93
236
  this.assistantId = assistant.id;
94
237
  return this.assistantId;
95
238
  }
96
- async promptToQuery(userPrompt) {
239
+ /**
240
+ * Load and format the schema context for direct injection into prompts.
241
+ * This supplements the file_search tool by providing explicit schema information.
242
+ */
243
+ loadSchemaContext() {
244
+ if (!this.schemaPath) {
245
+ return 'No schema available. Use node types from file_search.';
246
+ }
247
+ try {
248
+ const content = fs.readFileSync(this.schemaPath, 'utf-8');
249
+ const schema = JSON.parse(content);
250
+ if (!schema.discoveredSchema) {
251
+ return 'No discovered schema available.';
252
+ }
253
+ const ds = schema.discoveredSchema;
254
+ // Format node types
255
+ const nodeTypes = ds.nodeTypes?.map((n) => n.label).join(', ') ?? 'none';
256
+ // Get function count vs class count to hint at framework
257
+ const functionCount = ds.nodeTypes?.find((n) => n.label === 'Function')?.count ?? 0;
258
+ const classCount = ds.nodeTypes?.find((n) => n.label === 'Class')?.count ?? 0;
259
+ const decoratorCount = ds.nodeTypes?.find((n) => n.label === 'Decorator')?.count ?? 0;
260
+ // Format relationship types
261
+ const relTypes = ds.relationshipTypes?.map((r) => r.type).join(', ') ?? 'none';
262
+ // Format semantic types and categorize them
263
+ const semanticTypeList = ds.semanticTypes?.map((s) => s.type) ?? [];
264
+ const semTypes = semanticTypeList.length > 0 ? semanticTypeList.join(', ') : 'none';
265
+ // Cache categorized semantic types for dynamic example generation
266
+ this.cachedSemanticTypes = this.categorizeSemanticTypes(semanticTypeList);
267
+ // Framework hint based on graph composition
268
+ let frameworkHint = '';
269
+ if (decoratorCount > 10 && classCount > functionCount) {
270
+ // Use discovered semantic types instead of assuming NestJS
271
+ const sampleType = this.cachedSemanticTypes?.controller[0] ?? this.cachedSemanticTypes?.service[0] ?? 'YourSemanticType';
272
+ frameworkHint = `\nFRAMEWORK DETECTED: Decorator-based codebase. Use Class nodes with semanticType property (e.g., semanticType = "${sampleType}").`;
273
+ }
274
+ else if (functionCount > classCount) {
275
+ frameworkHint = '\nFRAMEWORK DETECTED: React/functional codebase. Use Function nodes for components.';
276
+ }
277
+ return `
278
+ ACTUAL GRAPH SCHEMA (use these exact labels):
279
+
280
+ Node Types: ${nodeTypes}
281
+ Relationship Types: ${relTypes}
282
+ Semantic Types: ${semTypes}
283
+ ${frameworkHint}
284
+ CRITICAL: Use ONLY these node labels. Do NOT invent labels like :DbService, :UserService, etc.
285
+ For queries about specific classes/services, use: (n:Class {name: 'ClassName'})
286
+ For inheritance: (child:Class)-[:EXTENDS]->(parent:Class {name: 'ParentName'})
287
+ For decorator-based queries: Use semanticType property with values from the discovered semantic types above.
288
+ `.trim();
289
+ }
290
+ catch (error) {
291
+ console.warn('Failed to load schema for prompt injection:', error);
292
+ return 'Schema load failed. Use file_search for schema information.';
293
+ }
294
+ }
295
+ /**
296
+ * Categorizes semantic types by their likely intent (controller, service, etc.)
297
+ * This allows the LLM to generate queries that work with any framework,
298
+ * not just NestJS-specific semantic type names.
299
+ */
300
+ categorizeSemanticTypes(semanticTypes) {
301
+ const categories = {
302
+ controller: [],
303
+ service: [],
304
+ repository: [],
305
+ module: [],
306
+ guard: [],
307
+ pipe: [],
308
+ interceptor: [],
309
+ other: [],
310
+ all: [...semanticTypes],
311
+ };
312
+ for (const type of semanticTypes) {
313
+ const lower = type.toLowerCase();
314
+ if (lower.includes('controller')) {
315
+ categories.controller.push(type);
316
+ }
317
+ else if (lower.includes('service') || lower.includes('provider') || lower.includes('injectable')) {
318
+ categories.service.push(type);
319
+ }
320
+ else if (lower.includes('repository') || lower.includes('dal') || lower.includes('dao')) {
321
+ categories.repository.push(type);
322
+ }
323
+ else if (lower.includes('module')) {
324
+ categories.module.push(type);
325
+ }
326
+ else if (lower.includes('guard') || lower.includes('auth')) {
327
+ categories.guard.push(type);
328
+ }
329
+ else if (lower.includes('pipe') || lower.includes('validator')) {
330
+ categories.pipe.push(type);
331
+ }
332
+ else if (lower.includes('interceptor') || lower.includes('middleware')) {
333
+ categories.interceptor.push(type);
334
+ }
335
+ else {
336
+ categories.other.push(type);
337
+ }
338
+ }
339
+ return categories;
340
+ }
341
+ /**
342
+ * Generates dynamic query examples based on discovered semantic types.
343
+ * Provides both semantic type matching and name pattern fallbacks.
344
+ */
345
+ generateDynamicSemanticExamples(categories) {
346
+ const formatTypes = (types) => types.map((t) => `'${t}'`).join(', ');
347
+ let examples = '\nSEMANTIC TYPES IN THIS PROJECT:\n';
348
+ if (categories.all.length === 0) {
349
+ examples += 'No semantic types discovered. Use name patterns for queries.\n';
350
+ }
351
+ else {
352
+ examples += `Available: ${categories.all.join(', ')}\n`;
353
+ }
354
+ examples += '\nFRAMEWORK-AGNOSTIC QUERY PATTERNS:\n';
355
+ // Controller queries
356
+ if (categories.controller.length > 0) {
357
+ examples += `- "Find all controllers" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [${formatTypes(categories.controller)}] RETURN c\n`;
358
+ }
359
+ else {
360
+ examples += `- "Find all controllers" -> MATCH (c:Class) WHERE c.projectId = $projectId AND (c.name CONTAINS 'Controller' OR c.name ENDS WITH 'Controller') RETURN c\n`;
361
+ }
362
+ // Service queries
363
+ if (categories.service.length > 0) {
364
+ examples += `- "Find all services" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [${formatTypes(categories.service)}] RETURN c\n`;
365
+ }
366
+ else {
367
+ examples += `- "Find all services" -> MATCH (c:Class) WHERE c.projectId = $projectId AND (c.name CONTAINS 'Service' OR c.name ENDS WITH 'Service') RETURN c\n`;
368
+ }
369
+ // Repository queries
370
+ if (categories.repository.length > 0) {
371
+ examples += `- "Find all repositories" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [${formatTypes(categories.repository)}] RETURN c\n`;
372
+ }
373
+ else {
374
+ examples += `- "Find all repositories" -> MATCH (c:Class) WHERE c.projectId = $projectId AND (c.name CONTAINS 'Repository' OR c.name ENDS WITH 'DAL') RETURN c\n`;
375
+ }
376
+ // Module queries
377
+ if (categories.module.length > 0) {
378
+ examples += `- "Find all modules" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [${formatTypes(categories.module)}] RETURN c\n`;
379
+ }
380
+ // Guard queries
381
+ if (categories.guard.length > 0) {
382
+ examples += `- "Find all guards" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.semanticType IN [${formatTypes(categories.guard)}] RETURN c\n`;
383
+ }
384
+ examples += `
385
+ FALLBACK PATTERNS (use when semantic types don't exist):
386
+ - For any component type, use name patterns: c.name CONTAINS 'TypeName' OR c.name ENDS WITH 'TypeName'
387
+ - Example: "Find UserController" -> MATCH (c:Class {name: 'UserController'}) WHERE c.projectId = $projectId RETURN c
388
+ `;
389
+ return examples;
390
+ }
391
+ async promptToQuery(userPrompt, projectId) {
392
+ const schemaContext = this.loadSchemaContext();
393
+ // Generate dynamic examples based on discovered semantic types
394
+ const dynamicSemanticExamples = this.cachedSemanticTypes
395
+ ? this.generateDynamicSemanticExamples(this.cachedSemanticTypes)
396
+ : '\nNo semantic types discovered. Use name patterns for all queries (e.g., c.name CONTAINS "Controller").\n';
97
397
  const prompt = `Please convert this request to a valid Neo4j Cypher query: ${userPrompt}.
98
- Use the Neo4j schema provided and follow the format specified in the instructions.
398
+
399
+ ${schemaContext}
400
+ ${dynamicSemanticExamples}
401
+ The query will be scoped to project: ${projectId}
402
+ Remember to include WHERE n.projectId = $projectId for all node patterns.
99
403
  `;
100
- console.log('Prompt:', prompt);
404
+ // SECURITY: Only log prompt length, not full content which may contain sensitive data
405
+ console.log(`NL-to-Cypher: Processing prompt (${prompt.length} chars) for project ${projectId}`);
101
406
  const run = await this.openai.beta.threads.createAndRunPoll({
102
407
  assistant_id: this.assistantId,
103
408
  thread: {
@@ -110,20 +415,28 @@ Provide ONLY the JSON response with no additional text, markdown formatting, or
110
415
  },
111
416
  });
112
417
  const threadId = run.thread_id;
113
- console.log(`Thread ID: ${threadId}`);
114
- console.log('Run status:', run.status);
115
- console.log('Required actions:', run.required_action);
116
- console.log('Last error:', run.last_error);
418
+ // SECURITY: Log minimal info, avoid exposing full objects that may contain sensitive data
419
+ console.log(`NL-to-Cypher: Thread ${threadId}, status: ${run.status}`);
117
420
  // Validate run completed successfully
118
421
  if (run.status !== 'completed') {
119
- console.error('Full run object:', JSON.stringify(run, null, 2));
422
+ // SECURITY: Only log status and error, not full run object which may contain sensitive data
423
+ console.error(`NL-to-Cypher run failed: status=${run.status}, error=${run.last_error?.message ?? 'none'}`);
120
424
  throw new Error(`Assistant run did not complete. Status: ${run.status}. ` +
121
425
  `Last error: ${run.last_error ? JSON.stringify(run.last_error) : 'none'}`);
122
426
  }
123
427
  const messages = await this.openai.beta.threads.messages.list(threadId);
124
428
  // Find the first text content in the latest message
125
429
  const latestMessage = messages.data[0];
126
- console.log('Latest message:', JSON.stringify(latestMessage, null, 2));
430
+ if (!latestMessage) {
431
+ throw new Error(`No messages returned from assistant. Run status: ${run.status}. Thread: ${threadId}. ` +
432
+ `This may occur if the assistant is still initializing. Try setting OPENAI_ASSISTANT_ID in .env.`);
433
+ }
434
+ // SECURITY: Don't log full message content which may contain user data
435
+ console.log(`NL-to-Cypher: Received message with ${latestMessage.content?.length ?? 0} content blocks`);
436
+ if (!latestMessage.content || latestMessage.content.length === 0) {
437
+ throw new Error(`Message has no content. Run status: ${run.status}. Thread: ${threadId}. ` +
438
+ `Message role: ${latestMessage.role}`);
439
+ }
127
440
  const textContent = latestMessage.content.find((content) => content.type === 'text');
128
441
  if (!textContent) {
129
442
  throw new Error(`No text content found in assistant response. Run status: ${run.status}`);
@@ -134,14 +447,100 @@ Provide ONLY the JSON response with no additional text, markdown formatting, or
134
447
  throw new Error(`Invalid text content structure in assistant response. Run status: ${run.status}. ` +
135
448
  `Text content: ${JSON.stringify(textContent)}`);
136
449
  }
137
- console.log('text value:', textValue);
138
- return JSON.parse(textValue);
450
+ // SECURITY: Don't log the full text value which may contain sensitive queries
451
+ console.log(`NL-to-Cypher: Parsing response (${textValue.length} chars)`);
452
+ // Parse the response with proper error handling
453
+ let result;
454
+ try {
455
+ result = JSON.parse(textValue);
456
+ }
457
+ catch (parseError) {
458
+ const message = parseError instanceof Error ? parseError.message : String(parseError);
459
+ throw new Error(`Failed to parse assistant response as JSON: ${message}. ` +
460
+ `Response preview: ${textValue.substring(0, 200)}...`);
461
+ }
462
+ // Validate that the generated Cypher contains projectId filters
463
+ this.validateProjectIdFilters(result.cypher);
464
+ // Validate that the query uses only valid node labels (not class names as labels)
465
+ this.validateLabelUsage(result.cypher);
466
+ return result;
467
+ }
468
+ /**
469
+ * Validates that the generated Cypher query contains projectId filters.
470
+ * This is a security measure to ensure project isolation is maintained
471
+ * even if the LLM forgets to include the filter.
472
+ *
473
+ * SECURITY: This validation ensures ALL node patterns in the query have projectId filters,
474
+ * preventing data leakage between projects.
475
+ */
476
+ validateProjectIdFilters(cypher) {
477
+ if (!cypher || typeof cypher !== 'string') {
478
+ throw new Error('Invalid Cypher query: query is empty or not a string');
479
+ }
480
+ // Check if the query contains any MATCH clauses
481
+ const matchPattern = /\bMATCH\s*\(/gi;
482
+ const matches = cypher.match(matchPattern);
483
+ if (matches && matches.length > 0) {
484
+ // SECURITY: Check that projectId filter exists and uses parameter binding
485
+ // We require $projectId to ensure parameterized queries (prevents injection)
486
+ const hasProjectIdParam = cypher.includes('$projectId');
487
+ const hasProjectIdFilter = cypher.includes('projectId') && hasProjectIdParam;
488
+ if (!hasProjectIdFilter) {
489
+ throw new Error('Generated Cypher query is missing projectId filter. ' +
490
+ 'All queries must include WHERE n.projectId = $projectId for project isolation. ' +
491
+ `Query: ${cypher}`);
492
+ }
493
+ // SECURITY: Additional validation - count MATCH patterns and ensure projectId appears enough times
494
+ // This catches queries like: MATCH (a:Class) MATCH (b:Method) WHERE a.projectId = $projectId
495
+ // where the second MATCH doesn't have a projectId filter
496
+ const matchCount = matches.length;
497
+ const projectIdOccurrences = (cypher.match(/\.projectId\s*=/gi) ?? []).length;
498
+ // Each MATCH pattern should ideally have a projectId filter
499
+ // We warn but don't fail if there's at least one filter (some queries use WITH to pass context)
500
+ if (projectIdOccurrences < matchCount) {
501
+ console.warn(`SECURITY WARNING: Query has ${matchCount} MATCH patterns but only ${projectIdOccurrences} projectId filters. ` +
502
+ 'Some patterns may not be properly isolated.');
503
+ }
504
+ }
139
505
  }
140
506
  /**
141
- * Create a new thread for a user
507
+ * Validates that the generated Cypher query uses only valid node labels.
508
+ * Class/service names should be matched via {name: 'ClassName'}, not as labels.
142
509
  */
143
- async createThread() {
144
- const thread = await this.openai.beta.threads.create();
145
- return thread.id;
510
+ validateLabelUsage(cypher) {
511
+ // Valid labels from the schema (actual Neo4j labels, not AST type names)
512
+ const validLabels = new Set([
513
+ 'SourceFile',
514
+ 'Class',
515
+ 'Method',
516
+ 'Function',
517
+ 'Property',
518
+ 'Interface',
519
+ 'Constructor',
520
+ 'Parameter',
521
+ 'Enum',
522
+ 'Variable',
523
+ 'Import',
524
+ 'Export',
525
+ 'Decorator',
526
+ ]);
527
+ // Extract all labels from query (matches :LabelName patterns in node definitions)
528
+ // This regex matches labels after : in patterns like (n:Label) or (:Label)
529
+ const labelPattern = /\(\s*\w*\s*:\s*([A-Z][a-zA-Z0-9]*)/g;
530
+ let match;
531
+ const invalidLabels = [];
532
+ while ((match = labelPattern.exec(cypher)) !== null) {
533
+ const label = match[1];
534
+ if (!validLabels.has(label)) {
535
+ invalidLabels.push(label);
536
+ }
537
+ }
538
+ if (invalidLabels.length > 0) {
539
+ const label = invalidLabels[0];
540
+ throw new Error(`Invalid label ":${label}" in query. ` +
541
+ `Class/service names should be matched via {name: '${label}'}, not as labels.\n` +
542
+ `Example: (n:Class {name: '${label}'}) instead of (n:${label})\n` +
543
+ `Query: ${cypher}`);
544
+ }
146
545
  }
147
546
  }
@@ -20,7 +20,7 @@ export class ParserFactory {
20
20
  */
21
21
  static createParser(options) {
22
22
  const { workspacePath, tsConfigPath = 'tsconfig.json', projectType = ProjectType.NESTJS, // Default to NestJS (use auto-detect for best results)
23
- customFrameworkSchemas = [], excludePatterns = EXCLUDE_PATTERNS_REGEX, excludedNodeTypes = [CoreNodeType.PARAMETER_DECLARATION], } = options;
23
+ customFrameworkSchemas = [], excludePatterns = EXCLUDE_PATTERNS_REGEX, excludedNodeTypes = [CoreNodeType.PARAMETER_DECLARATION], projectId, lazyLoad = false, } = options;
24
24
  // Select framework schemas based on project type
25
25
  const frameworkSchemas = this.selectFrameworkSchemas(projectType, customFrameworkSchemas);
26
26
  console.log(`📦 Creating parser for ${projectType} project`);
@@ -28,7 +28,7 @@ export class ParserFactory {
28
28
  return new TypeScriptParser(workspacePath, tsConfigPath, CORE_TYPESCRIPT_SCHEMA, frameworkSchemas, {
29
29
  excludePatterns,
30
30
  excludedNodeTypes,
31
- });
31
+ }, projectId, lazyLoad);
32
32
  }
33
33
  /**
34
34
  * Select framework schemas based on project type
@@ -91,13 +91,15 @@ export class ParserFactory {
91
91
  /**
92
92
  * Create parser with auto-detection
93
93
  */
94
- static async createParserWithAutoDetection(workspacePath, tsConfigPath) {
94
+ static async createParserWithAutoDetection(workspacePath, tsConfigPath, projectId, lazyLoad = false) {
95
95
  const projectType = await this.detectProjectType(workspacePath);
96
96
  console.log(`🔍 Auto-detected project type: ${projectType}`);
97
97
  return this.createParser({
98
98
  workspacePath,
99
99
  tsConfigPath,
100
100
  projectType,
101
+ projectId,
102
+ lazyLoad,
101
103
  });
102
104
  }
103
105
  }