code-graph-context 2.14.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/embeddings/natural-language-to-cypher.service.js +101 -229
- package/dist/mcp/constants.js +56 -228
- package/dist/mcp/handlers/swarm/abandon.handler.js +61 -0
- package/dist/mcp/handlers/swarm/advance.handler.js +78 -0
- package/dist/mcp/handlers/swarm/claim.handler.js +61 -0
- package/dist/mcp/handlers/swarm/index.js +5 -0
- package/dist/mcp/handlers/swarm/queries.js +140 -0
- package/dist/mcp/handlers/swarm/release.handler.js +41 -0
- package/dist/mcp/handlers/swarm-worker.handler.js +2 -13
- package/dist/mcp/tools/detect-dead-code.tool.js +33 -65
- package/dist/mcp/tools/detect-duplicate-code.tool.js +44 -53
- package/dist/mcp/tools/impact-analysis.tool.js +1 -1
- package/dist/mcp/tools/index.js +9 -9
- package/dist/mcp/tools/list-projects.tool.js +2 -2
- package/dist/mcp/tools/list-watchers.tool.js +2 -5
- package/dist/mcp/tools/natural-language-to-cypher.tool.js +2 -2
- package/dist/mcp/tools/parse-typescript-project.tool.js +7 -17
- package/dist/mcp/tools/search-codebase.tool.js +11 -26
- package/dist/mcp/tools/session-bookmark.tool.js +7 -11
- package/dist/mcp/tools/session-cleanup.tool.js +2 -6
- package/dist/mcp/tools/session-note.tool.js +6 -21
- package/dist/mcp/tools/session-recall.tool.js +293 -0
- package/dist/mcp/tools/session-save.tool.js +280 -0
- package/dist/mcp/tools/start-watch-project.tool.js +1 -1
- package/dist/mcp/tools/swarm-advance-task.tool.js +56 -0
- package/dist/mcp/tools/swarm-claim-task.tool.js +24 -388
- package/dist/mcp/tools/swarm-cleanup.tool.js +3 -7
- package/dist/mcp/tools/swarm-complete-task.tool.js +14 -17
- package/dist/mcp/tools/swarm-get-tasks.tool.js +8 -26
- package/dist/mcp/tools/swarm-message.tool.js +10 -25
- package/dist/mcp/tools/swarm-pheromone.tool.js +7 -25
- package/dist/mcp/tools/swarm-post-task.tool.js +7 -19
- package/dist/mcp/tools/swarm-release-task.tool.js +53 -0
- package/dist/mcp/tools/swarm-sense.tool.js +10 -30
- package/dist/mcp/tools/traverse-from-node.tool.js +19 -41
- package/dist/mcp/utils.js +41 -1
- package/package.json +1 -1
|
@@ -7,148 +7,31 @@ export class NaturalLanguageToCypherService {
|
|
|
7
7
|
MODEL = 'gpt-4o'; // GPT-4o for better Cypher generation accuracy
|
|
8
8
|
schemaPath = null;
|
|
9
9
|
cachedSemanticTypes = null;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
- semanticTypes: Framework-specific classifications with which label they appear on
|
|
16
|
-
- commonPatterns: Relationship patterns between node types with counts
|
|
17
|
-
|
|
18
|
-
=== VALID NODE LABELS ===
|
|
19
|
-
Use ONLY labels found in nodeTypes[].label. Labels fall into two categories:
|
|
20
|
-
|
|
21
|
-
1. CORE LABELS (base TypeScript AST):
|
|
22
|
-
SourceFile, Class, Function, Method, Interface, Property, Parameter, Constructor, Import, Export, Decorator, Enum, Variable, TypeAlias
|
|
23
|
-
|
|
24
|
-
2. FRAMEWORK LABELS (from framework enhancements - check nodeTypes):
|
|
25
|
-
These REPLACE the core label for enhanced nodes. A node with a framework label was originally a Class but got enhanced.
|
|
26
|
-
|
|
27
|
-
=== AST TYPE NAME MAPPING ===
|
|
28
|
-
AST type names are NOT valid labels. Always map them:
|
|
29
|
-
- ClassDeclaration → Class (or a framework label if enhanced)
|
|
30
|
-
- FunctionDeclaration → Function
|
|
31
|
-
- MethodDeclaration → Method
|
|
32
|
-
- InterfaceDeclaration → Interface
|
|
33
|
-
- PropertyDeclaration → Property
|
|
34
|
-
- ParameterDeclaration → Parameter
|
|
35
|
-
|
|
36
|
-
=== FINDING SPECIFIC NODES ===
|
|
37
|
-
Class/entity names are property values, NOT labels:
|
|
38
|
-
WRONG: (n:MyClassName) - using class names as labels
|
|
39
|
-
CORRECT: (n:Class {name: 'MyClassName'}) - use label from nodeTypes, name as property
|
|
40
|
-
|
|
41
|
-
Examples:
|
|
42
|
-
- "Count all classes" -> MATCH (n:Class) WHERE n.projectId = $projectId RETURN count(n)
|
|
43
|
-
- "Find class by name" -> MATCH (n:Class {name: 'ClassName'}) WHERE n.projectId = $projectId RETURN n
|
|
44
|
-
- "Methods in a class" -> MATCH (c:Class {name: 'ClassName'})-[:HAS_MEMBER]->(m:Method) WHERE c.projectId = $projectId RETURN m
|
|
45
|
-
|
|
46
|
-
=== PROJECT ISOLATION (REQUIRED) ===
|
|
47
|
-
ALL queries MUST filter by projectId on every node pattern:
|
|
48
|
-
WHERE n.projectId = $projectId
|
|
49
|
-
|
|
50
|
-
=== RESPONSE FORMAT ===
|
|
51
|
-
Return ONLY valid JSON:
|
|
52
|
-
{
|
|
53
|
-
"cypher": "MATCH (n:Label) WHERE n.projectId = $projectId RETURN n",
|
|
54
|
-
"parameters": { "param": "value" } | null,
|
|
55
|
-
"explanation": "What this query does"
|
|
56
|
-
}
|
|
57
|
-
Do NOT include projectId in parameters - it's injected automatically.
|
|
58
|
-
|
|
59
|
-
Query Generation Process - FOLLOW THIS EXACTLY:
|
|
60
|
-
1. SEARCH THE SCHEMA FILE FIRST: Use file_search to read neo4j-apoc-schema.json BEFORE generating any query
|
|
61
|
-
2. EXTRACT VALID LABELS: nodeTypes[].label contains all valid labels. nodeTypes[].properties lists available property keys per label.
|
|
62
|
-
3. CHECK RELATIONSHIPS: relationshipTypes[].type lists all relationship types. Each entry includes connections[] showing which node types they connect (from → to).
|
|
63
|
-
4. CHECK SEMANTIC TYPES: semanticTypes[].type lists framework classifications. Each entry includes label showing which node type it appears on.
|
|
64
|
-
- semanticTypes are PROPERTY values stored in n.semanticType, NOT labels
|
|
65
|
-
5. REVIEW PATTERNS: commonPatterns[] shows from→relationship→to triples with counts
|
|
66
|
-
6. GENERATE QUERY: Write the Cypher query using ONLY labels, relationships, and properties from the schema
|
|
67
|
-
7. VALIDATE LABELS: Double-check that every label in your query exists in nodeTypes
|
|
68
|
-
8. ADD PROJECT FILTER: Always include WHERE n.projectId = $projectId for every node pattern in the query
|
|
69
|
-
|
|
70
|
-
Critical Rules:
|
|
71
|
-
- ALWAYS filter by projectId on every node in the query (e.g., WHERE n.projectId = $projectId)
|
|
72
|
-
- Use the schema information from the file_search tool - do not guess node labels or relationships
|
|
73
|
-
- Use ONLY node labels and relationships found in the schema
|
|
74
|
-
- For nested JSON data in properties, use: apoc.convert.fromJsonMap(node.propertyName)
|
|
75
|
-
- Use parameterized queries with $ syntax for any dynamic values
|
|
76
|
-
- Return only the data relevant to the user's request
|
|
77
|
-
|
|
78
|
-
=== CORE RELATIONSHIPS ===
|
|
79
|
-
- CONTAINS: SourceFile contains declarations (use for "in file", "declared in", "defined in")
|
|
80
|
-
- HAS_MEMBER: Class/Interface has methods/properties (use for "has method", "contains property", "members")
|
|
81
|
-
- HAS_PARAMETER: Method/Function has parameters (use for "takes parameter", "accepts")
|
|
82
|
-
- EXTENDS: Class/Interface extends parent (use for "extends", "inherits from", "parent class", "subclass")
|
|
83
|
-
- IMPLEMENTS: Class implements Interface (use for "implements", "conforms to")
|
|
84
|
-
- IMPORTS: SourceFile imports another (use for "imports", "depends on", "requires")
|
|
85
|
-
- TYPED_AS: Parameter/Property has type annotation (use for "typed as", "has type", "returns")
|
|
86
|
-
- CALLS: Method/Function calls another (use for "calls", "invokes", "uses")
|
|
87
|
-
- DECORATED_WITH: Node has a Decorator (use for "decorated with", "has decorator", "@SomeDecorator")
|
|
88
|
-
|
|
89
|
-
=== FRAMEWORK RELATIONSHIPS ===
|
|
90
|
-
Check relationshipTypes and commonPatterns in the schema file for framework-specific relationships:
|
|
91
|
-
- What relationship types exist (e.g., INJECTS, EXPOSES, MODULE_IMPORTS, INTERNAL_API_CALL, etc.)
|
|
92
|
-
- commonPatterns shows which node types they connect and how frequently
|
|
93
|
-
- These vary by project - ALWAYS check the schema file for available relationships
|
|
94
|
-
|
|
95
|
-
CRITICAL: Do NOT confuse EXTENDS (inheritance) with HAS_MEMBER (composition). "extends" always means EXTENDS relationship.
|
|
96
|
-
|
|
97
|
-
EXTENDS DIRECTION - CRITICAL:
|
|
98
|
-
The arrow points FROM child TO parent. The child "extends" toward the parent.
|
|
99
|
-
- CORRECT: (child:Class)-[:EXTENDS]->(parent:Class {name: 'ParentClassName'})
|
|
100
|
-
- WRONG: (parent:Class {name: 'ParentClassName'})-[:EXTENDS]->(child:Class)
|
|
101
|
-
|
|
102
|
-
Examples:
|
|
103
|
-
- "Classes extending X" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'X'}) WHERE c.projectId = $projectId RETURN c
|
|
104
|
-
- "What extends Y" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'Y'}) WHERE c.projectId = $projectId RETURN c
|
|
105
|
-
|
|
106
|
-
=== SEMANTIC TYPES (Framework Classifications) - PRIMARY QUERY METHOD ===
|
|
107
|
-
*** MOST QUERIES SHOULD USE SEMANTIC TYPES - CHECK semanticTypes FIRST ***
|
|
108
|
-
|
|
109
|
-
Semantic types are the PRIMARY way to find framework-specific nodes:
|
|
110
|
-
semanticTypes[].type -> semantic type value
|
|
111
|
-
semanticTypes[].label -> which node label this type appears on
|
|
112
|
-
|
|
113
|
-
The semanticType is a PROPERTY on nodes, not a label. Query patterns:
|
|
114
|
-
- EXACT MATCH: MATCH (c) WHERE c.projectId = $projectId AND c.semanticType = 'ExactTypeFromSchema' RETURN c
|
|
115
|
-
- PARTIAL MATCH: MATCH (c) WHERE c.projectId = $projectId AND c.semanticType CONTAINS 'Pattern' RETURN c
|
|
116
|
-
|
|
117
|
-
FALLBACK - If semantic type doesn't exist, use name patterns:
|
|
118
|
-
- "Find all controllers" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Controller' RETURN c
|
|
119
|
-
- "Find all services" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Service' RETURN c
|
|
120
|
-
|
|
121
|
-
=== DECORATOR QUERIES ===
|
|
122
|
-
Use DECORATED_WITH relationship to find nodes with specific decorators:
|
|
123
|
-
- "Classes with @X" -> MATCH (c:Class)-[:DECORATED_WITH]->(d:Decorator {name: 'X'}) WHERE c.projectId = $projectId RETURN c
|
|
124
|
-
- "Methods with @Y" -> MATCH (m:Method)-[:DECORATED_WITH]->(d:Decorator {name: 'Y'}) WHERE m.projectId = $projectId RETURN m
|
|
125
|
-
|
|
126
|
-
=== MODULE/DIRECTORY QUERIES ===
|
|
127
|
-
Use filePath property for location-based queries:
|
|
128
|
-
- "in account module" -> WHERE n.filePath CONTAINS '/account/'
|
|
129
|
-
- "in auth folder" -> WHERE n.filePath CONTAINS '/auth/'
|
|
130
|
-
|
|
131
|
-
=== FRAMEWORK-SPECIFIC PATTERNS ===
|
|
10
|
+
/**
|
|
11
|
+
* System instructions for the assistant (set once at creation time).
|
|
12
|
+
* Kept focused on Cypher rules and output format — schema data is injected per-query.
|
|
13
|
+
*/
|
|
14
|
+
assistantInstructions = `You are a Neo4j Cypher query generator. You receive a schema and a natural language request, and you return a single JSON object. No prose, no markdown, no explanation outside the JSON.
|
|
132
15
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
- Use framework relationships from relationshipTypes and commonPatterns
|
|
136
|
-
- Check semanticTypes for framework classifications
|
|
16
|
+
OUTPUT FORMAT (strict):
|
|
17
|
+
{"cypher": "...", "parameters": null, "explanation": "..."}
|
|
137
18
|
|
|
138
|
-
|
|
139
|
-
-
|
|
140
|
-
-
|
|
141
|
-
- Example: "Find UserProfile component" -> MATCH (f:Function {name: 'UserProfile'}) WHERE f.projectId = $projectId RETURN f
|
|
19
|
+
- "cypher": valid Neo4j Cypher query
|
|
20
|
+
- "parameters": object of extra parameters or null (NEVER include projectId — it is injected automatically)
|
|
21
|
+
- "explanation": one sentence describing what the query does
|
|
142
22
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
23
|
+
RULES:
|
|
24
|
+
1. ALL node patterns MUST include: WHERE n.projectId = $projectId
|
|
25
|
+
2. Use ONLY node labels listed in the schema's nodeTypes[].label
|
|
26
|
+
3. Entity names are PROPERTY values, NOT labels: (n:Class {name: 'MyService'}) not (n:MyService)
|
|
27
|
+
4. AST type names are NOT labels: ClassDeclaration → Class, MethodDeclaration → Method, InterfaceDeclaration → Interface, FunctionDeclaration → Function, PropertyDeclaration → Property, ParameterDeclaration → Parameter
|
|
28
|
+
5. semanticType is a PROPERTY, not a label: WHERE n.semanticType = 'NestController'
|
|
29
|
+
6. EXTENDS direction: child → parent. (child:Class)-[:EXTENDS]->(parent:Class)
|
|
30
|
+
7. Cypher has no GROUP BY — aggregation is automatic in RETURN
|
|
31
|
+
8. Use $-prefixed parameters for dynamic values
|
|
150
32
|
|
|
151
|
-
|
|
33
|
+
CORE RELATIONSHIPS:
|
|
34
|
+
CONTAINS (file→declaration), HAS_MEMBER (class→method/property), HAS_PARAMETER (method→param), EXTENDS (child→parent), IMPLEMENTS (class→interface), IMPORTS (file→file), TYPED_AS (node→type), CALLS (caller→callee), DECORATED_WITH (node→decorator)
|
|
152
35
|
`;
|
|
153
36
|
constructor() {
|
|
154
37
|
const apiKey = process.env.OPENAI_API_KEY;
|
|
@@ -163,120 +46,77 @@ Provide ONLY the JSON response with no additional text, markdown formatting, or
|
|
|
163
46
|
});
|
|
164
47
|
}
|
|
165
48
|
async getOrCreateAssistant(schemaPath) {
|
|
166
|
-
// Store schema path for later use
|
|
49
|
+
// Store schema path for later use — schema is injected directly into each prompt
|
|
167
50
|
this.schemaPath = schemaPath;
|
|
168
51
|
if (process.env.OPENAI_ASSISTANT_ID) {
|
|
169
52
|
this.assistantId = process.env.OPENAI_ASSISTANT_ID;
|
|
170
53
|
console.error(`Using existing assistant with ID: ${this.assistantId}`);
|
|
171
54
|
return this.assistantId;
|
|
172
55
|
}
|
|
173
|
-
const
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
});
|
|
177
|
-
// Create a vector store for the schema file
|
|
178
|
-
const vectorStore = await this.openai.vectorStores.create({
|
|
179
|
-
name: 'Neo4j APOC Schema Vector Store',
|
|
180
|
-
file_ids: [schemaFile.id],
|
|
181
|
-
metadata: { type: 'neo4j_apoc_schema' },
|
|
182
|
-
});
|
|
183
|
-
const vectorStoreId = vectorStore.id;
|
|
184
|
-
// Create a new assistant
|
|
185
|
-
const assistantConfig = {
|
|
186
|
-
name: 'Neo4j Cypher Query Agent',
|
|
187
|
-
description: 'An agent that helps convert natural language to Neo4j Cypher queries',
|
|
56
|
+
const assistant = await this.openai.beta.assistants.create({
|
|
57
|
+
name: 'Neo4j Cypher Query Generator',
|
|
58
|
+
description: 'Converts natural language to Neo4j Cypher queries. Returns JSON only.',
|
|
188
59
|
model: this.MODEL,
|
|
189
|
-
instructions:
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
`,
|
|
195
|
-
tools: [
|
|
196
|
-
{
|
|
197
|
-
type: 'code_interpreter',
|
|
198
|
-
},
|
|
199
|
-
{
|
|
200
|
-
type: 'file_search',
|
|
201
|
-
},
|
|
202
|
-
],
|
|
203
|
-
tool_resources: {
|
|
204
|
-
code_interpreter: {
|
|
205
|
-
file_ids: [schemaFile.id],
|
|
206
|
-
},
|
|
207
|
-
file_search: {
|
|
208
|
-
vector_store_ids: [vectorStoreId],
|
|
209
|
-
},
|
|
210
|
-
},
|
|
211
|
-
};
|
|
212
|
-
const assistant = await this.openai.beta.assistants.create(assistantConfig);
|
|
60
|
+
instructions: this.assistantInstructions,
|
|
61
|
+
response_format: { type: 'json_object' },
|
|
62
|
+
// No tools — schema is injected directly into each message
|
|
63
|
+
tools: [],
|
|
64
|
+
});
|
|
213
65
|
this.assistantId = assistant.id;
|
|
66
|
+
console.error(`Created assistant with ID: ${this.assistantId}`);
|
|
214
67
|
return this.assistantId;
|
|
215
68
|
}
|
|
216
69
|
/**
|
|
217
|
-
* Load and format
|
|
218
|
-
* This
|
|
70
|
+
* Load the schema and format it for direct injection into the user message.
|
|
71
|
+
* This is the ONLY way the LLM sees the schema — no file_search.
|
|
219
72
|
*/
|
|
220
73
|
loadSchemaContext() {
|
|
221
74
|
if (!this.schemaPath) {
|
|
222
|
-
return 'No schema available.
|
|
75
|
+
return 'No schema available.';
|
|
223
76
|
}
|
|
224
77
|
try {
|
|
225
78
|
const content = fs.readFileSync(this.schemaPath, 'utf-8');
|
|
226
79
|
const schema = JSON.parse(content);
|
|
227
|
-
if (!schema
|
|
80
|
+
if (!schema?.nodeTypes) {
|
|
228
81
|
return 'No schema available.';
|
|
229
82
|
}
|
|
230
|
-
// Format node types
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
const
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
83
|
+
// Format node types with properties
|
|
84
|
+
const nodeTypeLines = schema.nodeTypes
|
|
85
|
+
?.map((n) => ` ${n.label} (${n.count} nodes) — properties: ${(n.properties ?? []).join(', ')}`)
|
|
86
|
+
.join('\n') ?? 'none';
|
|
87
|
+
// Format relationship types with connection patterns
|
|
88
|
+
const relTypeLines = schema.relationshipTypes
|
|
89
|
+
?.map((r) => {
|
|
90
|
+
const conns = (r.connections ?? []).map((c) => `${c.from}→${c.to}`).join(', ');
|
|
91
|
+
return ` ${r.type} (${r.count}) — ${conns}`;
|
|
92
|
+
})
|
|
93
|
+
.join('\n') ?? 'none';
|
|
94
|
+
// Format semantic types
|
|
239
95
|
const semanticTypeList = schema.semanticTypes?.map((s) => s.type) ?? [];
|
|
240
|
-
const
|
|
96
|
+
const semTypeLines = schema.semanticTypes?.map((s) => ` ${s.type} (on ${s.label}, ${s.count} nodes)`).join('\n') ?? 'none';
|
|
97
|
+
// Format common patterns
|
|
98
|
+
const patternLines = schema.commonPatterns
|
|
99
|
+
?.map((p) => ` (${p.from})-[:${p.relationship}]->(${p.to}) × ${p.count}`)
|
|
100
|
+
.join('\n') ?? 'none';
|
|
241
101
|
// Cache categorized semantic types for dynamic example generation
|
|
242
102
|
this.cachedSemanticTypes = this.categorizeSemanticTypes(semanticTypeList);
|
|
243
|
-
|
|
244
|
-
let frameworkHint = '';
|
|
245
|
-
if (decoratorCount > 10 && classCount > functionCount) {
|
|
246
|
-
// Use discovered semantic types instead of assuming NestJS
|
|
247
|
-
const sampleType = this.cachedSemanticTypes?.controller[0] ?? this.cachedSemanticTypes?.service[0] ?? 'YourSemanticType';
|
|
248
|
-
frameworkHint = `\nFRAMEWORK DETECTED: Decorator-based codebase. Use Class nodes with semanticType property (e.g., semanticType = "${sampleType}").`;
|
|
249
|
-
}
|
|
250
|
-
else if (functionCount > classCount) {
|
|
251
|
-
frameworkHint = '\nFRAMEWORK DETECTED: React/functional codebase. Use Function nodes for components.';
|
|
252
|
-
}
|
|
253
|
-
return `
|
|
254
|
-
=== VALID NODE LABELS (use ONLY these after the colon) ===
|
|
255
|
-
${nodeTypes}
|
|
103
|
+
return `SCHEMA:
|
|
256
104
|
|
|
257
|
-
|
|
258
|
-
${
|
|
105
|
+
NODE LABELS (use ONLY these):
|
|
106
|
+
${nodeTypeLines}
|
|
259
107
|
|
|
260
|
-
|
|
261
|
-
|
|
108
|
+
RELATIONSHIP TYPES:
|
|
109
|
+
${relTypeLines}
|
|
262
110
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
Example: MATCH (n:Class) WHERE n.projectId = $projectId AND n.semanticType = '${semanticTypeList[0] ?? 'SemanticType'}' RETURN n
|
|
266
|
-
${frameworkHint}
|
|
111
|
+
SEMANTIC TYPES (query via WHERE n.semanticType = 'value'):
|
|
112
|
+
${semTypeLines}
|
|
267
113
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
2. Semantic types are PROPERTY values, NOT labels - use WHERE n.semanticType = 'Type'
|
|
271
|
-
3. Class/entity names are PROPERTY values, NOT labels - use WHERE n.name = 'Name'
|
|
272
|
-
4. WRONG: (n:ClassName) - using names as labels
|
|
273
|
-
5. CORRECT: (n:Class {name: 'ClassName'}) or (n:LabelFromSchema {name: 'Name'})
|
|
274
|
-
6. CORRECT: (n:Class) WHERE n.semanticType = 'TypeFromSemanticTypesList'
|
|
275
|
-
`.trim();
|
|
114
|
+
COMMON PATTERNS:
|
|
115
|
+
${patternLines}`;
|
|
276
116
|
}
|
|
277
117
|
catch (error) {
|
|
278
118
|
console.warn('Failed to load schema for prompt injection:', error);
|
|
279
|
-
return 'Schema load failed.
|
|
119
|
+
return 'Schema load failed.';
|
|
280
120
|
}
|
|
281
121
|
}
|
|
282
122
|
/**
|
|
@@ -380,14 +220,14 @@ FALLBACK PATTERNS (use when semantic types don't exist):
|
|
|
380
220
|
// Generate dynamic examples based on discovered semantic types
|
|
381
221
|
const dynamicSemanticExamples = this.cachedSemanticTypes
|
|
382
222
|
? this.generateDynamicSemanticExamples(this.cachedSemanticTypes)
|
|
383
|
-
: '
|
|
384
|
-
const prompt = `
|
|
223
|
+
: '';
|
|
224
|
+
const prompt = `Convert to Cypher: ${userPrompt}
|
|
385
225
|
|
|
386
226
|
${schemaContext}
|
|
387
227
|
${dynamicSemanticExamples}
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
`;
|
|
228
|
+
Project: ${projectId} — add WHERE n.projectId = $projectId on every node pattern.
|
|
229
|
+
|
|
230
|
+
Respond with ONLY a JSON object: {"cypher": "...", "parameters": null, "explanation": "..."}`;
|
|
391
231
|
// SECURITY: Only log prompt length, not full content which may contain sensitive data
|
|
392
232
|
console.error(`NL-to-Cypher: Processing prompt (${prompt.length} chars) for project ${projectId}`);
|
|
393
233
|
const run = await this.openai.beta.threads.createAndRunPoll({
|
|
@@ -436,10 +276,10 @@ Remember to include WHERE n.projectId = $projectId for all node patterns.
|
|
|
436
276
|
}
|
|
437
277
|
// SECURITY: Don't log the full text value which may contain sensitive queries
|
|
438
278
|
console.error(`NL-to-Cypher: Parsing response (${textValue.length} chars)`);
|
|
439
|
-
//
|
|
279
|
+
// Extract JSON from the LLM response, handling markdown fences and prose preamble
|
|
440
280
|
let result;
|
|
441
281
|
try {
|
|
442
|
-
result = JSON.parse(textValue);
|
|
282
|
+
result = JSON.parse(this.extractJson(textValue));
|
|
443
283
|
}
|
|
444
284
|
catch (parseError) {
|
|
445
285
|
const message = parseError instanceof Error ? parseError.message : String(parseError);
|
|
@@ -452,6 +292,38 @@ Remember to include WHERE n.projectId = $projectId for all node patterns.
|
|
|
452
292
|
this.validateLabelUsage(result.cypher);
|
|
453
293
|
return result;
|
|
454
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Extracts JSON from an LLM response that may contain markdown fences or prose preamble.
|
|
297
|
+
* Tries in order: raw parse, markdown fence extraction, first `{...}` block extraction.
|
|
298
|
+
*/
|
|
299
|
+
extractJson(text) {
|
|
300
|
+
const trimmed = text.trim();
|
|
301
|
+
// 1. Already valid JSON — return as-is
|
|
302
|
+
if (trimmed.startsWith('{')) {
|
|
303
|
+
return trimmed;
|
|
304
|
+
}
|
|
305
|
+
// 2. Wrapped in markdown code fences: ```json ... ``` or ``` ... ```
|
|
306
|
+
const fenceMatch = trimmed.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
307
|
+
if (fenceMatch) {
|
|
308
|
+
return fenceMatch[1].trim();
|
|
309
|
+
}
|
|
310
|
+
// 3. JSON object embedded in prose — find the first top-level { ... }
|
|
311
|
+
const startIdx = trimmed.indexOf('{');
|
|
312
|
+
if (startIdx !== -1) {
|
|
313
|
+
let depth = 0;
|
|
314
|
+
for (let i = startIdx; i < trimmed.length; i++) {
|
|
315
|
+
if (trimmed[i] === '{')
|
|
316
|
+
depth++;
|
|
317
|
+
else if (trimmed[i] === '}')
|
|
318
|
+
depth--;
|
|
319
|
+
if (depth === 0) {
|
|
320
|
+
return trimmed.substring(startIdx, i + 1);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
// 4. Give up — return original text so JSON.parse produces a useful error
|
|
325
|
+
return trimmed;
|
|
326
|
+
}
|
|
455
327
|
/**
|
|
456
328
|
* Validates that the generated Cypher query contains projectId filters.
|
|
457
329
|
* This is a security measure to ensure project isolation is maintained
|