code-graph-context 2.13.3 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/cli/cli.js +3 -3
- package/dist/core/embeddings/embedding-sidecar.js +1 -1
- package/dist/core/embeddings/embeddings.service.js +18 -5
- package/dist/core/embeddings/natural-language-to-cypher.service.js +39 -74
- package/dist/core/embeddings/openai-embeddings.service.js +1 -1
- package/dist/core/utils/file-utils.js +15 -1
- package/dist/mcp/constants.js +1 -1
- package/dist/mcp/service-init.js +31 -23
- package/dist/mcp/tools/natural-language-to-cypher.tool.js +2 -2
- package/dist/storage/neo4j/neo4j.service.js +20 -45
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -162,7 +162,7 @@ If you prefer to edit the config files directly:
|
|
|
162
162
|
"code-graph-context": {
|
|
163
163
|
"command": "code-graph-context",
|
|
164
164
|
"env": {
|
|
165
|
-
"
|
|
165
|
+
"OPENAI_EMBEDDINGS_ENABLED": "true",
|
|
166
166
|
"OPENAI_API_KEY": "sk-your-key-here"
|
|
167
167
|
}
|
|
168
168
|
}
|
|
@@ -194,8 +194,8 @@ If you prefer to edit the config files directly:
|
|
|
194
194
|
| `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
|
|
195
195
|
| `EMBEDDING_DEVICE` | No | auto (`mps`/`cpu`) | Device for embeddings. Auto-detects MPS on Apple Silicon |
|
|
196
196
|
| `EMBEDDING_HALF_PRECISION` | No | `false` | Set `true` for float16 (uses ~0.5x memory) |
|
|
197
|
-
| `
|
|
198
|
-
| `OPENAI_API_KEY` | No* | - | Required when `
|
|
197
|
+
| `OPENAI_EMBEDDINGS_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local embeddings |
|
|
198
|
+
| `OPENAI_API_KEY` | No* | - | Required when `OPENAI_EMBEDDINGS_ENABLED=true`; also enables `natural_language_to_cypher` |
|
|
199
199
|
|
|
200
200
|
---
|
|
201
201
|
|
|
@@ -582,7 +582,7 @@ If you prefer OpenAI embeddings (higher quality, requires API key):
|
|
|
582
582
|
|
|
583
583
|
```bash
|
|
584
584
|
claude mcp add --scope user code-graph-context \
|
|
585
|
-
-e
|
|
585
|
+
-e OPENAI_EMBEDDINGS_ENABLED=true \
|
|
586
586
|
-e OPENAI_API_KEY=sk-your-key-here \
|
|
587
587
|
-- code-graph-context
|
|
588
588
|
```
|
|
@@ -626,7 +626,7 @@ claude mcp add --scope user code-graph-context \
|
|
|
626
626
|
```bash
|
|
627
627
|
claude mcp remove code-graph-context
|
|
628
628
|
claude mcp add --scope user code-graph-context \
|
|
629
|
-
-e
|
|
629
|
+
-e OPENAI_EMBEDDINGS_ENABLED=true \
|
|
630
630
|
-e OPENAI_API_KEY=sk-your-key-here \
|
|
631
631
|
-- code-graph-context
|
|
632
632
|
```
|
package/dist/cli/cli.js
CHANGED
|
@@ -82,7 +82,7 @@ ${c.bold}Next steps:${c.reset}
|
|
|
82
82
|
|
|
83
83
|
${c.dim}Local embeddings are used by default (no API key needed).
|
|
84
84
|
To use OpenAI instead, add:
|
|
85
|
-
"
|
|
85
|
+
"OPENAI_EMBEDDINGS_ENABLED": "true",
|
|
86
86
|
"OPENAI_API_KEY": "sk-..."${c.reset}
|
|
87
87
|
|
|
88
88
|
3. Restart Claude Code
|
|
@@ -199,7 +199,7 @@ const setupSidecar = async () => {
|
|
|
199
199
|
if (!pythonVersion) {
|
|
200
200
|
log(sym.err, 'Python 3 is not installed');
|
|
201
201
|
console.log(`\n Install Python 3.10+: ${c.cyan}https://www.python.org/downloads/${c.reset}`);
|
|
202
|
-
console.log(` ${c.dim}Or use OpenAI embeddings instead: set
|
|
202
|
+
console.log(` ${c.dim}Or use OpenAI embeddings instead: set OPENAI_EMBEDDINGS_ENABLED=true${c.reset}\n`);
|
|
203
203
|
return;
|
|
204
204
|
}
|
|
205
205
|
log(sym.ok, `${pythonVersion}`);
|
|
@@ -250,7 +250,7 @@ const setupSidecar = async () => {
|
|
|
250
250
|
verifySpinner.stop(verified, verified ? 'sentence-transformers OK' : 'sentence-transformers import failed');
|
|
251
251
|
if (!verified) {
|
|
252
252
|
console.log(`\n ${c.dim}Try: ${python} -c "from sentence_transformers import SentenceTransformer"${c.reset}`);
|
|
253
|
-
console.log(` ${c.dim}Or use OpenAI embeddings instead: set
|
|
253
|
+
console.log(` ${c.dim}Or use OpenAI embeddings instead: set OPENAI_EMBEDDINGS_ENABLED=true${c.reset}\n`);
|
|
254
254
|
return;
|
|
255
255
|
}
|
|
256
256
|
// Pre-download the embedding model so first real use is fast
|
|
@@ -160,7 +160,7 @@ export class EmbeddingSidecar {
|
|
|
160
160
|
reject(new Error('python3 not found. Local embeddings require Python 3.10+.\n\n' +
|
|
161
161
|
'Install Python and the sidecar dependencies:\n' +
|
|
162
162
|
' pip install -r sidecar/requirements.txt\n\n' +
|
|
163
|
-
'Or set
|
|
163
|
+
'Or set OPENAI_EMBEDDINGS_ENABLED=true to use OpenAI instead.'));
|
|
164
164
|
});
|
|
165
165
|
check.on('close', (code) => {
|
|
166
166
|
if (code !== 0) {
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
* Embeddings Service — barrel module
|
|
3
3
|
*
|
|
4
4
|
* Exports a common interface and a factory. Consumers do `new EmbeddingsService()`
|
|
5
|
-
* and get the right implementation based on
|
|
5
|
+
* and get the right implementation based on OPENAI_EMBEDDINGS_ENABLED.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
* default
|
|
7
|
+
* OPENAI_EMBEDDINGS_ENABLED=true → OpenAI text-embedding-3-large (requires OPENAI_API_KEY)
|
|
8
|
+
* default → Local Python sidecar with Qwen3-Embedding-0.6B
|
|
9
9
|
*/
|
|
10
10
|
import { LocalEmbeddingsService } from './local-embeddings.service.js';
|
|
11
11
|
import { OpenAIEmbeddingsService } from './openai-embeddings.service.js';
|
|
@@ -33,8 +33,21 @@ export const EMBEDDING_DIMENSIONS = {
|
|
|
33
33
|
'nomic-ai/nomic-embed-text-v1.5': 768,
|
|
34
34
|
};
|
|
35
35
|
export const isOpenAIEnabled = () => {
|
|
36
|
-
|
|
36
|
+
if (process.env.OPENAI_EMBEDDINGS_ENABLED?.toLowerCase() === 'true') {
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
// Backward-compat: OPENAI_ENABLED is deprecated in favour of OPENAI_EMBEDDINGS_ENABLED
|
|
40
|
+
if (process.env.OPENAI_ENABLED?.toLowerCase() === 'true') {
|
|
41
|
+
console.error(JSON.stringify({
|
|
42
|
+
level: 'warn',
|
|
43
|
+
message: '[code-graph-context] OPENAI_ENABLED is deprecated. Use OPENAI_EMBEDDINGS_ENABLED=true instead.',
|
|
44
|
+
}));
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
return false;
|
|
37
48
|
};
|
|
49
|
+
/** Returns true when OPENAI_API_KEY is present, regardless of embedding provider. */
|
|
50
|
+
export const isOpenAIAvailable = () => !!process.env.OPENAI_API_KEY;
|
|
38
51
|
/**
|
|
39
52
|
* Get the vector dimensions for the active embedding provider.
|
|
40
53
|
* For known models, returns a static value. For unknown local models,
|
|
@@ -50,7 +63,7 @@ export const getEmbeddingDimensions = () => {
|
|
|
50
63
|
return EMBEDDING_DIMENSIONS[model] ?? 1536;
|
|
51
64
|
};
|
|
52
65
|
/**
|
|
53
|
-
* Factory that returns the correct service based on
|
|
66
|
+
* Factory that returns the correct service based on OPENAI_EMBEDDINGS_ENABLED.
|
|
54
67
|
* Drop-in replacement everywhere `new EmbeddingsService()` was used.
|
|
55
68
|
*/
|
|
56
69
|
export class EmbeddingsService {
|
|
@@ -10,22 +10,23 @@ export class NaturalLanguageToCypherService {
|
|
|
10
10
|
messageInstructions = `
|
|
11
11
|
=== THE SCHEMA FILE IS THE SOURCE OF TRUTH ===
|
|
12
12
|
ALWAYS read neo4j-apoc-schema.json FIRST before generating any query. It contains:
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
- nodeTypes: All node labels with counts and property keys
|
|
14
|
+
- relationshipTypes: All relationship types with counts and connection patterns (from → to)
|
|
15
|
+
- semanticTypes: Framework-specific classifications with which label they appear on
|
|
16
|
+
- commonPatterns: Relationship patterns between node types with counts
|
|
15
17
|
|
|
16
|
-
===
|
|
17
|
-
|
|
18
|
+
=== VALID NODE LABELS ===
|
|
19
|
+
Use ONLY labels found in nodeTypes[].label. Labels fall into two categories:
|
|
18
20
|
|
|
19
21
|
1. CORE LABELS (base TypeScript AST):
|
|
20
22
|
SourceFile, Class, Function, Method, Interface, Property, Parameter, Constructor, Import, Export, Decorator, Enum, Variable, TypeAlias
|
|
21
23
|
|
|
22
|
-
2. FRAMEWORK LABELS (from framework enhancements - check
|
|
23
|
-
These REPLACE the core label for enhanced nodes.
|
|
24
|
-
A node with a framework label was originally a Class but got enhanced - always use the actual label from rawSchema.
|
|
24
|
+
2. FRAMEWORK LABELS (from framework enhancements - check nodeTypes):
|
|
25
|
+
These REPLACE the core label for enhanced nodes. A node with a framework label was originally a Class but got enhanced.
|
|
25
26
|
|
|
26
27
|
=== AST TYPE NAME MAPPING ===
|
|
27
28
|
AST type names are NOT valid labels. Always map them:
|
|
28
|
-
- ClassDeclaration → Class (or a framework label
|
|
29
|
+
- ClassDeclaration → Class (or a framework label if enhanced)
|
|
29
30
|
- FunctionDeclaration → Function
|
|
30
31
|
- MethodDeclaration → Method
|
|
31
32
|
- InterfaceDeclaration → Interface
|
|
@@ -35,8 +36,7 @@ AST type names are NOT valid labels. Always map them:
|
|
|
35
36
|
=== FINDING SPECIFIC NODES ===
|
|
36
37
|
Class/entity names are property values, NOT labels:
|
|
37
38
|
WRONG: (n:MyClassName) - using class names as labels
|
|
38
|
-
CORRECT: (n:Class {name: 'MyClassName'}) - use label from
|
|
39
|
-
CORRECT: (n:LabelFromSchema {name: 'EntityName'}) - always check rawSchema for valid labels
|
|
39
|
+
CORRECT: (n:Class {name: 'MyClassName'}) - use label from nodeTypes, name as property
|
|
40
40
|
|
|
41
41
|
Examples:
|
|
42
42
|
- "Count all classes" -> MATCH (n:Class) WHERE n.projectId = $projectId RETURN count(n)
|
|
@@ -58,22 +58,19 @@ Do NOT include projectId in parameters - it's injected automatically.
|
|
|
58
58
|
|
|
59
59
|
Query Generation Process - FOLLOW THIS EXACTLY:
|
|
60
60
|
1. SEARCH THE SCHEMA FILE FIRST: Use file_search to read neo4j-apoc-schema.json BEFORE generating any query
|
|
61
|
-
2. EXTRACT VALID LABELS:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
7. GENERATE QUERY: Write the Cypher query using ONLY labels, relationships, and properties from the schema
|
|
70
|
-
8. VALIDATE LABELS: Double-check that every label in your query exists as a key in rawSchema
|
|
71
|
-
9. ADD PROJECT FILTER: Always include WHERE n.projectId = $projectId for every node pattern in the query
|
|
61
|
+
2. EXTRACT VALID LABELS: nodeTypes[].label contains all valid labels. nodeTypes[].properties lists available property keys per label.
|
|
62
|
+
3. CHECK RELATIONSHIPS: relationshipTypes[].type lists all relationship types. Each entry includes connections[] showing which node types they connect (from → to).
|
|
63
|
+
4. CHECK SEMANTIC TYPES: semanticTypes[].type lists framework classifications. Each entry includes label showing which node type it appears on.
|
|
64
|
+
- semanticTypes are PROPERTY values stored in n.semanticType, NOT labels
|
|
65
|
+
5. REVIEW PATTERNS: commonPatterns[] shows from→relationship→to triples with counts
|
|
66
|
+
6. GENERATE QUERY: Write the Cypher query using ONLY labels, relationships, and properties from the schema
|
|
67
|
+
7. VALIDATE LABELS: Double-check that every label in your query exists in nodeTypes
|
|
68
|
+
8. ADD PROJECT FILTER: Always include WHERE n.projectId = $projectId for every node pattern in the query
|
|
72
69
|
|
|
73
70
|
Critical Rules:
|
|
74
71
|
- ALWAYS filter by projectId on every node in the query (e.g., WHERE n.projectId = $projectId)
|
|
75
72
|
- Use the schema information from the file_search tool - do not guess node labels or relationships
|
|
76
|
-
- Use ONLY node labels and
|
|
73
|
+
- Use ONLY node labels and relationships found in the schema
|
|
77
74
|
- For nested JSON data in properties, use: apoc.convert.fromJsonMap(node.propertyName)
|
|
78
75
|
- Use parameterized queries with $ syntax for any dynamic values
|
|
79
76
|
- Return only the data relevant to the user's request
|
|
@@ -90,9 +87,9 @@ Critical Rules:
|
|
|
90
87
|
- DECORATED_WITH: Node has a Decorator (use for "decorated with", "has decorator", "@SomeDecorator")
|
|
91
88
|
|
|
92
89
|
=== FRAMEWORK RELATIONSHIPS ===
|
|
93
|
-
|
|
90
|
+
Check relationshipTypes and commonPatterns in the schema file for framework-specific relationships:
|
|
94
91
|
- What relationship types exist (e.g., INJECTS, EXPOSES, MODULE_IMPORTS, INTERNAL_API_CALL, etc.)
|
|
95
|
-
-
|
|
92
|
+
- commonPatterns shows which node types they connect and how frequently
|
|
96
93
|
- These vary by project - ALWAYS check the schema file for available relationships
|
|
97
94
|
|
|
98
95
|
CRITICAL: Do NOT confuse EXTENDS (inheritance) with HAS_MEMBER (composition). "extends" always means EXTENDS relationship.
|
|
@@ -105,31 +102,18 @@ The arrow points FROM child TO parent. The child "extends" toward the parent.
|
|
|
105
102
|
Examples:
|
|
106
103
|
- "Classes extending X" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'X'}) WHERE c.projectId = $projectId RETURN c
|
|
107
104
|
- "What extends Y" -> MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'Y'}) WHERE c.projectId = $projectId RETURN c
|
|
108
|
-
- "Classes that extend X with >5 methods" ->
|
|
109
|
-
MATCH (c:Class)-[:EXTENDS]->(p:Class {name: 'X'})
|
|
110
|
-
WHERE c.projectId = $projectId
|
|
111
|
-
WITH c
|
|
112
|
-
MATCH (c)-[:HAS_MEMBER]->(m:Method)
|
|
113
|
-
WITH c, count(m) AS methodCount
|
|
114
|
-
WHERE methodCount > 5
|
|
115
|
-
RETURN c, methodCount
|
|
116
105
|
|
|
117
106
|
=== SEMANTIC TYPES (Framework Classifications) - PRIMARY QUERY METHOD ===
|
|
118
|
-
*** MOST QUERIES SHOULD USE SEMANTIC TYPES - CHECK
|
|
107
|
+
*** MOST QUERIES SHOULD USE SEMANTIC TYPES - CHECK semanticTypes FIRST ***
|
|
119
108
|
|
|
120
|
-
Semantic types are the PRIMARY way to find framework-specific nodes
|
|
121
|
-
|
|
109
|
+
Semantic types are the PRIMARY way to find framework-specific nodes:
|
|
110
|
+
semanticTypes[].type -> semantic type value
|
|
111
|
+
semanticTypes[].label -> which node label this type appears on
|
|
122
112
|
|
|
123
113
|
The semanticType is a PROPERTY on nodes, not a label. Query patterns:
|
|
124
114
|
- EXACT MATCH: MATCH (c) WHERE c.projectId = $projectId AND c.semanticType = 'ExactTypeFromSchema' RETURN c
|
|
125
115
|
- PARTIAL MATCH: MATCH (c) WHERE c.projectId = $projectId AND c.semanticType CONTAINS 'Pattern' RETURN c
|
|
126
116
|
|
|
127
|
-
Common semantic type patterns (verify against discoveredSchema.semanticTypes):
|
|
128
|
-
- Controllers: types containing 'Controller'
|
|
129
|
-
- Services: types containing 'Service', 'Provider', or 'Injectable'
|
|
130
|
-
- Repositories: types containing 'Repository', 'DAL', or 'DAO'
|
|
131
|
-
- Modules: types containing 'Module'
|
|
132
|
-
|
|
133
117
|
FALLBACK - If semantic type doesn't exist, use name patterns:
|
|
134
118
|
- "Find all controllers" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Controller' RETURN c
|
|
135
119
|
- "Find all services" -> MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Service' RETURN c
|
|
@@ -144,37 +128,25 @@ Use filePath property for location-based queries:
|
|
|
144
128
|
- "in account module" -> WHERE n.filePath CONTAINS '/account/'
|
|
145
129
|
- "in auth folder" -> WHERE n.filePath CONTAINS '/auth/'
|
|
146
130
|
|
|
147
|
-
Examples:
|
|
148
|
-
- "Items in account folder" ->
|
|
149
|
-
MATCH (c:Class) WHERE c.projectId = $projectId AND c.filePath CONTAINS '/account/' RETURN c
|
|
150
|
-
- FALLBACK (if no framework labels):
|
|
151
|
-
MATCH (c:Class) WHERE c.projectId = $projectId AND c.name CONTAINS 'Service' AND c.filePath CONTAINS '/account/' RETURN c
|
|
152
|
-
|
|
153
131
|
=== FRAMEWORK-SPECIFIC PATTERNS ===
|
|
154
132
|
|
|
155
133
|
Backend Projects (decorator-based frameworks):
|
|
156
|
-
- Check
|
|
157
|
-
- Use framework relationships
|
|
158
|
-
- Check
|
|
134
|
+
- Check nodeTypes for framework labels that REPLACE the Class label
|
|
135
|
+
- Use framework relationships from relationshipTypes and commonPatterns
|
|
136
|
+
- Check semanticTypes for framework classifications
|
|
159
137
|
|
|
160
138
|
Frontend Projects (React, functional):
|
|
161
139
|
- React components are typically Function nodes, NOT Class nodes
|
|
162
140
|
- Hooks are Function nodes (useAuth, useState, etc.)
|
|
163
141
|
- Example: "Find UserProfile component" -> MATCH (f:Function {name: 'UserProfile'}) WHERE f.projectId = $projectId RETURN f
|
|
164
142
|
|
|
165
|
-
Tip: Check rawSchema keys to understand if project uses framework labels or just core TypeScript labels.
|
|
166
|
-
|
|
167
143
|
IMPORTANT - Cypher Syntax (NOT SQL):
|
|
168
144
|
- Cypher does NOT use GROUP BY. Aggregation happens automatically in RETURN.
|
|
169
145
|
- WRONG (SQL): RETURN label, count(n) GROUP BY label
|
|
170
146
|
- CORRECT (Cypher): RETURN labels(n) AS label, count(n) AS count
|
|
171
|
-
- For grouping, non-aggregated values in RETURN automatically become grouping keys
|
|
172
147
|
- Use labels(n) to get node labels as an array
|
|
173
148
|
- Use collect() for aggregating into lists
|
|
174
149
|
- Use count(), sum(), avg(), min(), max() for aggregations
|
|
175
|
-
- Common patterns:
|
|
176
|
-
- Count by type: MATCH (n) RETURN labels(n)[0] AS type, count(n) AS count
|
|
177
|
-
- Group with collect: MATCH (n)-[:REL]->(m) RETURN n.name, collect(m.name) AS related
|
|
178
150
|
|
|
179
151
|
Provide ONLY the JSON response with no additional text, markdown formatting, or explanations outside the JSON structure.
|
|
180
152
|
`;
|
|
@@ -252,20 +224,19 @@ Provide ONLY the JSON response with no additional text, markdown formatting, or
|
|
|
252
224
|
try {
|
|
253
225
|
const content = fs.readFileSync(this.schemaPath, 'utf-8');
|
|
254
226
|
const schema = JSON.parse(content);
|
|
255
|
-
if (!schema.
|
|
256
|
-
return 'No
|
|
227
|
+
if (!schema || !schema.nodeTypes) {
|
|
228
|
+
return 'No schema available.';
|
|
257
229
|
}
|
|
258
|
-
const ds = schema.discoveredSchema;
|
|
259
230
|
// Format node types
|
|
260
|
-
const nodeTypes =
|
|
231
|
+
const nodeTypes = schema.nodeTypes?.map((n) => n.label).join(', ') ?? 'none';
|
|
261
232
|
// Get function count vs class count to hint at framework
|
|
262
|
-
const functionCount =
|
|
263
|
-
const classCount =
|
|
264
|
-
const decoratorCount =
|
|
233
|
+
const functionCount = schema.nodeTypes?.find((n) => n.label === 'Function')?.count ?? 0;
|
|
234
|
+
const classCount = schema.nodeTypes?.find((n) => n.label === 'Class')?.count ?? 0;
|
|
235
|
+
const decoratorCount = schema.nodeTypes?.find((n) => n.label === 'Decorator')?.count ?? 0;
|
|
265
236
|
// Format relationship types
|
|
266
|
-
const relTypes =
|
|
237
|
+
const relTypes = schema.relationshipTypes?.map((r) => r.type).join(', ') ?? 'none';
|
|
267
238
|
// Format semantic types and categorize them
|
|
268
|
-
const semanticTypeList =
|
|
239
|
+
const semanticTypeList = schema.semanticTypes?.map((s) => s.type) ?? [];
|
|
269
240
|
const semTypes = semanticTypeList.length > 0 ? semanticTypeList.join(', ') : 'none';
|
|
270
241
|
// Cache categorized semantic types for dynamic example generation
|
|
271
242
|
this.cachedSemanticTypes = this.categorizeSemanticTypes(semanticTypeList);
|
|
@@ -521,7 +492,7 @@ Remember to include WHERE n.projectId = $projectId for all node patterns.
|
|
|
521
492
|
}
|
|
522
493
|
/**
|
|
523
494
|
* Load valid labels dynamically from the schema file.
|
|
524
|
-
* Returns all
|
|
495
|
+
* Returns all labels from nodeTypes in the discovered schema.
|
|
525
496
|
*/
|
|
526
497
|
loadValidLabelsFromSchema() {
|
|
527
498
|
// Fallback to core TypeScript labels if schema not available
|
|
@@ -550,14 +521,8 @@ Remember to include WHERE n.projectId = $projectId for all node patterns.
|
|
|
550
521
|
const content = fs.readFileSync(this.schemaPath, 'utf-8');
|
|
551
522
|
const schema = JSON.parse(content);
|
|
552
523
|
const allLabels = new Set(coreLabels);
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
const schemaLabels = Object.keys(schema.rawSchema.records[0]._fields[0]);
|
|
556
|
-
schemaLabels.forEach((label) => allLabels.add(label));
|
|
557
|
-
}
|
|
558
|
-
// Also extract labels from discoveredSchema.nodeTypes (includes framework labels)
|
|
559
|
-
if (schema.discoveredSchema?.nodeTypes) {
|
|
560
|
-
for (const nodeType of schema.discoveredSchema.nodeTypes) {
|
|
524
|
+
if (schema?.nodeTypes) {
|
|
525
|
+
for (const nodeType of schema.nodeTypes) {
|
|
561
526
|
if (nodeType.label) {
|
|
562
527
|
allLabels.add(nodeType.label);
|
|
563
528
|
}
|
|
@@ -6,9 +6,23 @@ export const hashFile = async (filePath) => {
|
|
|
6
6
|
const content = await fs.readFile(filePath);
|
|
7
7
|
return crypto.createHash('sha256').update(content).digest('hex');
|
|
8
8
|
};
|
|
9
|
+
const serializeForLog = (data) => {
|
|
10
|
+
if (data instanceof Error) {
|
|
11
|
+
return { name: data.name, message: data.message, stack: data.stack };
|
|
12
|
+
}
|
|
13
|
+
if (data !== null && typeof data === 'object') {
|
|
14
|
+
const result = {};
|
|
15
|
+
for (const key of Object.keys(data)) {
|
|
16
|
+
result[key] = serializeForLog(data[key]);
|
|
17
|
+
}
|
|
18
|
+
return result;
|
|
19
|
+
}
|
|
20
|
+
return data;
|
|
21
|
+
};
|
|
9
22
|
export const debugLog = async (message, data) => {
|
|
10
23
|
const timestamp = new Date().toISOString();
|
|
11
|
-
const
|
|
24
|
+
const serialized = data !== undefined ? serializeForLog(data) : undefined;
|
|
25
|
+
const logEntry = `[${timestamp}] ${message}\n${serialized !== undefined ? JSON.stringify(serialized, null, LOG_CONFIG.jsonIndent) : ''}\n${LOG_CONFIG.separator}\n`;
|
|
12
26
|
try {
|
|
13
27
|
await fs.appendFile(path.join(process.cwd(), LOG_CONFIG.debugLogFile), logEntry);
|
|
14
28
|
}
|
package/dist/mcp/constants.js
CHANGED
|
@@ -418,7 +418,7 @@ export const WATCH = {
|
|
|
418
418
|
export const MESSAGES = {
|
|
419
419
|
errors: {
|
|
420
420
|
noRelevantCode: 'No relevant code found.',
|
|
421
|
-
serviceNotInitialized: '
|
|
421
|
+
serviceNotInitialized: 'natural_language_to_cypher requires OPENAI_API_KEY. Set it and restart the MCP server to enable this tool.',
|
|
422
422
|
connectionTestFailed: 'Connection test failed',
|
|
423
423
|
neo4jRequirement: 'Note: This server requires Neo4j with APOC plugin installed',
|
|
424
424
|
genericError: 'ERROR:',
|
package/dist/mcp/service-init.js
CHANGED
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
import fs from 'fs/promises';
|
|
6
6
|
import { join } from 'path';
|
|
7
7
|
import { ensureNeo4jRunning, isDockerInstalled, isDockerRunning } from '../cli/neo4j-docker.js';
|
|
8
|
-
import { isOpenAIEnabled, getEmbeddingDimensions } from '../core/embeddings/embeddings.service.js';
|
|
8
|
+
import { isOpenAIEnabled, isOpenAIAvailable, getEmbeddingDimensions } from '../core/embeddings/embeddings.service.js';
|
|
9
|
+
import { LIST_PROJECTS_QUERY } from '../core/utils/project-id.js';
|
|
9
10
|
import { Neo4jService, QUERIES } from '../storage/neo4j/neo4j.service.js';
|
|
10
11
|
import { FILE_PATHS, LOG_CONFIG } from './constants.js';
|
|
11
12
|
import { initializeNaturalLanguageService } from './tools/natural-language-to-cypher.tool.js';
|
|
@@ -22,24 +23,24 @@ const checkConfiguration = async () => {
|
|
|
22
23
|
message: `[code-graph-context] Embedding provider: ${provider} (${dims} dimensions)`,
|
|
23
24
|
}));
|
|
24
25
|
await debugLog('Embedding configuration', { provider, dimensions: dims });
|
|
25
|
-
if (openai && !
|
|
26
|
+
if (openai && !isOpenAIAvailable()) {
|
|
26
27
|
console.error(JSON.stringify({
|
|
27
28
|
level: 'warn',
|
|
28
|
-
message: '[code-graph-context]
|
|
29
|
+
message: '[code-graph-context] OPENAI_EMBEDDINGS_ENABLED=true but OPENAI_API_KEY not set. Embedding calls will fail.',
|
|
29
30
|
}));
|
|
30
|
-
await debugLog('Configuration warning', { warning: '
|
|
31
|
+
await debugLog('Configuration warning', { warning: 'OPENAI_EMBEDDINGS_ENABLED=true but OPENAI_API_KEY not set' });
|
|
31
32
|
}
|
|
32
33
|
if (!openai) {
|
|
33
34
|
console.error(JSON.stringify({
|
|
34
35
|
level: 'info',
|
|
35
36
|
message: '[code-graph-context] Using local embeddings (Python sidecar). Starts on first embedding request.',
|
|
36
37
|
}));
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
38
|
+
}
|
|
39
|
+
if (!isOpenAIAvailable()) {
|
|
40
|
+
console.error(JSON.stringify({
|
|
41
|
+
level: 'info',
|
|
42
|
+
message: '[code-graph-context] natural_language_to_cypher unavailable: OPENAI_API_KEY not set.',
|
|
43
|
+
}));
|
|
43
44
|
}
|
|
44
45
|
};
|
|
45
46
|
/**
|
|
@@ -87,26 +88,34 @@ export const initializeServices = async () => {
|
|
|
87
88
|
await ensureNeo4j();
|
|
88
89
|
// Initialize services sequentially - schema must be written before NL service reads it
|
|
89
90
|
await initializeNeo4jSchema();
|
|
90
|
-
|
|
91
|
+
if (isOpenAIAvailable()) {
|
|
92
|
+
await initializeNaturalLanguageService();
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
console.error(JSON.stringify({
|
|
96
|
+
level: 'info',
|
|
97
|
+
message: '[code-graph-context] natural_language_to_cypher unavailable: OPENAI_API_KEY not set',
|
|
98
|
+
}));
|
|
99
|
+
}
|
|
91
100
|
};
|
|
92
101
|
/**
|
|
93
102
|
* Dynamically discover schema from the actual graph contents.
|
|
94
103
|
* This is framework-agnostic - it discovers what's actually in the graph.
|
|
95
104
|
*/
|
|
96
|
-
const discoverSchemaFromGraph = async (neo4jService) => {
|
|
105
|
+
const discoverSchemaFromGraph = async (neo4jService, projectId) => {
|
|
97
106
|
try {
|
|
98
107
|
// Discover actual node types, relationships, and patterns from the graph
|
|
99
108
|
const [nodeTypes, relationshipTypes, semanticTypes, commonPatterns] = await Promise.all([
|
|
100
|
-
neo4jService.run(QUERIES.DISCOVER_NODE_TYPES),
|
|
101
|
-
neo4jService.run(QUERIES.DISCOVER_RELATIONSHIP_TYPES),
|
|
102
|
-
neo4jService.run(QUERIES.DISCOVER_SEMANTIC_TYPES),
|
|
103
|
-
neo4jService.run(QUERIES.DISCOVER_COMMON_PATTERNS),
|
|
109
|
+
neo4jService.run(QUERIES.DISCOVER_NODE_TYPES, { projectId }),
|
|
110
|
+
neo4jService.run(QUERIES.DISCOVER_RELATIONSHIP_TYPES, { projectId }),
|
|
111
|
+
neo4jService.run(QUERIES.DISCOVER_SEMANTIC_TYPES, { projectId }),
|
|
112
|
+
neo4jService.run(QUERIES.DISCOVER_COMMON_PATTERNS, { projectId }),
|
|
104
113
|
]);
|
|
105
114
|
return {
|
|
106
115
|
nodeTypes: nodeTypes.map((r) => ({
|
|
107
116
|
label: r.label,
|
|
108
117
|
count: typeof r.nodeCount === 'object' ? r.nodeCount.toNumber() : r.nodeCount,
|
|
109
|
-
properties: r.
|
|
118
|
+
properties: r.properties ?? [],
|
|
110
119
|
})),
|
|
111
120
|
relationshipTypes: relationshipTypes.map((r) => ({
|
|
112
121
|
type: r.relationshipType,
|
|
@@ -115,6 +124,7 @@ const discoverSchemaFromGraph = async (neo4jService) => {
|
|
|
115
124
|
})),
|
|
116
125
|
semanticTypes: semanticTypes.map((r) => ({
|
|
117
126
|
type: r.semanticType,
|
|
127
|
+
label: r.nodeLabel,
|
|
118
128
|
count: typeof r.count === 'object' ? r.count.toNumber() : r.count,
|
|
119
129
|
})),
|
|
120
130
|
commonPatterns: commonPatterns.map((r) => ({
|
|
@@ -136,13 +146,11 @@ const discoverSchemaFromGraph = async (neo4jService) => {
|
|
|
136
146
|
const initializeNeo4jSchema = async () => {
|
|
137
147
|
try {
|
|
138
148
|
const neo4jService = new Neo4jService();
|
|
139
|
-
|
|
149
|
+
// Find the most recently updated project to scope discovery queries
|
|
150
|
+
const projects = await neo4jService.run(LIST_PROJECTS_QUERY, {});
|
|
151
|
+
const projectId = projects.length > 0 ? projects[0].projectId : null;
|
|
140
152
|
// Dynamically discover what's actually in the graph
|
|
141
|
-
const
|
|
142
|
-
const schema = {
|
|
143
|
-
rawSchema,
|
|
144
|
-
discoveredSchema,
|
|
145
|
-
};
|
|
153
|
+
const schema = projectId ? await discoverSchemaFromGraph(neo4jService, projectId) : null;
|
|
146
154
|
const schemaPath = join(process.cwd(), FILE_PATHS.schemaOutput);
|
|
147
155
|
await fs.writeFile(schemaPath, JSON.stringify(schema, null, LOG_CONFIG.jsonIndentation));
|
|
148
156
|
await debugLog('Neo4j schema cached successfully', { schemaPath });
|
|
@@ -6,7 +6,7 @@ import { join } from 'path';
|
|
|
6
6
|
import { z } from 'zod';
|
|
7
7
|
import { NaturalLanguageToCypherService } from '../../core/embeddings/natural-language-to-cypher.service.js';
|
|
8
8
|
import { Neo4jService } from '../../storage/neo4j/neo4j.service.js';
|
|
9
|
-
import { TOOL_NAMES, TOOL_METADATA,
|
|
9
|
+
import { TOOL_NAMES, TOOL_METADATA, FILE_PATHS } from '../constants.js';
|
|
10
10
|
import { createErrorResponse, createSuccessResponse, formatQueryResults, debugLog, resolveProjectIdOrError, } from '../utils.js';
|
|
11
11
|
// Service instance - initialized asynchronously
|
|
12
12
|
let naturalLanguageToCypherService = null;
|
|
@@ -45,7 +45,7 @@ export const createNaturalLanguageToCypherTool = (server) => {
|
|
|
45
45
|
const resolvedProjectId = projectResult.projectId;
|
|
46
46
|
if (!naturalLanguageToCypherService) {
|
|
47
47
|
await debugLog('Natural language service not available', { projectId: resolvedProjectId, query });
|
|
48
|
-
return createSuccessResponse(
|
|
48
|
+
return createSuccessResponse('natural_language_to_cypher requires OPENAI_API_KEY. Set it and restart the MCP server to enable this tool.');
|
|
49
49
|
}
|
|
50
50
|
const cypherResult = await naturalLanguageToCypherService.promptToQuery(query, resolvedProjectId);
|
|
51
51
|
// Validate Cypher syntax using EXPLAIN (no execution, just parse)
|
|
@@ -48,29 +48,6 @@ export class Neo4jService {
|
|
|
48
48
|
getDriver() {
|
|
49
49
|
return this.driver;
|
|
50
50
|
}
|
|
51
|
-
async getSchema() {
|
|
52
|
-
const session = this.driver.session();
|
|
53
|
-
const timeoutConfig = getTimeoutConfig();
|
|
54
|
-
try {
|
|
55
|
-
return await session.run(QUERIES.APOC_SCHEMA, {}, {
|
|
56
|
-
timeout: timeoutConfig.neo4j.queryTimeoutMs,
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
catch (error) {
|
|
60
|
-
console.error('Error fetching schema:', error);
|
|
61
|
-
throw error;
|
|
62
|
-
}
|
|
63
|
-
finally {
|
|
64
|
-
// Wrap session close in try-catch to avoid masking the original error
|
|
65
|
-
try {
|
|
66
|
-
await session.close();
|
|
67
|
-
}
|
|
68
|
-
catch (closeError) {
|
|
69
|
-
// Log but don't re-throw to preserve original error
|
|
70
|
-
console.warn('Error closing Neo4j session:', closeError);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
51
|
/**
|
|
75
52
|
* Close the Neo4j driver connection.
|
|
76
53
|
* Should be called when the service is no longer needed to release resources.
|
|
@@ -82,10 +59,6 @@ export class Neo4jService {
|
|
|
82
59
|
}
|
|
83
60
|
}
|
|
84
61
|
export const QUERIES = {
|
|
85
|
-
APOC_SCHEMA: `
|
|
86
|
-
CALL apoc.meta.schema() YIELD value
|
|
87
|
-
RETURN value as schema
|
|
88
|
-
`,
|
|
89
62
|
// Project-scoped deletion - only deletes nodes for the specified project
|
|
90
63
|
// Uses APOC batched deletion to avoid transaction memory limits on large projects
|
|
91
64
|
CLEAR_PROJECT: `
|
|
@@ -444,65 +417,67 @@ export const QUERIES = {
|
|
|
444
417
|
// DYNAMIC SCHEMA DISCOVERY QUERIES
|
|
445
418
|
// ============================================
|
|
446
419
|
/**
|
|
447
|
-
* Get all distinct node labels with counts and
|
|
420
|
+
* Get all distinct node labels with counts, property keys, and property types.
|
|
421
|
+
* Samples up to 10 nodes per label to collect comprehensive property info.
|
|
448
422
|
*/
|
|
449
423
|
DISCOVER_NODE_TYPES: `
|
|
450
424
|
CALL db.labels() YIELD label
|
|
451
425
|
CALL {
|
|
452
426
|
WITH label
|
|
453
427
|
MATCH (n) WHERE label IN labels(n) AND n.projectId = $projectId
|
|
454
|
-
|
|
455
|
-
RETURN keys(n) AS sampleProperties
|
|
428
|
+
RETURN count(n) AS nodeCount
|
|
456
429
|
}
|
|
457
430
|
CALL {
|
|
458
431
|
WITH label
|
|
459
432
|
MATCH (n) WHERE label IN labels(n) AND n.projectId = $projectId
|
|
460
|
-
|
|
433
|
+
WITH n LIMIT 10
|
|
434
|
+
UNWIND keys(n) AS key
|
|
435
|
+
WITH DISTINCT key, n[key] AS val
|
|
436
|
+
RETURN collect(DISTINCT key) AS properties
|
|
461
437
|
}
|
|
462
|
-
RETURN label, nodeCount,
|
|
438
|
+
RETURN label, nodeCount, properties
|
|
463
439
|
ORDER BY nodeCount DESC
|
|
464
440
|
`,
|
|
465
441
|
/**
|
|
466
|
-
* Get all distinct relationship types with counts and
|
|
442
|
+
* Get all distinct relationship types with counts and all connection patterns
|
|
467
443
|
*/
|
|
468
444
|
DISCOVER_RELATIONSHIP_TYPES: `
|
|
469
445
|
CALL db.relationshipTypes() YIELD relationshipType
|
|
470
446
|
CALL {
|
|
471
447
|
WITH relationshipType
|
|
472
448
|
MATCH (a)-[r]->(b) WHERE type(r) = relationshipType AND a.projectId = $projectId AND b.projectId = $projectId
|
|
473
|
-
|
|
474
|
-
RETURN fromLabel, toLabel
|
|
475
|
-
LIMIT 10
|
|
449
|
+
RETURN count(r) AS relCount
|
|
476
450
|
}
|
|
477
451
|
CALL {
|
|
478
452
|
WITH relationshipType
|
|
479
|
-
MATCH (a)-[r]->(b) WHERE type(r) = relationshipType AND a.projectId = $projectId
|
|
480
|
-
|
|
453
|
+
MATCH (a)-[r]->(b) WHERE type(r) = relationshipType AND a.projectId = $projectId AND b.projectId = $projectId
|
|
454
|
+
WITH DISTINCT labels(a)[0] AS fromLabel, labels(b)[0] AS toLabel
|
|
455
|
+
RETURN collect({from: fromLabel, to: toLabel}) AS connections
|
|
481
456
|
}
|
|
482
|
-
RETURN relationshipType, relCount,
|
|
457
|
+
RETURN relationshipType, relCount, connections
|
|
483
458
|
ORDER BY relCount DESC
|
|
484
459
|
`,
|
|
485
460
|
/**
|
|
486
|
-
* Get
|
|
461
|
+
* Get semantic types with counts and which label they appear on
|
|
487
462
|
*/
|
|
488
463
|
DISCOVER_SEMANTIC_TYPES: `
|
|
489
464
|
MATCH (n)
|
|
490
465
|
WHERE n.semanticType IS NOT NULL AND n.projectId = $projectId
|
|
491
|
-
WITH n.semanticType AS semanticType, count(*) AS count
|
|
466
|
+
WITH n.semanticType AS semanticType, labels(n)[0] AS nodeLabel, count(*) AS count
|
|
467
|
+
RETURN semanticType, nodeLabel, count
|
|
492
468
|
ORDER BY count DESC
|
|
493
|
-
RETURN semanticType, count
|
|
494
469
|
`,
|
|
495
470
|
/**
|
|
496
|
-
* Get
|
|
471
|
+
* Get all relationship patterns between node types
|
|
497
472
|
*/
|
|
498
473
|
DISCOVER_COMMON_PATTERNS: `
|
|
499
474
|
MATCH (a)-[r]->(b)
|
|
500
475
|
WHERE a.projectId = $projectId AND b.projectId = $projectId
|
|
501
476
|
WITH labels(a)[0] AS fromType, type(r) AS relType, labels(b)[0] AS toType, count(*) AS count
|
|
502
|
-
WHERE count >
|
|
477
|
+
WHERE count > 2
|
|
503
478
|
RETURN fromType, relType, toType, count
|
|
504
479
|
ORDER BY count DESC
|
|
505
|
-
LIMIT
|
|
480
|
+
LIMIT 50
|
|
506
481
|
`,
|
|
507
482
|
// ============================================
|
|
508
483
|
// IMPACT ANALYSIS QUERIES
|
package/package.json
CHANGED