code-graph-context 2.9.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -26
- package/dist/cli/cli.js +250 -10
- package/dist/core/embeddings/embedding-sidecar.js +244 -0
- package/dist/core/embeddings/embeddings.service.js +60 -132
- package/dist/core/embeddings/local-embeddings.service.js +41 -0
- package/dist/core/embeddings/openai-embeddings.service.js +114 -0
- package/dist/mcp/handlers/graph-generator.handler.js +6 -5
- package/dist/mcp/mcp.server.js +2 -0
- package/dist/mcp/service-init.js +24 -3
- package/dist/mcp/tools/search-codebase.tool.js +37 -13
- package/dist/mcp/tools/session-note.tool.js +5 -6
- package/dist/storage/neo4j/neo4j.service.js +4 -4
- package/package.json +3 -1
- package/sidecar/embedding_server.py +147 -0
- package/sidecar/requirements.txt +5 -0
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://typescriptlang.org/)
|
|
6
6
|
[](https://neo4j.com/)
|
|
7
7
|
[](https://nestjs.com/)
|
|
8
|
-
[](https://openai.com/)
|
|
9
9
|
[](https://modelcontextprotocol.io/)
|
|
10
10
|
|
|
11
11
|
**Give your AI coding assistant a photographic memory of your codebase.**
|
|
@@ -27,7 +27,7 @@ Code Graph Context is an MCP server that builds a semantic graph of your TypeScr
|
|
|
27
27
|
│ CODE GRAPH CONTEXT │
|
|
28
28
|
│ │
|
|
29
29
|
│ AST Parser ──► Neo4j Graph ──► Vector Embeddings │
|
|
30
|
-
│ (ts-morph) (Relationships) (OpenAI)
|
|
30
|
+
│ (ts-morph) (Relationships) (Local or OpenAI) │
|
|
31
31
|
│ │
|
|
32
32
|
└─────────────────────────────────────────────────────────────┘
|
|
33
33
|
│
|
|
@@ -56,7 +56,7 @@ Code Graph Context is an MCP server that builds a semantic graph of your TypeScr
|
|
|
56
56
|
## Features
|
|
57
57
|
|
|
58
58
|
- **Multi-Project Support**: Parse and query multiple projects in a single database with complete isolation
|
|
59
|
-
- **Semantic Search**: Vector-based search using OpenAI embeddings to find relevant code
|
|
59
|
+
- **Semantic Search**: Vector-based search using local or OpenAI embeddings to find relevant code
|
|
60
60
|
- **Natural Language Querying**: Convert questions into Cypher queries
|
|
61
61
|
- **Framework-Aware**: Built-in NestJS schema with ability to define custom framework patterns
|
|
62
62
|
- **Weighted Graph Traversal**: Intelligent traversal scoring paths by importance and relevance
|
|
@@ -79,7 +79,7 @@ TypeScript Source → AST Parser (ts-morph) → Neo4j Graph + Vector Embeddings
|
|
|
79
79
|
**Core Components:**
|
|
80
80
|
- `src/core/parsers/typescript-parser.ts` - AST parsing with ts-morph
|
|
81
81
|
- `src/storage/neo4j/neo4j.service.ts` - Graph storage and queries
|
|
82
|
-
- `src/core/embeddings/embeddings.service.ts` - OpenAI
|
|
82
|
+
- `src/core/embeddings/embeddings.service.ts` - Embedding service (local sidecar or OpenAI)
|
|
83
83
|
- `src/mcp/mcp.server.ts` - MCP server and tool registration
|
|
84
84
|
|
|
85
85
|
**Dual-Schema System:**
|
|
@@ -93,29 +93,33 @@ Nodes have both `coreType` (AST) and `semanticType` (framework meaning), enablin
|
|
|
93
93
|
### Prerequisites
|
|
94
94
|
|
|
95
95
|
- **Node.js** >= 18
|
|
96
|
+
- **Python** >= 3.10 (for local embeddings)
|
|
96
97
|
- **Docker** (for Neo4j)
|
|
97
|
-
- **OpenAI API Key**
|
|
98
|
-
|
|
99
98
|
|
|
99
|
+
> **No API keys required.** Local embeddings work out of the box using a Python sidecar.
|
|
100
100
|
|
|
101
101
|
### 1. Install
|
|
102
102
|
|
|
103
103
|
```bash
|
|
104
104
|
npm install -g code-graph-context
|
|
105
|
-
code-graph-context init # Sets up Neo4j
|
|
105
|
+
code-graph-context init # Sets up Neo4j + Python sidecar + downloads embedding model
|
|
106
106
|
```
|
|
107
107
|
|
|
108
|
-
|
|
108
|
+
The `init` command handles everything:
|
|
109
|
+
- Starts a Neo4j container via Docker
|
|
110
|
+
- Creates a Python virtual environment
|
|
111
|
+
- Installs embedding dependencies (PyTorch, sentence-transformers)
|
|
112
|
+
- Downloads the default embedding model (~3GB)
|
|
109
113
|
|
|
110
|
-
|
|
114
|
+
### 2. Configure Claude Code
|
|
111
115
|
|
|
112
116
|
```bash
|
|
113
|
-
claude mcp add --scope user code-graph-context
|
|
114
|
-
-e OPENAI_API_KEY=sk-your-key-here \
|
|
115
|
-
-- code-graph-context
|
|
117
|
+
claude mcp add --scope user code-graph-context -- code-graph-context
|
|
116
118
|
```
|
|
117
119
|
|
|
118
|
-
**That's it.** Restart Claude Code and you're ready to go.
|
|
120
|
+
**That's it.** No API keys needed. Restart Claude Code and you're ready to go.
|
|
121
|
+
|
|
122
|
+
> **Want to use OpenAI instead?** See [Embedding Configuration](#embedding-configuration) below.
|
|
119
123
|
|
|
120
124
|
### 3. Parse Your Project
|
|
121
125
|
|
|
@@ -142,11 +146,23 @@ If you prefer to edit the config files directly:
|
|
|
142
146
|
|
|
143
147
|
**~/.claude.json** (user scope - recommended):
|
|
144
148
|
```json
|
|
149
|
+
{
|
|
150
|
+
"mcpServers": {
|
|
151
|
+
"code-graph-context": {
|
|
152
|
+
"command": "code-graph-context"
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**With OpenAI (optional):**
|
|
159
|
+
```json
|
|
145
160
|
{
|
|
146
161
|
"mcpServers": {
|
|
147
162
|
"code-graph-context": {
|
|
148
163
|
"command": "code-graph-context",
|
|
149
164
|
"env": {
|
|
165
|
+
"OPENAI_ENABLED": "true",
|
|
150
166
|
"OPENAI_API_KEY": "sk-your-key-here"
|
|
151
167
|
}
|
|
152
168
|
}
|
|
@@ -160,10 +176,7 @@ If you prefer to edit the config files directly:
|
|
|
160
176
|
"mcpServers": {
|
|
161
177
|
"code-graph-context": {
|
|
162
178
|
"command": "node",
|
|
163
|
-
"args": ["/absolute/path/to/code-graph-context/dist/cli/cli.js"]
|
|
164
|
-
"env": {
|
|
165
|
-
"OPENAI_API_KEY": "sk-your-key-here"
|
|
166
|
-
}
|
|
179
|
+
"args": ["/absolute/path/to/code-graph-context/dist/cli/cli.js"]
|
|
167
180
|
}
|
|
168
181
|
}
|
|
169
182
|
}
|
|
@@ -173,10 +186,13 @@ If you prefer to edit the config files directly:
|
|
|
173
186
|
|
|
174
187
|
| Variable | Required | Default | Description |
|
|
175
188
|
|----------|----------|---------|-------------|
|
|
176
|
-
| `OPENAI_API_KEY` | **Yes** | - | For embeddings and NL queries |
|
|
177
189
|
| `NEO4J_URI` | No | `bolt://localhost:7687` | Neo4j connection URI |
|
|
178
190
|
| `NEO4J_USER` | No | `neo4j` | Neo4j username |
|
|
179
191
|
| `NEO4J_PASSWORD` | No | `PASSWORD` | Neo4j password |
|
|
192
|
+
| `EMBEDDING_MODEL` | No | `Qodo/Qodo-Embed-1-1.5B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
|
|
193
|
+
| `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
|
|
194
|
+
| `OPENAI_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local |
|
|
195
|
+
| `OPENAI_API_KEY` | No* | - | Required when `OPENAI_ENABLED=true` |
|
|
180
196
|
|
|
181
197
|
---
|
|
182
198
|
|
|
@@ -515,6 +531,54 @@ This enables queries like "find all hooks that use context" while maintaining AS
|
|
|
515
531
|
|
|
516
532
|
---
|
|
517
533
|
|
|
534
|
+
## Embedding Configuration
|
|
535
|
+
|
|
536
|
+
Local embeddings are the default — **no API key needed**. The Python sidecar starts automatically on first use and runs a local model for high-quality code embeddings.
|
|
537
|
+
|
|
538
|
+
### Available Models
|
|
539
|
+
|
|
540
|
+
Set via the `EMBEDDING_MODEL` environment variable:
|
|
541
|
+
|
|
542
|
+
| Model | Dimensions | RAM | Quality | Best For |
|
|
543
|
+
|-------|-----------|-----|---------|----------|
|
|
544
|
+
| `Qodo/Qodo-Embed-1-1.5B` (default) | 1536 | ~9 GB | Best | Machines with 32+ GB RAM |
|
|
545
|
+
| `BAAI/bge-base-en-v1.5` | 768 | ~500 MB | Good | General purpose, low RAM |
|
|
546
|
+
| `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~200 MB | OK | Minimal RAM, fast |
|
|
547
|
+
| `nomic-ai/nomic-embed-text-v1.5` | 768 | ~600 MB | Good | Code + prose mixed |
|
|
548
|
+
| `sentence-transformers/all-mpnet-base-v2` | 768 | ~500 MB | Good | Balanced quality/speed |
|
|
549
|
+
| `BAAI/bge-small-en-v1.5` | 384 | ~130 MB | OK | Smallest footprint |
|
|
550
|
+
|
|
551
|
+
**Example:** Use a lightweight model on a 16GB machine:
|
|
552
|
+
```bash
|
|
553
|
+
claude mcp add --scope user code-graph-context \
|
|
554
|
+
-e EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 \
|
|
555
|
+
-- code-graph-context
|
|
556
|
+
```
|
|
557
|
+
|
|
558
|
+
### Switching Models
|
|
559
|
+
|
|
560
|
+
**Switching models requires re-parsing** — vector index dimensions are locked per model. Drop existing indexes first:
|
|
561
|
+
|
|
562
|
+
```cypher
|
|
563
|
+
DROP INDEX embedded_nodes_idx IF EXISTS;
|
|
564
|
+
DROP INDEX session_notes_idx IF EXISTS;
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
Then re-parse your project with the new model configured.
|
|
568
|
+
|
|
569
|
+
### Using OpenAI Instead
|
|
570
|
+
|
|
571
|
+
If you prefer OpenAI embeddings (higher quality, requires API key):
|
|
572
|
+
|
|
573
|
+
```bash
|
|
574
|
+
claude mcp add --scope user code-graph-context \
|
|
575
|
+
-e OPENAI_ENABLED=true \
|
|
576
|
+
-e OPENAI_API_KEY=sk-your-key-here \
|
|
577
|
+
-- code-graph-context
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
---
|
|
581
|
+
|
|
518
582
|
## Troubleshooting
|
|
519
583
|
|
|
520
584
|
### MCP Server Not Connecting
|
|
@@ -530,18 +594,29 @@ docker ps | grep neo4j
|
|
|
530
594
|
code-graph-context status
|
|
531
595
|
```
|
|
532
596
|
|
|
533
|
-
###
|
|
597
|
+
### Embedding Errors
|
|
534
598
|
|
|
535
|
-
|
|
599
|
+
**"Failed to generate embedding"** — The local sidecar may not have started. Check:
|
|
600
|
+
```bash
|
|
601
|
+
# Verify Python deps are installed
|
|
602
|
+
code-graph-context status
|
|
536
603
|
|
|
537
|
-
|
|
604
|
+
# Re-run init to fix sidecar setup
|
|
605
|
+
code-graph-context init
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
**Out of memory (large model on 16GB machine)** — Switch to a lighter model:
|
|
538
609
|
```bash
|
|
539
|
-
|
|
540
|
-
|
|
610
|
+
claude mcp add --scope user code-graph-context \
|
|
611
|
+
-e EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 \
|
|
612
|
+
-- code-graph-context
|
|
613
|
+
```
|
|
541
614
|
|
|
542
|
-
|
|
615
|
+
**Using OpenAI and getting auth errors** — Ensure your key is configured:
|
|
616
|
+
```bash
|
|
543
617
|
claude mcp remove code-graph-context
|
|
544
618
|
claude mcp add --scope user code-graph-context \
|
|
619
|
+
-e OPENAI_ENABLED=true \
|
|
545
620
|
-e OPENAI_API_KEY=sk-your-key-here \
|
|
546
621
|
-- code-graph-context
|
|
547
622
|
```
|
|
@@ -572,8 +647,8 @@ parse_typescript_project({
|
|
|
572
647
|
## CLI Commands
|
|
573
648
|
|
|
574
649
|
```bash
|
|
575
|
-
code-graph-context init [options] # Set up Neo4j
|
|
576
|
-
code-graph-context status # Check Docker/Neo4j status
|
|
650
|
+
code-graph-context init [options] # Set up Neo4j + Python sidecar + embedding model
|
|
651
|
+
code-graph-context status # Check Docker/Neo4j/sidecar status
|
|
577
652
|
code-graph-context stop # Stop Neo4j container
|
|
578
653
|
```
|
|
579
654
|
|
package/dist/cli/cli.js
CHANGED
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Handles CLI commands (init, status, stop) and delegates to MCP server
|
|
6
6
|
*/
|
|
7
|
-
import {
|
|
7
|
+
import { execSync, spawn as spawnProcess } from 'child_process';
|
|
8
|
+
import { existsSync, readFileSync } from 'fs';
|
|
8
9
|
import { dirname, join } from 'path';
|
|
9
10
|
import { fileURLToPath } from 'url';
|
|
10
11
|
import { Command } from 'commander';
|
|
@@ -47,7 +48,7 @@ const spinner = (msg) => {
|
|
|
47
48
|
return {
|
|
48
49
|
stop: (ok, finalMsg) => {
|
|
49
50
|
clearInterval(interval);
|
|
50
|
-
process.stdout.write(`\r ${ok ? sym.ok : sym.err} ${finalMsg
|
|
51
|
+
process.stdout.write(`\r ${ok ? sym.ok : sym.err} ${finalMsg ?? msg}\n`);
|
|
51
52
|
},
|
|
52
53
|
};
|
|
53
54
|
};
|
|
@@ -67,12 +68,11 @@ ${c.bold}Next steps:${c.reset}
|
|
|
67
68
|
"mcpServers": {
|
|
68
69
|
"code-graph-context": {
|
|
69
70
|
"command": "code-graph-context",
|
|
70
|
-
"env": {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
"NEO4J_PASSWORD": "${password}"`
|
|
71
|
+
"env": {${password !== NEO4J_CONFIG.defaultPassword
|
|
72
|
+
? `
|
|
73
|
+
"NEO4J_PASSWORD": "${password}"${boltPort !== NEO4J_CONFIG.boltPort ? ',' : ''}`
|
|
74
74
|
: ''}${boltPort !== NEO4J_CONFIG.boltPort
|
|
75
|
-
?
|
|
75
|
+
? `
|
|
76
76
|
"NEO4J_URI": "bolt://localhost:${boltPort}"`
|
|
77
77
|
: ''}
|
|
78
78
|
}
|
|
@@ -80,19 +80,239 @@ ${c.bold}Next steps:${c.reset}
|
|
|
80
80
|
}
|
|
81
81
|
}${c.reset}
|
|
82
82
|
|
|
83
|
-
${c.
|
|
83
|
+
${c.dim}Local embeddings are used by default (no API key needed).
|
|
84
|
+
To use OpenAI instead, add:
|
|
85
|
+
"OPENAI_ENABLED": "true",
|
|
86
|
+
"OPENAI_API_KEY": "sk-..."${c.reset}
|
|
84
87
|
|
|
85
88
|
3. Restart Claude Code
|
|
86
89
|
`);
|
|
87
90
|
};
|
|
91
|
+
/**
|
|
92
|
+
* Resolve the sidecar directory (works from both src/ and dist/)
|
|
93
|
+
*/
|
|
94
|
+
const getSidecarDir = () => join(__dirname, '..', '..', 'sidecar');
|
|
95
|
+
/**
|
|
96
|
+
* Get the path to the venv python binary inside the sidecar dir.
|
|
97
|
+
* Returns null if the venv doesn't exist yet.
|
|
98
|
+
*/
|
|
99
|
+
const getVenvPython = (sidecarDir) => {
|
|
100
|
+
const venvPython = join(sidecarDir, '.venv', 'bin', 'python3');
|
|
101
|
+
return existsSync(venvPython) ? venvPython : null;
|
|
102
|
+
};
|
|
103
|
+
/**
|
|
104
|
+
* Get the best python binary to use for the sidecar.
|
|
105
|
+
* Prefers venv python, falls back to system python3.
|
|
106
|
+
*/
|
|
107
|
+
const getSidecarPython = (sidecarDir) => {
|
|
108
|
+
return getVenvPython(sidecarDir) ?? 'python3';
|
|
109
|
+
};
|
|
110
|
+
/**
|
|
111
|
+
* Check if python3 is available and return its version
|
|
112
|
+
*/
|
|
113
|
+
const getPythonVersion = () => {
|
|
114
|
+
try {
|
|
115
|
+
return execSync('python3 --version', { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Check if a pip package is importable using the sidecar python
|
|
123
|
+
*/
|
|
124
|
+
const checkPipPackage = (pkg, python = 'python3') => {
|
|
125
|
+
try {
|
|
126
|
+
execSync(`${python} -c "import ${pkg}"`, { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
/**
|
|
134
|
+
* Create a venv in the sidecar directory
|
|
135
|
+
*/
|
|
136
|
+
const createVenv = (sidecarDir) => {
|
|
137
|
+
try {
|
|
138
|
+
const venvPath = join(sidecarDir, '.venv');
|
|
139
|
+
execSync(`python3 -m venv ${venvPath}`, { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
catch {
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
};
|
|
146
|
+
/**
|
|
147
|
+
* Install sidecar Python dependencies via pip inside the venv
|
|
148
|
+
*/
|
|
149
|
+
const installSidecarDeps = (sidecarDir) => {
|
|
150
|
+
return new Promise((resolve) => {
|
|
151
|
+
const venvPip = join(sidecarDir, '.venv', 'bin', 'pip');
|
|
152
|
+
const requirementsPath = join(sidecarDir, 'requirements.txt');
|
|
153
|
+
const pip = spawnProcess(venvPip, ['install', '-r', requirementsPath], {
|
|
154
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
155
|
+
});
|
|
156
|
+
pip.stderr?.on('data', (data) => {
|
|
157
|
+
const line = data.toString().trim();
|
|
158
|
+
if (line && (line.includes('Downloading') || line.includes('Installing') || line.includes('ERROR'))) {
|
|
159
|
+
process.stdout.write(`\r ${c.blue}⠸${c.reset} ${line.slice(0, 70).padEnd(70)}`);
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
pip.on('close', (code) => {
|
|
163
|
+
process.stdout.write('\r' + ' '.repeat(80) + '\r');
|
|
164
|
+
resolve(code === 0);
|
|
165
|
+
});
|
|
166
|
+
pip.on('error', () => {
|
|
167
|
+
resolve(false);
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
};
|
|
171
|
+
/**
|
|
172
|
+
* Verify sentence-transformers can be imported using the venv python
|
|
173
|
+
*/
|
|
174
|
+
const verifySidecar = (sidecarDir) => {
|
|
175
|
+
return new Promise((resolve) => {
|
|
176
|
+
const python = getSidecarPython(sidecarDir);
|
|
177
|
+
const test = spawnProcess(python, ['-c', 'from sentence_transformers import SentenceTransformer; print("ok")'], {
|
|
178
|
+
cwd: sidecarDir,
|
|
179
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
180
|
+
});
|
|
181
|
+
let stdout = '';
|
|
182
|
+
test.stdout?.on('data', (d) => (stdout += d.toString()));
|
|
183
|
+
test.on('close', (code) => resolve(code === 0 && stdout.includes('ok')));
|
|
184
|
+
test.on('error', () => resolve(false));
|
|
185
|
+
});
|
|
186
|
+
};
|
|
187
|
+
/**
|
|
188
|
+
* Set up the Python embedding sidecar
|
|
189
|
+
*/
|
|
190
|
+
const setupSidecar = async () => {
|
|
191
|
+
console.log('');
|
|
192
|
+
header('Embedding Sidecar Setup');
|
|
193
|
+
const sidecarDir = getSidecarDir();
|
|
194
|
+
// Check Python
|
|
195
|
+
const pythonVersion = getPythonVersion();
|
|
196
|
+
if (!pythonVersion) {
|
|
197
|
+
log(sym.err, 'Python 3 is not installed');
|
|
198
|
+
console.log(`\n Install Python 3.10+: ${c.cyan}https://www.python.org/downloads/${c.reset}`);
|
|
199
|
+
console.log(` ${c.dim}Or use OpenAI embeddings instead: set OPENAI_ENABLED=true${c.reset}\n`);
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
log(sym.ok, `${pythonVersion}`);
|
|
203
|
+
// Create or reuse venv
|
|
204
|
+
const venvPath = join(sidecarDir, '.venv');
|
|
205
|
+
if (existsSync(venvPath)) {
|
|
206
|
+
log(sym.ok, `Virtual environment exists (${c.dim}sidecar/.venv${c.reset})`);
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
const venvSpinner = spinner('Creating virtual environment...');
|
|
210
|
+
const created = createVenv(sidecarDir);
|
|
211
|
+
if (!created) {
|
|
212
|
+
venvSpinner.stop(false, 'Failed to create virtual environment');
|
|
213
|
+
console.log(`\n Try manually: ${c.dim}python3 -m venv ${venvPath}${c.reset}\n`);
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
venvSpinner.stop(true, `Virtual environment created (${c.dim}sidecar/.venv${c.reset})`);
|
|
217
|
+
}
|
|
218
|
+
const python = getSidecarPython(sidecarDir);
|
|
219
|
+
// Check if deps already installed in venv
|
|
220
|
+
const hasSentenceTransformers = checkPipPackage('sentence_transformers', python);
|
|
221
|
+
const hasFastApi = checkPipPackage('fastapi', python);
|
|
222
|
+
const hasTorch = checkPipPackage('torch', python);
|
|
223
|
+
if (hasSentenceTransformers && hasFastApi && hasTorch) {
|
|
224
|
+
log(sym.ok, 'Python dependencies already installed');
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
const missing = [];
|
|
228
|
+
if (!hasTorch)
|
|
229
|
+
missing.push('torch');
|
|
230
|
+
if (!hasSentenceTransformers)
|
|
231
|
+
missing.push('sentence-transformers');
|
|
232
|
+
if (!hasFastApi)
|
|
233
|
+
missing.push('fastapi');
|
|
234
|
+
log(sym.info, `Missing packages: ${missing.join(', ')}`);
|
|
235
|
+
const s = spinner('Installing Python dependencies (this may take a few minutes)...');
|
|
236
|
+
const installed = await installSidecarDeps(sidecarDir);
|
|
237
|
+
if (!installed) {
|
|
238
|
+
s.stop(false, 'Failed to install Python dependencies');
|
|
239
|
+
console.log(`\n Try manually: ${c.dim}${join(venvPath, 'bin', 'pip')} install -r ${join(sidecarDir, 'requirements.txt')}${c.reset}\n`);
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
s.stop(true, 'Python dependencies installed');
|
|
243
|
+
}
|
|
244
|
+
// Verify sentence-transformers works
|
|
245
|
+
const verifySpinner = spinner('Verifying sentence-transformers...');
|
|
246
|
+
const verified = await verifySidecar(sidecarDir);
|
|
247
|
+
verifySpinner.stop(verified, verified ? 'sentence-transformers OK' : 'sentence-transformers import failed');
|
|
248
|
+
if (!verified) {
|
|
249
|
+
console.log(`\n ${c.dim}Try: ${python} -c "from sentence_transformers import SentenceTransformer"${c.reset}`);
|
|
250
|
+
console.log(` ${c.dim}Or use OpenAI embeddings instead: set OPENAI_ENABLED=true${c.reset}\n`);
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
// Pre-download the embedding model so first real use is fast
|
|
254
|
+
const modelName = process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B';
|
|
255
|
+
await preDownloadModel(sidecarDir, python, modelName);
|
|
256
|
+
};
|
|
257
|
+
/**
|
|
258
|
+
* Pre-download the embedding model during init so the first parse doesn't hang.
|
|
259
|
+
* SentenceTransformer downloads to ~/.cache/huggingface/ on first load.
|
|
260
|
+
*/
|
|
261
|
+
const preDownloadModel = async (sidecarDir, python, modelName) => {
|
|
262
|
+
// Check if model is already cached by trying a quick load
|
|
263
|
+
const checkCached = () => {
|
|
264
|
+
return new Promise((resolve) => {
|
|
265
|
+
const proc = spawnProcess(python, [
|
|
266
|
+
'-c',
|
|
267
|
+
`from sentence_transformers import SentenceTransformer; m = SentenceTransformer("${modelName}"); print(f"dims:{len(m.encode(['test'])[0])}")`,
|
|
268
|
+
], { cwd: sidecarDir, stdio: ['pipe', 'pipe', 'pipe'], timeout: 30_000 });
|
|
269
|
+
let stdout = '';
|
|
270
|
+
proc.stdout?.on('data', (d) => (stdout += d.toString()));
|
|
271
|
+
proc.on('close', (code) => resolve(code === 0 && stdout.includes('dims:')));
|
|
272
|
+
proc.on('error', () => resolve(false));
|
|
273
|
+
});
|
|
274
|
+
};
|
|
275
|
+
// Quick check — if model is cached, this returns in ~5s
|
|
276
|
+
const quickSpinner = spinner(`Checking for cached model (${modelName})...`);
|
|
277
|
+
const isCached = await checkCached();
|
|
278
|
+
if (isCached) {
|
|
279
|
+
quickSpinner.stop(true, `Model ready (${modelName})`);
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
quickSpinner.stop(false, 'Model not cached yet');
|
|
283
|
+
log(sym.info, 'Downloading embedding model (~600MB, only needed once)');
|
|
284
|
+
// Download the model — this can take a few minutes.
|
|
285
|
+
// Pipe stderr directly to the terminal so HuggingFace progress bars render natively.
|
|
286
|
+
const downloaded = await new Promise((resolve) => {
|
|
287
|
+
const proc = spawnProcess(python, [
|
|
288
|
+
'-c',
|
|
289
|
+
`from sentence_transformers import SentenceTransformer; print("downloading..."); m = SentenceTransformer("${modelName}"); print(f"done dims:{len(m.encode(['test'])[0])}")`,
|
|
290
|
+
], { cwd: sidecarDir, stdio: ['pipe', 'pipe', 'inherit'] });
|
|
291
|
+
let stdout = '';
|
|
292
|
+
proc.stdout?.on('data', (d) => {
|
|
293
|
+
stdout += d.toString();
|
|
294
|
+
});
|
|
295
|
+
proc.on('close', (code) => {
|
|
296
|
+
resolve(code === 0 && stdout.includes('done'));
|
|
297
|
+
});
|
|
298
|
+
proc.on('error', () => resolve(false));
|
|
299
|
+
});
|
|
300
|
+
if (downloaded) {
|
|
301
|
+
log(sym.ok, 'Embedding model downloaded and cached');
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
log(sym.warn, 'Model download failed — it will retry on first use');
|
|
305
|
+
console.log(` ${c.dim}You can download manually: ${python} -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('${modelName}')"${c.reset}`);
|
|
306
|
+
}
|
|
307
|
+
};
|
|
88
308
|
/**
|
|
89
309
|
* Init command - set up Neo4j
|
|
90
310
|
*/
|
|
91
311
|
const runInit = async (options) => {
|
|
92
312
|
const boltPort = options.port ? parseInt(options.port, 10) : NEO4J_CONFIG.boltPort;
|
|
93
313
|
const httpPort = options.httpPort ? parseInt(options.httpPort, 10) : NEO4J_CONFIG.httpPort;
|
|
94
|
-
const password = options.password
|
|
95
|
-
const memory = options.memory
|
|
314
|
+
const password = options.password ?? NEO4J_CONFIG.defaultPassword;
|
|
315
|
+
const memory = options.memory ?? '4G';
|
|
96
316
|
header('Code Graph Context Setup');
|
|
97
317
|
// Check Docker
|
|
98
318
|
if (!isDockerInstalled()) {
|
|
@@ -114,6 +334,8 @@ const runInit = async (options) => {
|
|
|
114
334
|
const apocOk = isApocAvailable(NEO4J_CONFIG.containerName, password);
|
|
115
335
|
log(apocOk ? sym.ok : sym.warn, apocOk ? 'APOC plugin available' : 'APOC plugin not detected');
|
|
116
336
|
console.log(`\n ${c.dim}Use --force to recreate the container${c.reset}`);
|
|
337
|
+
// Still set up sidecar even if Neo4j is already running
|
|
338
|
+
await setupSidecar();
|
|
117
339
|
printConfigInstructions(password, boltPort);
|
|
118
340
|
return;
|
|
119
341
|
}
|
|
@@ -165,6 +387,8 @@ ${c.bold}Neo4j is ready${c.reset}
|
|
|
165
387
|
Browser: ${c.cyan}http://localhost:${httpPort}${c.reset}
|
|
166
388
|
Bolt URI: ${c.cyan}bolt://localhost:${boltPort}${c.reset}
|
|
167
389
|
Credentials: ${c.dim}neo4j / ${password}${c.reset}`);
|
|
390
|
+
// Set up Python embedding sidecar
|
|
391
|
+
await setupSidecar();
|
|
168
392
|
printConfigInstructions(password, boltPort);
|
|
169
393
|
};
|
|
170
394
|
/**
|
|
@@ -189,6 +413,22 @@ const runStatus = () => {
|
|
|
189
413
|
log(status.neo4jReady ? sym.ok : sym.warn, `Neo4j responding: ${status.neo4jReady ? 'yes' : 'no'}`);
|
|
190
414
|
log(status.apocAvailable ? sym.ok : sym.warn, `APOC plugin: ${status.apocAvailable ? 'available' : 'not available'}`);
|
|
191
415
|
}
|
|
416
|
+
// Sidecar status
|
|
417
|
+
console.log('');
|
|
418
|
+
const pythonVersion = getPythonVersion();
|
|
419
|
+
log(pythonVersion ? sym.ok : sym.warn, `Python: ${pythonVersion ?? 'not found'}`);
|
|
420
|
+
if (pythonVersion) {
|
|
421
|
+
const sidecarDir = getSidecarDir();
|
|
422
|
+
const venvExists = existsSync(join(sidecarDir, '.venv'));
|
|
423
|
+
log(venvExists ? sym.ok : sym.warn, `Sidecar venv: ${venvExists ? 'exists' : 'not created'}`);
|
|
424
|
+
if (venvExists) {
|
|
425
|
+
const python = getSidecarPython(sidecarDir);
|
|
426
|
+
const hasDeps = checkPipPackage('sentence_transformers', python) &&
|
|
427
|
+
checkPipPackage('fastapi', python) &&
|
|
428
|
+
checkPipPackage('torch', python);
|
|
429
|
+
log(hasDeps ? sym.ok : sym.warn, `Sidecar deps: ${hasDeps ? 'installed' : 'not installed'}`);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
192
432
|
console.log('');
|
|
193
433
|
if (status.containerStatus !== 'running') {
|
|
194
434
|
console.log(` Run ${c.dim}code-graph-context init${c.reset} to start Neo4j\n`);
|