code-graph-context 2.10.5 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -189,9 +189,9 @@ If you prefer to edit the config files directly:
|
|
|
189
189
|
| `NEO4J_URI` | No | `bolt://localhost:7687` | Neo4j connection URI |
|
|
190
190
|
| `NEO4J_USER` | No | `neo4j` | Neo4j username |
|
|
191
191
|
| `NEO4J_PASSWORD` | No | `PASSWORD` | Neo4j password |
|
|
192
|
-
| `EMBEDDING_MODEL` | No | `
|
|
192
|
+
| `EMBEDDING_MODEL` | No | `Qwen/Qwen3-Embedding-0.6B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
|
|
193
193
|
| `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
|
|
194
|
-
| `
|
|
194
|
+
| `EMBEDDING_HALF_PRECISION` | No | `false` | Set `true` for float16 (uses ~0.5x memory) |
|
|
195
195
|
| `OPENAI_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local |
|
|
196
196
|
| `OPENAI_API_KEY` | No* | - | Required when `OPENAI_ENABLED=true` |
|
|
197
197
|
|
|
@@ -538,7 +538,7 @@ Local embeddings are the default — **no API key needed**. The Python sidecar s
|
|
|
538
538
|
|
|
539
539
|
The sidecar uses **float16 (half precision)** by default, which halves memory usage with no meaningful quality loss. It also auto-shuts down after 3 minutes of inactivity to free memory, and restarts lazily when needed (~15-20s).
|
|
540
540
|
|
|
541
|
-
> **
|
|
541
|
+
> **Half precision mode:** To reduce memory usage at the cost of some accuracy, set `EMBEDDING_HALF_PRECISION=true`.
|
|
542
542
|
|
|
543
543
|
### Available Models
|
|
544
544
|
|
|
@@ -546,7 +546,8 @@ Set via the `EMBEDDING_MODEL` environment variable:
|
|
|
546
546
|
|
|
547
547
|
| Model | Dimensions | RAM (fp16) | Quality | Best For |
|
|
548
548
|
|-------|-----------|-----|---------|----------|
|
|
549
|
-
| `
|
|
549
|
+
| `Qwen/Qwen3-Embedding-0.6B` (default) | 1024 | ~1.2 GB | Best | Default, code-aware, MTEB-Code #1 |
|
|
550
|
+
| `Qodo/Qodo-Embed-1-1.5B` | 1536 | ~4.5 GB | Great | Machines with 32+ GB RAM |
|
|
550
551
|
| `BAAI/bge-base-en-v1.5` | 768 | ~250 MB | Good | General purpose, low RAM |
|
|
551
552
|
| `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~100 MB | OK | Minimal RAM, fast |
|
|
552
553
|
| `nomic-ai/nomic-embed-text-v1.5` | 768 | ~300 MB | Good | Code + prose mixed |
|
package/dist/cli/cli.js
CHANGED
|
@@ -251,7 +251,7 @@ const setupSidecar = async () => {
|
|
|
251
251
|
return;
|
|
252
252
|
}
|
|
253
253
|
// Pre-download the embedding model so first real use is fast
|
|
254
|
-
const modelName = process.env.EMBEDDING_MODEL ?? '
|
|
254
|
+
const modelName = process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B';
|
|
255
255
|
await preDownloadModel(sidecarDir, python, modelName);
|
|
256
256
|
};
|
|
257
257
|
/**
|
|
@@ -12,7 +12,7 @@ const __dirname = dirname(__filename);
|
|
|
12
12
|
const DEFAULT_CONFIG = {
|
|
13
13
|
port: parseInt(process.env.EMBEDDING_SIDECAR_PORT ?? '', 10) || 8787,
|
|
14
14
|
host: '127.0.0.1',
|
|
15
|
-
model: process.env.EMBEDDING_MODEL ?? '
|
|
15
|
+
model: process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B',
|
|
16
16
|
startupTimeoutMs: 120_000, // 2 min — first run downloads the model
|
|
17
17
|
requestTimeoutMs: 60_000,
|
|
18
18
|
idleTimeoutMs: 180_000, // 3 min — auto-shutdown after no requests
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* and get the right implementation based on OPENAI_ENABLED.
|
|
6
6
|
*
|
|
7
7
|
* OPENAI_ENABLED=true → OpenAI text-embedding-3-large (requires OPENAI_API_KEY)
|
|
8
|
-
* default → Local Python sidecar with
|
|
8
|
+
* default → Local Python sidecar with Qwen3-Embedding-0.6B
|
|
9
9
|
*/
|
|
10
10
|
import { LocalEmbeddingsService } from './local-embeddings.service.js';
|
|
11
11
|
import { OpenAIEmbeddingsService } from './openai-embeddings.service.js';
|
|
@@ -24,6 +24,7 @@ export const EMBEDDING_DIMENSIONS = {
|
|
|
24
24
|
'text-embedding-3-large': 3072,
|
|
25
25
|
'text-embedding-3-small': 1536,
|
|
26
26
|
// Local models (via sidecar)
|
|
27
|
+
'Qwen/Qwen3-Embedding-0.6B': 1024,
|
|
27
28
|
'Qodo/Qodo-Embed-1-1.5B': 1536,
|
|
28
29
|
'sentence-transformers/all-MiniLM-L6-v2': 384,
|
|
29
30
|
'sentence-transformers/all-mpnet-base-v2': 768,
|
|
@@ -45,7 +46,7 @@ export const getEmbeddingDimensions = () => {
|
|
|
45
46
|
const model = process.env.OPENAI_EMBEDDING_MODEL ?? 'text-embedding-3-large';
|
|
46
47
|
return EMBEDDING_DIMENSIONS[model] ?? 3072;
|
|
47
48
|
}
|
|
48
|
-
const model = process.env.EMBEDDING_MODEL ?? '
|
|
49
|
+
const model = process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B';
|
|
49
50
|
return EMBEDDING_DIMENSIONS[model] ?? 1536;
|
|
50
51
|
};
|
|
51
52
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Local embedding server for code-graph-context.
|
|
3
|
-
Uses
|
|
3
|
+
Uses Qwen3-Embedding-0.6B for high-quality code embeddings without OpenAI dependency.
|
|
4
4
|
Runs as a sidecar process managed by the Node.js MCP server.
|
|
5
5
|
"""
|
|
6
6
|
|
|
@@ -27,7 +27,7 @@ logger.info(f"Sidecar process starting (pid={os.getpid()})")
|
|
|
27
27
|
app = FastAPI(title="code-graph-context embedding sidecar")
|
|
28
28
|
|
|
29
29
|
model = None
|
|
30
|
-
model_name = os.environ.get("EMBEDDING_MODEL", "
|
|
30
|
+
model_name = os.environ.get("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B")
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class EmbedRequest(BaseModel):
|
|
@@ -52,7 +52,7 @@ def load_model():
|
|
|
52
52
|
logger.info(f"Loading {model_name} on {device}...")
|
|
53
53
|
logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
|
|
54
54
|
|
|
55
|
-
use_half = os.environ.get("
|
|
55
|
+
use_half = os.environ.get("EMBEDDING_HALF_PRECISION", "").lower() == "true"
|
|
56
56
|
model = SentenceTransformer(model_name, device=device)
|
|
57
57
|
if use_half:
|
|
58
58
|
model.half()
|
package/sidecar/requirements.txt
CHANGED