npm - code-graph-context - Versions diffs - 2.10.5 → 2.11.0 - Mend

code-graph-context 2.10.5 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +5 -4
package/dist/cli/cli.js +1 -1
package/dist/core/embeddings/embedding-sidecar.js +1 -1
package/dist/core/embeddings/embeddings.service.js +3 -2
package/package.json +1 -1
package/sidecar/embedding_server.py +3 -3
package/sidecar/requirements.txt +1 -0

package/README.md CHANGED Viewed

@@ -189,9 +189,9 @@ If you prefer to edit the config files directly:
 | `NEO4J_URI` | No | `bolt://localhost:7687` | Neo4j connection URI |
 | `NEO4J_USER` | No | `neo4j` | Neo4j username |
 | `NEO4J_PASSWORD` | No | `PASSWORD` | Neo4j password |
-| `EMBEDDING_MODEL` | No | `Qodo/Qodo-Embed-1-1.5B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
+| `EMBEDDING_MODEL` | No | `Qwen/Qwen3-Embedding-0.6B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
 | `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
-| `EMBEDDING_FULL_PRECISION` | No | `false` | Set `true` for float32 (uses ~2x memory) |
+| `EMBEDDING_HALF_PRECISION` | No | `false` | Set `true` for float16 (uses ~0.5x memory) |
 | `OPENAI_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local |
 | `OPENAI_API_KEY` | No* | - | Required when `OPENAI_ENABLED=true` |
@@ -538,7 +538,7 @@ Local embeddings are the default — **no API key needed**. The Python sidecar s
 The sidecar uses **float16 (half precision)** by default, which halves memory usage with no meaningful quality loss. It also auto-shuts down after 3 minutes of inactivity to free memory, and restarts lazily when needed (~15-20s).
-> **Full precision mode:** If you have 32+ GB RAM and want float32, set `EMBEDDING_FULL_PRECISION=true`.
+> **Half precision mode:** To reduce memory usage at the cost of some accuracy, set `EMBEDDING_HALF_PRECISION=true`.
 ### Available Models
@@ -546,7 +546,8 @@ Set via the `EMBEDDING_MODEL` environment variable:
 | Model | Dimensions | RAM (fp16) | Quality | Best For |
 |-------|-----------|-----|---------|----------|
-| `Qodo/Qodo-Embed-1-1.5B` (default) | 1536 | ~4.5 GB | Best | Default, works on 16GB machines |
+| `Qwen/Qwen3-Embedding-0.6B` (default) | 1024 | ~1.2 GB | Best | Default, code-aware, MTEB-Code #1 |
+| `Qodo/Qodo-Embed-1-1.5B` | 1536 | ~4.5 GB | Great | Machines with 32+ GB RAM |
 | `BAAI/bge-base-en-v1.5` | 768 | ~250 MB | Good | General purpose, low RAM |
 | `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~100 MB | OK | Minimal RAM, fast |
 | `nomic-ai/nomic-embed-text-v1.5` | 768 | ~300 MB | Good | Code + prose mixed |

package/dist/cli/cli.js CHANGED Viewed

@@ -251,7 +251,7 @@ const setupSidecar = async () => {
         return;
     }
     // Pre-download the embedding model so first real use is fast
-    const modelName = process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B';
+    const modelName = process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B';
     await preDownloadModel(sidecarDir, python, modelName);
 };
 /**

package/dist/core/embeddings/embedding-sidecar.js CHANGED Viewed

@@ -12,7 +12,7 @@ const __dirname = dirname(__filename);
 const DEFAULT_CONFIG = {
     port: parseInt(process.env.EMBEDDING_SIDECAR_PORT ?? '', 10) || 8787,
     host: '127.0.0.1',
-    model: process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B',
+    model: process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B',
     startupTimeoutMs: 120_000, // 2 min — first run downloads the model
     requestTimeoutMs: 60_000,
     idleTimeoutMs: 180_000, // 3 min — auto-shutdown after no requests

package/dist/core/embeddings/embeddings.service.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * and get the right implementation based on OPENAI_ENABLED.
  *
  *   OPENAI_ENABLED=true  → OpenAI text-embedding-3-large (requires OPENAI_API_KEY)
- *   default              → Local Python sidecar with Qodo-Embed-1-1.5B
+ *   default              → Local Python sidecar with Qwen3-Embedding-0.6B
  */
 import { LocalEmbeddingsService } from './local-embeddings.service.js';
 import { OpenAIEmbeddingsService } from './openai-embeddings.service.js';
@@ -24,6 +24,7 @@ export const EMBEDDING_DIMENSIONS = {
     'text-embedding-3-large': 3072,
     'text-embedding-3-small': 1536,
     // Local models (via sidecar)
+    'Qwen/Qwen3-Embedding-0.6B': 1024,
     'Qodo/Qodo-Embed-1-1.5B': 1536,
     'sentence-transformers/all-MiniLM-L6-v2': 384,
     'sentence-transformers/all-mpnet-base-v2': 768,
@@ -45,7 +46,7 @@ export const getEmbeddingDimensions = () => {
         const model = process.env.OPENAI_EMBEDDING_MODEL ?? 'text-embedding-3-large';
         return EMBEDDING_DIMENSIONS[model] ?? 3072;
     }
-    const model = process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B';
+    const model = process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B';
     return EMBEDDING_DIMENSIONS[model] ?? 1536;
 };
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "code-graph-context",
-  "version": "2.10.5",
+  "version": "2.11.0",
   "description": "MCP server that builds code graphs to provide rich context to LLMs",
   "type": "module",
   "homepage": "https://github.com/drewdrewH/code-graph-context#readme",

package/sidecar/embedding_server.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 Local embedding server for code-graph-context.
-Uses Qodo-Embed-1-1.5B for high-quality code embeddings without OpenAI dependency.
+Uses Qwen3-Embedding-0.6B for high-quality code embeddings without OpenAI dependency.
 Runs as a sidecar process managed by the Node.js MCP server.
 """
@@ -27,7 +27,7 @@ logger.info(f"Sidecar process starting (pid={os.getpid()})")
 app = FastAPI(title="code-graph-context embedding sidecar")
 model = None
-model_name = os.environ.get("EMBEDDING_MODEL", "Qodo/Qodo-Embed-1-1.5B")
+model_name = os.environ.get("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B")
 class EmbedRequest(BaseModel):
@@ -52,7 +52,7 @@ def load_model():
         logger.info(f"Loading {model_name} on {device}...")
         logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
-        use_half = os.environ.get("EMBEDDING_FULL_PRECISION", "").lower() != "true"
+        use_half = os.environ.get("EMBEDDING_HALF_PRECISION", "").lower() == "true"
         model = SentenceTransformer(model_name, device=device)
         if use_half:
             model.half()

package/sidecar/requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ uvicorn>=0.24.0
 sentence-transformers>=3.0.0
 torch>=2.0.0
 pydantic>=2.0.0
+transformers>=4.51.0