PyPI - code-context-engine - Versions diffs - 0.4.19__tar.gz → 0.4.20__tar.gz - Mend

code-context-engine 0.4.19tar.gz → 0.4.20tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

{code_context_engine-0.4.19/src/code_context_engine.egg-info → code_context_engine-0.4.20}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-context-engine
-Version: 0.4.19
+Version: 0.4.20
 Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
 Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
 License-Expression: MIT
@@ -21,7 +21,6 @@ License-File: LICENSE
 Requires-Dist: click>=8.1
 Requires-Dist: pyyaml>=6.0
 Requires-Dist: sqlite-vec>=0.1.6
-Requires-Dist: fastembed>=0.4
 Requires-Dist: numpy>=1.24
 Requires-Dist: tree-sitter>=0.22
 Requires-Dist: tree-sitter-python>=0.21
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
 Requires-Dist: pytest-xdist>=3.5; extra == "dev"
 Requires-Dist: ruff>=0.13; extra == "dev"
 Provides-Extra: http
+Provides-Extra: local
+Requires-Dist: fastembed>=0.4; extra == "local"
 Dynamic: license-file
 <p align="center">
@@ -148,6 +149,12 @@ cd /path/to/your/project
 cce init                              # index, install hooks, register MCP server
 ```
+**Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
+```bash
+uv tool install "code-context-engine[local]"   # includes fastembed + ONNX Runtime
+```
 Restart your editor. Done. Every question now hits the index instead of re-reading files.
 `cce init` auto-detects your editor and writes the right config:
@@ -425,11 +432,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
 | Component | Size |
 |-----------|------|
-| Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
-| Embedding model (one-time download) | ~60 MB |
+| Core install (Ollama backend) | ~17 MB |
+| With `[local]` extra (fastembed + ONNX) | ~189 MB |
+| Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
 | Index per project (small/medium/large) | 5-60 MB |
-No GPU required. Embedding model runs on CPU via ONNX Runtime.
+No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
 ---

{code_context_engine-0.4.19 → code_context_engine-0.4.20}/README.md RENAMED Viewed

@@ -98,6 +98,12 @@ cd /path/to/your/project
 cce init                              # index, install hooks, register MCP server
 ```
+**Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
+```bash
+uv tool install "code-context-engine[local]"   # includes fastembed + ONNX Runtime
+```
 Restart your editor. Done. Every question now hits the index instead of re-reading files.
 `cce init` auto-detects your editor and writes the right config:
@@ -375,11 +381,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
 | Component | Size |
 |-----------|------|
-| Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
-| Embedding model (one-time download) | ~60 MB |
+| Core install (Ollama backend) | ~17 MB |
+| With `[local]` extra (fastembed + ONNX) | ~189 MB |
+| Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
 | Index per project (small/medium/large) | 5-60 MB |
-No GPU required. Embedding model runs on CPU via ONNX Runtime.
+No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
 ---

{code_context_engine-0.4.19 → code_context_engine-0.4.20}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "code-context-engine"
-version = "0.4.19"
+version = "0.4.20"
 description = "Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server."
 readme = {file = "README.md", content-type = "text/markdown"}
 license = "MIT"
@@ -23,7 +23,6 @@ dependencies = [
     "click>=8.1",
     "pyyaml>=6.0",
     "sqlite-vec>=0.1.6",
-    "fastembed>=0.4",
     "numpy>=1.24",
     "tree-sitter>=0.22",
     "tree-sitter-python>=0.21",
@@ -60,6 +59,11 @@ dev = [
     "ruff>=0.13",
 ]
 http = []  # back-compat: aiohttp is now a core dependency
+# Local on-device embedding via fastembed (ONNX). ~172 MB install
+# footprint; needed only if you don't have Ollama running. Without
+# this extra, CCE auto-detects Ollama at localhost:11434 and uses
+# nomic-embed-text via /api/embed.
+local = ["fastembed>=0.4"]
 [project.scripts]
 cce = "context_engine.cli:main"

{code_context_engine-0.4.19 → code_context_engine-0.4.20/src/code_context_engine.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-context-engine
-Version: 0.4.19
+Version: 0.4.20
 Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
 Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
 License-Expression: MIT
@@ -21,7 +21,6 @@ License-File: LICENSE
 Requires-Dist: click>=8.1
 Requires-Dist: pyyaml>=6.0
 Requires-Dist: sqlite-vec>=0.1.6
-Requires-Dist: fastembed>=0.4
 Requires-Dist: numpy>=1.24
 Requires-Dist: tree-sitter>=0.22
 Requires-Dist: tree-sitter-python>=0.21
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
 Requires-Dist: pytest-xdist>=3.5; extra == "dev"
 Requires-Dist: ruff>=0.13; extra == "dev"
 Provides-Extra: http
+Provides-Extra: local
+Requires-Dist: fastembed>=0.4; extra == "local"
 Dynamic: license-file
 <p align="center">
@@ -148,6 +149,12 @@ cd /path/to/your/project
 cce init                              # index, install hooks, register MCP server
 ```
+**Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
+```bash
+uv tool install "code-context-engine[local]"   # includes fastembed + ONNX Runtime
+```
 Restart your editor. Done. Every question now hits the index instead of re-reading files.
 `cce init` auto-detects your editor and writes the right config:
@@ -425,11 +432,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
 | Component | Size |
 |-----------|------|
-| Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
-| Embedding model (one-time download) | ~60 MB |
+| Core install (Ollama backend) | ~17 MB |
+| With `[local]` extra (fastembed + ONNX) | ~189 MB |
+| Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
 | Index per project (small/medium/large) | 5-60 MB |
-No GPU required. Embedding model runs on CPU via ONNX Runtime.
+No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
 ---

{code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/requires.txt RENAMED Viewed

@@ -1,7 +1,6 @@
 click>=8.1
 pyyaml>=6.0
 sqlite-vec>=0.1.6
-fastembed>=0.4
 numpy>=1.24
 tree-sitter>=0.22
 tree-sitter-python>=0.21
@@ -28,3 +27,6 @@ pytest-xdist>=3.5
 ruff>=0.13
 [http]
+[local]
+fastembed>=0.4

{code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/cli.py RENAMED Viewed

@@ -2,6 +2,7 @@
 """CLI entry point for code-context-engine."""
 import asyncio
 import json
+import os
 import socket
 import sys
 from pathlib import Path
@@ -536,25 +537,43 @@ def _show_welcome_banner(config) -> None:
 def _preflight_check(config) -> None:
     """Verify all required components are ready before indexing starts.
-    Downloads the embedding model on first use with a clear progress message,
-    and reports Ollama status so users know what compression level they will get.
+    Auto-detects an embedding backend (fastembed → Ollama), reports which
+    one was picked, and surfaces Ollama status for the separate compression
+    path so users know what compression level they will get.
     """
-    # --- Embedding model ---
-    click.echo(_dim("  Checking embedding model") + "...", nl=False)
+    # --- Embedding backend ---
+    click.echo(_dim("  Detecting embedding backend") + "...", nl=False)
+    from context_engine.config import resolve_ollama_url
+    ollama_model = getattr(config, "ollama_embed_model", "nomic-embed-text")
+    ollama_url = resolve_ollama_url(config)
     try:
-        from fastembed import TextEmbedding
-        model_name = getattr(config, "embedding_model", "BAAI/bge-small-en-v1.5")
-        if "/" not in model_name:
-            model_name = f"sentence-transformers/{model_name}"
-        click.echo(_dim(" downloading if needed (60 MB, first time only)") + "...", nl=False)
-        TextEmbedding(model_name)
-        click.echo(" " + click.style("ready", fg="green"))
+        from context_engine.indexer.embedder import select_backend
+        # Don't echo a tentative "loading fastembed…" or "using Ollama…"
+        # banner before select_backend() picks. CCE_EMBED_BACKEND can
+        # force a different choice than the probe order suggests, and
+        # printing both messages produced contradictory output. Wait for
+        # the actual selection, then echo once with the truth.
+        backend = select_backend(
+            model_name=getattr(config, "embedding_model", "BAAI/bge-small-en-v1.5"),
+            ollama_model=ollama_model,
+            ollama_url=ollama_url,
+        )
+        click.echo(
+            " " + click.style(
+                f"ready ({backend.name}, {backend.dimension}-d, {backend.model_name})",
+                fg="green",
+            )
+        )
     except Exception as exc:
         click.echo("")
-        _warn(f"Could not load embedding model: {exc}")
-        _warn("Indexing will attempt to continue but may fail.")
+        _warn(f"No embedding backend available: {exc}")
+        _warn(
+            "Install fastembed (`pip install code-context-engine[local]`) "
+            f"or start an Ollama server at {ollama_url} and pull "
+            f"{ollama_model}."
+        )
-    # --- Ollama (optional) ---
+    # --- Ollama for LLM compression (independent of the embedding path) ---
     try:
         import httpx
         resp = httpx.get("http://localhost:11434/api/tags", timeout=2.0)
@@ -2786,6 +2805,16 @@ async def _run_index(
 async def _run_serve(config) -> None:
     """Start MCP server with live file watcher."""
     import logging
+    import signal
+    # Force single-process embedding inside `cce serve` unless the user
+    # explicitly overrode it. The reindex worker triggered by file changes
+    # otherwise spawns a fastembed forkserver pool (~4 workers × ~1.6 GB on
+    # Linux) that orphans on abnormal exit and leaks RSS across `cce index`
+    # invocations (issue #66). Single-process embed is plenty for one-file
+    # watcher reindexes; bulk `cce index` run from a separate shell still
+    # gets the multiprocess path.
+    os.environ.setdefault("CCE_EMBED_PARALLEL", "0")
     from context_engine.storage.local_backend import LocalBackend
     from context_engine.indexer.embedder import Embedder
     from context_engine.retrieval.retriever import HybridRetriever
@@ -2903,9 +2932,56 @@ async def _run_serve(config) -> None:
         file=sys.stderr,
     )
+    # Install signal handlers so SIGINT (Ctrl-C), SIGTERM, and SIGHUP all
+    # route through the same orderly shutdown path. Previously only SIGTERM
+    # cancelled the MCP task — SIGINT was swallowed by stdio reads, leaving
+    # `cce serve` unkillable except via SIGKILL, which orphans the embed
+    # workers (#66).
+    serve_loop = asyncio.get_running_loop()
+    mcp_task = asyncio.create_task(mcp.run_stdio())
+    def _request_shutdown(signame: str) -> None:
+        if not mcp_task.done():
+            _log.info("Received %s, shutting down...", signame)
+            mcp_task.cancel()
+    # Build the candidate list with getattr so we don't reference
+    # `signal.SIGHUP` at the tuple-construction site — SIGHUP is
+    # undefined on Windows and that AttributeError would fire *before*
+    # the try/except below could swallow it, crashing `cce serve` on
+    # Windows entirely (Copilot review on #69).
+    installed_signals: list[int] = []
+    candidate_sigs = [
+        s for s in (
+            getattr(signal, "SIGINT", None),
+            getattr(signal, "SIGTERM", None),
+            getattr(signal, "SIGHUP", None),
+        ) if s is not None
+    ]
+    for _sig in candidate_sigs:
+        try:
+            serve_loop.add_signal_handler(
+                _sig, _request_shutdown, _sig.name,
+            )
+            installed_signals.append(_sig)
+        except (NotImplementedError, RuntimeError):
+            # Windows's ProactorEventLoop refuses add_signal_handler;
+            # asyncio also raises NotImplementedError outside the main
+            # thread. SIGTERM still arrives via the default Python
+            # handler in those environments.
+            pass
     try:
-        await mcp.run_stdio()
+        try:
+            await mcp_task
+        except asyncio.CancelledError:
+            pass
     finally:
+        for _sig in installed_signals:
+            try:
+                serve_loop.remove_signal_handler(_sig)
+            except (NotImplementedError, RuntimeError):
+                pass
         if watcher:
             watcher.stop()
         if worker_task:

{code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/config.py RENAMED Viewed

@@ -59,6 +59,11 @@ class Config:
     # Embedding
     embedding_model: str = "BAAI/bge-small-en-v1.5"
+    # Model used when the Ollama embedding backend is selected. Only
+    # consulted if fastembed isn't installed or `CCE_EMBED_BACKEND=ollama`
+    # forces the Ollama path. 768-dim default; switching this triggers a
+    # full reindex because the vector store rejects dimension mismatches.
+    ollama_embed_model: str = "nomic-embed-text"
     # Retrieval
     retrieval_confidence_threshold: float = 0.2
@@ -120,6 +125,7 @@ _EXPECTED_TYPES: dict[str, type | tuple[type, ...]] = {
     "ollama_url": str,
     "output_compression": str,
     "embedding_model": str,
+    "ollama_embed_model": str,
     "retrieval_confidence_threshold": (int, float),
     "retrieval_top_k": int,
     "bootstrap_max_tokens": int,
@@ -141,6 +147,7 @@ def _apply_dict_to_config(config: Config, data: dict) -> None:
         ("compression", "ollama_url"): "ollama_url",
         ("compression", "output"): "output_compression",
         ("embedding", "model"): "embedding_model",
+        ("embedding", "ollama_model"): "ollama_embed_model",
         ("retrieval", "confidence_threshold"): "retrieval_confidence_threshold",
         ("retrieval", "top_k"): "retrieval_top_k",
         ("retrieval", "bootstrap_max_tokens"): "bootstrap_max_tokens",

code-context-engine 0.4.19__tar.gz → 0.4.20__tar.gz

code-context-engine 0.4.19tar.gz → 0.4.20tar.gz