PyPI - code-context-engine - Versions diffs - 0.4.0__py3-none-any.whl - Mend

code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

code_context_engine-0.4.0.dist-info/METADATA +389 -0
code_context_engine-0.4.0.dist-info/RECORD +63 -0
code_context_engine-0.4.0.dist-info/WHEEL +5 -0
code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
context_engine/__init__.py +3 -0
context_engine/cli.py +2848 -0
context_engine/cli_style.py +66 -0
context_engine/compression/__init__.py +0 -0
context_engine/compression/compressor.py +144 -0
context_engine/compression/ollama_client.py +33 -0
context_engine/compression/output_rules.py +77 -0
context_engine/compression/prompts.py +9 -0
context_engine/compression/quality.py +37 -0
context_engine/config.py +198 -0
context_engine/dashboard/__init__.py +0 -0
context_engine/dashboard/_page.py +1548 -0
context_engine/dashboard/server.py +429 -0
context_engine/editors.py +265 -0
context_engine/event_bus.py +24 -0
context_engine/indexer/__init__.py +0 -0
context_engine/indexer/chunker.py +147 -0
context_engine/indexer/embedder.py +154 -0
context_engine/indexer/embedding_cache.py +168 -0
context_engine/indexer/git_hooks.py +73 -0
context_engine/indexer/git_indexer.py +136 -0
context_engine/indexer/ignorefile.py +96 -0
context_engine/indexer/manifest.py +78 -0
context_engine/indexer/pipeline.py +624 -0
context_engine/indexer/secrets.py +332 -0
context_engine/indexer/watcher.py +109 -0
context_engine/integration/__init__.py +0 -0
context_engine/integration/bootstrap.py +76 -0
context_engine/integration/git_context.py +132 -0
context_engine/integration/mcp_server.py +1825 -0
context_engine/integration/session_capture.py +306 -0
context_engine/memory/__init__.py +6 -0
context_engine/memory/compressor.py +344 -0
context_engine/memory/db.py +922 -0
context_engine/memory/extractive.py +106 -0
context_engine/memory/grammar.py +419 -0
context_engine/memory/hook_installer.py +258 -0
context_engine/memory/hook_server.py +83 -0
context_engine/memory/hooks.py +327 -0
context_engine/memory/migrate.py +268 -0
context_engine/models.py +96 -0
context_engine/pricing.py +104 -0
context_engine/project_commands.py +296 -0
context_engine/retrieval/__init__.py +0 -0
context_engine/retrieval/confidence.py +47 -0
context_engine/retrieval/query_parser.py +105 -0
context_engine/retrieval/retriever.py +199 -0
context_engine/serve_http.py +208 -0
context_engine/services.py +252 -0
context_engine/storage/__init__.py +0 -0
context_engine/storage/backend.py +39 -0
context_engine/storage/fts_store.py +112 -0
context_engine/storage/graph_store.py +219 -0
context_engine/storage/local_backend.py +109 -0
context_engine/storage/remote_backend.py +117 -0
context_engine/storage/vector_store.py +357 -0
context_engine/utils.py +72 -0

context_engine/storage/graph_store.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""Graph store — SQLite-backed implementation."""
+import asyncio
+import json
+import sqlite3
+from threading import RLock
+from context_engine.models import GraphNode, GraphEdge, NodeType, EdgeType
+_DDL = """
+CREATE TABLE IF NOT EXISTS nodes (
+    id          TEXT PRIMARY KEY,
+    node_type   TEXT NOT NULL,
+    name        TEXT NOT NULL,
+    file_path   TEXT NOT NULL,
+    properties  TEXT NOT NULL DEFAULT '{}'
+);
+CREATE TABLE IF NOT EXISTS edges (
+    source_id   TEXT NOT NULL,
+    target_id   TEXT NOT NULL,
+    edge_type   TEXT NOT NULL,
+    properties  TEXT NOT NULL DEFAULT '{}',
+    PRIMARY KEY (source_id, target_id, edge_type)
+);
+CREATE INDEX IF NOT EXISTS idx_edges_source ON edges (source_id);
+CREATE INDEX IF NOT EXISTS idx_edges_target ON edges (target_id);
+CREATE INDEX IF NOT EXISTS idx_nodes_file   ON nodes  (file_path);
+"""
+def _row_to_node(row: tuple) -> GraphNode:
+    node_id, node_type, name, file_path, properties = row
+    return GraphNode(
+        id=node_id,
+        node_type=NodeType(node_type),
+        name=name,
+        file_path=file_path,
+        properties=json.loads(properties),
+    )
+class GraphStore:
+    """Single-connection SQLite graph store, serialised with an RLock.
+    `check_same_thread=False` only disables thread ownership checks; concurrent
+    operations on one connection are still unsafe. Mirrors VectorStore's
+    locking pattern.
+    """
+    def __init__(self, db_path: str) -> None:
+        self._db_path = db_path + ".db"
+        self._lock = RLock()
+        self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
+        with self._lock:
+            self._conn.executescript(_DDL)
+            self._conn.commit()
+    # ------------------------------------------------------------------
+    # Sync internals (run inside asyncio.to_thread)
+    # ------------------------------------------------------------------
+    def _sync_ingest(self, nodes: list[GraphNode], edges: list[GraphEdge]) -> None:
+        with self._lock:
+            cur = self._conn.cursor()
+            for node in nodes:
+                cur.execute(
+                    "INSERT OR REPLACE INTO nodes (id, node_type, name, file_path, properties) "
+                    "VALUES (?, ?, ?, ?, ?)",
+                    (node.id, node.node_type.value, node.name, node.file_path,
+                     json.dumps(node.properties)),
+                )
+            for edge in edges:
+                cur.execute(
+                    "INSERT OR REPLACE INTO edges (source_id, target_id, edge_type, properties) "
+                    "VALUES (?, ?, ?, ?)",
+                    (edge.source_id, edge.target_id, edge.edge_type.value,
+                     json.dumps(edge.properties)),
+                )
+            self._conn.commit()
+    def _sync_get_neighbors(self, node_id: str, edge_type: EdgeType | None) -> list[GraphNode]:
+        with self._lock:
+            cur = self._conn.cursor()
+            if edge_type is None:
+                cur.execute(
+                    "SELECT n.id, n.node_type, n.name, n.file_path, n.properties "
+                    "FROM edges e JOIN nodes n ON e.target_id = n.id "
+                    "WHERE e.source_id = ?",
+                    (node_id,),
+                )
+            else:
+                cur.execute(
+                    "SELECT n.id, n.node_type, n.name, n.file_path, n.properties "
+                    "FROM edges e JOIN nodes n ON e.target_id = n.id "
+                    "WHERE e.source_id = ? AND e.edge_type = ?",
+                    (node_id, edge_type.value),
+                )
+            return [_row_to_node(row) for row in cur.fetchall()]
+    def _sync_get_nodes_by_file(self, file_path: str) -> list[GraphNode]:
+        with self._lock:
+            cur = self._conn.cursor()
+            cur.execute(
+                "SELECT id, node_type, name, file_path, properties FROM nodes WHERE file_path = ?",
+                (file_path,),
+            )
+            return [_row_to_node(row) for row in cur.fetchall()]
+    def _sync_neighbors_for_files(
+        self,
+        file_paths: list[str],
+        edge_types: list[EdgeType],
+        node_types: list[NodeType] | None = None,
+    ) -> list[GraphNode]:
+        """Single query: target-nodes of edges originating from any node belonging
+        to any of `file_paths`, filtered by edge_type (and optionally source-node
+        type). Replaces N+1 calls to get_nodes_by_file + get_neighbors per result.
+        """
+        if not file_paths or not edge_types:
+            return []
+        with self._lock:
+            cur = self._conn.cursor()
+            file_placeholders = ",".join("?" * len(file_paths))
+            edge_placeholders = ",".join("?" * len(edge_types))
+            params: list = list(file_paths) + [et.value for et in edge_types]
+            node_filter = ""
+            if node_types:
+                node_placeholders = ",".join("?" * len(node_types))
+                node_filter = f" AND src.node_type IN ({node_placeholders})"
+                params.extend(nt.value for nt in node_types)
+            cur.execute(
+                f"SELECT DISTINCT tgt.id, tgt.node_type, tgt.name, tgt.file_path, tgt.properties "
+                f"FROM nodes src "
+                f"JOIN edges e ON e.source_id = src.id "
+                f"JOIN nodes tgt ON tgt.id = e.target_id "
+                f"WHERE src.file_path IN ({file_placeholders}) "
+                f"  AND e.edge_type IN ({edge_placeholders})"
+                f"{node_filter}",
+                params,
+            )
+            return [_row_to_node(row) for row in cur.fetchall()]
+    def _sync_get_nodes_by_type(self, node_type: NodeType) -> list[GraphNode]:
+        with self._lock:
+            cur = self._conn.cursor()
+            cur.execute(
+                "SELECT id, node_type, name, file_path, properties FROM nodes WHERE node_type = ?",
+                (node_type.value,),
+            )
+            return [_row_to_node(row) for row in cur.fetchall()]
+    def _sync_delete_by_file(self, file_path: str) -> None:
+        self._sync_delete_by_files([file_path])
+    def _sync_delete_by_files(self, file_paths: list[str]) -> None:
+        if not file_paths:
+            return
+        from context_engine.utils import batched_params
+        with self._lock:
+            cur = self._conn.cursor()
+            # Collect node IDs in batches to respect SQLite param limits.
+            node_ids: list[str] = []
+            for batch in batched_params(file_paths):
+                ph = ",".join("?" * len(batch))
+                cur.execute(
+                    f"SELECT id FROM nodes WHERE file_path IN ({ph})", batch
+                )
+                node_ids.extend(row[0] for row in cur.fetchall())
+            # Delete edges and nodes in batches.
+            for batch in batched_params(node_ids):
+                ph = ",".join("?" * len(batch))
+                cur.execute(
+                    f"DELETE FROM edges WHERE source_id IN ({ph}) "
+                    f"OR target_id IN ({ph})",
+                    batch + batch,
+                )
+                cur.execute(f"DELETE FROM nodes WHERE id IN ({ph})", batch)
+            self._conn.commit()
+    # ------------------------------------------------------------------
+    # Public async API
+    # ------------------------------------------------------------------
+    async def ingest(self, nodes: list[GraphNode], edges: list[GraphEdge]) -> None:
+        await asyncio.to_thread(self._sync_ingest, nodes, edges)
+    async def get_neighbors(self, node_id: str, edge_type: EdgeType | None = None) -> list[GraphNode]:
+        return await asyncio.to_thread(self._sync_get_neighbors, node_id, edge_type)
+    async def get_nodes_by_file(self, file_path: str) -> list[GraphNode]:
+        return await asyncio.to_thread(self._sync_get_nodes_by_file, file_path)
+    async def neighbors_for_files(
+        self,
+        file_paths: list[str],
+        edge_types: list[EdgeType],
+        node_types: list[NodeType] | None = None,
+    ) -> list[GraphNode]:
+        return await asyncio.to_thread(
+            self._sync_neighbors_for_files, file_paths, edge_types, node_types
+        )
+    async def get_nodes_by_type(self, node_type: NodeType) -> list[GraphNode]:
+        return await asyncio.to_thread(self._sync_get_nodes_by_type, node_type)
+    async def delete_by_file(self, file_path: str) -> None:
+        await asyncio.to_thread(self._sync_delete_by_file, file_path)
+    async def delete_by_files(self, file_paths: list[str]) -> None:
+        await asyncio.to_thread(self._sync_delete_by_files, file_paths)
+    def clear(self) -> None:
+        with self._lock:
+            self._conn.execute("DELETE FROM edges")
+            self._conn.execute("DELETE FROM nodes")
+            self._conn.commit()

context_engine/storage/local_backend.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Local storage backend — LanceDB vectors + SQLite FTS + SQLite graph."""
+import asyncio
+from pathlib import Path
+from context_engine.models import Chunk, GraphNode, GraphEdge, EdgeType
+from context_engine.storage.vector_store import VectorStore
+from context_engine.storage.fts_store import FTSStore
+from context_engine.storage.graph_store import GraphStore
+class LocalBackend:
+    def __init__(self, base_path: str) -> None:
+        self._vector_store = VectorStore(db_path=str(Path(base_path) / "vectors"))
+        self._fts_store = FTSStore(db_path=str(Path(base_path) / "fts"))
+        self._graph_store = GraphStore(db_path=str(Path(base_path) / "graph"))
+    async def ingest(
+        self,
+        chunks: list[Chunk],
+        nodes: list[GraphNode],
+        edges: list[GraphEdge],
+    ) -> None:
+        await asyncio.gather(
+            self._vector_store.ingest(chunks),
+            self._fts_store.ingest(chunks),
+            self._graph_store.ingest(nodes, edges),
+        )
+    async def vector_search(
+        self,
+        query_embedding: list[float],
+        top_k: int = 10,
+        filters: dict | None = None,
+    ) -> list[Chunk]:
+        return await self._vector_store.search(query_embedding, top_k, filters)
+    async def fts_search(
+        self,
+        query: str,
+        top_k: int = 30,
+    ) -> list[tuple[str, float]]:
+        return await self._fts_store.search(query, top_k)
+    async def graph_neighbors(
+        self,
+        node_id: str,
+        edge_type: EdgeType | None = None,
+    ) -> list[GraphNode]:
+        return await self._graph_store.get_neighbors(node_id, edge_type)
+    async def get_related_file_paths(self, file_paths: list[str]) -> list[str]:
+        """Return file paths reachable via CALLS or IMPORTS edges from the given files.
+        Used by the retriever for 1-hop graph expansion: if a result is in
+        auth.py, also surface chunks from files that auth.py calls or imports.
+        """
+        from context_engine.models import EdgeType, NodeType
+        if not file_paths:
+            return []
+        input_set = set(file_paths)
+        neighbors = await self._graph_store.neighbors_for_files(
+            file_paths,
+            edge_types=[EdgeType.CALLS, EdgeType.IMPORTS],
+            node_types=[NodeType.FUNCTION, NodeType.CLASS, NodeType.FILE, NodeType.MODULE],
+        )
+        return list({n.file_path for n in neighbors if n.file_path and n.file_path not in input_set})
+    async def get_chunk_by_id(self, chunk_id: str) -> Chunk | None:
+        return await self._vector_store.get_by_id(chunk_id)
+    async def get_chunks_by_ids(self, chunk_ids: list[str]) -> list[Chunk]:
+        return await self._vector_store.get_chunks_by_ids(chunk_ids)
+    async def delete_by_file(self, file_path: str) -> None:
+        await asyncio.gather(
+            self._vector_store.delete_by_file(file_path),
+            self._fts_store.delete_by_file(file_path),
+            self._graph_store.delete_by_file(file_path),
+        )
+    async def delete_by_files(self, file_paths: list[str]) -> None:
+        """Batched cousin of delete_by_file. Pipeline calls this once per
+        re-index batch instead of awaiting per-file deletes serially. The
+        three stores still run in parallel via asyncio.gather."""
+        if not file_paths:
+            return
+        await asyncio.gather(
+            self._vector_store.delete_by_files(file_paths),
+            self._fts_store.delete_by_files(file_paths),
+            self._graph_store.delete_by_files(file_paths),
+        )
+    def count_chunks(self) -> int:
+        return self._vector_store.count()
+    def file_chunk_counts(self) -> dict[str, int]:
+        return self._vector_store.file_chunk_counts()
+    def get_cached_compression(self, chunk_id: str, level: str) -> str | None:
+        return self._vector_store.get_cached_compression(chunk_id, level)
+    def put_cached_compression(self, chunk_id: str, level: str, compressed: str) -> None:
+        self._vector_store.put_cached_compression(chunk_id, level, compressed)
+    async def clear(self) -> None:
+        self._vector_store.clear()
+        self._fts_store.clear()
+        self._graph_store.clear()

context_engine/storage/remote_backend.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Remote storage backend — proxies DB + LLM operations to a remote server via SSH/HTTP."""
+import asyncio
+import httpx
+from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType
+class RemoteBackend:
+    def __init__(self, host: str, port: int = 8765, fallback_to_local: bool = True):
+        self.host = host
+        self.port = port
+        self.fallback_to_local = fallback_to_local
+        if "@" in host:
+            self._user, self._hostname = host.split("@", 1)
+        else:
+            self._user = None
+            self._hostname = host
+        self._api_base = f"http://{self._hostname}:{port}"
+    async def is_reachable(self) -> bool:
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                "ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes",
+                self.host, "echo", "ok",
+                stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
+            )
+            stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
+            return b"ok" in stdout
+        except (asyncio.TimeoutError, OSError):
+            return False
+    async def vector_search(self, query_embedding, top_k=10, filters=None):
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(f"{self._api_base}/vector_search",
+                    json={"embedding": query_embedding, "top_k": top_k, "filters": filters})
+                resp.raise_for_status()
+                return [self._dict_to_chunk(d) for d in resp.json()["results"]]
+        except (httpx.ConnectError, httpx.TimeoutException):
+            return []
+    async def graph_neighbors(self, node_id, edge_type=None):
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(f"{self._api_base}/graph_neighbors",
+                    json={"node_id": node_id, "edge_type": edge_type.value if edge_type else None})
+                resp.raise_for_status()
+                return [self._dict_to_node(d) for d in resp.json()["results"]]
+        except (httpx.ConnectError, httpx.TimeoutException):
+            return []
+    async def ingest(self, chunks, nodes, edges):
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                await client.post(f"{self._api_base}/ingest", json={
+                    "chunks": [self._chunk_to_dict(c) for c in chunks],
+                    "nodes": [self._node_to_dict(n) for n in nodes],
+                    "edges": [self._edge_to_dict(e) for e in edges],
+                })
+        except (httpx.ConnectError, httpx.TimeoutException):
+            pass
+    async def get_chunk_by_id(self, chunk_id):
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.get(f"{self._api_base}/chunk/{chunk_id}")
+                if resp.status_code == 404:
+                    return None
+                resp.raise_for_status()
+                return self._dict_to_chunk(resp.json())
+        except (httpx.ConnectError, httpx.TimeoutException):
+            return None
+    async def delete_by_file(self, file_path):
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                await client.delete(f"{self._api_base}/file/{file_path}")
+        except (httpx.ConnectError, httpx.TimeoutException):
+            pass
+    async def fts_search(self, query, top_k=30):
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(f"{self._api_base}/fts/search",
+                    json={"query": query, "top_k": top_k})
+                resp.raise_for_status()
+                return [(item["chunk_id"], item["score"]) for item in resp.json()["results"]]
+        except (httpx.ConnectError, httpx.TimeoutException):
+            return []
+    async def get_chunks_by_ids(self, chunk_ids):
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(f"{self._api_base}/chunks/batch",
+                    json={"chunk_ids": chunk_ids})
+                resp.raise_for_status()
+                return [self._dict_to_chunk(d) for d in resp.json()["results"]]
+        except (httpx.ConnectError, httpx.TimeoutException):
+            return []
+    def _chunk_to_dict(self, chunk):
+        return {"id": chunk.id, "content": chunk.content, "chunk_type": chunk.chunk_type.value,
+                "file_path": chunk.file_path, "start_line": chunk.start_line, "end_line": chunk.end_line,
+                "language": chunk.language, "embedding": chunk.embedding, "metadata": chunk.metadata}
+    def _dict_to_chunk(self, d):
+        return Chunk(id=d["id"], content=d["content"], chunk_type=ChunkType(d["chunk_type"]),
+                     file_path=d["file_path"], start_line=d["start_line"], end_line=d["end_line"],
+                     language=d["language"], embedding=d.get("embedding"), metadata=d.get("metadata", {}))
+    def _node_to_dict(self, node):
+        return {"id": node.id, "node_type": node.node_type.value, "name": node.name, "file_path": node.file_path}
+    def _dict_to_node(self, d):
+        return GraphNode(id=d["id"], node_type=NodeType(d["node_type"]), name=d["name"], file_path=d["file_path"])
+    def _edge_to_dict(self, edge):
+        return {"source_id": edge.source_id, "target_id": edge.target_id, "edge_type": edge.edge_type.value}