code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. code_context_engine-0.4.0.dist-info/METADATA +389 -0
  2. code_context_engine-0.4.0.dist-info/RECORD +63 -0
  3. code_context_engine-0.4.0.dist-info/WHEEL +5 -0
  4. code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
  5. code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
  7. context_engine/__init__.py +3 -0
  8. context_engine/cli.py +2848 -0
  9. context_engine/cli_style.py +66 -0
  10. context_engine/compression/__init__.py +0 -0
  11. context_engine/compression/compressor.py +144 -0
  12. context_engine/compression/ollama_client.py +33 -0
  13. context_engine/compression/output_rules.py +77 -0
  14. context_engine/compression/prompts.py +9 -0
  15. context_engine/compression/quality.py +37 -0
  16. context_engine/config.py +198 -0
  17. context_engine/dashboard/__init__.py +0 -0
  18. context_engine/dashboard/_page.py +1548 -0
  19. context_engine/dashboard/server.py +429 -0
  20. context_engine/editors.py +265 -0
  21. context_engine/event_bus.py +24 -0
  22. context_engine/indexer/__init__.py +0 -0
  23. context_engine/indexer/chunker.py +147 -0
  24. context_engine/indexer/embedder.py +154 -0
  25. context_engine/indexer/embedding_cache.py +168 -0
  26. context_engine/indexer/git_hooks.py +73 -0
  27. context_engine/indexer/git_indexer.py +136 -0
  28. context_engine/indexer/ignorefile.py +96 -0
  29. context_engine/indexer/manifest.py +78 -0
  30. context_engine/indexer/pipeline.py +624 -0
  31. context_engine/indexer/secrets.py +332 -0
  32. context_engine/indexer/watcher.py +109 -0
  33. context_engine/integration/__init__.py +0 -0
  34. context_engine/integration/bootstrap.py +76 -0
  35. context_engine/integration/git_context.py +132 -0
  36. context_engine/integration/mcp_server.py +1825 -0
  37. context_engine/integration/session_capture.py +306 -0
  38. context_engine/memory/__init__.py +6 -0
  39. context_engine/memory/compressor.py +344 -0
  40. context_engine/memory/db.py +922 -0
  41. context_engine/memory/extractive.py +106 -0
  42. context_engine/memory/grammar.py +419 -0
  43. context_engine/memory/hook_installer.py +258 -0
  44. context_engine/memory/hook_server.py +83 -0
  45. context_engine/memory/hooks.py +327 -0
  46. context_engine/memory/migrate.py +268 -0
  47. context_engine/models.py +96 -0
  48. context_engine/pricing.py +104 -0
  49. context_engine/project_commands.py +296 -0
  50. context_engine/retrieval/__init__.py +0 -0
  51. context_engine/retrieval/confidence.py +47 -0
  52. context_engine/retrieval/query_parser.py +105 -0
  53. context_engine/retrieval/retriever.py +199 -0
  54. context_engine/serve_http.py +208 -0
  55. context_engine/services.py +252 -0
  56. context_engine/storage/__init__.py +0 -0
  57. context_engine/storage/backend.py +39 -0
  58. context_engine/storage/fts_store.py +112 -0
  59. context_engine/storage/graph_store.py +219 -0
  60. context_engine/storage/local_backend.py +109 -0
  61. context_engine/storage/remote_backend.py +117 -0
  62. context_engine/storage/vector_store.py +357 -0
  63. context_engine/utils.py +72 -0
@@ -0,0 +1,219 @@
1
+ """Graph store — SQLite-backed implementation."""
2
+
3
+ import asyncio
4
+ import json
5
+ import sqlite3
6
+ from threading import RLock
7
+
8
+ from context_engine.models import GraphNode, GraphEdge, NodeType, EdgeType
9
+
10
+ _DDL = """
11
+ CREATE TABLE IF NOT EXISTS nodes (
12
+ id TEXT PRIMARY KEY,
13
+ node_type TEXT NOT NULL,
14
+ name TEXT NOT NULL,
15
+ file_path TEXT NOT NULL,
16
+ properties TEXT NOT NULL DEFAULT '{}'
17
+ );
18
+
19
+ CREATE TABLE IF NOT EXISTS edges (
20
+ source_id TEXT NOT NULL,
21
+ target_id TEXT NOT NULL,
22
+ edge_type TEXT NOT NULL,
23
+ properties TEXT NOT NULL DEFAULT '{}',
24
+ PRIMARY KEY (source_id, target_id, edge_type)
25
+ );
26
+
27
+ CREATE INDEX IF NOT EXISTS idx_edges_source ON edges (source_id);
28
+ CREATE INDEX IF NOT EXISTS idx_edges_target ON edges (target_id);
29
+ CREATE INDEX IF NOT EXISTS idx_nodes_file ON nodes (file_path);
30
+ """
31
+
32
+
33
+ def _row_to_node(row: tuple) -> GraphNode:
34
+ node_id, node_type, name, file_path, properties = row
35
+ return GraphNode(
36
+ id=node_id,
37
+ node_type=NodeType(node_type),
38
+ name=name,
39
+ file_path=file_path,
40
+ properties=json.loads(properties),
41
+ )
42
+
43
+
44
+ class GraphStore:
45
+ """Single-connection SQLite graph store, serialised with an RLock.
46
+
47
+ `check_same_thread=False` only disables thread ownership checks; concurrent
48
+ operations on one connection are still unsafe. Mirrors VectorStore's
49
+ locking pattern.
50
+ """
51
+
52
+ def __init__(self, db_path: str) -> None:
53
+ self._db_path = db_path + ".db"
54
+ self._lock = RLock()
55
+ self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
56
+ with self._lock:
57
+ self._conn.executescript(_DDL)
58
+ self._conn.commit()
59
+
60
+ # ------------------------------------------------------------------
61
+ # Sync internals (run inside asyncio.to_thread)
62
+ # ------------------------------------------------------------------
63
+
64
+ def _sync_ingest(self, nodes: list[GraphNode], edges: list[GraphEdge]) -> None:
65
+ with self._lock:
66
+ cur = self._conn.cursor()
67
+ for node in nodes:
68
+ cur.execute(
69
+ "INSERT OR REPLACE INTO nodes (id, node_type, name, file_path, properties) "
70
+ "VALUES (?, ?, ?, ?, ?)",
71
+ (node.id, node.node_type.value, node.name, node.file_path,
72
+ json.dumps(node.properties)),
73
+ )
74
+ for edge in edges:
75
+ cur.execute(
76
+ "INSERT OR REPLACE INTO edges (source_id, target_id, edge_type, properties) "
77
+ "VALUES (?, ?, ?, ?)",
78
+ (edge.source_id, edge.target_id, edge.edge_type.value,
79
+ json.dumps(edge.properties)),
80
+ )
81
+ self._conn.commit()
82
+
83
+ def _sync_get_neighbors(self, node_id: str, edge_type: EdgeType | None) -> list[GraphNode]:
84
+ with self._lock:
85
+ cur = self._conn.cursor()
86
+ if edge_type is None:
87
+ cur.execute(
88
+ "SELECT n.id, n.node_type, n.name, n.file_path, n.properties "
89
+ "FROM edges e JOIN nodes n ON e.target_id = n.id "
90
+ "WHERE e.source_id = ?",
91
+ (node_id,),
92
+ )
93
+ else:
94
+ cur.execute(
95
+ "SELECT n.id, n.node_type, n.name, n.file_path, n.properties "
96
+ "FROM edges e JOIN nodes n ON e.target_id = n.id "
97
+ "WHERE e.source_id = ? AND e.edge_type = ?",
98
+ (node_id, edge_type.value),
99
+ )
100
+ return [_row_to_node(row) for row in cur.fetchall()]
101
+
102
+ def _sync_get_nodes_by_file(self, file_path: str) -> list[GraphNode]:
103
+ with self._lock:
104
+ cur = self._conn.cursor()
105
+ cur.execute(
106
+ "SELECT id, node_type, name, file_path, properties FROM nodes WHERE file_path = ?",
107
+ (file_path,),
108
+ )
109
+ return [_row_to_node(row) for row in cur.fetchall()]
110
+
111
+ def _sync_neighbors_for_files(
112
+ self,
113
+ file_paths: list[str],
114
+ edge_types: list[EdgeType],
115
+ node_types: list[NodeType] | None = None,
116
+ ) -> list[GraphNode]:
117
+ """Single query: target-nodes of edges originating from any node belonging
118
+ to any of `file_paths`, filtered by edge_type (and optionally source-node
119
+ type). Replaces N+1 calls to get_nodes_by_file + get_neighbors per result.
120
+ """
121
+ if not file_paths or not edge_types:
122
+ return []
123
+ with self._lock:
124
+ cur = self._conn.cursor()
125
+ file_placeholders = ",".join("?" * len(file_paths))
126
+ edge_placeholders = ",".join("?" * len(edge_types))
127
+ params: list = list(file_paths) + [et.value for et in edge_types]
128
+ node_filter = ""
129
+ if node_types:
130
+ node_placeholders = ",".join("?" * len(node_types))
131
+ node_filter = f" AND src.node_type IN ({node_placeholders})"
132
+ params.extend(nt.value for nt in node_types)
133
+ cur.execute(
134
+ f"SELECT DISTINCT tgt.id, tgt.node_type, tgt.name, tgt.file_path, tgt.properties "
135
+ f"FROM nodes src "
136
+ f"JOIN edges e ON e.source_id = src.id "
137
+ f"JOIN nodes tgt ON tgt.id = e.target_id "
138
+ f"WHERE src.file_path IN ({file_placeholders}) "
139
+ f" AND e.edge_type IN ({edge_placeholders})"
140
+ f"{node_filter}",
141
+ params,
142
+ )
143
+ return [_row_to_node(row) for row in cur.fetchall()]
144
+
145
+ def _sync_get_nodes_by_type(self, node_type: NodeType) -> list[GraphNode]:
146
+ with self._lock:
147
+ cur = self._conn.cursor()
148
+ cur.execute(
149
+ "SELECT id, node_type, name, file_path, properties FROM nodes WHERE node_type = ?",
150
+ (node_type.value,),
151
+ )
152
+ return [_row_to_node(row) for row in cur.fetchall()]
153
+
154
+ def _sync_delete_by_file(self, file_path: str) -> None:
155
+ self._sync_delete_by_files([file_path])
156
+
157
+ def _sync_delete_by_files(self, file_paths: list[str]) -> None:
158
+ if not file_paths:
159
+ return
160
+ from context_engine.utils import batched_params
161
+
162
+ with self._lock:
163
+ cur = self._conn.cursor()
164
+ # Collect node IDs in batches to respect SQLite param limits.
165
+ node_ids: list[str] = []
166
+ for batch in batched_params(file_paths):
167
+ ph = ",".join("?" * len(batch))
168
+ cur.execute(
169
+ f"SELECT id FROM nodes WHERE file_path IN ({ph})", batch
170
+ )
171
+ node_ids.extend(row[0] for row in cur.fetchall())
172
+ # Delete edges and nodes in batches.
173
+ for batch in batched_params(node_ids):
174
+ ph = ",".join("?" * len(batch))
175
+ cur.execute(
176
+ f"DELETE FROM edges WHERE source_id IN ({ph}) "
177
+ f"OR target_id IN ({ph})",
178
+ batch + batch,
179
+ )
180
+ cur.execute(f"DELETE FROM nodes WHERE id IN ({ph})", batch)
181
+ self._conn.commit()
182
+
183
+ # ------------------------------------------------------------------
184
+ # Public async API
185
+ # ------------------------------------------------------------------
186
+
187
+ async def ingest(self, nodes: list[GraphNode], edges: list[GraphEdge]) -> None:
188
+ await asyncio.to_thread(self._sync_ingest, nodes, edges)
189
+
190
+ async def get_neighbors(self, node_id: str, edge_type: EdgeType | None = None) -> list[GraphNode]:
191
+ return await asyncio.to_thread(self._sync_get_neighbors, node_id, edge_type)
192
+
193
+ async def get_nodes_by_file(self, file_path: str) -> list[GraphNode]:
194
+ return await asyncio.to_thread(self._sync_get_nodes_by_file, file_path)
195
+
196
+ async def neighbors_for_files(
197
+ self,
198
+ file_paths: list[str],
199
+ edge_types: list[EdgeType],
200
+ node_types: list[NodeType] | None = None,
201
+ ) -> list[GraphNode]:
202
+ return await asyncio.to_thread(
203
+ self._sync_neighbors_for_files, file_paths, edge_types, node_types
204
+ )
205
+
206
+ async def get_nodes_by_type(self, node_type: NodeType) -> list[GraphNode]:
207
+ return await asyncio.to_thread(self._sync_get_nodes_by_type, node_type)
208
+
209
+ async def delete_by_file(self, file_path: str) -> None:
210
+ await asyncio.to_thread(self._sync_delete_by_file, file_path)
211
+
212
+ async def delete_by_files(self, file_paths: list[str]) -> None:
213
+ await asyncio.to_thread(self._sync_delete_by_files, file_paths)
214
+
215
+ def clear(self) -> None:
216
+ with self._lock:
217
+ self._conn.execute("DELETE FROM edges")
218
+ self._conn.execute("DELETE FROM nodes")
219
+ self._conn.commit()
@@ -0,0 +1,109 @@
1
+ """Local storage backend — LanceDB vectors + SQLite FTS + SQLite graph."""
2
+ import asyncio
3
+ from pathlib import Path
4
+
5
+ from context_engine.models import Chunk, GraphNode, GraphEdge, EdgeType
6
+ from context_engine.storage.vector_store import VectorStore
7
+ from context_engine.storage.fts_store import FTSStore
8
+ from context_engine.storage.graph_store import GraphStore
9
+
10
+
11
+ class LocalBackend:
12
+ def __init__(self, base_path: str) -> None:
13
+ self._vector_store = VectorStore(db_path=str(Path(base_path) / "vectors"))
14
+ self._fts_store = FTSStore(db_path=str(Path(base_path) / "fts"))
15
+ self._graph_store = GraphStore(db_path=str(Path(base_path) / "graph"))
16
+
17
+ async def ingest(
18
+ self,
19
+ chunks: list[Chunk],
20
+ nodes: list[GraphNode],
21
+ edges: list[GraphEdge],
22
+ ) -> None:
23
+ await asyncio.gather(
24
+ self._vector_store.ingest(chunks),
25
+ self._fts_store.ingest(chunks),
26
+ self._graph_store.ingest(nodes, edges),
27
+ )
28
+
29
+ async def vector_search(
30
+ self,
31
+ query_embedding: list[float],
32
+ top_k: int = 10,
33
+ filters: dict | None = None,
34
+ ) -> list[Chunk]:
35
+ return await self._vector_store.search(query_embedding, top_k, filters)
36
+
37
+ async def fts_search(
38
+ self,
39
+ query: str,
40
+ top_k: int = 30,
41
+ ) -> list[tuple[str, float]]:
42
+ return await self._fts_store.search(query, top_k)
43
+
44
+ async def graph_neighbors(
45
+ self,
46
+ node_id: str,
47
+ edge_type: EdgeType | None = None,
48
+ ) -> list[GraphNode]:
49
+ return await self._graph_store.get_neighbors(node_id, edge_type)
50
+
51
+ async def get_related_file_paths(self, file_paths: list[str]) -> list[str]:
52
+ """Return file paths reachable via CALLS or IMPORTS edges from the given files.
53
+
54
+ Used by the retriever for 1-hop graph expansion: if a result is in
55
+ auth.py, also surface chunks from files that auth.py calls or imports.
56
+ """
57
+ from context_engine.models import EdgeType, NodeType
58
+
59
+ if not file_paths:
60
+ return []
61
+ input_set = set(file_paths)
62
+ neighbors = await self._graph_store.neighbors_for_files(
63
+ file_paths,
64
+ edge_types=[EdgeType.CALLS, EdgeType.IMPORTS],
65
+ node_types=[NodeType.FUNCTION, NodeType.CLASS, NodeType.FILE, NodeType.MODULE],
66
+ )
67
+ return list({n.file_path for n in neighbors if n.file_path and n.file_path not in input_set})
68
+
69
+ async def get_chunk_by_id(self, chunk_id: str) -> Chunk | None:
70
+ return await self._vector_store.get_by_id(chunk_id)
71
+
72
+ async def get_chunks_by_ids(self, chunk_ids: list[str]) -> list[Chunk]:
73
+ return await self._vector_store.get_chunks_by_ids(chunk_ids)
74
+
75
+ async def delete_by_file(self, file_path: str) -> None:
76
+ await asyncio.gather(
77
+ self._vector_store.delete_by_file(file_path),
78
+ self._fts_store.delete_by_file(file_path),
79
+ self._graph_store.delete_by_file(file_path),
80
+ )
81
+
82
+ async def delete_by_files(self, file_paths: list[str]) -> None:
83
+ """Batched cousin of delete_by_file. Pipeline calls this once per
84
+ re-index batch instead of awaiting per-file deletes serially. The
85
+ three stores still run in parallel via asyncio.gather."""
86
+ if not file_paths:
87
+ return
88
+ await asyncio.gather(
89
+ self._vector_store.delete_by_files(file_paths),
90
+ self._fts_store.delete_by_files(file_paths),
91
+ self._graph_store.delete_by_files(file_paths),
92
+ )
93
+
94
+ def count_chunks(self) -> int:
95
+ return self._vector_store.count()
96
+
97
+ def file_chunk_counts(self) -> dict[str, int]:
98
+ return self._vector_store.file_chunk_counts()
99
+
100
+ def get_cached_compression(self, chunk_id: str, level: str) -> str | None:
101
+ return self._vector_store.get_cached_compression(chunk_id, level)
102
+
103
+ def put_cached_compression(self, chunk_id: str, level: str, compressed: str) -> None:
104
+ self._vector_store.put_cached_compression(chunk_id, level, compressed)
105
+
106
+ async def clear(self) -> None:
107
+ self._vector_store.clear()
108
+ self._fts_store.clear()
109
+ self._graph_store.clear()
@@ -0,0 +1,117 @@
1
+ """Remote storage backend — proxies DB + LLM operations to a remote server via SSH/HTTP."""
2
+ import asyncio
3
+ import httpx
4
+ from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType
5
+
6
+
7
+ class RemoteBackend:
8
+ def __init__(self, host: str, port: int = 8765, fallback_to_local: bool = True):
9
+ self.host = host
10
+ self.port = port
11
+ self.fallback_to_local = fallback_to_local
12
+ if "@" in host:
13
+ self._user, self._hostname = host.split("@", 1)
14
+ else:
15
+ self._user = None
16
+ self._hostname = host
17
+ self._api_base = f"http://{self._hostname}:{port}"
18
+
19
+ async def is_reachable(self) -> bool:
20
+ try:
21
+ proc = await asyncio.create_subprocess_exec(
22
+ "ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes",
23
+ self.host, "echo", "ok",
24
+ stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
25
+ )
26
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
27
+ return b"ok" in stdout
28
+ except (asyncio.TimeoutError, OSError):
29
+ return False
30
+
31
+ async def vector_search(self, query_embedding, top_k=10, filters=None):
32
+ try:
33
+ async with httpx.AsyncClient(timeout=10.0) as client:
34
+ resp = await client.post(f"{self._api_base}/vector_search",
35
+ json={"embedding": query_embedding, "top_k": top_k, "filters": filters})
36
+ resp.raise_for_status()
37
+ return [self._dict_to_chunk(d) for d in resp.json()["results"]]
38
+ except (httpx.ConnectError, httpx.TimeoutException):
39
+ return []
40
+
41
+ async def graph_neighbors(self, node_id, edge_type=None):
42
+ try:
43
+ async with httpx.AsyncClient(timeout=10.0) as client:
44
+ resp = await client.post(f"{self._api_base}/graph_neighbors",
45
+ json={"node_id": node_id, "edge_type": edge_type.value if edge_type else None})
46
+ resp.raise_for_status()
47
+ return [self._dict_to_node(d) for d in resp.json()["results"]]
48
+ except (httpx.ConnectError, httpx.TimeoutException):
49
+ return []
50
+
51
+ async def ingest(self, chunks, nodes, edges):
52
+ try:
53
+ async with httpx.AsyncClient(timeout=30.0) as client:
54
+ await client.post(f"{self._api_base}/ingest", json={
55
+ "chunks": [self._chunk_to_dict(c) for c in chunks],
56
+ "nodes": [self._node_to_dict(n) for n in nodes],
57
+ "edges": [self._edge_to_dict(e) for e in edges],
58
+ })
59
+ except (httpx.ConnectError, httpx.TimeoutException):
60
+ pass
61
+
62
+ async def get_chunk_by_id(self, chunk_id):
63
+ try:
64
+ async with httpx.AsyncClient(timeout=10.0) as client:
65
+ resp = await client.get(f"{self._api_base}/chunk/{chunk_id}")
66
+ if resp.status_code == 404:
67
+ return None
68
+ resp.raise_for_status()
69
+ return self._dict_to_chunk(resp.json())
70
+ except (httpx.ConnectError, httpx.TimeoutException):
71
+ return None
72
+
73
+ async def delete_by_file(self, file_path):
74
+ try:
75
+ async with httpx.AsyncClient(timeout=10.0) as client:
76
+ await client.delete(f"{self._api_base}/file/{file_path}")
77
+ except (httpx.ConnectError, httpx.TimeoutException):
78
+ pass
79
+
80
+ async def fts_search(self, query, top_k=30):
81
+ try:
82
+ async with httpx.AsyncClient(timeout=10.0) as client:
83
+ resp = await client.post(f"{self._api_base}/fts/search",
84
+ json={"query": query, "top_k": top_k})
85
+ resp.raise_for_status()
86
+ return [(item["chunk_id"], item["score"]) for item in resp.json()["results"]]
87
+ except (httpx.ConnectError, httpx.TimeoutException):
88
+ return []
89
+
90
+ async def get_chunks_by_ids(self, chunk_ids):
91
+ try:
92
+ async with httpx.AsyncClient(timeout=10.0) as client:
93
+ resp = await client.post(f"{self._api_base}/chunks/batch",
94
+ json={"chunk_ids": chunk_ids})
95
+ resp.raise_for_status()
96
+ return [self._dict_to_chunk(d) for d in resp.json()["results"]]
97
+ except (httpx.ConnectError, httpx.TimeoutException):
98
+ return []
99
+
100
+ def _chunk_to_dict(self, chunk):
101
+ return {"id": chunk.id, "content": chunk.content, "chunk_type": chunk.chunk_type.value,
102
+ "file_path": chunk.file_path, "start_line": chunk.start_line, "end_line": chunk.end_line,
103
+ "language": chunk.language, "embedding": chunk.embedding, "metadata": chunk.metadata}
104
+
105
+ def _dict_to_chunk(self, d):
106
+ return Chunk(id=d["id"], content=d["content"], chunk_type=ChunkType(d["chunk_type"]),
107
+ file_path=d["file_path"], start_line=d["start_line"], end_line=d["end_line"],
108
+ language=d["language"], embedding=d.get("embedding"), metadata=d.get("metadata", {}))
109
+
110
+ def _node_to_dict(self, node):
111
+ return {"id": node.id, "node_type": node.node_type.value, "name": node.name, "file_path": node.file_path}
112
+
113
+ def _dict_to_node(self, d):
114
+ return GraphNode(id=d["id"], node_type=NodeType(d["node_type"]), name=d["name"], file_path=d["file_path"])
115
+
116
+ def _edge_to_dict(self, edge):
117
+ return {"source_id": edge.source_id, "target_id": edge.target_id, "edge_type": edge.edge_type.value}