code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. code_context_engine-0.4.0.dist-info/METADATA +389 -0
  2. code_context_engine-0.4.0.dist-info/RECORD +63 -0
  3. code_context_engine-0.4.0.dist-info/WHEEL +5 -0
  4. code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
  5. code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
  7. context_engine/__init__.py +3 -0
  8. context_engine/cli.py +2848 -0
  9. context_engine/cli_style.py +66 -0
  10. context_engine/compression/__init__.py +0 -0
  11. context_engine/compression/compressor.py +144 -0
  12. context_engine/compression/ollama_client.py +33 -0
  13. context_engine/compression/output_rules.py +77 -0
  14. context_engine/compression/prompts.py +9 -0
  15. context_engine/compression/quality.py +37 -0
  16. context_engine/config.py +198 -0
  17. context_engine/dashboard/__init__.py +0 -0
  18. context_engine/dashboard/_page.py +1548 -0
  19. context_engine/dashboard/server.py +429 -0
  20. context_engine/editors.py +265 -0
  21. context_engine/event_bus.py +24 -0
  22. context_engine/indexer/__init__.py +0 -0
  23. context_engine/indexer/chunker.py +147 -0
  24. context_engine/indexer/embedder.py +154 -0
  25. context_engine/indexer/embedding_cache.py +168 -0
  26. context_engine/indexer/git_hooks.py +73 -0
  27. context_engine/indexer/git_indexer.py +136 -0
  28. context_engine/indexer/ignorefile.py +96 -0
  29. context_engine/indexer/manifest.py +78 -0
  30. context_engine/indexer/pipeline.py +624 -0
  31. context_engine/indexer/secrets.py +332 -0
  32. context_engine/indexer/watcher.py +109 -0
  33. context_engine/integration/__init__.py +0 -0
  34. context_engine/integration/bootstrap.py +76 -0
  35. context_engine/integration/git_context.py +132 -0
  36. context_engine/integration/mcp_server.py +1825 -0
  37. context_engine/integration/session_capture.py +306 -0
  38. context_engine/memory/__init__.py +6 -0
  39. context_engine/memory/compressor.py +344 -0
  40. context_engine/memory/db.py +922 -0
  41. context_engine/memory/extractive.py +106 -0
  42. context_engine/memory/grammar.py +419 -0
  43. context_engine/memory/hook_installer.py +258 -0
  44. context_engine/memory/hook_server.py +83 -0
  45. context_engine/memory/hooks.py +327 -0
  46. context_engine/memory/migrate.py +268 -0
  47. context_engine/models.py +96 -0
  48. context_engine/pricing.py +104 -0
  49. context_engine/project_commands.py +296 -0
  50. context_engine/retrieval/__init__.py +0 -0
  51. context_engine/retrieval/confidence.py +47 -0
  52. context_engine/retrieval/query_parser.py +105 -0
  53. context_engine/retrieval/retriever.py +199 -0
  54. context_engine/serve_http.py +208 -0
  55. context_engine/services.py +252 -0
  56. context_engine/storage/__init__.py +0 -0
  57. context_engine/storage/backend.py +39 -0
  58. context_engine/storage/fts_store.py +112 -0
  59. context_engine/storage/graph_store.py +219 -0
  60. context_engine/storage/local_backend.py +109 -0
  61. context_engine/storage/remote_backend.py +117 -0
  62. context_engine/storage/vector_store.py +357 -0
  63. context_engine/utils.py +72 -0
@@ -0,0 +1,268 @@
1
+ """Import legacy per-session JSON files into the new memory.db.
2
+
3
+ Walks each project's `sessions/` directory (and the legacy
4
+ `~/.claude-context-engine/projects/<name>/sessions/` path), parses each
5
+ *.json, imports decisions and code_areas with `source='migrated'`, then
6
+ archives the consumed files into `migrated.zip` and removes them.
7
+
8
+ Idempotent — `migrated_files` tracks what has already been imported so a
9
+ rerun is a no-op.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ import sqlite3
16
+ import time
17
+ import zipfile
18
+ from dataclasses import dataclass, field
19
+ from pathlib import Path
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+ _DECISIONS_LOG_NAME = "decisions_log.json"
24
+
25
+
26
+ @dataclass
27
+ class MigrationSummary:
28
+ decisions_imported: int = 0
29
+ code_areas_imported: int = 0
30
+ files_imported: int = 0
31
+ files_archived: int = 0
32
+ files_skipped: int = 0
33
+ sources_scanned: list[str] = field(default_factory=list)
34
+
35
+
36
+ def candidate_session_dirs(project_name: str, primary_storage_base: Path) -> list[Path]:
37
+ """Return every directory we should scan for legacy session JSON.
38
+
39
+ Currently:
40
+ - <storage_base>/sessions/ (current path)
41
+ - ~/.claude-context-engine/projects/<name>/sessions/ (pre-rebrand)
42
+ """
43
+ legacy_root = Path.home() / ".claude-context-engine" / "projects" / project_name / "sessions"
44
+ return [
45
+ Path(primary_storage_base) / "sessions",
46
+ legacy_root,
47
+ ]
48
+
49
+
50
+ def migrate(
51
+ conn: sqlite3.Connection,
52
+ project_name: str,
53
+ storage_base: str | Path,
54
+ *,
55
+ archive: bool = True,
56
+ ) -> MigrationSummary:
57
+ """Import all legacy JSON sessions for `project_name` into the open db.
58
+
59
+ `storage_base` is the per-project storage directory (e.g.
60
+ ~/.cce/projects/<name>). `archive=True` zips and deletes consumed
61
+ JSONs after successful import; pass False from tests that want to
62
+ re-read the source files.
63
+ """
64
+ storage_base = Path(storage_base)
65
+ summary = MigrationSummary()
66
+
67
+ for sessions_dir in candidate_session_dirs(project_name, storage_base):
68
+ if not sessions_dir.exists():
69
+ continue
70
+ summary.sources_scanned.append(str(sessions_dir))
71
+
72
+ json_files = sorted(sessions_dir.glob("*.json"))
73
+ consumed: list[Path] = []
74
+ decisions_added = 0
75
+ code_areas_added = 0
76
+ for f in json_files:
77
+ if _already_imported(conn, f):
78
+ summary.files_skipped += 1
79
+ continue
80
+ try:
81
+ imported = _import_one(conn, f)
82
+ except (json.JSONDecodeError, OSError) as exc:
83
+ log.warning("Skipping unreadable session file %s: %s", f, exc)
84
+ continue
85
+ decisions_added += imported.decisions
86
+ code_areas_added += imported.code_areas
87
+ consumed.append(f)
88
+
89
+ if not consumed:
90
+ continue
91
+
92
+ # Archive *before* marking imported. If zip-write fails we roll back
93
+ # the directory's inserts so a rerun retries cleanly — otherwise
94
+ # files would be permanently flagged imported but never archived.
95
+ if archive:
96
+ try:
97
+ archived = _archive_and_remove(sessions_dir, consumed)
98
+ except OSError as exc:
99
+ log.error(
100
+ "Archive failed for %s: %s — rolling back imports", sessions_dir, exc
101
+ )
102
+ conn.rollback()
103
+ continue
104
+ summary.files_archived += archived
105
+
106
+ for f in consumed:
107
+ _mark_imported(conn, f)
108
+ conn.commit()
109
+ summary.decisions_imported += decisions_added
110
+ summary.code_areas_imported += code_areas_added
111
+ summary.files_imported += len(consumed)
112
+
113
+ return summary
114
+
115
+
116
+ @dataclass
117
+ class _ImportCounts:
118
+ decisions: int = 0
119
+ code_areas: int = 0
120
+
121
+
122
+ def _import_one(conn: sqlite3.Connection, source: Path) -> _ImportCounts:
123
+ """Import a single legacy JSON file. Returns counts of imported rows."""
124
+ counts = _ImportCounts()
125
+ data = json.loads(source.read_text())
126
+
127
+ # decisions_log.json is a top-level list of decision dicts, not a session.
128
+ if source.name == _DECISIONS_LOG_NAME and isinstance(data, list):
129
+ # Memoise per-session existence checks within this archive — the same
130
+ # session_id often appears across many entries.
131
+ exists_cache: dict[str, bool] = {}
132
+ for d in data:
133
+ sid = d.get("session_id")
134
+ if sid is not None and sid not in exists_cache:
135
+ exists_cache[sid] = _session_exists(conn, sid)
136
+ _insert_decision(
137
+ conn,
138
+ session_id=sid if sid is not None and exists_cache.get(sid) else None,
139
+ decision=d.get("decision", ""),
140
+ reason=d.get("reason", ""),
141
+ timestamp=d.get("timestamp"),
142
+ )
143
+ counts.decisions += 1
144
+ return counts
145
+
146
+ # Per-session JSON: {"id", "decisions": [...], "code_areas": [...], ...}
147
+ if not isinstance(data, dict):
148
+ return counts
149
+
150
+ session_id = data.get("id")
151
+ # session_id is constant for the rest of this file — resolve once.
152
+ fk_session_id = session_id if _session_exists(conn, session_id) else None
153
+ for d in data.get("decisions", []) or []:
154
+ _insert_decision(
155
+ conn,
156
+ session_id=fk_session_id,
157
+ decision=d.get("decision", ""),
158
+ reason=d.get("reason", ""),
159
+ timestamp=d.get("timestamp"),
160
+ )
161
+ counts.decisions += 1
162
+ for c in data.get("code_areas", []) or []:
163
+ _insert_code_area(
164
+ conn,
165
+ session_id=fk_session_id,
166
+ file_path=c.get("file_path", ""),
167
+ description=c.get("description", ""),
168
+ timestamp=c.get("timestamp"),
169
+ )
170
+ counts.code_areas += 1
171
+ return counts
172
+
173
+
174
+ def _insert_decision(conn, *, session_id, decision, reason, timestamp):
175
+ # Use `is not None` so legacy rows with an explicit 0/0.0 timestamp keep
176
+ # their original ordering instead of being stamped to "now".
177
+ epoch = int(timestamp) if timestamp is not None else int(time.time())
178
+ iso = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(epoch))
179
+ # Mirror the live record_decision write path: pass through grammar.compress
180
+ # at the project default level so migrated rows land in storage at the
181
+ # same shape as freshly recorded ones. Without this the FTS5 index sits
182
+ # over a heterogeneous corpus (compressed-with-articles-dropped + raw),
183
+ # and `_content_key` dedup would fail across the same boundary.
184
+ from context_engine.memory.grammar import (
185
+ compress as _grammar_compress, DEFAULT_LEVEL as _GRAMMAR_LEVEL,
186
+ )
187
+ conn.execute(
188
+ "INSERT INTO decisions (session_id, decision, reason, source, "
189
+ "created_at_epoch, created_at) VALUES (?, ?, ?, 'migrated', ?, ?)",
190
+ (
191
+ session_id,
192
+ _grammar_compress(decision or "", level=_GRAMMAR_LEVEL),
193
+ _grammar_compress(reason or "", level=_GRAMMAR_LEVEL),
194
+ epoch,
195
+ iso,
196
+ ),
197
+ )
198
+
199
+
200
+ def _insert_code_area(conn, *, session_id, file_path, description, timestamp):
201
+ epoch = int(timestamp) if timestamp is not None else int(time.time())
202
+ # description is prose; file_path is a structured token preserved by
203
+ # grammar.compress's tokeniser. Compressing both is safe + symmetric.
204
+ from context_engine.memory.grammar import (
205
+ compress as _grammar_compress, DEFAULT_LEVEL as _GRAMMAR_LEVEL,
206
+ )
207
+ conn.execute(
208
+ "INSERT INTO code_areas (session_id, file_path, description, source, "
209
+ "created_at_epoch) VALUES (?, ?, ?, 'migrated', ?)",
210
+ (
211
+ session_id,
212
+ file_path, # path is a structured token; no point compressing
213
+ _grammar_compress(description or "", level=_GRAMMAR_LEVEL),
214
+ epoch,
215
+ ),
216
+ )
217
+
218
+
219
+ def _session_exists(conn, session_id) -> bool:
220
+ if not session_id:
221
+ return False
222
+ row = conn.execute(
223
+ "SELECT 1 FROM sessions WHERE id = ?", (session_id,)
224
+ ).fetchone()
225
+ return row is not None
226
+
227
+
228
+ def _already_imported(conn, source: Path) -> bool:
229
+ row = conn.execute(
230
+ "SELECT 1 FROM migrated_files WHERE source_path = ?",
231
+ (str(source),),
232
+ ).fetchone()
233
+ return row is not None
234
+
235
+
236
+ def _mark_imported(conn, source: Path) -> None:
237
+ conn.execute(
238
+ "INSERT OR IGNORE INTO migrated_files (source_path, imported_at_epoch) "
239
+ "VALUES (?, ?)",
240
+ (str(source), int(time.time())),
241
+ )
242
+
243
+
244
+ def _archive_and_remove(sessions_dir: Path, files: list[Path]) -> int:
245
+ """Append `files` to `sessions_dir/migrated.zip` and remove the originals.
246
+
247
+ Returns the number of files actually written to the zip.
248
+ """
249
+ if not files:
250
+ return 0
251
+ archive_path = sessions_dir / "migrated.zip"
252
+ written = 0
253
+ with zipfile.ZipFile(archive_path, mode="a", compression=zipfile.ZIP_DEFLATED) as zf:
254
+ existing = set(zf.namelist())
255
+ for f in files:
256
+ arcname = f.name
257
+ if arcname in existing:
258
+ # Already in the archive from a previous run; just delete.
259
+ pass
260
+ else:
261
+ zf.write(f, arcname=arcname)
262
+ written += 1
263
+ for f in files:
264
+ try:
265
+ f.unlink()
266
+ except OSError as exc:
267
+ log.warning("Could not remove migrated file %s: %s", f, exc)
268
+ return written
@@ -0,0 +1,96 @@
1
+ """Shared data models for the context engine."""
2
+ from dataclasses import dataclass, field
3
+ from enum import Enum
4
+
5
+
6
+ class ChunkType(Enum):
7
+ FUNCTION = "function"
8
+ CLASS = "class"
9
+ MODULE = "module"
10
+ DOC = "doc"
11
+ COMMENT = "comment"
12
+ COMMIT = "commit"
13
+ SESSION = "session"
14
+ DECISION = "decision"
15
+
16
+
17
+ class NodeType(Enum):
18
+ FUNCTION = "function"
19
+ CLASS = "class"
20
+ FILE = "file"
21
+ MODULE = "module"
22
+ DOC = "doc"
23
+ COMMIT = "commit"
24
+ SESSION = "session"
25
+ DECISION = "decision"
26
+
27
+
28
+ class EdgeType(Enum):
29
+ CALLS = "calls"
30
+ IMPORTS = "imports"
31
+ DEFINES = "defines"
32
+ MODIFIES = "modifies"
33
+ DISCUSSED_IN = "discussed_in"
34
+ DECIDED = "decided"
35
+
36
+
37
+ class ConfidenceLevel(Enum):
38
+ HIGH = "high"
39
+ MEDIUM = "medium"
40
+ LOW = "low"
41
+
42
+ @staticmethod
43
+ def from_score(score: float) -> "ConfidenceLevel":
44
+ if score > 0.8:
45
+ return ConfidenceLevel.HIGH
46
+ if score >= 0.5:
47
+ return ConfidenceLevel.MEDIUM
48
+ return ConfidenceLevel.LOW
49
+
50
+
51
+ @dataclass
52
+ class Chunk:
53
+ id: str
54
+ content: str
55
+ chunk_type: ChunkType
56
+ file_path: str
57
+ start_line: int
58
+ end_line: int
59
+ language: str
60
+ metadata: dict = field(default_factory=dict)
61
+ embedding: list[float] | None = None
62
+ confidence_score: float = 0.0
63
+ compressed_content: str | None = None
64
+
65
+ _CHARS_PER_TOKEN_CODE = 3.3
66
+
67
+ @property
68
+ def token_count(self) -> int:
69
+ text = self.compressed_content or self.content
70
+ return max(1, int(len(text) / self._CHARS_PER_TOKEN_CODE))
71
+
72
+
73
+ @dataclass
74
+ class GraphNode:
75
+ id: str
76
+ node_type: NodeType
77
+ name: str
78
+ file_path: str
79
+ properties: dict = field(default_factory=dict)
80
+
81
+
82
+ @dataclass
83
+ class GraphEdge:
84
+ source_id: str
85
+ target_id: str
86
+ edge_type: EdgeType
87
+ properties: dict = field(default_factory=dict)
88
+
89
+
90
+ @dataclass
91
+ class RetrievalResult:
92
+ chunks: list[Chunk]
93
+ graph_nodes: list[GraphNode]
94
+ graph_edges: list[GraphEdge]
95
+ query: str
96
+ confidence_scores: dict[str, float] = field(default_factory=dict)
@@ -0,0 +1,104 @@
1
+ """Dynamic model pricing — fetched from Anthropic docs, cached locally."""
2
+ import json
3
+ import re
4
+ import time
5
+ from pathlib import Path
6
+
7
+ _CCE_HOME = Path.home() / ".cce"
8
+ _CACHE_PATH = _CCE_HOME / "pricing_cache.json"
9
+ _CACHE_TTL = 7 * 24 * 3600 # 7 days
10
+ _DOCS_URL = "https://docs.anthropic.com/en/docs/about-claude/models"
11
+
12
+ # Used only when fetch fails and no cache exists
13
+ _FALLBACK: dict[str, float] = {
14
+ "opus": 5.0,
15
+ "sonnet": 3.0,
16
+ "haiku": 1.0,
17
+ }
18
+
19
+
20
+ def _parse_html(html: str) -> dict[str, float] | None:
21
+ """Parse per-family input pricing from Anthropic docs HTML table."""
22
+ pricing: dict[str, float] = {}
23
+
24
+ rows = re.findall(r"<tr[^>]*>(.*?)</tr>", html, re.DOTALL | re.IGNORECASE)
25
+ col_families: list[str | None] = []
26
+
27
+ for row_html in rows:
28
+ cells = re.findall(
29
+ r"<t[hd][^>]*>(.*?)</t[hd]>", row_html, re.DOTALL | re.IGNORECASE
30
+ )
31
+
32
+ # Header row: extract column → family mapping
33
+ families_in_row: list[str | None] = []
34
+ has_model = False
35
+ for cell in cells:
36
+ m = re.search(r"Claude\s+(Opus|Sonnet|Haiku)", cell, re.IGNORECASE)
37
+ if m:
38
+ families_in_row.append(m.group(1).lower())
39
+ has_model = True
40
+ else:
41
+ families_in_row.append(None)
42
+
43
+ if has_model and sum(1 for f in families_in_row if f) >= 2:
44
+ col_families = families_in_row
45
+ continue
46
+
47
+ # Pricing row: extract $ amounts per column
48
+ if col_families and any(
49
+ "input" in c.lower() and "tok" in c.lower() for c in cells
50
+ ):
51
+ for i, cell in enumerate(cells):
52
+ if i < len(col_families) and col_families[i]:
53
+ m = re.search(r"\$(\d+(?:\.\d+)?)", cell)
54
+ if m:
55
+ family = col_families[i]
56
+ if family not in pricing:
57
+ pricing[family] = float(m.group(1))
58
+ col_families = []
59
+
60
+ return pricing if pricing else None
61
+
62
+
63
+ def _fetch() -> dict[str, float] | None:
64
+ try:
65
+ import httpx
66
+
67
+ resp = httpx.get(_DOCS_URL, follow_redirects=True, timeout=5.0)
68
+ if resp.status_code != 200:
69
+ return None
70
+ return _parse_html(resp.text)
71
+ except Exception:
72
+ return None
73
+
74
+
75
+ def _load_cache() -> dict[str, float] | None:
76
+ try:
77
+ if not _CACHE_PATH.exists():
78
+ return None
79
+ data = json.loads(_CACHE_PATH.read_text())
80
+ if time.time() - data.get("ts", 0) < _CACHE_TTL:
81
+ return data.get("pricing")
82
+ except Exception:
83
+ pass
84
+ return None
85
+
86
+
87
+ def _save_cache(pricing: dict[str, float]) -> None:
88
+ try:
89
+ _CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
90
+ _CACHE_PATH.write_text(json.dumps({"ts": time.time(), "pricing": pricing}))
91
+ except Exception:
92
+ pass
93
+
94
+
95
+ def get_model_pricing() -> dict[str, float]:
96
+ """Return {family: input_price_per_1M_tokens}. Cached 7 days."""
97
+ cached = _load_cache()
98
+ if cached:
99
+ return cached
100
+ fetched = _fetch()
101
+ if fetched:
102
+ _save_cache(fetched)
103
+ return fetched
104
+ return dict(_FALLBACK)