PyPI - code-context-engine - Versions diffs - 0.4.0__py3-none-any.whl - Mend

code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

code_context_engine-0.4.0.dist-info/METADATA +389 -0
code_context_engine-0.4.0.dist-info/RECORD +63 -0
code_context_engine-0.4.0.dist-info/WHEEL +5 -0
code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
context_engine/__init__.py +3 -0
context_engine/cli.py +2848 -0
context_engine/cli_style.py +66 -0
context_engine/compression/__init__.py +0 -0
context_engine/compression/compressor.py +144 -0
context_engine/compression/ollama_client.py +33 -0
context_engine/compression/output_rules.py +77 -0
context_engine/compression/prompts.py +9 -0
context_engine/compression/quality.py +37 -0
context_engine/config.py +198 -0
context_engine/dashboard/__init__.py +0 -0
context_engine/dashboard/_page.py +1548 -0
context_engine/dashboard/server.py +429 -0
context_engine/editors.py +265 -0
context_engine/event_bus.py +24 -0
context_engine/indexer/__init__.py +0 -0
context_engine/indexer/chunker.py +147 -0
context_engine/indexer/embedder.py +154 -0
context_engine/indexer/embedding_cache.py +168 -0
context_engine/indexer/git_hooks.py +73 -0
context_engine/indexer/git_indexer.py +136 -0
context_engine/indexer/ignorefile.py +96 -0
context_engine/indexer/manifest.py +78 -0
context_engine/indexer/pipeline.py +624 -0
context_engine/indexer/secrets.py +332 -0
context_engine/indexer/watcher.py +109 -0
context_engine/integration/__init__.py +0 -0
context_engine/integration/bootstrap.py +76 -0
context_engine/integration/git_context.py +132 -0
context_engine/integration/mcp_server.py +1825 -0
context_engine/integration/session_capture.py +306 -0
context_engine/memory/__init__.py +6 -0
context_engine/memory/compressor.py +344 -0
context_engine/memory/db.py +922 -0
context_engine/memory/extractive.py +106 -0
context_engine/memory/grammar.py +419 -0
context_engine/memory/hook_installer.py +258 -0
context_engine/memory/hook_server.py +83 -0
context_engine/memory/hooks.py +327 -0
context_engine/memory/migrate.py +268 -0
context_engine/models.py +96 -0
context_engine/pricing.py +104 -0
context_engine/project_commands.py +296 -0
context_engine/retrieval/__init__.py +0 -0
context_engine/retrieval/confidence.py +47 -0
context_engine/retrieval/query_parser.py +105 -0
context_engine/retrieval/retriever.py +199 -0
context_engine/serve_http.py +208 -0
context_engine/services.py +252 -0
context_engine/storage/__init__.py +0 -0
context_engine/storage/backend.py +39 -0
context_engine/storage/fts_store.py +112 -0
context_engine/storage/graph_store.py +219 -0
context_engine/storage/local_backend.py +109 -0
context_engine/storage/remote_backend.py +117 -0
context_engine/storage/vector_store.py +357 -0
context_engine/utils.py +72 -0

context_engine/memory/migrate.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""Import legacy per-session JSON files into the new memory.db.
+Walks each project's `sessions/` directory (and the legacy
+`~/.claude-context-engine/projects/<name>/sessions/` path), parses each
+*.json, imports decisions and code_areas with `source='migrated'`, then
+archives the consumed files into `migrated.zip` and removes them.
+Idempotent — `migrated_files` tracks what has already been imported so a
+rerun is a no-op.
+"""
+from __future__ import annotations
+import json
+import logging
+import sqlite3
+import time
+import zipfile
+from dataclasses import dataclass, field
+from pathlib import Path
+log = logging.getLogger(__name__)
+_DECISIONS_LOG_NAME = "decisions_log.json"
+@dataclass
+class MigrationSummary:
+    decisions_imported: int = 0
+    code_areas_imported: int = 0
+    files_imported: int = 0
+    files_archived: int = 0
+    files_skipped: int = 0
+    sources_scanned: list[str] = field(default_factory=list)
+def candidate_session_dirs(project_name: str, primary_storage_base: Path) -> list[Path]:
+    """Return every directory we should scan for legacy session JSON.
+    Currently:
+      - <storage_base>/sessions/         (current path)
+      - ~/.claude-context-engine/projects/<name>/sessions/  (pre-rebrand)
+    """
+    legacy_root = Path.home() / ".claude-context-engine" / "projects" / project_name / "sessions"
+    return [
+        Path(primary_storage_base) / "sessions",
+        legacy_root,
+    ]
+def migrate(
+    conn: sqlite3.Connection,
+    project_name: str,
+    storage_base: str | Path,
+    *,
+    archive: bool = True,
+) -> MigrationSummary:
+    """Import all legacy JSON sessions for `project_name` into the open db.
+    `storage_base` is the per-project storage directory (e.g.
+    ~/.cce/projects/<name>). `archive=True` zips and deletes consumed
+    JSONs after successful import; pass False from tests that want to
+    re-read the source files.
+    """
+    storage_base = Path(storage_base)
+    summary = MigrationSummary()
+    for sessions_dir in candidate_session_dirs(project_name, storage_base):
+        if not sessions_dir.exists():
+            continue
+        summary.sources_scanned.append(str(sessions_dir))
+        json_files = sorted(sessions_dir.glob("*.json"))
+        consumed: list[Path] = []
+        decisions_added = 0
+        code_areas_added = 0
+        for f in json_files:
+            if _already_imported(conn, f):
+                summary.files_skipped += 1
+                continue
+            try:
+                imported = _import_one(conn, f)
+            except (json.JSONDecodeError, OSError) as exc:
+                log.warning("Skipping unreadable session file %s: %s", f, exc)
+                continue
+            decisions_added += imported.decisions
+            code_areas_added += imported.code_areas
+            consumed.append(f)
+        if not consumed:
+            continue
+        # Archive *before* marking imported. If zip-write fails we roll back
+        # the directory's inserts so a rerun retries cleanly — otherwise
+        # files would be permanently flagged imported but never archived.
+        if archive:
+            try:
+                archived = _archive_and_remove(sessions_dir, consumed)
+            except OSError as exc:
+                log.error(
+                    "Archive failed for %s: %s — rolling back imports", sessions_dir, exc
+                )
+                conn.rollback()
+                continue
+            summary.files_archived += archived
+        for f in consumed:
+            _mark_imported(conn, f)
+        conn.commit()
+        summary.decisions_imported += decisions_added
+        summary.code_areas_imported += code_areas_added
+        summary.files_imported += len(consumed)
+    return summary
+@dataclass
+class _ImportCounts:
+    decisions: int = 0
+    code_areas: int = 0
+def _import_one(conn: sqlite3.Connection, source: Path) -> _ImportCounts:
+    """Import a single legacy JSON file. Returns counts of imported rows."""
+    counts = _ImportCounts()
+    data = json.loads(source.read_text())
+    # decisions_log.json is a top-level list of decision dicts, not a session.
+    if source.name == _DECISIONS_LOG_NAME and isinstance(data, list):
+        # Memoise per-session existence checks within this archive — the same
+        # session_id often appears across many entries.
+        exists_cache: dict[str, bool] = {}
+        for d in data:
+            sid = d.get("session_id")
+            if sid is not None and sid not in exists_cache:
+                exists_cache[sid] = _session_exists(conn, sid)
+            _insert_decision(
+                conn,
+                session_id=sid if sid is not None and exists_cache.get(sid) else None,
+                decision=d.get("decision", ""),
+                reason=d.get("reason", ""),
+                timestamp=d.get("timestamp"),
+            )
+            counts.decisions += 1
+        return counts
+    # Per-session JSON: {"id", "decisions": [...], "code_areas": [...], ...}
+    if not isinstance(data, dict):
+        return counts
+    session_id = data.get("id")
+    # session_id is constant for the rest of this file — resolve once.
+    fk_session_id = session_id if _session_exists(conn, session_id) else None
+    for d in data.get("decisions", []) or []:
+        _insert_decision(
+            conn,
+            session_id=fk_session_id,
+            decision=d.get("decision", ""),
+            reason=d.get("reason", ""),
+            timestamp=d.get("timestamp"),
+        )
+        counts.decisions += 1
+    for c in data.get("code_areas", []) or []:
+        _insert_code_area(
+            conn,
+            session_id=fk_session_id,
+            file_path=c.get("file_path", ""),
+            description=c.get("description", ""),
+            timestamp=c.get("timestamp"),
+        )
+        counts.code_areas += 1
+    return counts
+def _insert_decision(conn, *, session_id, decision, reason, timestamp):
+    # Use `is not None` so legacy rows with an explicit 0/0.0 timestamp keep
+    # their original ordering instead of being stamped to "now".
+    epoch = int(timestamp) if timestamp is not None else int(time.time())
+    iso = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(epoch))
+    # Mirror the live record_decision write path: pass through grammar.compress
+    # at the project default level so migrated rows land in storage at the
+    # same shape as freshly recorded ones. Without this the FTS5 index sits
+    # over a heterogeneous corpus (compressed-with-articles-dropped + raw),
+    # and `_content_key` dedup would fail across the same boundary.
+    from context_engine.memory.grammar import (
+        compress as _grammar_compress, DEFAULT_LEVEL as _GRAMMAR_LEVEL,
+    )
+    conn.execute(
+        "INSERT INTO decisions (session_id, decision, reason, source, "
+        "created_at_epoch, created_at) VALUES (?, ?, ?, 'migrated', ?, ?)",
+        (
+            session_id,
+            _grammar_compress(decision or "", level=_GRAMMAR_LEVEL),
+            _grammar_compress(reason or "", level=_GRAMMAR_LEVEL),
+            epoch,
+            iso,
+        ),
+    )
+def _insert_code_area(conn, *, session_id, file_path, description, timestamp):
+    epoch = int(timestamp) if timestamp is not None else int(time.time())
+    # description is prose; file_path is a structured token preserved by
+    # grammar.compress's tokeniser. Compressing both is safe + symmetric.
+    from context_engine.memory.grammar import (
+        compress as _grammar_compress, DEFAULT_LEVEL as _GRAMMAR_LEVEL,
+    )
+    conn.execute(
+        "INSERT INTO code_areas (session_id, file_path, description, source, "
+        "created_at_epoch) VALUES (?, ?, ?, 'migrated', ?)",
+        (
+            session_id,
+            file_path,  # path is a structured token; no point compressing
+            _grammar_compress(description or "", level=_GRAMMAR_LEVEL),
+            epoch,
+        ),
+    )
+def _session_exists(conn, session_id) -> bool:
+    if not session_id:
+        return False
+    row = conn.execute(
+        "SELECT 1 FROM sessions WHERE id = ?", (session_id,)
+    ).fetchone()
+    return row is not None
+def _already_imported(conn, source: Path) -> bool:
+    row = conn.execute(
+        "SELECT 1 FROM migrated_files WHERE source_path = ?",
+        (str(source),),
+    ).fetchone()
+    return row is not None
+def _mark_imported(conn, source: Path) -> None:
+    conn.execute(
+        "INSERT OR IGNORE INTO migrated_files (source_path, imported_at_epoch) "
+        "VALUES (?, ?)",
+        (str(source), int(time.time())),
+    )
+def _archive_and_remove(sessions_dir: Path, files: list[Path]) -> int:
+    """Append `files` to `sessions_dir/migrated.zip` and remove the originals.
+    Returns the number of files actually written to the zip.
+    """
+    if not files:
+        return 0
+    archive_path = sessions_dir / "migrated.zip"
+    written = 0
+    with zipfile.ZipFile(archive_path, mode="a", compression=zipfile.ZIP_DEFLATED) as zf:
+        existing = set(zf.namelist())
+        for f in files:
+            arcname = f.name
+            if arcname in existing:
+                # Already in the archive from a previous run; just delete.
+                pass
+            else:
+                zf.write(f, arcname=arcname)
+                written += 1
+    for f in files:
+        try:
+            f.unlink()
+        except OSError as exc:
+            log.warning("Could not remove migrated file %s: %s", f, exc)
+    return written

context_engine/models.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Shared data models for the context engine."""
+from dataclasses import dataclass, field
+from enum import Enum
+class ChunkType(Enum):
+    FUNCTION = "function"
+    CLASS = "class"
+    MODULE = "module"
+    DOC = "doc"
+    COMMENT = "comment"
+    COMMIT = "commit"
+    SESSION = "session"
+    DECISION = "decision"
+class NodeType(Enum):
+    FUNCTION = "function"
+    CLASS = "class"
+    FILE = "file"
+    MODULE = "module"
+    DOC = "doc"
+    COMMIT = "commit"
+    SESSION = "session"
+    DECISION = "decision"
+class EdgeType(Enum):
+    CALLS = "calls"
+    IMPORTS = "imports"
+    DEFINES = "defines"
+    MODIFIES = "modifies"
+    DISCUSSED_IN = "discussed_in"
+    DECIDED = "decided"
+class ConfidenceLevel(Enum):
+    HIGH = "high"
+    MEDIUM = "medium"
+    LOW = "low"
+    @staticmethod
+    def from_score(score: float) -> "ConfidenceLevel":
+        if score > 0.8:
+            return ConfidenceLevel.HIGH
+        if score >= 0.5:
+            return ConfidenceLevel.MEDIUM
+        return ConfidenceLevel.LOW
+@dataclass
+class Chunk:
+    id: str
+    content: str
+    chunk_type: ChunkType
+    file_path: str
+    start_line: int
+    end_line: int
+    language: str
+    metadata: dict = field(default_factory=dict)
+    embedding: list[float] | None = None
+    confidence_score: float = 0.0
+    compressed_content: str | None = None
+    _CHARS_PER_TOKEN_CODE = 3.3
+    @property
+    def token_count(self) -> int:
+        text = self.compressed_content or self.content
+        return max(1, int(len(text) / self._CHARS_PER_TOKEN_CODE))
+@dataclass
+class GraphNode:
+    id: str
+    node_type: NodeType
+    name: str
+    file_path: str
+    properties: dict = field(default_factory=dict)
+@dataclass
+class GraphEdge:
+    source_id: str
+    target_id: str
+    edge_type: EdgeType
+    properties: dict = field(default_factory=dict)
+@dataclass
+class RetrievalResult:
+    chunks: list[Chunk]
+    graph_nodes: list[GraphNode]
+    graph_edges: list[GraphEdge]
+    query: str
+    confidence_scores: dict[str, float] = field(default_factory=dict)

context_engine/pricing.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Dynamic model pricing — fetched from Anthropic docs, cached locally."""
+import json
+import re
+import time
+from pathlib import Path
+_CCE_HOME = Path.home() / ".cce"
+_CACHE_PATH = _CCE_HOME / "pricing_cache.json"
+_CACHE_TTL = 7 * 24 * 3600  # 7 days
+_DOCS_URL = "https://docs.anthropic.com/en/docs/about-claude/models"
+# Used only when fetch fails and no cache exists
+_FALLBACK: dict[str, float] = {
+    "opus": 5.0,
+    "sonnet": 3.0,
+    "haiku": 1.0,
+}
+def _parse_html(html: str) -> dict[str, float] | None:
+    """Parse per-family input pricing from Anthropic docs HTML table."""
+    pricing: dict[str, float] = {}
+    rows = re.findall(r"<tr[^>]*>(.*?)</tr>", html, re.DOTALL | re.IGNORECASE)
+    col_families: list[str | None] = []
+    for row_html in rows:
+        cells = re.findall(
+            r"<t[hd][^>]*>(.*?)</t[hd]>", row_html, re.DOTALL | re.IGNORECASE
+        )
+        # Header row: extract column → family mapping
+        families_in_row: list[str | None] = []
+        has_model = False
+        for cell in cells:
+            m = re.search(r"Claude\s+(Opus|Sonnet|Haiku)", cell, re.IGNORECASE)
+            if m:
+                families_in_row.append(m.group(1).lower())
+                has_model = True
+            else:
+                families_in_row.append(None)
+        if has_model and sum(1 for f in families_in_row if f) >= 2:
+            col_families = families_in_row
+            continue
+        # Pricing row: extract $ amounts per column
+        if col_families and any(
+            "input" in c.lower() and "tok" in c.lower() for c in cells
+        ):
+            for i, cell in enumerate(cells):
+                if i < len(col_families) and col_families[i]:
+                    m = re.search(r"\$(\d+(?:\.\d+)?)", cell)
+                    if m:
+                        family = col_families[i]
+                        if family not in pricing:
+                            pricing[family] = float(m.group(1))
+            col_families = []
+    return pricing if pricing else None
+def _fetch() -> dict[str, float] | None:
+    try:
+        import httpx
+        resp = httpx.get(_DOCS_URL, follow_redirects=True, timeout=5.0)
+        if resp.status_code != 200:
+            return None
+        return _parse_html(resp.text)
+    except Exception:
+        return None
+def _load_cache() -> dict[str, float] | None:
+    try:
+        if not _CACHE_PATH.exists():
+            return None
+        data = json.loads(_CACHE_PATH.read_text())
+        if time.time() - data.get("ts", 0) < _CACHE_TTL:
+            return data.get("pricing")
+    except Exception:
+        pass
+    return None
+def _save_cache(pricing: dict[str, float]) -> None:
+    try:
+        _CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
+        _CACHE_PATH.write_text(json.dumps({"ts": time.time(), "pricing": pricing}))
+    except Exception:
+        pass
+def get_model_pricing() -> dict[str, float]:
+    """Return {family: input_price_per_1M_tokens}. Cached 7 days."""
+    cached = _load_cache()
+    if cached:
+        return cached
+    fetched = _fetch()
+    if fetched:
+        _save_cache(fetched)
+        return fetched
+    return dict(_FALLBACK)