code-context-engine 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. code_context_engine-0.4.0.dist-info/METADATA +389 -0
  2. code_context_engine-0.4.0.dist-info/RECORD +63 -0
  3. code_context_engine-0.4.0.dist-info/WHEEL +5 -0
  4. code_context_engine-0.4.0.dist-info/entry_points.txt +4 -0
  5. code_context_engine-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. code_context_engine-0.4.0.dist-info/top_level.txt +1 -0
  7. context_engine/__init__.py +3 -0
  8. context_engine/cli.py +2848 -0
  9. context_engine/cli_style.py +66 -0
  10. context_engine/compression/__init__.py +0 -0
  11. context_engine/compression/compressor.py +144 -0
  12. context_engine/compression/ollama_client.py +33 -0
  13. context_engine/compression/output_rules.py +77 -0
  14. context_engine/compression/prompts.py +9 -0
  15. context_engine/compression/quality.py +37 -0
  16. context_engine/config.py +198 -0
  17. context_engine/dashboard/__init__.py +0 -0
  18. context_engine/dashboard/_page.py +1548 -0
  19. context_engine/dashboard/server.py +429 -0
  20. context_engine/editors.py +265 -0
  21. context_engine/event_bus.py +24 -0
  22. context_engine/indexer/__init__.py +0 -0
  23. context_engine/indexer/chunker.py +147 -0
  24. context_engine/indexer/embedder.py +154 -0
  25. context_engine/indexer/embedding_cache.py +168 -0
  26. context_engine/indexer/git_hooks.py +73 -0
  27. context_engine/indexer/git_indexer.py +136 -0
  28. context_engine/indexer/ignorefile.py +96 -0
  29. context_engine/indexer/manifest.py +78 -0
  30. context_engine/indexer/pipeline.py +624 -0
  31. context_engine/indexer/secrets.py +332 -0
  32. context_engine/indexer/watcher.py +109 -0
  33. context_engine/integration/__init__.py +0 -0
  34. context_engine/integration/bootstrap.py +76 -0
  35. context_engine/integration/git_context.py +132 -0
  36. context_engine/integration/mcp_server.py +1825 -0
  37. context_engine/integration/session_capture.py +306 -0
  38. context_engine/memory/__init__.py +6 -0
  39. context_engine/memory/compressor.py +344 -0
  40. context_engine/memory/db.py +922 -0
  41. context_engine/memory/extractive.py +106 -0
  42. context_engine/memory/grammar.py +419 -0
  43. context_engine/memory/hook_installer.py +258 -0
  44. context_engine/memory/hook_server.py +83 -0
  45. context_engine/memory/hooks.py +327 -0
  46. context_engine/memory/migrate.py +268 -0
  47. context_engine/models.py +96 -0
  48. context_engine/pricing.py +104 -0
  49. context_engine/project_commands.py +296 -0
  50. context_engine/retrieval/__init__.py +0 -0
  51. context_engine/retrieval/confidence.py +47 -0
  52. context_engine/retrieval/query_parser.py +105 -0
  53. context_engine/retrieval/retriever.py +199 -0
  54. context_engine/serve_http.py +208 -0
  55. context_engine/services.py +252 -0
  56. context_engine/storage/__init__.py +0 -0
  57. context_engine/storage/backend.py +39 -0
  58. context_engine/storage/fts_store.py +112 -0
  59. context_engine/storage/graph_store.py +219 -0
  60. context_engine/storage/local_backend.py +109 -0
  61. context_engine/storage/remote_backend.py +117 -0
  62. context_engine/storage/vector_store.py +357 -0
  63. context_engine/utils.py +72 -0
@@ -0,0 +1,136 @@
1
+ """Parse git log into searchable chunks."""
2
+ import asyncio
3
+ import logging
4
+ import re
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ from context_engine.models import (
9
+ Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType,
10
+ )
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+ _SHA_RE = re.compile(r"^[0-9a-f]{40}$")
15
+
16
+ # Delimiter placed at the START of each commit record so we can split cleanly.
17
+ _RECORD_START = "---CCE_START---"
18
+
19
+
20
+ async def index_commits(
21
+ project_dir: Path,
22
+ since_sha: str | None = None,
23
+ max_commits: int = 200,
24
+ ) -> tuple[list[Chunk], list[GraphNode], list[GraphEdge]]:
25
+ """Parse recent git history into searchable chunks."""
26
+ # Use two separate git calls:
27
+ # 1. git log --format=... to get commit metadata in order
28
+ # 2. git log --name-only to get changed files per commit
29
+ range_arg = f"{since_sha}..HEAD" if since_sha else f"-{max_commits}"
30
+
31
+ meta_result = await asyncio.to_thread(
32
+ subprocess.run,
33
+ ["git", "log", range_arg, "--format=%H%n%an%n%ai%n%s%n%b%x00"],
34
+ cwd=project_dir, capture_output=True, text=True, check=False,
35
+ )
36
+
37
+ if meta_result.returncode != 0:
38
+ log.debug("git log skipped: %s", meta_result.stderr.strip())
39
+ return [], [], []
40
+
41
+ files_result = await asyncio.to_thread(
42
+ subprocess.run,
43
+ ["git", "log", range_arg, "--name-only", "--format=%H"],
44
+ cwd=project_dir, capture_output=True, text=True, check=False,
45
+ )
46
+
47
+ changed_files_by_hash: dict[str, list[str]] = {}
48
+ if files_result.returncode == 0:
49
+ changed_files_by_hash = _parse_name_only(files_result.stdout)
50
+
51
+ return _parse_meta(meta_result.stdout, changed_files_by_hash)
52
+
53
+
54
+ def _parse_name_only(output: str) -> dict[str, list[str]]:
55
+ """Parse `git log --name-only --format=%H` output into {hash: [files]}."""
56
+ result: dict[str, list[str]] = {}
57
+ current_hash: str | None = None
58
+ for line in output.splitlines():
59
+ stripped = line.strip()
60
+ if not stripped:
61
+ continue
62
+ if _SHA_RE.match(stripped):
63
+ current_hash = stripped
64
+ result.setdefault(current_hash, [])
65
+ elif current_hash is not None:
66
+ result[current_hash].append(stripped)
67
+ return result
68
+
69
+
70
+ def _parse_meta(
71
+ output: str,
72
+ changed_files_by_hash: dict[str, list[str]],
73
+ ) -> tuple[list[Chunk], list[GraphNode], list[GraphEdge]]:
74
+ """Parse commit metadata output and build chunks/nodes/edges."""
75
+ chunks: list[Chunk] = []
76
+ nodes: list[GraphNode] = []
77
+ edges: list[GraphEdge] = []
78
+
79
+ # Records are separated by NUL bytes (\x00)
80
+ records = output.split("\x00")
81
+ for record in records:
82
+ record = record.strip()
83
+ if not record:
84
+ continue
85
+
86
+ lines = record.splitlines()
87
+ if len(lines) < 4:
88
+ continue
89
+
90
+ commit_hash = lines[0].strip()
91
+ if not _SHA_RE.match(commit_hash):
92
+ continue
93
+
94
+ author = lines[1].strip()
95
+ date = lines[2].strip()
96
+ subject = lines[3].strip()
97
+ body = "\n".join(lines[4:]).strip()
98
+
99
+ content = f"{subject}\n\n{body}".strip()
100
+ short_hash = commit_hash[:7]
101
+
102
+ chunk = Chunk(
103
+ id=f"commit_{short_hash}",
104
+ content=content,
105
+ chunk_type=ChunkType.COMMIT,
106
+ file_path=f"git:{short_hash}",
107
+ start_line=0,
108
+ end_line=0,
109
+ language="git",
110
+ metadata={
111
+ "author": author,
112
+ "date": date,
113
+ "hash": commit_hash,
114
+ "chunk_kind": "commit",
115
+ },
116
+ )
117
+ chunks.append(chunk)
118
+
119
+ node = GraphNode(
120
+ id=f"commit_{short_hash}",
121
+ node_type=NodeType.COMMIT,
122
+ name=subject,
123
+ file_path=f"git:{short_hash}",
124
+ )
125
+ nodes.append(node)
126
+
127
+ for fname in changed_files_by_hash.get(commit_hash, []):
128
+ edges.append(
129
+ GraphEdge(
130
+ source_id=f"commit_{short_hash}",
131
+ target_id=f"file_{fname}",
132
+ edge_type=EdgeType.MODIFIES,
133
+ )
134
+ )
135
+
136
+ return chunks, nodes, edges
@@ -0,0 +1,96 @@
1
+ """`.cceignore` parser — gitignore-style patterns for the indexer.
2
+
3
+ Supports the practical subset of `.gitignore` syntax that covers ~95% of
4
+ real-world use:
5
+
6
+ · Glob patterns: `*.log`, `temp/*`, `**/build/`
7
+ · Directory matches: `node_modules/` (trailing slash)
8
+ · Comments: lines starting with `#`
9
+ · Blank lines: ignored
10
+
11
+ Deliberate deviation from strict gitignore: `*` here matches across
12
+ path separators (fnmatch behaviour), so `temp/*` excludes everything
13
+ under `temp/`, not just direct children. In our experience that's what
14
+ users actually want from an indexer ignore file.
15
+
16
+ NOT supported (intentionally — adds dependency and complexity for
17
+ diminishing returns):
18
+
19
+ · Negation patterns (`!keep.log`)
20
+ · Anchored patterns (leading `/`) — all patterns match anywhere in the tree
21
+ · Character classes beyond what `fnmatch` provides
22
+
23
+ Users who need full gitignore semantics can add `pathspec` to their
24
+ project and wire a custom matcher; this module covers the common case
25
+ without a third-party dependency.
26
+ """
27
+ from __future__ import annotations
28
+
29
+ import fnmatch
30
+ from pathlib import Path
31
+
32
+ CCEIGNORE_FILENAME = ".cceignore"
33
+
34
+
35
+ def load_ignore_patterns(project_dir: Path) -> list[str]:
36
+ """Read `.cceignore` from `project_dir` and return its non-comment,
37
+ non-blank lines. Returns an empty list if the file doesn't exist.
38
+
39
+ Patterns are returned verbatim (whitespace stripped); matching is
40
+ delegated to `matches_any`.
41
+ """
42
+ path = project_dir / CCEIGNORE_FILENAME
43
+ if not path.is_file():
44
+ return []
45
+ try:
46
+ raw = path.read_text(encoding="utf-8", errors="strict")
47
+ except OSError:
48
+ return []
49
+ out: list[str] = []
50
+ for line in raw.splitlines():
51
+ s = line.strip()
52
+ if not s or s.startswith("#"):
53
+ continue
54
+ out.append(s)
55
+ return out
56
+
57
+
58
+ def matches_any(rel_path: str, is_dir: bool, patterns: list[str]) -> bool:
59
+ """True if `rel_path` matches any of the given patterns.
60
+
61
+ `rel_path` is the path relative to the project root, using forward
62
+ slashes regardless of platform. `is_dir` distinguishes directories
63
+ so trailing-slash patterns (e.g. `build/`) only match directories.
64
+ """
65
+ if not patterns:
66
+ return False
67
+ # Normalise: forward slashes, no leading "./"
68
+ rel = rel_path.replace("\\", "/").lstrip("./")
69
+ name = rel.rsplit("/", 1)[-1]
70
+ for pat in patterns:
71
+ # Trailing slash → directory-only pattern.
72
+ is_dir_pat = pat.endswith("/")
73
+ p = pat[:-1] if is_dir_pat else pat
74
+ if is_dir_pat and not is_dir:
75
+ continue
76
+ # Pattern with no slash → match against basename anywhere in tree.
77
+ # Pattern with a slash → match against the relative path from root.
78
+ if "/" not in p:
79
+ if fnmatch.fnmatchcase(name, p):
80
+ return True
81
+ else:
82
+ # Strip a leading `/` if user wrote it (anchored), our matcher
83
+ # is implicitly anchored against the project root anyway.
84
+ anchored = p.lstrip("/")
85
+ if fnmatch.fnmatchcase(rel, anchored):
86
+ return True
87
+ # `**` support — fnmatch treats it as `*`. We extend by also
88
+ # trying the pattern with `**/` stripped from the front, so
89
+ # `**/build/foo` matches `build/foo` and `src/build/foo`.
90
+ if anchored.startswith("**/"):
91
+ tail = anchored[3:]
92
+ if fnmatch.fnmatchcase(rel, tail):
93
+ return True
94
+ if fnmatch.fnmatchcase(rel, f"*/{tail}"):
95
+ return True
96
+ return False
@@ -0,0 +1,78 @@
1
+ """Content hash manifest for incremental indexing."""
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ from context_engine.utils import atomic_write_text
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+ CURRENT_SCHEMA_VERSION = 2
11
+
12
+
13
+ class Manifest:
14
+ def __init__(self, manifest_path: Path) -> None:
15
+ self._path = manifest_path
16
+ self._entries: dict[str, str] = {}
17
+ self._schema_version: int = CURRENT_SCHEMA_VERSION
18
+ self._last_git_sha: str | None = None
19
+
20
+ if self._path.exists():
21
+ try:
22
+ with open(self._path) as f:
23
+ loaded = json.load(f)
24
+ if isinstance(loaded, dict):
25
+ if "__schema_version" in loaded:
26
+ # New versioned format
27
+ self._schema_version = loaded["__schema_version"]
28
+ self._entries = loaded.get("files", {})
29
+ self._last_git_sha = loaded.get("last_git_sha")
30
+ else:
31
+ # Old plain-dict format (pre-v0.2) — treat as version 1
32
+ self._schema_version = 1
33
+ self._entries = loaded
34
+ else:
35
+ log.warning(
36
+ "Manifest at %s was not a dict (got %s); starting empty.",
37
+ self._path,
38
+ type(loaded).__name__,
39
+ )
40
+ except (json.JSONDecodeError, OSError) as exc:
41
+ log.warning("Manifest at %s unreadable (%s); starting empty.", self._path, exc)
42
+ self._entries = {}
43
+
44
+ @property
45
+ def schema_version(self) -> int:
46
+ return self._schema_version
47
+
48
+ @property
49
+ def needs_reindex(self) -> bool:
50
+ return self._schema_version != CURRENT_SCHEMA_VERSION
51
+
52
+ @property
53
+ def last_git_sha(self) -> str | None:
54
+ return self._last_git_sha
55
+
56
+ @last_git_sha.setter
57
+ def last_git_sha(self, value: str | None) -> None:
58
+ self._last_git_sha = value
59
+
60
+ def get_hash(self, file_path: str) -> str | None:
61
+ return self._entries.get(file_path)
62
+
63
+ def update(self, file_path: str, content_hash: str) -> None:
64
+ self._entries[file_path] = content_hash
65
+
66
+ def remove(self, file_path: str) -> None:
67
+ self._entries.pop(file_path, None)
68
+
69
+ def has_changed(self, file_path: str, content_hash: str) -> bool:
70
+ return self._entries.get(file_path) != content_hash
71
+
72
+ def save(self) -> None:
73
+ payload = {
74
+ "__schema_version": CURRENT_SCHEMA_VERSION,
75
+ "files": self._entries,
76
+ "last_git_sha": self._last_git_sha,
77
+ }
78
+ atomic_write_text(self._path, json.dumps(payload))