code-context-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code_context/__init__.py +3 -0
  2. code_context/_background.py +93 -0
  3. code_context/_composition.py +425 -0
  4. code_context/_watcher.py +89 -0
  5. code_context/adapters/__init__.py +0 -0
  6. code_context/adapters/driven/__init__.py +0 -0
  7. code_context/adapters/driven/chunker_dispatcher.py +43 -0
  8. code_context/adapters/driven/chunker_line.py +54 -0
  9. code_context/adapters/driven/chunker_treesitter.py +215 -0
  10. code_context/adapters/driven/chunker_treesitter_queries.py +111 -0
  11. code_context/adapters/driven/code_source_fs.py +122 -0
  12. code_context/adapters/driven/embeddings_local.py +111 -0
  13. code_context/adapters/driven/embeddings_openai.py +58 -0
  14. code_context/adapters/driven/git_source_cli.py +211 -0
  15. code_context/adapters/driven/introspector_fs.py +224 -0
  16. code_context/adapters/driven/keyword_index_sqlite.py +206 -0
  17. code_context/adapters/driven/reranker_crossencoder.py +61 -0
  18. code_context/adapters/driven/symbol_index_sqlite.py +264 -0
  19. code_context/adapters/driven/vector_store_numpy.py +119 -0
  20. code_context/adapters/driving/__init__.py +0 -0
  21. code_context/adapters/driving/mcp_server.py +365 -0
  22. code_context/cli.py +161 -0
  23. code_context/config.py +114 -0
  24. code_context/domain/__init__.py +0 -0
  25. code_context/domain/index_bus.py +52 -0
  26. code_context/domain/models.py +140 -0
  27. code_context/domain/ports.py +205 -0
  28. code_context/domain/use_cases/__init__.py +0 -0
  29. code_context/domain/use_cases/explain_diff.py +98 -0
  30. code_context/domain/use_cases/find_definition.py +30 -0
  31. code_context/domain/use_cases/find_references.py +22 -0
  32. code_context/domain/use_cases/get_file_tree.py +36 -0
  33. code_context/domain/use_cases/get_summary.py +24 -0
  34. code_context/domain/use_cases/indexer.py +336 -0
  35. code_context/domain/use_cases/recent_changes.py +36 -0
  36. code_context/domain/use_cases/search_repo.py +131 -0
  37. code_context/server.py +151 -0
  38. code_context_mcp-1.0.0.dist-info/METADATA +181 -0
  39. code_context_mcp-1.0.0.dist-info/RECORD +43 -0
  40. code_context_mcp-1.0.0.dist-info/WHEEL +5 -0
  41. code_context_mcp-1.0.0.dist-info/entry_points.txt +3 -0
  42. code_context_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
  43. code_context_mcp-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,336 @@
1
+ """IndexerUseCase — orchestrates the 5 ports for full + incremental reindex."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ from dataclasses import dataclass
9
+ from datetime import UTC, datetime
10
+ from pathlib import Path
11
+
12
+ from code_context.domain.models import IndexEntry, StaleSet, SymbolDef
13
+ from code_context.domain.ports import (
14
+ Chunker,
15
+ CodeSource,
16
+ EmbeddingsProvider,
17
+ GitSource,
18
+ KeywordIndex,
19
+ SymbolIndex,
20
+ VectorStore,
21
+ )
22
+
23
+ log = logging.getLogger(__name__)
24
+
25
+ _BATCH_SIZE = 64
26
+ _CURRENT_FILE = "current.json"
27
+ # v1: original schema (no file_hashes).
28
+ # v2: Sprint 6 — adds file_hashes for incremental reindex.
29
+ _VERSION = 2
30
+
31
+
32
+ @dataclass
33
+ class IndexerUseCase:
34
+ cache_dir: Path
35
+ repo_root: Path
36
+ embeddings: EmbeddingsProvider
37
+ vector_store: VectorStore
38
+ keyword_index: KeywordIndex
39
+ symbol_index: SymbolIndex
40
+ chunker: Chunker
41
+ code_source: CodeSource
42
+ git_source: GitSource
43
+ include_extensions: list[str]
44
+ max_file_bytes: int = 1_048_576
45
+
46
+ # ---------- public ----------
47
+
48
+ def dirty_set(self) -> StaleSet:
49
+ """Verdict that drives Sprint 6's incremental reindex.
50
+
51
+ Returns a StaleSet whose `full_reindex_required` is True for any
52
+ of these blow-it-all-away conditions: no current index, no git
53
+ repo, metadata schema older than v2 (i.e. file_hashes absent),
54
+ or any global version (embeddings model id, chunker version,
55
+ keyword/symbol index version) changed since last index. Otherwise
56
+ compares the per-file content SHA of every currently-indexable
57
+ file against `metadata.file_hashes`; mismatches go to
58
+ `dirty_files`, vanished entries go to `deleted_files`. Both
59
+ empty + flag False = "no work" steady state.
60
+ """
61
+ active = self._current_metadata()
62
+ if active is None:
63
+ return StaleSet(full_reindex_required=True, reason="no current index")
64
+ if not self.git_source.is_repo(self.repo_root):
65
+ return StaleSet(full_reindex_required=True, reason="not a git repo")
66
+ if active.get("version", 1) < _VERSION:
67
+ return StaleSet(
68
+ full_reindex_required=True,
69
+ reason="metadata schema upgrade (v1 → v2)",
70
+ )
71
+ if active.get("embeddings_model") != self.embeddings.model_id:
72
+ return StaleSet(full_reindex_required=True, reason="embeddings_model changed")
73
+ if active.get("chunker_version") != self.chunker.version:
74
+ return StaleSet(full_reindex_required=True, reason="chunker_version changed")
75
+ if active.get("keyword_version") != self.keyword_index.version:
76
+ return StaleSet(full_reindex_required=True, reason="keyword_version changed")
77
+ if active.get("symbol_version") != self.symbol_index.version:
78
+ return StaleSet(full_reindex_required=True, reason="symbol_version changed")
79
+
80
+ prior_hashes: dict[str, str] = active.get("file_hashes") or {}
81
+ files = self.code_source.list_files(
82
+ self.repo_root, self.include_extensions, self.max_file_bytes
83
+ )
84
+ current_paths_rel: set[str] = set()
85
+ dirty: list[Path] = []
86
+ for f in files:
87
+ rel = f.relative_to(self.repo_root).as_posix()
88
+ current_paths_rel.add(rel)
89
+ try:
90
+ content = self.code_source.read(f)
91
+ except (OSError, UnicodeDecodeError):
92
+ # Unreadable now — skip; if it was indexed before, the next
93
+ # full reindex picks it up. Don't mark as dirty (avoids a
94
+ # poison-pill loop where a permanently-broken file forces
95
+ # repeated incremental runs).
96
+ continue
97
+ sha = hashlib.sha256(content.encode("utf-8")).hexdigest()
98
+ if prior_hashes.get(rel) != sha:
99
+ dirty.append(f)
100
+
101
+ deleted = tuple(p for p in prior_hashes if p not in current_paths_rel)
102
+
103
+ return StaleSet(
104
+ full_reindex_required=False,
105
+ reason=f"{len(dirty)} dirty, {len(deleted)} deleted",
106
+ dirty_files=tuple(dirty),
107
+ deleted_files=deleted,
108
+ )
109
+
110
+ def is_stale(self) -> bool:
111
+ """Thin wrapper kept so existing CLI / composition callers work.
112
+
113
+ Returns True when dirty_set's verdict is anything other than
114
+ the steady-state "no work". Sprint 6 retired the head_sha
115
+ global invalidator: changing HEAD without modifying any indexed
116
+ file no longer triggers a reindex (per-file SHA tracks content
117
+ truth, not commit position).
118
+ """
119
+ s = self.dirty_set()
120
+ return s.full_reindex_required or bool(s.dirty_files) or bool(s.deleted_files)
121
+
122
+ def run_incremental(self, stale: StaleSet) -> Path:
123
+ """Re-embed dirty files; purge deleted files; persist a new index dir.
124
+
125
+ Caller (composition root) is responsible for the atomic swap of
126
+ current.json after this returns — same contract as run().
127
+
128
+ When `stale.full_reindex_required` is True, falls back to
129
+ `self.run()` (the file lists are advisory in that mode).
130
+ Otherwise:
131
+ 1. Loads the active index into the three stores. Mutations stay
132
+ in-memory (the SQLite adapters' load() copies disk → :memory:
133
+ specifically so this step is safe).
134
+ 2. Drops every row whose path is in `stale.deleted_files`.
135
+ 3. For each path in `stale.dirty_files`: drops its old rows from
136
+ every store, then re-chunks + re-embeds + re-extracts symbols
137
+ from the current content.
138
+ 4. Persists every store to a fresh index dir.
139
+ 5. Stamps metadata: file_hashes copied forward from the prior
140
+ run, with deletes removed and dirties updated. n_files derives
141
+ from len(file_hashes) so the count stays honest.
142
+ """
143
+ if stale.full_reindex_required:
144
+ return self.run()
145
+
146
+ active = self.current_index_dir()
147
+ prior = self._current_metadata()
148
+ if active is None or prior is None:
149
+ return self.run()
150
+
151
+ log.info("indexer-incremental: %s", stale.reason)
152
+
153
+ self.vector_store.load(active)
154
+ self.keyword_index.load(active)
155
+ self.symbol_index.load(active)
156
+
157
+ for path in stale.deleted_files:
158
+ self.vector_store.delete_by_path(path)
159
+ self.keyword_index.delete_by_path(path)
160
+ self.symbol_index.delete_by_path(path)
161
+
162
+ new_file_hashes: dict[str, str] = dict(prior.get("file_hashes") or {})
163
+ for path in stale.deleted_files:
164
+ new_file_hashes.pop(path, None)
165
+
166
+ new_chunks: list = []
167
+ new_defs: list[SymbolDef] = []
168
+ for f in stale.dirty_files:
169
+ rel = f.relative_to(self.repo_root).as_posix()
170
+ self.vector_store.delete_by_path(rel)
171
+ self.keyword_index.delete_by_path(rel)
172
+ self.symbol_index.delete_by_path(rel)
173
+ try:
174
+ content = self.code_source.read(f)
175
+ except (OSError, UnicodeDecodeError) as exc:
176
+ log.warning("indexer-incremental: skipping %s (%s)", rel, exc)
177
+ new_file_hashes.pop(rel, None)
178
+ continue
179
+ new_file_hashes[rel] = hashlib.sha256(content.encode("utf-8")).hexdigest()
180
+ for chunk in self.chunker.chunk(content, rel):
181
+ new_chunks.append(chunk)
182
+ extractor = getattr(self.chunker, "extract_definitions", None)
183
+ if extractor is not None:
184
+ try:
185
+ new_defs.extend(extractor(content, rel))
186
+ except Exception as exc: # noqa: BLE001 - same policy as run()
187
+ log.warning(
188
+ "indexer-incremental: symbol extract failed for %s (%s)",
189
+ rel,
190
+ exc,
191
+ )
192
+
193
+ new_entries: list[IndexEntry] = []
194
+ for i in range(0, len(new_chunks), _BATCH_SIZE):
195
+ batch = new_chunks[i : i + _BATCH_SIZE]
196
+ vectors = self.embeddings.embed([c.snippet for c in batch])
197
+ for chunk, vec in zip(batch, vectors, strict=True):
198
+ new_entries.append(IndexEntry(chunk=chunk, vector=vec))
199
+
200
+ self.vector_store.add(new_entries)
201
+ self.keyword_index.add(new_entries)
202
+ self.symbol_index.add_definitions(new_defs)
203
+ ref_rows = [(c.path, c.line_start, c.snippet) for c in new_chunks]
204
+ self.symbol_index.add_references(ref_rows)
205
+
206
+ head = self.git_source.head_sha(self.repo_root) or "no-git"
207
+ new_dir_name = f"index-{head[:12]}-{datetime.now(UTC).strftime('%Y%m%dT%H%M%S%f')}"
208
+ new_dir = self.cache_dir / new_dir_name
209
+ new_dir.mkdir(parents=True, exist_ok=True)
210
+
211
+ self.vector_store.persist(new_dir)
212
+ self.keyword_index.persist(new_dir)
213
+ self.symbol_index.persist(new_dir)
214
+
215
+ meta = {
216
+ "version": _VERSION,
217
+ "head_sha": head,
218
+ "indexed_at": datetime.now(UTC).isoformat(),
219
+ "embeddings_model": self.embeddings.model_id,
220
+ "embeddings_dimension": self.embeddings.dimension,
221
+ "chunker_version": self.chunker.version,
222
+ "keyword_version": self.keyword_index.version,
223
+ "symbol_version": self.symbol_index.version,
224
+ # n_chunks here only counts what changed in this run; the
225
+ # store's true total is opaque from the use case's vantage
226
+ # point. Sprint 7 can wire a richer accounting if needed.
227
+ "n_chunks_added": len(new_entries),
228
+ "n_files": len(new_file_hashes),
229
+ "file_hashes": new_file_hashes,
230
+ }
231
+ (new_dir / "metadata.json").write_text(json.dumps(meta, indent=2))
232
+
233
+ return new_dir
234
+
235
+ def run(self) -> Path:
236
+ """Full reindex. Returns the new index directory path.
237
+
238
+ Caller (composition root) is responsible for the atomic swap of
239
+ current.json after this returns.
240
+ """
241
+ files = self.code_source.list_files(
242
+ self.repo_root, self.include_extensions, self.max_file_bytes
243
+ )
244
+ log.info("indexer: reindexing %d files", len(files))
245
+
246
+ all_entries: list[IndexEntry] = []
247
+ all_defs: list[SymbolDef] = []
248
+ # Collect chunks first so we can batch-embed.
249
+ chunks_with_paths: list = []
250
+ # Per-file SHA stamped into metadata so dirty_set() has a baseline
251
+ # for the next run. Computed inline so we don't re-read every file.
252
+ file_hashes: dict[str, str] = {}
253
+ for f in files:
254
+ try:
255
+ content = self.code_source.read(f)
256
+ except (OSError, UnicodeDecodeError) as exc:
257
+ log.warning("indexer: skipping %s (%s)", f, exc)
258
+ continue
259
+ rel = f.relative_to(self.repo_root).as_posix()
260
+ file_hashes[rel] = hashlib.sha256(content.encode("utf-8")).hexdigest()
261
+ for chunk in self.chunker.chunk(content, rel):
262
+ chunks_with_paths.append(chunk)
263
+ # Symbol extraction — only chunkers that expose it (TreeSitterChunker).
264
+ extractor = getattr(self.chunker, "extract_definitions", None)
265
+ if extractor is not None:
266
+ try:
267
+ all_defs.extend(extractor(content, rel))
268
+ except Exception as exc: # noqa: BLE001 - extractor failure must not abort indexing
269
+ log.warning("indexer: symbol extract failed for %s (%s)", rel, exc)
270
+
271
+ # Batch-embed.
272
+ for i in range(0, len(chunks_with_paths), _BATCH_SIZE):
273
+ batch = chunks_with_paths[i : i + _BATCH_SIZE]
274
+ vectors = self.embeddings.embed([c.snippet for c in batch])
275
+ for chunk, vec in zip(batch, vectors, strict=True):
276
+ all_entries.append(IndexEntry(chunk=chunk, vector=vec))
277
+
278
+ # Reset and add.
279
+ head = self.git_source.head_sha(self.repo_root) or "no-git"
280
+ new_dir_name = f"index-{head[:12]}-{datetime.now(UTC).strftime('%Y%m%dT%H%M%S%f')}"
281
+ new_dir = self.cache_dir / new_dir_name
282
+ new_dir.mkdir(parents=True, exist_ok=True)
283
+
284
+ self.vector_store.add(all_entries)
285
+ self.vector_store.persist(new_dir)
286
+
287
+ self.keyword_index.add(all_entries)
288
+ self.keyword_index.persist(new_dir)
289
+
290
+ self.symbol_index.add_definitions(all_defs)
291
+ # Feed chunk snippets to the references FTS5 table so find_references
292
+ # has rows to match against (definitions alone are not enough — a
293
+ # symbol's call sites live in the chunk text, not in the defs table).
294
+ ref_rows = [(c.path, c.line_start, c.snippet) for c in chunks_with_paths]
295
+ self.symbol_index.add_references(ref_rows)
296
+ self.symbol_index.persist(new_dir)
297
+
298
+ meta = {
299
+ "version": _VERSION,
300
+ "head_sha": head,
301
+ "indexed_at": datetime.now(UTC).isoformat(),
302
+ "embeddings_model": self.embeddings.model_id,
303
+ "embeddings_dimension": self.embeddings.dimension,
304
+ "chunker_version": self.chunker.version,
305
+ "keyword_version": self.keyword_index.version,
306
+ "symbol_version": self.symbol_index.version,
307
+ "n_chunks": len(all_entries),
308
+ "n_files": len(file_hashes),
309
+ "file_hashes": file_hashes,
310
+ }
311
+ (new_dir / "metadata.json").write_text(json.dumps(meta, indent=2))
312
+
313
+ return new_dir
314
+
315
+ def current_index_dir(self) -> Path | None:
316
+ current = self._read_current()
317
+ if current is None:
318
+ return None
319
+ return self.cache_dir / current["active"]
320
+
321
+ # ---------- internal ----------
322
+
323
+ def _read_current(self) -> dict | None:
324
+ cur = self.cache_dir / _CURRENT_FILE
325
+ if not cur.exists():
326
+ return None
327
+ return json.loads(cur.read_text())
328
+
329
+ def _current_metadata(self) -> dict | None:
330
+ cur = self._read_current()
331
+ if cur is None:
332
+ return None
333
+ meta_path = self.cache_dir / cur["active"] / "metadata.json"
334
+ if not meta_path.exists():
335
+ return None
336
+ return json.loads(meta_path.read_text())
@@ -0,0 +1,36 @@
1
+ """RecentChangesUseCase — direct delegation to GitSource with no-repo fallback."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from datetime import UTC, datetime, timedelta
8
+ from pathlib import Path
9
+
10
+ from code_context.domain.models import Change
11
+ from code_context.domain.ports import GitSource
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+ _DEFAULT_LOOKBACK_DAYS = 7
16
+
17
+
18
+ @dataclass
19
+ class RecentChangesUseCase:
20
+ git_source: GitSource
21
+ repo_root: Path
22
+
23
+ def run(
24
+ self,
25
+ since: datetime | None = None,
26
+ paths: list[str] | None = None,
27
+ max_count: int = 20,
28
+ ) -> list[Change]:
29
+ if not self.git_source.is_repo(self.repo_root):
30
+ log.warning("recent_changes: %s is not a git repo; returning []", self.repo_root)
31
+ return []
32
+ if since is None:
33
+ since = datetime.now(UTC) - timedelta(days=_DEFAULT_LOOKBACK_DAYS)
34
+ return self.git_source.commits(
35
+ self.repo_root, since=since, paths=paths, max_count=max_count
36
+ )
@@ -0,0 +1,131 @@
1
+ """SearchRepoUseCase — hybrid retrieval pipeline.
2
+
3
+ vector + keyword are fused via Reciprocal Rank Fusion (RRF). If a
4
+ reranker is supplied, it re-scores the fused top-N. Returns top_k
5
+ SearchResults with the fused or reranked score.
6
+
7
+ Sprint 7: optional `bus` + `reload_callback` give the use case a
8
+ "stale-aware" mode. On each `.run()` call, if the bus' generation has
9
+ advanced since the last reload, the callback fires (typically
10
+ re-loading the vector / keyword / symbol stores from `current.json`'s
11
+ new active dir) before serving the query. Implemented as a single
12
+ int compare in the hot path; legacy callers (no bus, no callback)
13
+ incur zero overhead.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ from collections.abc import Callable
20
+ from dataclasses import dataclass, field
21
+
22
+ from code_context.domain.index_bus import IndexUpdateBus
23
+ from code_context.domain.models import IndexEntry, SearchResult
24
+ from code_context.domain.ports import EmbeddingsProvider, KeywordIndex, Reranker, VectorStore
25
+
26
+ _STRUCTURAL_RE = re.compile(
27
+ r"^\s*(def |class |function |func |fn |export |const |interface |type |struct )"
28
+ )
29
+ _WHY_MAX_LEN = 80
30
+ # Bumped from 2 in v0.1.x — RRF benefits from a wider pool because
31
+ # entries unique to one ranker still feed the fusion.
32
+ _OVER_FETCH_MULTIPLIER = 3
33
+ # Canonical Reciprocal Rank Fusion constant from the original paper.
34
+ _RRF_K = 60
35
+
36
+
37
+ @dataclass
38
+ class SearchRepoUseCase:
39
+ embeddings: EmbeddingsProvider
40
+ vector_store: VectorStore
41
+ keyword_index: KeywordIndex
42
+ reranker: Reranker | None = None
43
+ bus: IndexUpdateBus | None = None
44
+ reload_callback: Callable[[], None] | None = None
45
+ # Initialized to -1 so the very first call (bus.generation == 0)
46
+ # also triggers a reload — covers the cold-start case where the
47
+ # bg indexer hasn't yet published a swap but the active index dir
48
+ # might already be on disk and unloaded.
49
+ _last_seen_generation: int = field(default=-1, init=False, repr=False)
50
+
51
+ def run(
52
+ self,
53
+ query: str,
54
+ top_k: int = 5,
55
+ scope: str | None = None,
56
+ ) -> list[SearchResult]:
57
+ self._reload_if_swapped()
58
+ pool = top_k * _OVER_FETCH_MULTIPLIER
59
+ # 1. vector
60
+ query_vec = self.embeddings.embed([query])[0]
61
+ v_hits = self.vector_store.search(query_vec, k=pool)
62
+ # 2. keyword
63
+ k_hits = self.keyword_index.search(query, k=pool)
64
+ # 3. fuse via RRF
65
+ fused = _rrf_fuse(v_hits, k_hits, k_constant=_RRF_K)
66
+ if scope:
67
+ fused = [(entry, score) for entry, score in fused if entry.chunk.path.startswith(scope)]
68
+ # 4. optional rerank on the top of the fused pool
69
+ if self.reranker is not None and fused:
70
+ rerank_pool = fused[:pool] # re-score the whole over-fetched pool
71
+ fused = self.reranker.rerank(query, rerank_pool, k=top_k)
72
+ else:
73
+ fused = fused[:top_k]
74
+ return [self._to_result(e, s) for e, s in fused]
75
+
76
+ def _reload_if_swapped(self) -> None:
77
+ """Refresh in-memory store handles if the bg indexer published a
78
+ new index dir since our last call. No-op for legacy callers
79
+ (bus is None). Reload exceptions propagate up — better to fail
80
+ loud than silently serve stale results — and the failed reload
81
+ does NOT update `_last_seen_generation`, so the next call retries.
82
+ """
83
+ if self.bus is None or self.reload_callback is None:
84
+ return
85
+ gen = self.bus.generation
86
+ if gen == self._last_seen_generation:
87
+ return
88
+ self.reload_callback()
89
+ # Only mark as seen AFTER a successful reload, so a transient
90
+ # failure (e.g. disk hiccup) gets retried on the next query.
91
+ self._last_seen_generation = gen
92
+
93
+ @staticmethod
94
+ def _to_result(entry: IndexEntry, score: float) -> SearchResult:
95
+ return SearchResult(
96
+ path=entry.chunk.path,
97
+ lines=(entry.chunk.line_start, entry.chunk.line_end),
98
+ snippet=entry.chunk.snippet,
99
+ score=float(score),
100
+ why=_compute_why(entry.chunk.snippet),
101
+ )
102
+
103
+
104
+ def _rrf_fuse(
105
+ a: list[tuple[IndexEntry, float]],
106
+ b: list[tuple[IndexEntry, float]],
107
+ k_constant: int = 60,
108
+ ) -> list[tuple[IndexEntry, float]]:
109
+ """Reciprocal Rank Fusion. Identifies entries by chunk.path + line range."""
110
+ scores: dict[tuple[str, int, int], float] = {}
111
+ entry_by_key: dict[tuple[str, int, int], IndexEntry] = {}
112
+ for hits in (a, b):
113
+ for rank, (entry, _) in enumerate(hits):
114
+ c = entry.chunk
115
+ key = (c.path, c.line_start, c.line_end)
116
+ scores[key] = scores.get(key, 0.0) + 1.0 / (k_constant + rank + 1)
117
+ entry_by_key.setdefault(key, entry)
118
+ items = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
119
+ return [(entry_by_key[key], score) for key, score in items]
120
+
121
+
122
+ def _compute_why(snippet: str) -> str:
123
+ """Pick a one-line description from the snippet."""
124
+ for line in snippet.splitlines():
125
+ if _STRUCTURAL_RE.match(line):
126
+ return line.strip()[:_WHY_MAX_LEN]
127
+ for line in snippet.splitlines():
128
+ stripped = line.strip()
129
+ if stripped:
130
+ return stripped[:_WHY_MAX_LEN]
131
+ return ""
code_context/server.py ADDED
@@ -0,0 +1,151 @@
1
+ """code-context-server entry: composition root + MCP stdio runner.
2
+
3
+ Sprint 7 changes the startup shape:
4
+
5
+ - **Foreground**: build the runtime, fast-load whatever index exists
6
+ on disk (no synchronous reindex), register MCP tools, run stdio.
7
+ Total time on a previously-indexed repo: ~1 s (model load + npy +
8
+ 2× sqlite-to-memory). On a cache-cold repo: <100 ms (the foreground
9
+ has nothing to load yet; first queries return empty until bg
10
+ finishes).
11
+ - **Background**: a BackgroundIndexer daemon thread runs dirty_set +
12
+ run_incremental (or full reindex) and publishes swap events to the
13
+ IndexUpdateBus. SearchRepoUseCase reloads its store handles on the
14
+ next query after each swap, transparently.
15
+
16
+ The user pays the cold-reindex cost only on first install (or after
17
+ a model upgrade); ongoing edit cycles are sub-10 s and run while
18
+ Claude is asking other questions.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import logging
25
+ import sys
26
+
27
+ from mcp.server import Server
28
+ from mcp.server.stdio import stdio_server
29
+
30
+ from code_context._background import BackgroundIndexer
31
+ from code_context._composition import (
32
+ atomic_swap_current,
33
+ build_indexer_and_store,
34
+ build_use_cases,
35
+ ensure_index,
36
+ fast_load_existing_index,
37
+ make_reload_callback,
38
+ setup_logging,
39
+ )
40
+ from code_context._watcher import RepoWatcher
41
+ from code_context.adapters.driving.mcp_server import register
42
+ from code_context.config import Config, load_config
43
+ from code_context.domain.index_bus import IndexUpdateBus
44
+
45
+ log = logging.getLogger("code_context")
46
+
47
+
48
+ async def _run_server(cfg: Config) -> None:
49
+ indexer, store, embeddings, keyword_index, symbol_index = build_indexer_and_store(cfg)
50
+ bus = IndexUpdateBus()
51
+
52
+ # Foreground: load whatever index exists right now. No reindex. If the
53
+ # cache is empty, queries return [] until the bg thread finishes the
54
+ # first reindex; SearchRepoUseCase's bus-driven reload makes that
55
+ # transition transparent.
56
+ loaded = fast_load_existing_index(indexer, store, keyword_index, symbol_index)
57
+ if loaded:
58
+ log.info("loaded existing index from %s", indexer.current_index_dir())
59
+ elif not cfg.bg_reindex:
60
+ # Background reindex disabled (CC_BG_REINDEX=off) AND no index on
61
+ # disk. Fall back to the v0.7-style synchronous reindex so the
62
+ # server is functional after startup.
63
+ log.info("no existing index and bg_reindex=off; running synchronous reindex")
64
+ ensure_index(cfg, indexer, store, keyword_index, symbol_index)
65
+ else:
66
+ log.info(
67
+ "no existing index — first queries will return [] until the "
68
+ "background reindex finishes (~%d s on a typical repo)",
69
+ 60,
70
+ )
71
+
72
+ reload_cb = make_reload_callback(indexer, store, keyword_index, symbol_index)
73
+ search, recent, summary, find_def, find_ref, file_tree, explain_diff = build_use_cases(
74
+ cfg,
75
+ indexer,
76
+ store,
77
+ embeddings,
78
+ keyword_index,
79
+ symbol_index,
80
+ bus=bus,
81
+ reload_callback=reload_cb,
82
+ )
83
+
84
+ bg = None
85
+ if cfg.bg_reindex:
86
+ bg = BackgroundIndexer(
87
+ indexer=indexer,
88
+ swap=lambda new_dir: atomic_swap_current(cfg, new_dir),
89
+ bus=bus,
90
+ idle_seconds=cfg.bg_idle_seconds,
91
+ )
92
+ bg.start()
93
+ bg.trigger() # kick off initial dirty_set + (full or incremental) reindex
94
+ log.info("background indexer started (idle=%.2fs)", cfg.bg_idle_seconds)
95
+
96
+ watcher = None
97
+ if cfg.watch and bg is not None:
98
+ watcher = RepoWatcher(
99
+ root=cfg.repo_root,
100
+ on_change=bg.trigger,
101
+ debounce_ms=cfg.watch_debounce_ms,
102
+ )
103
+ watcher.start()
104
+ log.info(
105
+ "repo watcher armed (CC_WATCH=on, debounce=%dms)",
106
+ cfg.watch_debounce_ms,
107
+ )
108
+ elif cfg.watch and bg is None:
109
+ log.warning(
110
+ "CC_WATCH=on requires CC_BG_REINDEX=on; watcher not started "
111
+ "(without the bg thread there's nothing to trigger)"
112
+ )
113
+
114
+ server = Server("code-context")
115
+ register(
116
+ server,
117
+ search_repo=search,
118
+ recent_changes=recent,
119
+ get_summary=summary,
120
+ find_definition=find_def,
121
+ find_references=find_ref,
122
+ get_file_tree=file_tree,
123
+ explain_diff=explain_diff,
124
+ )
125
+
126
+ try:
127
+ async with stdio_server() as (read_stream, write_stream):
128
+ await server.run(read_stream, write_stream, server.create_initialization_options())
129
+ finally:
130
+ if watcher is not None:
131
+ log.info("stopping repo watcher")
132
+ watcher.stop()
133
+ if bg is not None:
134
+ log.info("stopping background indexer")
135
+ bg.stop(timeout=10.0)
136
+
137
+
138
+ def main() -> int:
139
+ cfg = load_config()
140
+ setup_logging(cfg)
141
+ log.info("starting code-context-server (repo=%s)", cfg.repo_root)
142
+ try:
143
+ asyncio.run(_run_server(cfg))
144
+ return 0
145
+ except KeyboardInterrupt:
146
+ log.info("server interrupted; exiting")
147
+ return 130
148
+
149
+
150
+ if __name__ == "__main__":
151
+ sys.exit(main())