code-context-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code_context/__init__.py +3 -0
  2. code_context/_background.py +93 -0
  3. code_context/_composition.py +425 -0
  4. code_context/_watcher.py +89 -0
  5. code_context/adapters/__init__.py +0 -0
  6. code_context/adapters/driven/__init__.py +0 -0
  7. code_context/adapters/driven/chunker_dispatcher.py +43 -0
  8. code_context/adapters/driven/chunker_line.py +54 -0
  9. code_context/adapters/driven/chunker_treesitter.py +215 -0
  10. code_context/adapters/driven/chunker_treesitter_queries.py +111 -0
  11. code_context/adapters/driven/code_source_fs.py +122 -0
  12. code_context/adapters/driven/embeddings_local.py +111 -0
  13. code_context/adapters/driven/embeddings_openai.py +58 -0
  14. code_context/adapters/driven/git_source_cli.py +211 -0
  15. code_context/adapters/driven/introspector_fs.py +224 -0
  16. code_context/adapters/driven/keyword_index_sqlite.py +206 -0
  17. code_context/adapters/driven/reranker_crossencoder.py +61 -0
  18. code_context/adapters/driven/symbol_index_sqlite.py +264 -0
  19. code_context/adapters/driven/vector_store_numpy.py +119 -0
  20. code_context/adapters/driving/__init__.py +0 -0
  21. code_context/adapters/driving/mcp_server.py +365 -0
  22. code_context/cli.py +161 -0
  23. code_context/config.py +114 -0
  24. code_context/domain/__init__.py +0 -0
  25. code_context/domain/index_bus.py +52 -0
  26. code_context/domain/models.py +140 -0
  27. code_context/domain/ports.py +205 -0
  28. code_context/domain/use_cases/__init__.py +0 -0
  29. code_context/domain/use_cases/explain_diff.py +98 -0
  30. code_context/domain/use_cases/find_definition.py +30 -0
  31. code_context/domain/use_cases/find_references.py +22 -0
  32. code_context/domain/use_cases/get_file_tree.py +36 -0
  33. code_context/domain/use_cases/get_summary.py +24 -0
  34. code_context/domain/use_cases/indexer.py +336 -0
  35. code_context/domain/use_cases/recent_changes.py +36 -0
  36. code_context/domain/use_cases/search_repo.py +131 -0
  37. code_context/server.py +151 -0
  38. code_context_mcp-1.0.0.dist-info/METADATA +181 -0
  39. code_context_mcp-1.0.0.dist-info/RECORD +43 -0
  40. code_context_mcp-1.0.0.dist-info/WHEEL +5 -0
  41. code_context_mcp-1.0.0.dist-info/entry_points.txt +3 -0
  42. code_context_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
  43. code_context_mcp-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3 @@
1
+ """code-context: MCP server with local RAG for Claude Code repo context."""
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,93 @@
1
+ """BackgroundIndexer — runs reindex on a worker thread, posts to the bus.
2
+
3
+ Single-threaded coordinator. External code calls `.trigger()` to ask
4
+ for a reindex; the thread coalesces multiple triggers into one job
5
+ (an `Event` is set/cleared, not a queue), so a 5-event burst from a
6
+ file watcher saving in rapid succession produces ONE reindex, not
7
+ five. On completion, the configured `swap` callback runs first
8
+ (typically `_atomic_swap_current` from the composition root) and
9
+ then `bus.publish_swap(new_dir)` notifies any subscriber.
10
+
11
+ Errors in the indexer are caught and logged at ERROR level; the
12
+ worker keeps running so the next trigger has a chance. This matches
13
+ the philosophy of "background reindex must never crash the MCP
14
+ server."
15
+
16
+ The thread is daemonic so it doesn't block process exit if `.stop()`
17
+ is missed (e.g., a hard SIGINT before the main loop's finally
18
+ block). `.stop()` itself sets a flag and joins with a 5 s timeout
19
+ by default; longer for the ~1 s default `idle_seconds`.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ import threading
26
+ from collections.abc import Callable
27
+ from pathlib import Path
28
+ from typing import Any
29
+
30
+ from code_context.domain.index_bus import IndexUpdateBus
31
+
32
+ log = logging.getLogger(__name__)
33
+
34
+
35
+ class BackgroundIndexer(threading.Thread):
36
+ def __init__(
37
+ self,
38
+ *,
39
+ indexer: Any, # IndexerUseCase, untyped to avoid circular import
40
+ swap: Callable[[Path], None],
41
+ bus: IndexUpdateBus,
42
+ idle_seconds: float = 1.0,
43
+ ) -> None:
44
+ super().__init__(name="code-context-bg-indexer", daemon=True)
45
+ self._indexer = indexer
46
+ self._swap = swap
47
+ self._bus = bus
48
+ self._idle = idle_seconds
49
+ self._wake = threading.Event()
50
+ self._stop_event = threading.Event()
51
+
52
+ def trigger(self) -> None:
53
+ """Ask the worker thread to run a reindex.
54
+
55
+ Idempotent within an idle window: 5 rapid triggers coalesce
56
+ into one job because the Event is sticky until consumed.
57
+ """
58
+ self._wake.set()
59
+
60
+ def stop(self, timeout: float = 5.0) -> None:
61
+ """Signal the worker to exit and join up to `timeout` seconds."""
62
+ self._stop_event.set()
63
+ self._wake.set() # break out of `wait()`
64
+ self.join(timeout=timeout)
65
+
66
+ def run(self) -> None:
67
+ while not self._stop_event.is_set():
68
+ self._wake.wait()
69
+ self._wake.clear()
70
+ if self._stop_event.is_set():
71
+ return
72
+ try:
73
+ self._reindex_once()
74
+ except Exception: # noqa: BLE001 - bg failure must not kill the thread
75
+ log.exception("background reindex failed; will retry on next trigger")
76
+ # Idle so rapid triggers coalesce; stop_event lets `.stop()`
77
+ # break out without waiting the full window.
78
+ self._stop_event.wait(self._idle)
79
+
80
+ def _reindex_once(self) -> None:
81
+ stale = self._indexer.dirty_set()
82
+ no_work = (
83
+ not stale.full_reindex_required and not stale.dirty_files and not stale.deleted_files
84
+ )
85
+ if no_work:
86
+ return
87
+ if stale.full_reindex_required:
88
+ new_dir = self._indexer.run()
89
+ else:
90
+ new_dir = self._indexer.run_incremental(stale)
91
+ self._swap(new_dir)
92
+ self._bus.publish_swap(str(new_dir))
93
+ log.info("background reindex complete (%s) -> %s", stale.reason, new_dir)
@@ -0,0 +1,425 @@
1
+ """Composition helpers shared by server.py and cli.py."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ import sys
9
+ from collections.abc import Callable
10
+ from pathlib import Path
11
+
12
+ from code_context.adapters.driven.chunker_dispatcher import ChunkerDispatcher
13
+ from code_context.adapters.driven.chunker_line import LineChunker
14
+ from code_context.adapters.driven.chunker_treesitter import TreeSitterChunker
15
+ from code_context.adapters.driven.code_source_fs import FilesystemSource
16
+ from code_context.adapters.driven.embeddings_local import LocalST
17
+ from code_context.adapters.driven.git_source_cli import GitCliSource
18
+ from code_context.adapters.driven.introspector_fs import FilesystemIntrospector
19
+ from code_context.adapters.driven.keyword_index_sqlite import SqliteFTS5Index
20
+ from code_context.adapters.driven.reranker_crossencoder import CrossEncoderReranker
21
+ from code_context.adapters.driven.symbol_index_sqlite import SymbolIndexSqlite
22
+ from code_context.adapters.driven.vector_store_numpy import NumPyParquetStore
23
+ from code_context.config import Config
24
+ from code_context.domain.index_bus import IndexUpdateBus
25
+ from code_context.domain.models import StaleSet
26
+ from code_context.domain.ports import (
27
+ Chunker,
28
+ EmbeddingsProvider,
29
+ KeywordIndex,
30
+ Reranker,
31
+ SymbolIndex,
32
+ )
33
+ from code_context.domain.use_cases.explain_diff import ExplainDiffUseCase
34
+ from code_context.domain.use_cases.find_definition import FindDefinitionUseCase
35
+ from code_context.domain.use_cases.find_references import FindReferencesUseCase
36
+ from code_context.domain.use_cases.get_file_tree import GetFileTreeUseCase
37
+ from code_context.domain.use_cases.get_summary import GetSummaryUseCase
38
+ from code_context.domain.use_cases.indexer import IndexerUseCase
39
+ from code_context.domain.use_cases.recent_changes import RecentChangesUseCase
40
+ from code_context.domain.use_cases.search_repo import SearchRepoUseCase
41
+
42
+ log = logging.getLogger("code_context")
43
+
44
+
45
+ class _NullKeywordIndex:
46
+ """No-op keyword index for users who set CC_KEYWORD_INDEX=none.
47
+
48
+ Implements the KeywordIndex Protocol with search returning []. Lets the
49
+ hybrid pipeline degrade gracefully to vector-only without special-casing
50
+ in SearchRepoUseCase.
51
+ """
52
+
53
+ @property
54
+ def version(self) -> str:
55
+ return "null-v1"
56
+
57
+ def add(self, entries) -> None:
58
+ pass
59
+
60
+ def search(self, query: str, k: int):
61
+ return []
62
+
63
+ def delete_by_path(self, path: str) -> int:
64
+ return 0
65
+
66
+ def persist(self, path) -> None:
67
+ pass
68
+
69
+ def load(self, path) -> None:
70
+ pass
71
+
72
+
73
+ class _NullSymbolIndex:
74
+ """No-op symbol index for users who set CC_SYMBOL_INDEX=none.
75
+
76
+ Implements the SymbolIndex Protocol; find_definition/find_references
77
+ return []. Lets users disable the symbol pipeline without breaking
78
+ composition (e.g., on platforms where SQLite FTS5 misbehaves).
79
+ """
80
+
81
+ @property
82
+ def version(self) -> str:
83
+ return "null-symbol-v1"
84
+
85
+ def add_definitions(self, defs) -> None:
86
+ pass
87
+
88
+ def add_references(self, refs) -> None:
89
+ pass
90
+
91
+ def find_definition(self, name, language=None, max_count=5):
92
+ return []
93
+
94
+ def find_references(self, name, max_count=50):
95
+ return []
96
+
97
+ def delete_by_path(self, path: str) -> int:
98
+ return 0
99
+
100
+ def persist(self, path) -> None:
101
+ pass
102
+
103
+ def load(self, path) -> None:
104
+ pass
105
+
106
+
107
+ def build_embeddings(cfg: Config) -> EmbeddingsProvider:
108
+ if cfg.embeddings_provider == "openai":
109
+ if not cfg.openai_api_key:
110
+ log.error("CC_EMBEDDINGS=openai but OPENAI_API_KEY is unset")
111
+ sys.exit(1)
112
+ from code_context.adapters.driven.embeddings_openai import OpenAIProvider
113
+
114
+ return OpenAIProvider(
115
+ model=cfg.embeddings_model or "text-embedding-3-small",
116
+ api_key=cfg.openai_api_key,
117
+ )
118
+ return LocalST(
119
+ model_name=cfg.embeddings_model or "all-MiniLM-L6-v2",
120
+ trust_remote_code=cfg.trust_remote_code,
121
+ )
122
+
123
+
124
+ def build_chunker(cfg: Config) -> Chunker:
125
+ """Build the chunker according to cfg.chunker_strategy.
126
+
127
+ "treesitter" (default in v0.2.0+): TreeSitterChunker for Py/JS/TS/Go/Rust,
128
+ LineChunker for everything else AND for parse errors. "line": legacy
129
+ behavior — LineChunker only. Anything else logs an error and falls back
130
+ to LineChunker so composition root never crashes on bad config.
131
+ """
132
+ line = LineChunker(chunk_lines=cfg.chunk_lines, overlap=cfg.chunk_overlap)
133
+ if cfg.chunker_strategy == "line":
134
+ return line
135
+ if cfg.chunker_strategy == "treesitter":
136
+ return ChunkerDispatcher(treesitter=TreeSitterChunker(), line=line)
137
+ log.error("unknown CC_CHUNKER=%r; falling back to line", cfg.chunker_strategy)
138
+ return line
139
+
140
+
141
+ def build_keyword_index(cfg: Config) -> KeywordIndex:
142
+ if cfg.keyword_strategy == "none":
143
+ return _NullKeywordIndex()
144
+ if cfg.keyword_strategy == "sqlite":
145
+ return SqliteFTS5Index()
146
+ log.error(
147
+ "unknown CC_KEYWORD_INDEX=%r; falling back to sqlite",
148
+ cfg.keyword_strategy,
149
+ )
150
+ return SqliteFTS5Index()
151
+
152
+
153
+ def build_symbol_index(cfg: Config) -> SymbolIndex:
154
+ if cfg.symbol_index_strategy == "none":
155
+ return _NullSymbolIndex()
156
+ if cfg.symbol_index_strategy == "sqlite":
157
+ return SymbolIndexSqlite()
158
+ log.error(
159
+ "unknown CC_SYMBOL_INDEX=%r; falling back to sqlite",
160
+ cfg.symbol_index_strategy,
161
+ )
162
+ return SymbolIndexSqlite()
163
+
164
+
165
+ def build_reranker(cfg: Config) -> Reranker | None:
166
+ if not cfg.rerank:
167
+ return None
168
+ return CrossEncoderReranker(
169
+ model_name=cfg.rerank_model or "cross-encoder/ms-marco-MiniLM-L-6-v2",
170
+ )
171
+
172
+
173
+ def build_indexer_and_store(
174
+ cfg: Config,
175
+ ) -> tuple[
176
+ IndexerUseCase,
177
+ NumPyParquetStore,
178
+ EmbeddingsProvider,
179
+ KeywordIndex,
180
+ SymbolIndex,
181
+ ]:
182
+ cfg.repo_cache_subdir().mkdir(parents=True, exist_ok=True)
183
+
184
+ embeddings = build_embeddings(cfg)
185
+ chunker = build_chunker(cfg)
186
+ code_source = FilesystemSource()
187
+ git_source = GitCliSource()
188
+ store = NumPyParquetStore()
189
+ keyword_index = build_keyword_index(cfg)
190
+ symbol_index = build_symbol_index(cfg)
191
+ indexer = IndexerUseCase(
192
+ cache_dir=cfg.repo_cache_subdir(),
193
+ repo_root=cfg.repo_root,
194
+ embeddings=embeddings,
195
+ vector_store=store,
196
+ keyword_index=keyword_index,
197
+ symbol_index=symbol_index,
198
+ chunker=chunker,
199
+ code_source=code_source,
200
+ git_source=git_source,
201
+ include_extensions=cfg.include_extensions,
202
+ max_file_bytes=cfg.max_file_bytes,
203
+ )
204
+ return indexer, store, embeddings, keyword_index, symbol_index
205
+
206
+
207
+ def build_use_cases(
208
+ cfg: Config,
209
+ indexer: IndexerUseCase,
210
+ store: NumPyParquetStore,
211
+ embeddings: EmbeddingsProvider,
212
+ keyword_index: KeywordIndex,
213
+ symbol_index: SymbolIndex,
214
+ bus: IndexUpdateBus | None = None,
215
+ reload_callback: Callable[[], None] | None = None,
216
+ ) -> tuple[
217
+ SearchRepoUseCase,
218
+ RecentChangesUseCase,
219
+ GetSummaryUseCase,
220
+ FindDefinitionUseCase,
221
+ FindReferencesUseCase,
222
+ GetFileTreeUseCase,
223
+ ExplainDiffUseCase,
224
+ ]:
225
+ git_source = GitCliSource()
226
+ introspector = FilesystemIntrospector()
227
+ code_source = FilesystemSource()
228
+ chunker = build_chunker(cfg)
229
+ reranker = build_reranker(cfg)
230
+ return (
231
+ SearchRepoUseCase(
232
+ embeddings=embeddings,
233
+ vector_store=store,
234
+ keyword_index=keyword_index,
235
+ reranker=reranker,
236
+ bus=bus,
237
+ reload_callback=reload_callback,
238
+ ),
239
+ RecentChangesUseCase(git_source=git_source, repo_root=cfg.repo_root),
240
+ GetSummaryUseCase(introspector=introspector, repo_root=cfg.repo_root),
241
+ FindDefinitionUseCase(symbol_index=symbol_index),
242
+ FindReferencesUseCase(symbol_index=symbol_index),
243
+ GetFileTreeUseCase(code_source=code_source, repo_root=cfg.repo_root),
244
+ ExplainDiffUseCase(
245
+ chunker=chunker,
246
+ code_source=code_source,
247
+ git_source=git_source,
248
+ repo_root=cfg.repo_root,
249
+ ),
250
+ )
251
+
252
+
253
+ def make_reload_callback(
254
+ indexer: IndexerUseCase,
255
+ store: NumPyParquetStore,
256
+ keyword_index: KeywordIndex,
257
+ symbol_index: SymbolIndex,
258
+ ) -> Callable[[], None]:
259
+ """Build the closure that SearchRepoUseCase fires on bus drift.
260
+
261
+ Reloads all 3 stores from whatever current.json says is active.
262
+ No-op if there's no current index yet (cold-start case where
263
+ the bg indexer hasn't published its first swap). Returns None
264
+ so the use case's reload-on-tick path remains side-effects-only.
265
+ """
266
+
267
+ def _reload() -> None:
268
+ active = indexer.current_index_dir()
269
+ if active is None or not active.exists():
270
+ return
271
+ store.load(active)
272
+ try:
273
+ keyword_index.load(active)
274
+ symbol_index.load(active)
275
+ except FileNotFoundError:
276
+ # Reindex was published but one of the stores' files isn't
277
+ # there yet (race between persist + swap); next bus tick
278
+ # will reload again.
279
+ log.warning(
280
+ "reload: keyword/symbol index missing in %s; will retry next swap",
281
+ active,
282
+ )
283
+
284
+ return _reload
285
+
286
+
287
+ def fast_load_existing_index(
288
+ indexer: IndexerUseCase,
289
+ store: NumPyParquetStore,
290
+ keyword_index: KeywordIndex,
291
+ symbol_index: SymbolIndex,
292
+ ) -> bool:
293
+ """Sprint 7: load whatever's already on disk WITHOUT triggering a
294
+ reindex. Returns True if all 3 stores loaded successfully, False
295
+ if the cache is empty / partial — caller should fall back to
296
+ `ensure_index` (synchronous reindex) or rely on the bg indexer to
297
+ populate fresh.
298
+ """
299
+ active = indexer.current_index_dir()
300
+ if active is None or not active.exists():
301
+ return False
302
+ try:
303
+ store.load(active)
304
+ keyword_index.load(active)
305
+ symbol_index.load(active)
306
+ except FileNotFoundError:
307
+ return False
308
+ return True
309
+
310
+
311
+ def atomic_swap_current(cfg: Config, new_dir: Path) -> None:
312
+ """Update current.json to point at `new_dir.name`, atomically.
313
+
314
+ The bg indexer's swap callback. Mirrors the inline swap in
315
+ safe_reindex(); split out so the BackgroundIndexer can use it
316
+ directly without re-acquiring the file lock (the bg thread already
317
+ holds the lock during its run_incremental call when invoked via
318
+ safe_reindex; but when we wire it directly to the bg thread we
319
+ need a thinner helper that just updates current.json).
320
+ """
321
+ current_path = cfg.repo_cache_subdir() / "current.json"
322
+ tmp = current_path.with_suffix(".json.tmp")
323
+ tmp.write_text(json.dumps({"active": new_dir.name, "version": 1}))
324
+ os.replace(tmp, current_path)
325
+
326
+
327
+ def _lock_path(cfg: Config) -> Path:
328
+ cfg.repo_cache_subdir().mkdir(parents=True, exist_ok=True)
329
+ return cfg.repo_cache_subdir() / ".lock"
330
+
331
+
332
+ def safe_reindex(
333
+ cfg: Config,
334
+ indexer: IndexerUseCase,
335
+ stale: StaleSet | None = None,
336
+ ) -> Path:
337
+ """Run reindex (full or incremental) protected by a cross-platform file lock.
338
+
339
+ Acquires the lock or blocks for up to 5 min. Returns the path of the
340
+ new index dir AND atomically swaps current.json to point at it.
341
+
342
+ If `stale` is omitted or has `full_reindex_required=True`, runs the
343
+ legacy full `indexer.run()`. Otherwise dispatches to
344
+ `indexer.run_incremental(stale)` so only `stale.dirty_files` get
345
+ re-embedded — the Sprint 6 win that turns a 1-2 minute edit-cycle
346
+ reindex into <10s on a typical repo.
347
+ """
348
+ from filelock import FileLock, Timeout
349
+
350
+ lock = FileLock(str(_lock_path(cfg)), timeout=300)
351
+ try:
352
+ with lock:
353
+ log.info("acquired reindex lock at %s", _lock_path(cfg))
354
+ if stale is not None and not stale.full_reindex_required:
355
+ new_dir = indexer.run_incremental(stale)
356
+ else:
357
+ new_dir = indexer.run()
358
+ current_path = cfg.repo_cache_subdir() / "current.json"
359
+ tmp = current_path.with_suffix(".json.tmp")
360
+ tmp.write_text(json.dumps({"active": new_dir.name, "version": 1}))
361
+ os.replace(tmp, current_path)
362
+ return new_dir
363
+ except Timeout as exc:
364
+ raise RuntimeError(
365
+ f"could not acquire reindex lock at {_lock_path(cfg)} after 5 min; "
366
+ "is another reindex running? if not, delete the .lock file and retry."
367
+ ) from exc
368
+
369
+
370
+ def ensure_index(
371
+ cfg: Config,
372
+ indexer: IndexerUseCase,
373
+ store: NumPyParquetStore,
374
+ keyword_index: KeywordIndex,
375
+ symbol_index: SymbolIndex,
376
+ ) -> None:
377
+ """Ensure the on-disk index is fresh, reusing it if possible.
378
+
379
+ Sprint 6 routing: ask the indexer for a `dirty_set()` once, then:
380
+ - StaleSet says no work → load the active index, return.
381
+ - StaleSet says full reindex required → full `indexer.run()`.
382
+ - Otherwise → `indexer.run_incremental(stale)`; only the
383
+ `dirty_files` pay the embedding cost.
384
+
385
+ Pre-Sprint-3 caches without keyword.sqlite and pre-Sprint-4 ones
386
+ without symbols.sqlite still self-heal: load() raises FileNotFound,
387
+ which forces a full reindex via the `_force_full` short-circuit.
388
+ """
389
+ stale = indexer.dirty_set()
390
+ no_work = not stale.full_reindex_required and not stale.dirty_files and not stale.deleted_files
391
+ if no_work:
392
+ current = indexer.current_index_dir()
393
+ if current is not None:
394
+ log.info("loading existing index from %s", current)
395
+ store.load(current)
396
+ try:
397
+ keyword_index.load(current)
398
+ symbol_index.load(current)
399
+ except FileNotFoundError:
400
+ log.info(
401
+ "keyword or symbol index missing in %s; reindexing to backfill",
402
+ current,
403
+ )
404
+ new_dir = safe_reindex(cfg, indexer) # full
405
+ store.load(new_dir)
406
+ keyword_index.load(new_dir)
407
+ symbol_index.load(new_dir)
408
+ return
409
+ log.info(
410
+ "ensure_index: %s — running %s reindex",
411
+ stale.reason,
412
+ "full" if stale.full_reindex_required else "incremental",
413
+ )
414
+ new_dir = safe_reindex(cfg, indexer, stale=stale)
415
+ store.load(new_dir)
416
+ keyword_index.load(new_dir)
417
+ symbol_index.load(new_dir)
418
+
419
+
420
+ def setup_logging(cfg: Config) -> None:
421
+ logging.basicConfig(
422
+ level=cfg.log_level,
423
+ stream=sys.stderr,
424
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s",
425
+ )
@@ -0,0 +1,89 @@
1
+ """RepoWatcher — debounced file-system watcher that triggers reindex.
2
+
3
+ Lazily imports `watchdog` (it's an optional `[watch]` extra). Listens
4
+ for created/modified/deleted/moved events under `cfg.repo_root`,
5
+ debounces them with a configurable delay, and calls `on_change()`
6
+ once per quiet window.
7
+
8
+ If `watchdog` isn't installed, `start()` logs a warning and becomes a
9
+ no-op so users who set `CC_WATCH=on` without the extra get a clear
10
+ signal instead of a hard crash.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import threading
17
+ from collections.abc import Callable
18
+ from pathlib import Path
19
+
20
+ log = logging.getLogger(__name__)
21
+
22
+
23
+ class RepoWatcher:
24
+ def __init__(
25
+ self,
26
+ root: Path,
27
+ on_change: Callable[[], None],
28
+ debounce_ms: int = 1000,
29
+ ) -> None:
30
+ self._root = root
31
+ self._on_change = on_change
32
+ self._debounce_s = max(debounce_ms, 1) / 1000.0
33
+ self._timer_lock = threading.Lock()
34
+ self._timer: threading.Timer | None = None
35
+ self._observer = None # watchdog Observer, lazy-imported in start()
36
+ self._stopped = False
37
+
38
+ def start(self) -> None:
39
+ try:
40
+ from watchdog.events import FileSystemEventHandler
41
+ from watchdog.observers import Observer
42
+ except ImportError as exc:
43
+ log.warning(
44
+ "watchdog not installed; CC_WATCH=on is a no-op (%s). "
45
+ "Install code-context[watch] to enable live reindex on save.",
46
+ exc,
47
+ )
48
+ return
49
+
50
+ watcher = self # closure ref for the inner handler
51
+
52
+ class _Handler(FileSystemEventHandler):
53
+ def on_any_event(self, _event) -> None:
54
+ watcher._on_event()
55
+
56
+ self._observer = Observer()
57
+ self._observer.schedule(_Handler(), str(self._root), recursive=True)
58
+ self._observer.start()
59
+ log.info("repo watcher started for %s (debounce=%.2fs)", self._root, self._debounce_s)
60
+
61
+ def stop(self) -> None:
62
+ self._stopped = True
63
+ if self._observer is not None:
64
+ self._observer.stop()
65
+ self._observer.join(timeout=2.0)
66
+ self._observer = None
67
+ with self._timer_lock:
68
+ if self._timer is not None:
69
+ self._timer.cancel()
70
+ self._timer = None
71
+
72
+ def _on_event(self) -> None:
73
+ """Reset the debounce timer on every event."""
74
+ if self._stopped:
75
+ return
76
+ with self._timer_lock:
77
+ if self._timer is not None:
78
+ self._timer.cancel()
79
+ self._timer = threading.Timer(self._debounce_s, self._fire)
80
+ self._timer.daemon = True
81
+ self._timer.start()
82
+
83
+ def _fire(self) -> None:
84
+ if self._stopped:
85
+ return
86
+ try:
87
+ self._on_change()
88
+ except Exception: # noqa: BLE001 - watcher must survive callback bugs
89
+ log.exception("RepoWatcher on_change callback failed; will keep watching")
File without changes
File without changes
@@ -0,0 +1,43 @@
1
+ """ChunkerDispatcher — routes chunking by file extension.
2
+
3
+ Tree-sitter languages → TreeSitterChunker. Everything else → LineChunker.
4
+ If TreeSitterChunker returns [] (unsupported or parse error), LineChunker
5
+ takes over so we don't lose the file from the index.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+
13
+ from code_context.domain.models import Chunk, SymbolDef
14
+ from code_context.domain.ports import Chunker
15
+
16
+ _TREESITTER_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".rs", ".cs"}
17
+
18
+
19
+ @dataclass
20
+ class ChunkerDispatcher:
21
+ """Composite chunker: tree-sitter for known languages, line fallback."""
22
+
23
+ treesitter: Chunker
24
+ line: Chunker
25
+
26
+ @property
27
+ def version(self) -> str:
28
+ # Both sub-versions in the identifier so any change invalidates the cache.
29
+ return f"dispatcher({self.treesitter.version}|{self.line.version})-v1"
30
+
31
+ def chunk(self, content: str, path: str) -> list[Chunk]:
32
+ if Path(path).suffix.lower() in _TREESITTER_EXTS:
33
+ chunks = self.treesitter.chunk(content, path)
34
+ if chunks:
35
+ return chunks
36
+ return self.line.chunk(content, path)
37
+
38
+ def extract_definitions(self, content: str, path: str) -> list[SymbolDef]:
39
+ """Delegate symbol extraction to the tree-sitter sub-chunker if it has it."""
40
+ extractor = getattr(self.treesitter, "extract_definitions", None)
41
+ if extractor is None:
42
+ return []
43
+ return extractor(content, path)