code-context-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. code_context/__init__.py +3 -0
  2. code_context/_background.py +93 -0
  3. code_context/_composition.py +425 -0
  4. code_context/_watcher.py +89 -0
  5. code_context/adapters/__init__.py +0 -0
  6. code_context/adapters/driven/__init__.py +0 -0
  7. code_context/adapters/driven/chunker_dispatcher.py +43 -0
  8. code_context/adapters/driven/chunker_line.py +54 -0
  9. code_context/adapters/driven/chunker_treesitter.py +215 -0
  10. code_context/adapters/driven/chunker_treesitter_queries.py +111 -0
  11. code_context/adapters/driven/code_source_fs.py +122 -0
  12. code_context/adapters/driven/embeddings_local.py +111 -0
  13. code_context/adapters/driven/embeddings_openai.py +58 -0
  14. code_context/adapters/driven/git_source_cli.py +211 -0
  15. code_context/adapters/driven/introspector_fs.py +224 -0
  16. code_context/adapters/driven/keyword_index_sqlite.py +206 -0
  17. code_context/adapters/driven/reranker_crossencoder.py +61 -0
  18. code_context/adapters/driven/symbol_index_sqlite.py +264 -0
  19. code_context/adapters/driven/vector_store_numpy.py +119 -0
  20. code_context/adapters/driving/__init__.py +0 -0
  21. code_context/adapters/driving/mcp_server.py +365 -0
  22. code_context/cli.py +161 -0
  23. code_context/config.py +114 -0
  24. code_context/domain/__init__.py +0 -0
  25. code_context/domain/index_bus.py +52 -0
  26. code_context/domain/models.py +140 -0
  27. code_context/domain/ports.py +205 -0
  28. code_context/domain/use_cases/__init__.py +0 -0
  29. code_context/domain/use_cases/explain_diff.py +98 -0
  30. code_context/domain/use_cases/find_definition.py +30 -0
  31. code_context/domain/use_cases/find_references.py +22 -0
  32. code_context/domain/use_cases/get_file_tree.py +36 -0
  33. code_context/domain/use_cases/get_summary.py +24 -0
  34. code_context/domain/use_cases/indexer.py +336 -0
  35. code_context/domain/use_cases/recent_changes.py +36 -0
  36. code_context/domain/use_cases/search_repo.py +131 -0
  37. code_context/server.py +151 -0
  38. code_context_mcp-1.0.0.dist-info/METADATA +181 -0
  39. code_context_mcp-1.0.0.dist-info/RECORD +43 -0
  40. code_context_mcp-1.0.0.dist-info/WHEEL +5 -0
  41. code_context_mcp-1.0.0.dist-info/entry_points.txt +3 -0
  42. code_context_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
  43. code_context_mcp-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,365 @@
1
+ """MCP driving adapter: registers the 7 contract tools on an mcp Server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from mcp.server import Server
13
+ from mcp.types import TextContent, Tool
14
+
15
+ from code_context.domain.use_cases.explain_diff import ExplainDiffUseCase
16
+ from code_context.domain.use_cases.find_definition import FindDefinitionUseCase
17
+ from code_context.domain.use_cases.find_references import FindReferencesUseCase
18
+ from code_context.domain.use_cases.get_file_tree import GetFileTreeUseCase
19
+ from code_context.domain.use_cases.get_summary import GetSummaryUseCase
20
+ from code_context.domain.use_cases.recent_changes import RecentChangesUseCase
21
+ from code_context.domain.use_cases.search_repo import SearchRepoUseCase
22
+
23
+ log = logging.getLogger(__name__)
24
+
25
+
26
+ def register(
27
+ server: Server,
28
+ *,
29
+ search_repo: SearchRepoUseCase,
30
+ recent_changes: RecentChangesUseCase,
31
+ get_summary: GetSummaryUseCase,
32
+ find_definition: FindDefinitionUseCase,
33
+ find_references: FindReferencesUseCase,
34
+ get_file_tree: GetFileTreeUseCase,
35
+ explain_diff: ExplainDiffUseCase,
36
+ ) -> None:
37
+ """Register the 7 contract tools on the given mcp Server instance."""
38
+
39
+ @server.list_tools()
40
+ async def list_tools() -> list[Tool]:
41
+ return [
42
+ Tool(
43
+ name="search_repo",
44
+ description=(
45
+ "Semantic search over the indexed codebase. Use this INSTEAD of Grep "
46
+ "when the query is conceptual (e.g. 'where do we validate input', "
47
+ "'how is caching implemented', 'authentication flow'). Returns ranked "
48
+ "code fragments with file path, line range, snippet, score and a "
49
+ "one-line `why` excerpt. For exact-string lookup, Grep is still better."
50
+ ),
51
+ inputSchema={
52
+ "type": "object",
53
+ "properties": {
54
+ "query": {"type": "string"},
55
+ "top_k": {"type": "integer", "default": 5},
56
+ "scope": {
57
+ "type": "string",
58
+ "description": (
59
+ "Optional repo-relative path prefix to constrain results."
60
+ ),
61
+ },
62
+ },
63
+ "required": ["query"],
64
+ },
65
+ ),
66
+ Tool(
67
+ name="recent_changes",
68
+ description=(
69
+ "Recent git commits with structured fields (sha, ISO date, author, "
70
+ "paths, summary). Use INSTEAD of `git log` shell calls — the output "
71
+ "is already parsed and filterable by `since` and `paths`. Defaults "
72
+ "to the last 7 days when `since` is omitted."
73
+ ),
74
+ inputSchema={
75
+ "type": "object",
76
+ "properties": {
77
+ "since": {
78
+ "type": "string",
79
+ "description": "ISO 8601 cutoff; defaults to 7 days ago.",
80
+ },
81
+ "paths": {"type": "array", "items": {"type": "string"}},
82
+ "max": {"type": "integer", "default": 20},
83
+ },
84
+ },
85
+ ),
86
+ Tool(
87
+ name="get_summary",
88
+ description=(
89
+ "Structured snapshot of the project or a module: name, purpose "
90
+ "(README first paragraph), stack (Python/Node/Rust/Go/Java), "
91
+ "entry_points, key_modules, stats (files, loc, languages). Useful "
92
+ "at session start for orientation; prefer it over reading "
93
+ "README/CLAUDE.md when you need machine-readable fields."
94
+ ),
95
+ inputSchema={
96
+ "type": "object",
97
+ "properties": {
98
+ "scope": {"type": "string", "enum": ["project", "module"]},
99
+ "path": {
100
+ "type": "string",
101
+ "description": "Required when scope='module'; repo-relative path.",
102
+ },
103
+ },
104
+ },
105
+ ),
106
+ Tool(
107
+ name="find_definition",
108
+ description=(
109
+ "Locate the definition site of a named symbol (function, class, "
110
+ "method, type, struct, enum, interface, record). Use this INSTEAD of "
111
+ 'shelling out to grep when the user asks "where is X defined?" — '
112
+ "returns SymbolDef[] with path, line range, kind, and language. "
113
+ "Faster and more accurate than grepping for `def X` / `class X` / "
114
+ "`function X` / etc., because it consults a tree-sitter-indexed "
115
+ "symbol table built at reindex time, not the raw text."
116
+ ),
117
+ inputSchema={
118
+ "type": "object",
119
+ "properties": {
120
+ "name": {
121
+ "type": "string",
122
+ "description": "Exact identifier to locate.",
123
+ },
124
+ "language": {
125
+ "type": "string",
126
+ "enum": [
127
+ "python",
128
+ "javascript",
129
+ "typescript",
130
+ "go",
131
+ "rust",
132
+ "csharp",
133
+ ],
134
+ "description": ("Optional language hint for same-name disambiguation."),
135
+ },
136
+ "max": {"type": "integer", "default": 5},
137
+ },
138
+ "required": ["name"],
139
+ },
140
+ ),
141
+ Tool(
142
+ name="find_references",
143
+ description=(
144
+ "List every textual occurrence of a named symbol in the indexed "
145
+ 'corpus. Use INSTEAD of `grep -n "X"` when the user asks "who '
146
+ 'calls X?" or "where is X used?". Returns SymbolRef[] with path, '
147
+ "line, snippet. Word-boundary matched, so 'log' won't return "
148
+ "'logger' or 'log_format'."
149
+ ),
150
+ inputSchema={
151
+ "type": "object",
152
+ "properties": {
153
+ "name": {
154
+ "type": "string",
155
+ "description": "Exact identifier to find references for.",
156
+ },
157
+ "max": {"type": "integer", "default": 50},
158
+ },
159
+ "required": ["name"],
160
+ },
161
+ ),
162
+ Tool(
163
+ name="get_file_tree",
164
+ description=(
165
+ "Repo-relative directory tree, gitignore-aware. Use INSTEAD of "
166
+ "shelling out to `Bash: ls -R` or `Bash: tree` when the user "
167
+ "asks for the project structure or for orientation in an "
168
+ "unfamiliar module. Returns a hierarchical FileTreeNode with "
169
+ "files (with byte sizes) and directories (with recursive "
170
+ "children, capped at max_depth). Honors .gitignore; skips "
171
+ "hidden files unless include_hidden=true."
172
+ ),
173
+ inputSchema={
174
+ "type": "object",
175
+ "properties": {
176
+ "path": {
177
+ "type": "string",
178
+ "description": (
179
+ "Optional repo-relative subdirectory; defaults to root."
180
+ ),
181
+ },
182
+ "max_depth": {
183
+ "type": "integer",
184
+ "default": 4,
185
+ "description": "Cap on tree depth.",
186
+ },
187
+ "include_hidden": {
188
+ "type": "boolean",
189
+ "default": False,
190
+ "description": "Include dot-files / dot-directories.",
191
+ },
192
+ },
193
+ },
194
+ ),
195
+ Tool(
196
+ name="explain_diff",
197
+ description=(
198
+ "AST-aligned chunks affected by the diff at `ref`. Use INSTEAD "
199
+ 'of `Bash: git show <sha>` when the user asks "what does this '
200
+ 'commit do?" or "what changed in HEAD~3?". The chunker resolves '
201
+ "which whole functions / classes were touched, not just raw line "
202
+ "additions — much easier for an LLM to reason about. Returns "
203
+ "DiffChunk[] with path, lines, snippet, kind, and change "
204
+ '("added"|"modified"|"deleted").'
205
+ ),
206
+ inputSchema={
207
+ "type": "object",
208
+ "properties": {
209
+ "ref": {
210
+ "type": "string",
211
+ "description": (
212
+ "Git ref: full SHA, short SHA, HEAD, HEAD~N, branch name."
213
+ ),
214
+ },
215
+ "max_chunks": {"type": "integer", "default": 50},
216
+ },
217
+ "required": ["ref"],
218
+ },
219
+ ),
220
+ ]
221
+
222
+ @server.call_tool()
223
+ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
224
+ if name == "search_repo":
225
+ return await asyncio.to_thread(_handle_search, search_repo, arguments)
226
+ if name == "recent_changes":
227
+ return await asyncio.to_thread(_handle_recent, recent_changes, arguments)
228
+ if name == "get_summary":
229
+ return await asyncio.to_thread(_handle_summary, get_summary, arguments)
230
+ if name == "find_definition":
231
+ return await asyncio.to_thread(_handle_find_definition, find_definition, arguments)
232
+ if name == "find_references":
233
+ return await asyncio.to_thread(_handle_find_references, find_references, arguments)
234
+ if name == "get_file_tree":
235
+ return await asyncio.to_thread(_handle_file_tree, get_file_tree, arguments)
236
+ if name == "explain_diff":
237
+ return await asyncio.to_thread(_handle_explain_diff, explain_diff, arguments)
238
+ raise ValueError(f"unknown tool: {name}")
239
+
240
+
241
+ def _handle_search(uc: SearchRepoUseCase, args: dict[str, Any]) -> list[TextContent]:
242
+ results = uc.run(
243
+ query=args["query"],
244
+ top_k=int(args.get("top_k", 5)),
245
+ scope=args.get("scope"),
246
+ )
247
+ payload = [
248
+ {
249
+ "path": r.path,
250
+ "lines": list(r.lines),
251
+ "snippet": r.snippet,
252
+ "score": r.score,
253
+ "why": r.why,
254
+ }
255
+ for r in results
256
+ ]
257
+ return [TextContent(type="text", text=_to_json(payload))]
258
+
259
+
260
+ def _handle_recent(uc: RecentChangesUseCase, args: dict[str, Any]) -> list[TextContent]:
261
+ since = None
262
+ if args.get("since"):
263
+ since = datetime.fromisoformat(args["since"])
264
+ commits = uc.run(
265
+ since=since,
266
+ paths=args.get("paths"),
267
+ max_count=int(args.get("max", 20)),
268
+ )
269
+ payload = [
270
+ {
271
+ "sha": c.sha,
272
+ "date": c.date.isoformat(),
273
+ "author": c.author,
274
+ "paths": c.paths,
275
+ "summary": c.summary,
276
+ }
277
+ for c in commits
278
+ ]
279
+ return [TextContent(type="text", text=_to_json(payload))]
280
+
281
+
282
+ def _handle_summary(uc: GetSummaryUseCase, args: dict[str, Any]) -> list[TextContent]:
283
+ scope = args.get("scope", "project")
284
+ path = Path(args["path"]) if args.get("path") else None
285
+ summary = uc.run(scope=scope, path=path)
286
+ payload = {
287
+ "name": summary.name,
288
+ "purpose": summary.purpose,
289
+ "stack": summary.stack,
290
+ "entry_points": summary.entry_points,
291
+ "key_modules": summary.key_modules,
292
+ "stats": summary.stats,
293
+ }
294
+ return [TextContent(type="text", text=_to_json(payload))]
295
+
296
+
297
+ def _handle_find_definition(uc: FindDefinitionUseCase, args: dict[str, Any]) -> list[TextContent]:
298
+ defs = uc.run(
299
+ name=args["name"],
300
+ language=args.get("language"),
301
+ max_count=int(args.get("max", 5)),
302
+ )
303
+ payload = [
304
+ {
305
+ "name": d.name,
306
+ "path": d.path,
307
+ "lines": list(d.lines),
308
+ "kind": d.kind,
309
+ "language": d.language,
310
+ }
311
+ for d in defs
312
+ ]
313
+ return [TextContent(type="text", text=_to_json(payload))]
314
+
315
+
316
+ def _handle_find_references(uc: FindReferencesUseCase, args: dict[str, Any]) -> list[TextContent]:
317
+ refs = uc.run(
318
+ name=args["name"],
319
+ max_count=int(args.get("max", 50)),
320
+ )
321
+ payload = [{"path": r.path, "line": r.line, "snippet": r.snippet} for r in refs]
322
+ return [TextContent(type="text", text=_to_json(payload))]
323
+
324
+
325
+ def _handle_file_tree(uc: GetFileTreeUseCase, args: dict[str, Any]) -> list[TextContent]:
326
+ tree = uc.run(
327
+ path=args.get("path"),
328
+ max_depth=int(args.get("max_depth", 4)),
329
+ include_hidden=bool(args.get("include_hidden", False)),
330
+ )
331
+ payload = _serialize_tree_node(tree)
332
+ return [TextContent(type="text", text=_to_json(payload))]
333
+
334
+
335
+ def _serialize_tree_node(node) -> dict[str, Any]:
336
+ """Recursively flatten a FileTreeNode tuple to plain JSON dicts."""
337
+ out: dict[str, Any] = {
338
+ "path": node.path,
339
+ "kind": node.kind,
340
+ "size": node.size,
341
+ "children": [_serialize_tree_node(c) for c in node.children],
342
+ }
343
+ return out
344
+
345
+
346
+ def _handle_explain_diff(uc: ExplainDiffUseCase, args: dict[str, Any]) -> list[TextContent]:
347
+ chunks = uc.run(
348
+ ref=args["ref"],
349
+ max_chunks=int(args.get("max_chunks", 50)),
350
+ )
351
+ payload = [
352
+ {
353
+ "path": c.path,
354
+ "lines": list(c.lines),
355
+ "snippet": c.snippet,
356
+ "kind": c.kind,
357
+ "change": c.change,
358
+ }
359
+ for c in chunks
360
+ ]
361
+ return [TextContent(type="text", text=_to_json(payload))]
362
+
363
+
364
+ def _to_json(obj: Any) -> str:
365
+ return json.dumps(obj, ensure_ascii=False, indent=2)
code_context/cli.py ADDED
@@ -0,0 +1,161 @@
1
+ """code-context CLI: reindex, status, query, clear."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import logging
8
+ import shutil
9
+ import sys
10
+
11
+ from code_context._composition import (
12
+ build_indexer_and_store,
13
+ build_use_cases,
14
+ safe_reindex,
15
+ setup_logging,
16
+ )
17
+ from code_context.config import load_config
18
+
19
+ log = logging.getLogger("code_context")
20
+
21
+
22
+ def _cmd_reindex(args: argparse.Namespace) -> int:
23
+ cfg = load_config()
24
+ setup_logging(cfg)
25
+ indexer, _, _, _, _ = build_indexer_and_store(cfg)
26
+ if args.force:
27
+ log.info("reindexing %s (forced full)", cfg.repo_root)
28
+ new_dir = safe_reindex(cfg, indexer)
29
+ print(f"reindexed (full, forced) -> {new_dir}")
30
+ return 0
31
+ stale = indexer.dirty_set()
32
+ log.info("reindexing %s (%s)", cfg.repo_root, stale.reason)
33
+ new_dir = safe_reindex(cfg, indexer, stale=stale)
34
+ mode = "full" if stale.full_reindex_required else "incremental"
35
+ print(f"reindexed ({mode}: {stale.reason}) -> {new_dir}")
36
+ return 0
37
+
38
+
39
+ def _cmd_status(args: argparse.Namespace) -> int:
40
+ cfg = load_config()
41
+ setup_logging(cfg)
42
+ indexer, _, _, _, _ = build_indexer_and_store(cfg)
43
+ current = indexer.current_index_dir()
44
+ print(f"repo_root: {cfg.repo_root}")
45
+ print(f"cache_dir: {cfg.repo_cache_subdir()}")
46
+ if current is None:
47
+ print("status: no index yet")
48
+ return 0
49
+ meta_path = current / "metadata.json"
50
+ if not meta_path.exists():
51
+ print("status: index dir present but metadata missing")
52
+ return 1
53
+ meta = json.loads(meta_path.read_text())
54
+ print(f"index_dir: {current}")
55
+ print(f"head_sha: {meta.get('head_sha')}")
56
+ print(f"indexed_at: {meta.get('indexed_at')}")
57
+ print(f"n_chunks: {meta.get('n_chunks')}")
58
+ print(f"n_files: {meta.get('n_files')}")
59
+ print(f"model: {meta.get('embeddings_model')}")
60
+ print(f"chunker: {meta.get('chunker_version')}")
61
+ print(f"keyword: {meta.get('keyword_version', '<not indexed — pre-v0.4.0>')}")
62
+ print(f"symbol: {meta.get('symbol_version', '<not indexed — pre-v0.5.0>')}")
63
+ stale = indexer.dirty_set()
64
+ print(f"dirty: {len(stale.dirty_files)}")
65
+ print(f"deleted: {len(stale.deleted_files)}")
66
+ print(f"full_reindex_required: {stale.full_reindex_required}")
67
+ print(f"reason: {stale.reason}")
68
+ return 0
69
+
70
+
71
+ def _cmd_query(args: argparse.Namespace) -> int:
72
+ cfg = load_config()
73
+ setup_logging(cfg)
74
+ indexer, store, embeddings, keyword_index, symbol_index = build_indexer_and_store(cfg)
75
+ current = indexer.current_index_dir()
76
+ if current is None:
77
+ print("error: no index. run `code-context reindex` first.", file=sys.stderr)
78
+ return 1
79
+ if indexer.is_stale():
80
+ print(
81
+ "warning: index is stale (HEAD/files/model/chunker changed since last reindex). "
82
+ "Results may be out of date. Run `code-context reindex` to refresh.",
83
+ file=sys.stderr,
84
+ )
85
+ store.load(current)
86
+ try:
87
+ keyword_index.load(current)
88
+ except FileNotFoundError:
89
+ log.warning(
90
+ "keyword index missing in %s — search will fall back to vector-only. "
91
+ "Run `code-context reindex` to backfill the keyword leg.",
92
+ current,
93
+ )
94
+ try:
95
+ symbol_index.load(current)
96
+ except FileNotFoundError:
97
+ log.warning(
98
+ "symbol index missing in %s — find_definition/find_references unavailable. "
99
+ "Run `code-context reindex` to backfill the symbol leg.",
100
+ current,
101
+ )
102
+ search, _, _, _, _, _, _ = build_use_cases(
103
+ cfg,
104
+ indexer,
105
+ store,
106
+ embeddings,
107
+ keyword_index,
108
+ symbol_index,
109
+ )
110
+ results = search.run(query=args.text, top_k=args.k or cfg.top_k_default)
111
+ for r in results:
112
+ print(f"{r.score:.3f} {r.path}:{r.lines[0]}-{r.lines[1]} ({r.why})")
113
+ return 0
114
+
115
+
116
+ def _cmd_clear(args: argparse.Namespace) -> int:
117
+ cfg = load_config()
118
+ setup_logging(cfg)
119
+ target = cfg.repo_cache_subdir()
120
+ if not target.exists():
121
+ print("nothing to clear")
122
+ return 0
123
+ if not args.yes:
124
+ print(f"this will delete {target}. pass --yes to confirm.", file=sys.stderr)
125
+ return 1
126
+ shutil.rmtree(target)
127
+ print(f"cleared {target}")
128
+ return 0
129
+
130
+
131
+ def main() -> int:
132
+ parser = argparse.ArgumentParser(prog="code-context", description="code-context CLI")
133
+ sub = parser.add_subparsers(dest="cmd", required=True)
134
+
135
+ r = sub.add_parser(
136
+ "reindex",
137
+ help="Reindex now (incremental by default; --force for full)",
138
+ )
139
+ r.add_argument(
140
+ "--force",
141
+ action="store_true",
142
+ help="Force a full reindex regardless of dirty_set verdict.",
143
+ )
144
+ r.set_defaults(func=_cmd_reindex)
145
+ sub.add_parser("status", help="Show index health").set_defaults(func=_cmd_status)
146
+
147
+ q = sub.add_parser("query", help="Run a search query without MCP")
148
+ q.add_argument("text")
149
+ q.add_argument("-k", type=int, default=None, help="Override top_k")
150
+ q.set_defaults(func=_cmd_query)
151
+
152
+ c = sub.add_parser("clear", help="Delete the cache for this repo")
153
+ c.add_argument("--yes", action="store_true", help="Confirm deletion")
154
+ c.set_defaults(func=_cmd_clear)
155
+
156
+ args = parser.parse_args()
157
+ return int(args.func(args))
158
+
159
+
160
+ if __name__ == "__main__":
161
+ sys.exit(main())
code_context/config.py ADDED
@@ -0,0 +1,114 @@
1
+ """Configuration: env vars + defaults, frozen dataclass."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import os
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ import platformdirs
11
+
12
+ _DEFAULT_EXTENSIONS = [
13
+ ".py",
14
+ ".js",
15
+ ".ts",
16
+ ".jsx",
17
+ ".tsx",
18
+ ".go",
19
+ ".rs",
20
+ ".cs",
21
+ ".java",
22
+ ".c",
23
+ ".cpp",
24
+ ".h",
25
+ ".hpp",
26
+ ".md",
27
+ ".yaml",
28
+ ".yml",
29
+ ".json",
30
+ ]
31
+
32
+
33
+ @dataclass(frozen=True, slots=True)
34
+ class Config:
35
+ repo_root: Path
36
+ embeddings_provider: str # "local" or "openai"
37
+ embeddings_model: str | None
38
+ openai_api_key: str | None
39
+ include_extensions: list[str]
40
+ max_file_bytes: int
41
+ cache_dir: Path
42
+ log_level: str
43
+ top_k_default: int
44
+ chunk_lines: int
45
+ chunk_overlap: int
46
+ chunker_strategy: str # "treesitter" (default) or "line"
47
+ keyword_strategy: str # "sqlite" (default) or "none"
48
+ rerank: bool
49
+ rerank_model: str | None
50
+ symbol_index_strategy: str # "sqlite" (default) or "none"
51
+ trust_remote_code: bool # Off by default. Required for some HF models that ship custom Python.
52
+ # Sprint 7 — background reindex thread (default ON). Coalesce window
53
+ # for trigger storms.
54
+ bg_reindex: bool = True
55
+ bg_idle_seconds: float = 1.0
56
+ # Sprint 7 — optional file-system watcher (off by default; needs
57
+ # the [watch] extra installed).
58
+ watch: bool = False
59
+ watch_debounce_ms: int = 1000
60
+
61
+ def repo_cache_subdir(self) -> Path:
62
+ """Cache subdir specific to this repo (hashed for collision safety)."""
63
+ h = hashlib.sha256(str(self.repo_root.resolve()).encode("utf-8")).hexdigest()[:16]
64
+ return self.cache_dir / h
65
+
66
+
67
+ def load_config(default_repo_root: Path | None = None) -> Config:
68
+ repo_root = Path(os.environ.get("CC_REPO_ROOT") or default_repo_root or Path.cwd())
69
+ embeddings = os.environ.get("CC_EMBEDDINGS", "local")
70
+
71
+ default_model = "all-MiniLM-L6-v2" if embeddings == "local" else "text-embedding-3-small"
72
+ model = os.environ.get("CC_EMBEDDINGS_MODEL", default_model)
73
+
74
+ cache_override = os.environ.get("CC_CACHE_DIR")
75
+ cache_dir = (
76
+ Path(cache_override)
77
+ if cache_override
78
+ else Path(platformdirs.user_cache_dir("code-context"))
79
+ )
80
+
81
+ exts_raw = os.environ.get("CC_INCLUDE_EXTENSIONS")
82
+ if exts_raw:
83
+ exts = [
84
+ e.strip() if e.startswith(".") else f".{e.strip()}"
85
+ for e in exts_raw.split(",")
86
+ if e.strip()
87
+ ]
88
+ else:
89
+ exts = list(_DEFAULT_EXTENSIONS)
90
+
91
+ return Config(
92
+ repo_root=repo_root.resolve(),
93
+ embeddings_provider=embeddings,
94
+ embeddings_model=model,
95
+ openai_api_key=os.environ.get("OPENAI_API_KEY"),
96
+ include_extensions=exts,
97
+ max_file_bytes=int(os.environ.get("CC_MAX_FILE_BYTES", "1048576")),
98
+ cache_dir=cache_dir,
99
+ log_level=os.environ.get("CC_LOG_LEVEL", "INFO"),
100
+ top_k_default=int(os.environ.get("CC_TOP_K_DEFAULT", "5")),
101
+ chunk_lines=int(os.environ.get("CC_CHUNK_LINES", "50")),
102
+ chunk_overlap=int(os.environ.get("CC_CHUNK_OVERLAP", "10")),
103
+ chunker_strategy=os.environ.get("CC_CHUNKER", "treesitter"),
104
+ keyword_strategy=os.environ.get("CC_KEYWORD_INDEX", "sqlite"),
105
+ rerank=os.environ.get("CC_RERANK", "off").lower() in ("on", "true", "1"),
106
+ rerank_model=os.environ.get("CC_RERANK_MODEL"),
107
+ symbol_index_strategy=os.environ.get("CC_SYMBOL_INDEX", "sqlite"),
108
+ trust_remote_code=os.environ.get("CC_TRUST_REMOTE_CODE", "off").lower()
109
+ in ("on", "true", "1"),
110
+ bg_reindex=os.environ.get("CC_BG_REINDEX", "on").lower() in ("on", "true", "1"),
111
+ bg_idle_seconds=float(os.environ.get("CC_BG_IDLE_SECONDS", "1.0")),
112
+ watch=os.environ.get("CC_WATCH", "off").lower() in ("on", "true", "1"),
113
+ watch_debounce_ms=int(os.environ.get("CC_WATCH_DEBOUNCE_MS", "1000")),
114
+ )
File without changes