context-mcp-server 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +464 -0
  2. package/codegraph/__init__.py +0 -0
  3. package/codegraph/__main__.py +24 -0
  4. package/codegraph/__pycache__/__init__.cpython-313.pyc +0 -0
  5. package/codegraph/__pycache__/__main__.cpython-313.pyc +0 -0
  6. package/codegraph/__pycache__/cache.cpython-313.pyc +0 -0
  7. package/codegraph/__pycache__/config.cpython-313.pyc +0 -0
  8. package/codegraph/__pycache__/report.cpython-313.pyc +0 -0
  9. package/codegraph/__pycache__/scanner.cpython-313.pyc +0 -0
  10. package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
  11. package/codegraph/cache.py +137 -0
  12. package/codegraph/config.py +31 -0
  13. package/codegraph/extractors/__init__.py +0 -0
  14. package/codegraph/extractors/__pycache__/__init__.cpython-313.pyc +0 -0
  15. package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
  16. package/codegraph/extractors/__pycache__/audio_extractor.cpython-313.pyc +0 -0
  17. package/codegraph/extractors/__pycache__/doc_extractor.cpython-313.pyc +0 -0
  18. package/codegraph/extractors/__pycache__/image_extractor.cpython-313.pyc +0 -0
  19. package/codegraph/extractors/ast_extractor.py +222 -0
  20. package/codegraph/extractors/audio_extractor.py +8 -0
  21. package/codegraph/extractors/doc_extractor.py +34 -0
  22. package/codegraph/extractors/image_extractor.py +26 -0
  23. package/codegraph/graph/__init__.py +0 -0
  24. package/codegraph/graph/__pycache__/__init__.cpython-313.pyc +0 -0
  25. package/codegraph/graph/__pycache__/builder.cpython-313.pyc +0 -0
  26. package/codegraph/graph/__pycache__/clustering.cpython-313.pyc +0 -0
  27. package/codegraph/graph/__pycache__/query.cpython-313.pyc +0 -0
  28. package/codegraph/graph/builder.py +145 -0
  29. package/codegraph/graph/clustering.py +40 -0
  30. package/codegraph/graph/query.py +283 -0
  31. package/codegraph/report.py +115 -0
  32. package/codegraph/scanner.py +92 -0
  33. package/codegraph/server.py +514 -0
  34. package/package.json +62 -0
  35. package/src/cli.js +1010 -0
  36. package/src/config.js +89 -0
  37. package/src/db.js +786 -0
  38. package/src/guard.js +20 -0
  39. package/src/hooks/autoContext.js +17 -0
  40. package/src/hooks/autoLink.js +7 -0
  41. package/src/http.js +765 -0
  42. package/src/index.js +47 -0
  43. package/src/search.js +50 -0
  44. package/src/server.js +80 -0
  45. package/src/summarizer.js +124 -0
  46. package/src/templates/AGENTS.md +76 -0
  47. package/src/templates/CLAUDE.md +94 -0
  48. package/src/templates/GEMINI.md +76 -0
  49. package/src/templates/cursor-rules.mdc +41 -0
  50. package/src/templates/windsurf-rules.md +35 -0
  51. package/src/tools/codegraph.js +215 -0
  52. package/src/tools/context.js +188 -0
  53. package/src/tools/discussion.js +123 -0
  54. package/src/tools/errorCheck.js +65 -0
  55. package/src/tools/fileTools.js +185 -0
  56. package/src/tools/gitTools.js +259 -0
  57. package/src/tools/search.js +55 -0
  58. package/src/vector.js +153 -0
package/README.md ADDED
@@ -0,0 +1,464 @@
1
+ # context-mcp
2
+
3
+ Persistent memory and codebase knowledge graph for AI coding assistants — delivered as a single MCP server.
4
+
5
+ One shared context store. Works across Claude Code, Cursor, Gemini CLI, Codex, Windsurf, VS Code Copilot, Claude.ai, and ChatGPT. Save context from one AI, pick it up in another. Your memory follows the project, not the tool.
6
+
7
+ ---
8
+
9
+ ## The Problem
10
+
11
+ Every conversation with an AI assistant starts from zero. The AI re-reads files it already read yesterday, re-discovers architecture it already understood, re-derives decisions that were already made. You repeat context. You paste the same background. You explain the same things.
12
+
13
+ This gets worse as projects grow. A codebase with 50 files means the AI either reads all of them every time (burning thousands of tokens) or misses context and gives wrong answers.
14
+
15
+ ---
16
+
17
+ ## What context-mcp Solves
18
+
19
+ **1. You lose context between conversations.**
20
+ AI assistants have no memory. Every new chat is a blank slate. context-mcp gives the AI a persistent store of decisions, bugs, notes, and architecture — loaded automatically at conversation start.
21
+
22
+ **2. Context is siloed to one tool.**
23
+ You fix a bug with Claude Code, then open Cursor and it knows nothing about it. context-mcp stores everything in `~/.context-mcp/` — a single shared store on your machine. Any AI that connects reads and writes the same store.
24
+
25
+ **3. Structural understanding costs too many tokens.**
26
+ Reading 20 files to answer "what calls this function?" is wasteful. context-mcp builds a knowledge graph of your codebase once, then answers structural questions in ~500 tokens instead of ~50,000.
27
+
28
+ **4. Repeated enrichment is expensive.**
29
+ AI-written descriptions of your code nodes are computed once and stored permanently. They survive file changes, rebuilds, and new conversations — never paid for twice.
30
+
31
+ ---
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ npm install -g context-mcp-server
37
+ ```
38
+
39
+ That's it. One command installs everything — the MCP server, HTTP server, and `ctx` CLI.
40
+
41
+ Then run from your project root:
42
+
43
+ ```bash
44
+ ctx install --all
45
+ ```
46
+
47
+ This writes MCP config + AI instruction files for every platform **and** automatically sets up the Python codegraph environment if [uv](https://docs.astral.sh/uv/) is installed.
48
+
49
+ > **CodeGraph requires uv.** Install it first if you want graph features:
50
+ > ```bash
51
+ > curl -Ls https://astral.sh/uv/install.sh | sh # macOS / Linux
52
+ > winget install astral-sh.uv # Windows
53
+ > ```
54
+ > Memory tools work with npm alone — uv is only needed for `codegraph_build` and graph queries.
55
+
56
+ Requires Node.js ≥ 18.
57
+
58
+ Installs three commands:
59
+
60
+ | Command | What it runs |
61
+ |---------|-------------|
62
+ | `context-mcp` | Stdio MCP server (for local AI clients) |
63
+ | `context-mcp-http` | HTTP MCP server with OAuth 2.0 (for web clients) |
64
+ | `ctx` | Interactive CLI — browse, search, manage context |
65
+
66
+ ---
67
+
68
+ ## Platform Setup
69
+
70
+ ```bash
71
+ ctx install --claude # Claude Code
72
+ ctx install --cursor # Cursor
73
+ ctx install --vscode # VS Code Copilot
74
+ ctx install --gemini # Gemini CLI
75
+ ctx install --codex # Codex CLI
76
+ ctx install --windsurf # Windsurf
77
+ ctx install --all # all platforms + Python setup at once
78
+ ```
79
+
80
+ Run from your project root. Each command writes the MCP config file and AI instruction file for that platform, then checks for uv and sets up the Python codegraph environment.
81
+
82
+ ---
83
+
84
+ ### Claude Code
85
+
86
+ `ctx install --claude` writes:
87
+ - `.claude/mcp.json` — MCP server config
88
+ - `CLAUDE.md` — instructions Claude reads automatically at conversation start
89
+
90
+ Manual config — add to `.claude/mcp.json`:
91
+
92
+ ```json
93
+ {
94
+ "mcpServers": {
95
+ "context-mcp": {
96
+ "command": "npx",
97
+ "args": ["-y", "context-mcp-server@latest"]
98
+ }
99
+ }
100
+ }
101
+ ```
102
+
103
+ ---
104
+
105
+ ### Cursor
106
+
107
+ `ctx install --cursor` writes:
108
+ - `.cursor/mcp.json` — MCP server config
109
+ - `.cursor/rules/context-mcp.mdc` — Cursor rules file
110
+
111
+ Manual config — add to `.cursor/mcp.json`:
112
+
113
+ ```json
114
+ {
115
+ "mcpServers": {
116
+ "context-mcp": {
117
+ "command": "npx",
118
+ "args": ["-y", "context-mcp-server@latest"]
119
+ }
120
+ }
121
+ }
122
+ ```
123
+
124
+ ---
125
+
126
+ ### VS Code Copilot
127
+
128
+ `ctx install --vscode` writes:
129
+ - `.vscode/mcp.json` — MCP server config
130
+ - `CLAUDE.md` — instruction file
131
+
132
+ Manual config — add to `.vscode/mcp.json`:
133
+
134
+ ```json
135
+ {
136
+ "servers": {
137
+ "context-mcp": {
138
+ "type": "stdio",
139
+ "command": "npx",
140
+ "args": ["-y", "context-mcp-server@latest"]
141
+ }
142
+ }
143
+ }
144
+ ```
145
+
146
+ ---
147
+
148
+ ### Gemini CLI
149
+
150
+ `ctx install --gemini` writes:
151
+ - `.gemini/settings.json` — MCP server config
152
+ - `GEMINI.md` — instructions Gemini reads automatically
153
+
154
+ Manual config — add to `.gemini/settings.json`:
155
+
156
+ ```json
157
+ {
158
+ "mcpServers": {
159
+ "context-mcp": {
160
+ "command": "npx",
161
+ "args": ["-y", "context-mcp-server@latest"]
162
+ }
163
+ }
164
+ }
165
+ ```
166
+
167
+ ---
168
+
169
+ ### Codex CLI
170
+
171
+ `ctx install --codex` writes:
172
+ - `.codex/config.toml` — MCP server config
173
+ - `AGENTS.md` — instructions Codex reads automatically
174
+
175
+ Manual config — add to `.codex/config.toml`:
176
+
177
+ ```toml
178
+ [[mcp_servers]]
179
+ name = "context-mcp"
180
+ command = "npx"
181
+ args = ["-y", "context-mcp-server@latest"]
182
+ ```
183
+
184
+ ---
185
+
186
+ ### Windsurf
187
+
188
+ `ctx install --windsurf` writes:
189
+ - `.windsurf/rules/context-mcp.md` — local rules file (project scope)
190
+ - `~/.codeium/windsurf/mcp_config.json` — global MCP config (merged, not overwritten)
191
+
192
+ Manual config — add to `~/.codeium/windsurf/mcp_config.json`:
193
+
194
+ ```json
195
+ {
196
+ "mcpServers": {
197
+ "context-mcp": {
198
+ "command": "npx",
199
+ "args": ["-y", "context-mcp-server@latest"]
200
+ }
201
+ }
202
+ }
203
+ ```
204
+
205
+ ---
206
+
207
+ ### Claude.ai / ChatGPT (HTTP mode)
208
+
209
+ Web-based clients connect over HTTP with OAuth 2.0. Use `ctx online` to start the HTTP server.
210
+
211
+ **Step 1 — Start the server:**
212
+
213
+ ```bash
214
+ ctx online
215
+ ```
216
+
217
+ Starts the server in the background, shows your OAuth credentials, and prints the endpoint URL. Safe to re-run — won't start a second copy.
218
+
219
+ ```bash
220
+ ctx online --restart # force restart
221
+ ctx online --port 3200 # use a different port
222
+ ```
223
+
224
+ Or start directly:
225
+
226
+ ```bash
227
+ context-mcp-http --port 3100 --host localhost --access-git
228
+ ```
229
+
230
+ **Step 2 — Add as a remote MCP connector:**
231
+
232
+ 1. Go to Claude.ai → Settings → Integrations → Add MCP Connector
233
+ 2. Enter your server URL (e.g. `http://localhost:3100`)
234
+ 3. Use the **Client ID** and **Client Secret** from `~/.context-mcp/contextconfig.json`
235
+
236
+ **View or edit config:**
237
+
238
+ ```bash
239
+ ctx settings
240
+ ```
241
+
242
+ ---
243
+
244
+ ## Path Sandboxing (Security)
245
+
246
+ File and git tools are sandboxed to your project root. Pass `rootPath` when calling `context.resume` to register it:
247
+
248
+ ```json
249
+ { "action": "resume", "project": "my-app", "rootPath": "/home/user/my-app" }
250
+ ```
251
+
252
+ The root is stored permanently with the project. Any file or git operation outside that directory is rejected. This applies to all HTTP-connected clients (Claude.ai, ChatGPT) — they can only access files within the registered project root.
253
+
254
+ ---
255
+
256
+ ## CLI Reference
257
+
258
+ ```bash
259
+ ctx # open interactive mode
260
+
261
+ # Context
262
+ ctx list [project] # list entries, discussions, graphs
263
+ ctx projects # all projects with IDs, graph status, recent entries
264
+ ctx search "query" # keyword → semantic fallback search
265
+ ctx add # add entry interactively
266
+ ctx summary [project] # summarize recent entries
267
+
268
+ # Delete
269
+ ctx delete <id-prefix> # delete one entry by ID prefix
270
+ ctx delete project <name|id> # delete all entries for a project
271
+
272
+ # Server
273
+ ctx online # start HTTP server (idempotent)
274
+ ctx online --restart # force stop + restart
275
+ ctx online --port 3200 # use a different port
276
+ ctx settings # view and edit config interactively
277
+
278
+ # Setup
279
+ ctx install --claude # write MCP config for Claude Code
280
+ ctx install --cursor # write MCP config for Cursor
281
+ ctx install --vscode # write MCP config for VS Code
282
+ ctx install --gemini # write MCP config for Gemini CLI
283
+ ctx install --codex # write MCP config for Codex CLI
284
+ ctx install --windsurf # write MCP config for Windsurf
285
+ ctx install --all # all platforms + Python setup
286
+
287
+ # Tools
288
+ ctx benchmark # real token savings report (memory + graph)
289
+ ctx discuss [project] # view discussions
290
+ ```
291
+
292
+ ---
293
+
294
+ ## Server Flags
295
+
296
+ ### `context-mcp` (stdio)
297
+
298
+ ```
299
+ context-mcp [options]
300
+
301
+ Options:
302
+ --data-dir <path> Override storage directory (default: ~/.context-mcp)
303
+ Also via env: CONTEXT_MCP_DIR=<path>
304
+ --help, -h Show help
305
+ ```
306
+
307
+ ### `context-mcp-http` (HTTP + OAuth)
308
+
309
+ ```
310
+ context-mcp-http [options]
311
+
312
+ Options:
313
+ --port <number> HTTP listen port (default: 3100)
314
+ --host <string> Bind address (default: localhost)
315
+ --access-git Enable git tools for connected clients
316
+ --data-dir <path> Override storage directory (default: ~/.context-mcp)
317
+ Also via env: CONTEXT_MCP_DIR=<path>
318
+ --help, -h Show help
319
+ ```
320
+
321
+ ---
322
+
323
+ ## Config Reference
324
+
325
+ Config lives at `~/.context-mcp/contextconfig.json` — auto-created on first run:
326
+
327
+ ```json
328
+ {
329
+ "client_id": "context-mcp",
330
+ "client_secret": "<auto-generated>",
331
+ "port": 3100,
332
+ "host": "localhost",
333
+ "access_git": false,
334
+ "public_url": null,
335
+ "allowed_redirect_uris": ["https://claude.ai"],
336
+ "allowed_origins": []
337
+ }
338
+ ```
339
+
340
+ | Field | Default | Description |
341
+ |-------|---------|-------------|
342
+ | `client_id` | `"context-mcp"` | OAuth client ID |
343
+ | `client_secret` | auto-generated | OAuth signing secret — keep private |
344
+ | `port` | `3100` | HTTP server port |
345
+ | `host` | `"localhost"` | HTTP bind host |
346
+ | `access_git` | `false` | Enable git tools for HTTP clients |
347
+ | `public_url` | `null` | Public URL shown in `ctx online` output |
348
+ | `allowed_redirect_uris` | `["https://claude.ai"]` | OAuth redirect URI whitelist |
349
+ | `allowed_origins` | `[]` | Extra CORS origins beyond `claude.ai` and `localhost` |
350
+
351
+ Edit any field interactively with `ctx settings`.
352
+
353
+ ---
354
+
355
+ ## Features
356
+
357
+ ### Memory
358
+ - `context.resume` — loads recent entries, active discussions, and graph status. Pass `rootPath` to sandbox file/git tools to your project directory.
359
+ - `context.save` — store decisions, bugs, notes, code snippets, architecture with type tags
360
+ - `context.get` / `context.update` / `context.delete` — full CRUD
361
+ - `search` — keyword-first, semantic fallback, searches all past context
362
+ - `discussion` — threaded plans with steps, status tracking, cross-session continuity
363
+ - Auto-deduplication on save
364
+ - Auto-compact at 50 entries (oldest entries summarized into a digest)
365
+ - Per-project isolation with stable UUIDs
366
+
367
+ ### File & Git Tools (HTTP mode)
368
+ Available to web clients (Claude.ai, ChatGPT) only — local AI clients use their native IDE tools directly.
369
+
370
+ - `read_file`, `write_file`, `patch_file`, `create_dir`, `list_dir`, `delete_file`
371
+ - `git_status`, `git_diff`, `git_log`, `git_add`, `git_commit`, `git_push`, `git_pull`, `git_branch`, `git_stash`, `git_reset`, `git_show`
372
+
373
+ All file and git operations are sandboxed to the registered project root. Enable git tools with `--access-git` or `access_git: true` in config.
374
+
375
+ ### CodeGraph
376
+ - `codegraph_build` — AST scan: functions, classes, imports, edges. Runs locally, no API cost.
377
+ - `codegraph_extract` — returns changed files with node lists for AI enrichment
378
+ - `codegraph_add_nodes` — stores AI-written descriptions in permanent semantic cache
379
+ - `codegraph_query` — natural language structural question → NODE/EDGE subgraph with `token_budget` control
380
+ - `codegraph_explain` — single node: description, dependencies, usages
381
+ - `codegraph_path` — shortest path between two concepts
382
+ - `codegraph_nodes` — list all nodes of a given type
383
+ - `codegraph_report` — full graph analysis: god nodes, clusters, surprising connections
384
+
385
+ ### Multi-AI Support
386
+
387
+ | AI | Config File | Instruction File |
388
+ |----|------------|-----------------|
389
+ | Claude Code | `.claude/mcp.json` | `CLAUDE.md` |
390
+ | VS Code Copilot | `.vscode/mcp.json` | `CLAUDE.md` |
391
+ | Cursor | `.cursor/mcp.json` | `.cursor/rules/context-mcp.mdc` |
392
+ | Gemini CLI | `.gemini/settings.json` | `GEMINI.md` |
393
+ | Codex CLI | `.codex/config.toml` | `AGENTS.md` |
394
+ | Windsurf | `~/.codeium/windsurf/mcp_config.json` | `.windsurf/rules/context-mcp.md` |
395
+ | Claude.ai / ChatGPT | HTTP (`ctx online`) | — |
396
+
397
+ > The context store lives at `~/.context-mcp/` — not inside any tool, IDE, or session. A decision saved in Claude Code is visible in Cursor. A bug logged from Gemini CLI shows up when you resume in Codex.
398
+
399
+ ---
400
+
401
+ ## Token Reduction
402
+
403
+ | Scenario | Without context-mcp | With context-mcp |
404
+ |----------|-------------------|--------------------|
405
+ | Start of conversation | Paste background, re-explain project | `context.resume` → 15 entries, ~750 tokens |
406
+ | "What calls function X?" | Read 10 files to trace callers | `codegraph_query` → subgraph, ~400 tokens |
407
+ | "What does module Y depend on?" | Read module + all imports | `codegraph_explain` → node + edges, ~200 tokens |
408
+ | Understand architecture | Read 20+ files | Graph built once, queried forever |
409
+ | Remember last session's decision | Ask user or re-derive | `context.resume` loads it automatically |
410
+
411
+ Real measured reduction on this project: **162× fewer tokens**, **99.38% reduction** per conversation.
412
+
413
+ ---
414
+
415
+ ## Architecture
416
+
417
+ ```
418
+ context-mcp/
419
+ ├── src/
420
+ │ ├── index.js Stdio MCP server entrypoint
421
+ │ ├── server.js MCP server — registers all tools
422
+ │ ├── db.js JSON store — in-memory cache, debounced writes, project registry
423
+ │ ├── guard.js Path sandboxing — enforces project root on all file/git ops
424
+ │ ├── search.js Keyword + semantic search
425
+ │ ├── summarizer.js Auto-compact summarization
426
+ │ ├── cli.js Interactive CLI (ctx)
427
+ │ ├── http.js HTTP server — OAuth 2.0 + Streamable HTTP transport
428
+ │ ├── config.js Config loader — contextconfig.json + keytar
429
+ │ ├── vector.js Embedding helpers
430
+ │ └── tools/
431
+ │ ├── context.js Memory tool (resume/save/get/update/delete)
432
+ │ ├── discussion.js Discussion tool (threaded plans + steps)
433
+ │ ├── codegraph.js CodeGraph tool — bridge to Python subprocess
434
+ │ ├── search.js Search tool
435
+ │ ├── fileTools.js File read/write (HTTP mode, sandboxed to project root)
436
+ │ ├── gitTools.js Git integration (HTTP mode, sandboxed to project root)
437
+ │ └── errorCheck.js Error checking tool
438
+ ├── codegraph/ Python package — AST extraction + graph queries
439
+ │ ├── server.py Dispatcher — reads JSON from stdin, routes to tools
440
+ │ ├── scanner.py File walker + classifier
441
+ │ ├── cache.py Two-layer cache (ast.json + semantic.json)
442
+ │ ├── report.py Graph report generator
443
+ │ ├── extractors/
444
+ │ │ ├── ast_extractor.py
445
+ │ │ ├── doc_extractor.py
446
+ │ │ ├── image_extractor.py
447
+ │ │ └── audio_extractor.py
448
+ │ └── graph/
449
+ │ ├── builder.py NetworkX graph construction
450
+ │ ├── query.py Natural language → subgraph traversal
451
+ │ └── clustering.py Community detection
452
+ └── ~/.context-mcp/ Data directory (outside repo, never committed)
453
+ ├── contexts.json
454
+ ├── discussions.json
455
+ ├── projects.json Project registry — includes rootPath per project
456
+ ├── graphs.json
457
+ └── contextconfig.json OAuth config + server settings
458
+ ```
459
+
460
+ ---
461
+
462
+ ## License
463
+
464
+ MIT
File without changes
@@ -0,0 +1,24 @@
1
+ """
2
+ codegraph/__main__.py — stdin/stdout dispatcher for Node.js integration.
3
+ Reads {"tool": "codegraph_build", "args": {...}} from stdin, writes result JSON to stdout.
4
+ """
5
+
6
+ import json
7
+ import sys
8
+ import asyncio
9
+
10
+ from .server import _dispatch
11
+
12
+
13
+ def main():
14
+ try:
15
+ payload = json.loads(sys.stdin.read())
16
+ result = asyncio.run(_dispatch(payload["tool"], payload["args"]))
17
+ print(json.dumps(result))
18
+ except Exception as e:
19
+ print(json.dumps({"error": str(e)}))
20
+ sys.exit(1)
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()
@@ -0,0 +1,137 @@
1
+ """
2
+ cache.py — SHA-256 file hash cache for codegraph.
3
+
4
+ Two separate caches:
5
+ codegraph-cache/ast.json — AST-extracted nodes (overwritten on rebuild)
6
+ codegraph-cache/semantic.json — AI-written descriptions (never overwritten by rebuild)
7
+
8
+ Format: { "rel/path": { "hash": "...", "nodes": [...], "extracted_at": "..." } }
9
+ """
10
+
11
+ import hashlib
12
+ import json
13
+ from datetime import datetime, timezone
14
+ from pathlib import Path
15
+
16
+
17
+ def _ast_path(project_root: str) -> Path:
18
+ return Path(project_root) / "codegraph-cache" / "ast.json"
19
+
20
+
21
+ def _semantic_path(project_root: str) -> Path:
22
+ return Path(project_root) / "codegraph-cache" / "semantic.json"
23
+
24
+
25
+ def _read(p: Path) -> dict:
26
+ if not p.exists():
27
+ return {}
28
+ try:
29
+ return json.loads(p.read_text(encoding="utf-8"))
30
+ except Exception:
31
+ return {}
32
+
33
+
34
+ def _write(p: Path, data: dict) -> None:
35
+ p.parent.mkdir(parents=True, exist_ok=True)
36
+ tmp = p.with_suffix(".tmp")
37
+ tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
38
+ tmp.replace(p)
39
+
40
+
41
+ def load_cache(project_root: str) -> dict:
42
+ """Load merged view: AST base + semantic descriptions overlaid."""
43
+ ast = _read(_ast_path(project_root))
44
+ sem = _read(_semantic_path(project_root))
45
+
46
+ merged = {}
47
+ all_keys = set(ast) | set(sem)
48
+ for key in all_keys:
49
+ ast_entry = ast.get(key, {})
50
+ sem_entry = sem.get(key, {})
51
+
52
+ # Use AST hash for change detection (source of truth)
53
+ merged[key] = {
54
+ "hash": ast_entry.get("hash", sem_entry.get("hash", "")),
55
+ "nodes": _merge_nodes(ast_entry.get("nodes", []), sem_entry.get("nodes", [])),
56
+ "extracted_at": ast_entry.get("extracted_at", sem_entry.get("extracted_at", "")),
57
+ }
58
+ return merged
59
+
60
+
61
+ def _merge_nodes(ast_nodes: list, sem_nodes: list) -> list:
62
+ """Overlay semantic descriptions onto AST nodes by name."""
63
+ sem_by_name = {n.get("name"): n for n in sem_nodes if n.get("name")}
64
+ result = []
65
+ for n in ast_nodes:
66
+ name = n.get("name")
67
+ if name and name in sem_by_name:
68
+ merged = dict(n)
69
+ sem_desc = sem_by_name[name].get("description", "")
70
+ if sem_desc:
71
+ merged["description"] = sem_desc
72
+ result.append(merged)
73
+ else:
74
+ result.append(n)
75
+ # Append semantic-only nodes (from doc files) not in AST
76
+ ast_names = {n.get("name") for n in ast_nodes}
77
+ for n in sem_nodes:
78
+ if n.get("name") not in ast_names:
79
+ result.append(n)
80
+ return result
81
+
82
+
83
+ def save_cache(project_root: str, cache: dict) -> None:
84
+ """Write back to AST cache only (used by build pipeline)."""
85
+ _write(_ast_path(project_root), cache)
86
+
87
+
88
+ def save_semantic_cache(project_root: str, updates: dict[str, list]) -> None:
89
+ """
90
+ Persist AI-written descriptions into semantic cache.
91
+ updates: { rel_path: [nodes_with_descriptions] }
92
+ Never touched by rebuild — descriptions survive file changes.
93
+ """
94
+ sem = _read(_semantic_path(project_root))
95
+ for rel_path, nodes in updates.items():
96
+ existing = {n.get("name"): n for n in sem.get(rel_path, {}).get("nodes", [])}
97
+ for n in nodes:
98
+ name = n.get("name")
99
+ if name:
100
+ existing[name] = {**existing.get(name, {}), **{k: v for k, v in n.items() if v}}
101
+ sem[rel_path] = {
102
+ "nodes": list(existing.values()),
103
+ "extracted_at": datetime.now(timezone.utc).isoformat(),
104
+ }
105
+ _write(_semantic_path(project_root), sem)
106
+
107
+
108
+ def file_hash(path: str) -> str:
109
+ h = hashlib.sha256()
110
+ with open(path, "rb") as f:
111
+ for chunk in iter(lambda: f.read(65536), b""):
112
+ h.update(chunk)
113
+ return h.hexdigest()
114
+
115
+
116
+ def get_cached_nodes(cache: dict, rel_path: str, current_hash: str) -> list | None:
117
+ """Return cached nodes if hash matches, else None."""
118
+ entry = cache.get(rel_path)
119
+ if entry and entry.get("hash") == current_hash:
120
+ return entry.get("nodes", [])
121
+ return None
122
+
123
+
124
+ def set_cached_nodes(cache: dict, rel_path: str, file_hash_val: str, nodes: list) -> None:
125
+ cache[rel_path] = {
126
+ "hash": file_hash_val,
127
+ "nodes": nodes,
128
+ "extracted_at": datetime.now(timezone.utc).isoformat(),
129
+ }
130
+
131
+
132
+ def remove_deleted(cache: dict, existing_rel_paths: set) -> list:
133
+ """Remove cache entries for files that no longer exist. Returns removed keys."""
134
+ removed = [k for k in list(cache.keys()) if k not in existing_rel_paths]
135
+ for k in removed:
136
+ del cache[k]
137
+ return removed
@@ -0,0 +1,31 @@
1
+ """
2
+ config.py — codegraph settings.
3
+ """
4
+
5
+ import os
6
+
7
+ # Files/dirs to ignore during scanning
8
+ DEFAULT_IGNORE = {
9
+ "node_modules", ".git", "dist", "build", ".next", "__pycache__",
10
+ ".venv", "venv", "env", ".env", "coverage", ".DS_Store",
11
+ "codegraph-cache", ".pytest_cache", ".mypy_cache",
12
+ }
13
+
14
+ # Extensions handled by each extractor
15
+ CODE_EXTENSIONS = {
16
+ ".py", ".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs",
17
+ ".go", ".rs", ".java", ".c", ".cpp", ".h", ".hpp", ".rb",
18
+ }
19
+ SQL_EXTENSIONS = {".sql"}
20
+ CONFIG_EXTENSIONS = {".yaml", ".yml", ".toml", ".env", ".ini", ".cfg"}
21
+ DOC_EXTENSIONS = {".md", ".txt", ".rst", ".mdx"}
22
+ PDF_EXTENSIONS = {".pdf"}
23
+ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".svg", ".gif", ".webp"}
24
+ AUDIO_EXTENSIONS = {".mp3", ".wav", ".m4a", ".ogg", ".flac"}
25
+ VIDEO_EXTENSIONS = {".mp4", ".mov", ".avi", ".mkv", ".webm"}
26
+
27
+ # Max file size to process (bytes) — skip huge generated files
28
+ MAX_FILE_BYTES = 500_000
29
+
30
+ # Max characters of doc text returned to the AI per file
31
+ DOC_MAX_CHARS = 8_000
File without changes