context-mcp-server 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +464 -0
  2. package/codegraph/__init__.py +0 -0
  3. package/codegraph/__main__.py +24 -0
  4. package/codegraph/__pycache__/__init__.cpython-313.pyc +0 -0
  5. package/codegraph/__pycache__/__main__.cpython-313.pyc +0 -0
  6. package/codegraph/__pycache__/cache.cpython-313.pyc +0 -0
  7. package/codegraph/__pycache__/config.cpython-313.pyc +0 -0
  8. package/codegraph/__pycache__/report.cpython-313.pyc +0 -0
  9. package/codegraph/__pycache__/scanner.cpython-313.pyc +0 -0
  10. package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
  11. package/codegraph/cache.py +137 -0
  12. package/codegraph/config.py +31 -0
  13. package/codegraph/extractors/__init__.py +0 -0
  14. package/codegraph/extractors/__pycache__/__init__.cpython-313.pyc +0 -0
  15. package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
  16. package/codegraph/extractors/__pycache__/audio_extractor.cpython-313.pyc +0 -0
  17. package/codegraph/extractors/__pycache__/doc_extractor.cpython-313.pyc +0 -0
  18. package/codegraph/extractors/__pycache__/image_extractor.cpython-313.pyc +0 -0
  19. package/codegraph/extractors/ast_extractor.py +222 -0
  20. package/codegraph/extractors/audio_extractor.py +8 -0
  21. package/codegraph/extractors/doc_extractor.py +34 -0
  22. package/codegraph/extractors/image_extractor.py +26 -0
  23. package/codegraph/graph/__init__.py +0 -0
  24. package/codegraph/graph/__pycache__/__init__.cpython-313.pyc +0 -0
  25. package/codegraph/graph/__pycache__/builder.cpython-313.pyc +0 -0
  26. package/codegraph/graph/__pycache__/clustering.cpython-313.pyc +0 -0
  27. package/codegraph/graph/__pycache__/query.cpython-313.pyc +0 -0
  28. package/codegraph/graph/builder.py +145 -0
  29. package/codegraph/graph/clustering.py +40 -0
  30. package/codegraph/graph/query.py +283 -0
  31. package/codegraph/report.py +115 -0
  32. package/codegraph/scanner.py +92 -0
  33. package/codegraph/server.py +514 -0
  34. package/package.json +62 -0
  35. package/src/cli.js +1010 -0
  36. package/src/config.js +89 -0
  37. package/src/db.js +786 -0
  38. package/src/guard.js +20 -0
  39. package/src/hooks/autoContext.js +17 -0
  40. package/src/hooks/autoLink.js +7 -0
  41. package/src/http.js +765 -0
  42. package/src/index.js +47 -0
  43. package/src/search.js +50 -0
  44. package/src/server.js +80 -0
  45. package/src/summarizer.js +124 -0
  46. package/src/templates/AGENTS.md +76 -0
  47. package/src/templates/CLAUDE.md +94 -0
  48. package/src/templates/GEMINI.md +76 -0
  49. package/src/templates/cursor-rules.mdc +41 -0
  50. package/src/templates/windsurf-rules.md +35 -0
  51. package/src/tools/codegraph.js +215 -0
  52. package/src/tools/context.js +188 -0
  53. package/src/tools/discussion.js +123 -0
  54. package/src/tools/errorCheck.js +65 -0
  55. package/src/tools/fileTools.js +185 -0
  56. package/src/tools/gitTools.js +259 -0
  57. package/src/tools/search.js +55 -0
  58. package/src/vector.js +153 -0
@@ -0,0 +1,222 @@
1
+ """
2
+ ast_extractor.py — extract nodes from code files using tree-sitter AST.
3
+
4
+ Falls back to regex if tree-sitter grammars aren't installed.
5
+ Each node: { id, name, type, file, line, docstring?, calls?, imports? }
6
+ """
7
+
8
+ import re
9
+ from pathlib import Path
10
+
11
+
12
+ # ── tree-sitter setup (optional — graceful fallback) ─────────────────────────
13
+
14
+ def _try_load_ts():
15
+ try:
16
+ import tree_sitter_python as tspython
17
+ import tree_sitter_javascript as tsjavascript
18
+ from tree_sitter import Language, Parser
19
+ PY_LANG = Language(tspython.language())
20
+ JS_LANG = Language(tsjavascript.language())
21
+ return {"python": (PY_LANG, Parser(PY_LANG)), "javascript": (JS_LANG, Parser(JS_LANG))}
22
+ except ImportError:
23
+ return {}
24
+
25
+ _TS_PARSERS = _try_load_ts()
26
+
27
+
28
+ # ── tree-sitter queries ───────────────────────────────────────────────────────
29
+
30
+ _PY_CLASS_QUERY = """
31
+ (class_definition name: (identifier) @name) @class
32
+ """
33
+
34
+ _PY_FUNC_QUERY = """
35
+ (function_definition name: (identifier) @name) @func
36
+ """
37
+
38
+ _JS_CLASS_QUERY = """
39
+ (class_declaration name: (identifier) @name) @class
40
+ """
41
+
42
+ _JS_FUNC_QUERY = """
43
+ [
44
+ (function_declaration name: (identifier) @name)
45
+ (method_definition name: (property_identifier) @name)
46
+ ] @func
47
+ """
48
+
49
+
50
+ def _ts_extract(source: bytes, lang_key: str, rel_path: str) -> list:
51
+ parsers = _TS_PARSERS
52
+ if lang_key not in parsers:
53
+ return []
54
+ lang, parser = parsers[lang_key]
55
+ tree = parser.parse(source)
56
+
57
+ nodes = []
58
+ lines = source.decode("utf-8", errors="replace").splitlines()
59
+
60
+ def _node(kind, name, line):
61
+ return {
62
+ "id": f"{rel_path}::{kind}::{name}",
63
+ "name": name,
64
+ "type": kind,
65
+ "file": rel_path,
66
+ "line": line,
67
+ }
68
+
69
+ def _iter_captures(query, root):
70
+ """Yield (capture_name, tree_node) pairs; compatible with tree-sitter >=0.20."""
71
+ try:
72
+ # tree-sitter >= 0.22: matches() returns list of (pattern_idx, {name: [Node]})
73
+ for _pat_idx, caps in query.matches(root):
74
+ for cap_name, cap_nodes in caps.items():
75
+ for n in (cap_nodes if isinstance(cap_nodes, list) else [cap_nodes]):
76
+ yield cap_name, n
77
+ except Exception:
78
+ pass
79
+
80
+ # Classes
81
+ try:
82
+ query = lang.query(_PY_CLASS_QUERY if lang_key == "python" else _JS_CLASS_QUERY)
83
+ for cap_name, node in _iter_captures(query, tree.root_node):
84
+ if cap_name == "name" and node.type == "identifier":
85
+ nodes.append(_node("class", node.text.decode(), node.start_point[0] + 1))
86
+ except Exception:
87
+ pass
88
+
89
+ # Functions
90
+ try:
91
+ query = lang.query(_PY_FUNC_QUERY if lang_key == "python" else _JS_FUNC_QUERY)
92
+ for cap_name, node in _iter_captures(query, tree.root_node):
93
+ if cap_name == "name" and node.type in ("identifier", "property_identifier"):
94
+ nodes.append(_node("function", node.text.decode(), node.start_point[0] + 1))
95
+ except Exception:
96
+ pass
97
+
98
+ return nodes
99
+
100
+
101
+ # ── Regex fallback ────────────────────────────────────────────────────────────
102
+
103
+ _PATTERNS = {
104
+ "python": {
105
+ "class": re.compile(r"^class\s+(\w+)", re.MULTILINE),
106
+ "function": re.compile(r"^def\s+(\w+)", re.MULTILINE),
107
+ "import": re.compile(r"^(?:import|from)\s+([\w.]+)", re.MULTILINE),
108
+ },
109
+ "javascript": {
110
+ "class": re.compile(r"\bclass\s+(\w+)", re.MULTILINE),
111
+ "function": re.compile(r"\bfunction\s+(\w+)", re.MULTILINE),
112
+ "import": re.compile(r"^import\s+.*?from\s+['\"](.+?)['\"]", re.MULTILINE),
113
+ },
114
+ "go": {
115
+ "function": re.compile(r"^func\s+(?:\(\w+\s+\*?\w+\)\s+)?(\w+)", re.MULTILINE),
116
+ "struct": re.compile(r"^type\s+(\w+)\s+struct", re.MULTILINE),
117
+ "import": re.compile(r'"([\w./]+)"', re.MULTILINE),
118
+ },
119
+ "rust": {
120
+ "function": re.compile(r"^(?:pub\s+)?fn\s+(\w+)", re.MULTILINE),
121
+ "struct": re.compile(r"^(?:pub\s+)?struct\s+(\w+)", re.MULTILINE),
122
+ "import": re.compile(r"^use\s+([\w:]+)", re.MULTILINE),
123
+ },
124
+ "java": {
125
+ "class": re.compile(r"\bclass\s+(\w+)", re.MULTILINE),
126
+ "function": re.compile(r"(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+(\w+)\s*\(", re.MULTILINE),
127
+ "import": re.compile(r"^import\s+([\w.]+);", re.MULTILINE),
128
+ },
129
+ "ruby": {
130
+ "class": re.compile(r"^class\s+(\w+)", re.MULTILINE),
131
+ "function": re.compile(r"^\s*def\s+(\w+)", re.MULTILINE),
132
+ },
133
+ "sql": {
134
+ "table": re.compile(r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`\"]?(\w+)[`\"]?", re.IGNORECASE),
135
+ "index": re.compile(r"CREATE\s+(?:UNIQUE\s+)?INDEX\s+\w+\s+ON\s+[`\"]?(\w+)[`\"]?", re.IGNORECASE),
136
+ },
137
+ # config files: no node extraction — file node created by scanner
138
+ }
139
+
140
+
141
+ def _ext_to_lang(ext: str) -> str:
142
+ return {
143
+ ".py": "python", ".pyw": "python",
144
+ ".js": "javascript", ".mjs": "javascript", ".cjs": "javascript",
145
+ ".jsx": "javascript", ".ts": "javascript", ".tsx": "javascript",
146
+ ".go": "go", ".rs": "rust",
147
+ ".java": "java", ".rb": "ruby",
148
+ ".sql": "sql",
149
+ ".yaml": "config", ".yml": "config", ".toml": "config",
150
+ ".env": "config", ".ini": "config", ".cfg": "config",
151
+ }.get(ext, "")
152
+
153
+
154
+ def _regex_extract(text: str, lang: str, rel_path: str) -> list:
155
+ patterns = _PATTERNS.get(lang, {})
156
+ children = []
157
+ for kind, pat in patterns.items():
158
+ if kind == "import":
159
+ continue
160
+ for m in pat.finditer(text):
161
+ line = text[:m.start()].count("\n") + 1
162
+ children.append({
163
+ "id": f"{rel_path}::{kind}::{m.group(1)}",
164
+ "name": m.group(1),
165
+ "type": kind,
166
+ "file": rel_path,
167
+ "line": line,
168
+ })
169
+
170
+ imp_pat = patterns.get("import")
171
+ imports = [m.group(1) for m in imp_pat.finditer(text)][:30] if imp_pat else []
172
+
173
+ return _wrap_in_module(rel_path, children, imports)
174
+
175
+
176
+ # ── Module wrapper ───────────────────────────────────────────────────────────
177
+
178
+ def _wrap_in_module(rel_path: str, children: list, imports: list) -> list:
179
+ """
180
+ Create a module node for the file, link all child nodes to it via defined_in.
181
+ The module node carries the imports so builder can create file-to-file edges.
182
+ """
183
+ from pathlib import Path as _Path
184
+ stem = _Path(rel_path).stem
185
+ mod_id = f"{rel_path}::module::{stem}"
186
+ module = {
187
+ "id": mod_id,
188
+ "name": stem,
189
+ "type": "module",
190
+ "file": rel_path,
191
+ "line": 1,
192
+ "imports": imports,
193
+ }
194
+ for child in children:
195
+ child["relations"] = [{"id": mod_id, "relation": "defined-in", "confidence": "EXTRACTED"}]
196
+ return [module] + children
197
+
198
+
199
+ # ── Public API ────────────────────────────────────────────────────────────────
200
+
201
+ def extract(abs_path: str, rel_path: str) -> list:
202
+ """Extract nodes from a code/sql/config file. Returns list of node dicts."""
203
+ ext = Path(abs_path).suffix.lower()
204
+ lang = _ext_to_lang(ext)
205
+ if not lang:
206
+ return []
207
+
208
+ try:
209
+ raw = open(abs_path, "rb").read()
210
+ text = raw.decode("utf-8", errors="replace")
211
+ except OSError:
212
+ return []
213
+
214
+ # tree-sitter for Python and JS/TS if available
215
+ if lang in ("python", "javascript") and lang in _TS_PARSERS:
216
+ children = _ts_extract(raw, lang, rel_path)
217
+ if children:
218
+ imp_pat = _PATTERNS.get(lang, {}).get("import")
219
+ imports = [m.group(1) for m in imp_pat.finditer(text)][:30] if imp_pat else []
220
+ return _wrap_in_module(rel_path, children, imports)
221
+
222
+ return _regex_extract(text, lang, rel_path)
@@ -0,0 +1,8 @@
1
+ """
2
+ audio_extractor.py — audio files are not supported without faster-whisper.
3
+ Stub kept so imports don't break.
4
+ """
5
+
6
+
7
+ def transcribe(_path: str) -> str:
8
+ return ""
@@ -0,0 +1,34 @@
1
+ """
2
+ doc_extractor.py — extract plain text from doc and PDF files.
3
+ PDF extraction uses pymupdf if installed; falls back to label-only otherwise.
4
+ """
5
+
6
+ from pathlib import Path
7
+
8
+
9
+ def extract_text(path: str) -> str:
10
+ """Return text content of a doc/PDF file. Truncated at DOC_MAX_CHARS."""
11
+ from ..config import DOC_MAX_CHARS
12
+ if path.lower().endswith(".pdf"):
13
+ return _extract_pdf(path, DOC_MAX_CHARS)
14
+ try:
15
+ return Path(path).read_text(encoding="utf-8", errors="replace")[:DOC_MAX_CHARS]
16
+ except OSError:
17
+ return ""
18
+
19
+
20
+ def _extract_pdf(path: str, max_chars: int) -> str:
21
+ try:
22
+ import pymupdf # optional dep
23
+ doc = pymupdf.open(path)
24
+ parts = []
25
+ for page in doc:
26
+ parts.append(page.get_text())
27
+ if sum(len(p) for p in parts) >= max_chars:
28
+ break
29
+ doc.close()
30
+ return "".join(parts)[:max_chars]
31
+ except ImportError:
32
+ return f"[PDF: {Path(path).name} — install pymupdf to extract text]"
33
+ except Exception:
34
+ return ""
@@ -0,0 +1,26 @@
1
+ """
2
+ image_extractor.py — encode images as base64 for AI vision.
3
+ No external deps — stdlib only.
4
+ """
5
+
6
+ import base64
7
+ import mimetypes
8
+ from pathlib import Path
9
+
10
+
11
+ def extract_image_b64(path: str) -> dict | None:
12
+ """Return {"data": base64_str, "media_type": "image/png"} or None on failure."""
13
+ try:
14
+ data = Path(path).read_bytes()
15
+ media_type = mimetypes.guess_type(path)[0] or "image/png"
16
+ return {"data": base64.b64encode(data).decode(), "media_type": media_type}
17
+ except OSError:
18
+ return None
19
+
20
+
21
+ def extract_svg_text(path: str) -> str:
22
+ """Return SVG file as plain text (SVGs are XML — readable as-is)."""
23
+ try:
24
+ return Path(path).read_text(encoding="utf-8", errors="replace")[:4000]
25
+ except OSError:
26
+ return ""
File without changes
@@ -0,0 +1,145 @@
1
+ """
2
+ graph/builder.py — build a NetworkX directed graph from extracted nodes.
3
+
4
+ Node attributes: id, name, type, file, line, community
5
+ Edge attributes: relation, confidence (EXTRACTED | INFERRED | AMBIGUOUS)
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+
11
+ try:
12
+ import networkx as nx
13
+ _HAS_NX = True
14
+ except ImportError:
15
+ _HAS_NX = False
16
+
17
+
18
+ def build(all_nodes: list[dict]) -> "nx.DiGraph | dict":
19
+ """
20
+ Build graph from flat node list. Returns nx.DiGraph or plain dict fallback.
21
+ Edges are created from node.imports[] and node.calls[] fields.
22
+ """
23
+ if not _HAS_NX:
24
+ return _dict_graph(all_nodes)
25
+
26
+ G = nx.DiGraph()
27
+
28
+ node_by_name: dict[str, str] = {} # name -> id
29
+ module_by_file: dict[str, str] = {} # rel_path -> module node id
30
+
31
+ for node in all_nodes:
32
+ nid = node.get("id", "")
33
+ if not nid:
34
+ continue
35
+ G.add_node(nid, **{k: v for k, v in node.items() if k not in ("imports", "calls", "relations")})
36
+ node_by_name[node.get("name", "")] = nid
37
+ if node.get("type") == "module":
38
+ module_by_file[node.get("file", "")] = nid
39
+
40
+ # Build file-path lookup from module nodes
41
+ file_node: dict[str, str] = {}
42
+ for rel_path, mod_id in module_by_file.items():
43
+ p = rel_path.replace("\\", "/")
44
+ stem = p.split("/")[-1].split(".")[0]
45
+ base = p.split("/")[-1]
46
+ for key in (stem, base, p):
47
+ file_node.setdefault(key, mod_id)
48
+
49
+ # defined-in edges: child nodes → their module
50
+ for node in all_nodes:
51
+ nid = node.get("id", "")
52
+ for rel in node.get("relations", []):
53
+ target_id = rel.get("id") or node_by_name.get(rel.get("name", ""))
54
+ if target_id and target_id != nid:
55
+ G.add_edge(nid, target_id,
56
+ relation=rel.get("relation", "relates-to"),
57
+ confidence=rel.get("confidence", "EXTRACTED"))
58
+
59
+ # Import edges: module → module
60
+ seen_edges: set[tuple] = set()
61
+ for node in all_nodes:
62
+ if node.get("type") != "module":
63
+ continue
64
+ src_id = node.get("id", "")
65
+ for imp in node.get("imports", []):
66
+ clean = imp.lstrip(".")
67
+ parts = clean.replace("\\", "/").split("/")
68
+ last = parts[-1]
69
+ stem = last.split(".")[0]
70
+ for c in (clean, last, stem):
71
+ if not c:
72
+ continue
73
+ target = file_node.get(c) or node_by_name.get(c)
74
+ if target and target != src_id:
75
+ key = (src_id, target)
76
+ if key not in seen_edges:
77
+ seen_edges.add(key)
78
+ G.add_edge(src_id, target, relation="imports", confidence="EXTRACTED")
79
+ break
80
+
81
+ # Edges from explicit relations (concept nodes from LLM)
82
+ for node in all_nodes:
83
+ nid = node.get("id", "")
84
+ for rel in node.get("relations", []):
85
+ target_id = rel.get("id") or node_by_name.get(rel.get("name", ""))
86
+ if target_id and target_id != nid:
87
+ G.add_edge(nid, target_id,
88
+ relation=rel.get("relation", "relates-to"),
89
+ confidence=rel.get("confidence", "INFERRED"))
90
+
91
+ return G
92
+
93
+
94
+ def _dict_graph(all_nodes: list[dict]) -> dict:
95
+ """Fallback when networkx not installed."""
96
+ nodes = []
97
+ edges = []
98
+ seen = set()
99
+ for node in all_nodes:
100
+ nid = node.get("id", "")
101
+ if nid in seen:
102
+ continue
103
+ seen.add(nid)
104
+ nodes.append({k: v for k, v in node.items() if k not in ("imports", "calls", "relations")})
105
+ for imp in node.get("imports", []):
106
+ edges.append({"from": nid, "to": imp, "relation": "imports", "confidence": "EXTRACTED"})
107
+ return {"nodes": nodes, "edges": edges, "communities": [], "god_nodes": []}
108
+
109
+
110
+ def to_json_dict(G) -> dict:
111
+ """Serialize graph to the graph.json schema."""
112
+ if isinstance(G, dict):
113
+ return G # fallback path
114
+
115
+ nodes = [{"id": nid, **data} for nid, data in G.nodes(data=True)]
116
+ edges = [{"from": u, "to": v, **data} for u, v, data in G.edges(data=True)]
117
+
118
+ # God nodes = highest degree
119
+ degrees = sorted(G.degree(), key=lambda x: x[1], reverse=True)
120
+ god_nodes = [n for n, d in degrees[:5] if d > 2]
121
+
122
+ return {
123
+ "nodes": nodes,
124
+ "edges": edges,
125
+ "communities": G.graph.get("communities", []),
126
+ "god_nodes": god_nodes,
127
+ "generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
128
+ }
129
+
130
+
131
+ def save_graph(project_root: str, graph_dict: dict) -> str:
132
+ out = Path(project_root) / "codegraph-cache" / "graph.json"
133
+ out.parent.mkdir(parents=True, exist_ok=True)
134
+ out.write_text(json.dumps(graph_dict, indent=2), encoding="utf-8")
135
+ return str(out)
136
+
137
+
138
+ def load_graph(project_root: str) -> dict | None:
139
+ p = Path(project_root) / "codegraph-cache" / "graph.json"
140
+ if not p.exists():
141
+ return None
142
+ try:
143
+ return json.loads(p.read_text(encoding="utf-8"))
144
+ except Exception:
145
+ return None
@@ -0,0 +1,40 @@
1
+ """
2
+ graph/clustering.py — community detection using NetworkX connected-components.
3
+ No external deps beyond networkx (already required).
4
+ """
5
+
6
+
7
+ def detect_communities(G) -> list[dict]:
8
+ """Assign community IDs to graph nodes. Returns list of community dicts."""
9
+ try:
10
+ import networkx as nx
11
+ except ImportError:
12
+ return []
13
+
14
+ if G.number_of_nodes() == 0:
15
+ return []
16
+
17
+ undirected = G.to_undirected()
18
+ communities = []
19
+ for comm_id, component in enumerate(nx.connected_components(undirected)):
20
+ member_ids = list(component)
21
+ label = _community_label(G, member_ids)
22
+ communities.append({"id": comm_id, "label": label, "members": member_ids})
23
+ for nid in member_ids:
24
+ if G.has_node(nid):
25
+ G.nodes[nid]["community"] = comm_id
26
+
27
+ G.graph["communities"] = communities
28
+ return communities
29
+
30
+
31
+ def _community_label(G, member_ids: list) -> str:
32
+ files = []
33
+ for nid in member_ids:
34
+ if G.has_node(nid):
35
+ f = G.nodes[nid].get("file", "")
36
+ if f:
37
+ files.append(f.split("/")[0])
38
+ if not files:
39
+ return "misc"
40
+ return max(set(files), key=files.count)