@oriro/orirocli 0.1.9 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/README.md +16 -18
  2. package/dist/cli.js +4776 -2964
  3. package/package.json +2 -2
  4. package/skills/craft/ai-engineering/SKILL.md +2 -2
  5. package/skills/graphify/SKILL.md +0 -619
  6. package/skills/graphify/__init__.py +0 -28
  7. package/skills/graphify/__main__.py +0 -4582
  8. package/skills/graphify/affected.py +0 -154
  9. package/skills/graphify/always_on/agents-md.md +0 -12
  10. package/skills/graphify/always_on/antigravity-rules.md +0 -14
  11. package/skills/graphify/always_on/claude-md.md +0 -9
  12. package/skills/graphify/always_on/gemini-md.md +0 -9
  13. package/skills/graphify/always_on/kiro-steering.md +0 -5
  14. package/skills/graphify/always_on/vscode-instructions.md +0 -17
  15. package/skills/graphify/analyze.py +0 -724
  16. package/skills/graphify/benchmark.py +0 -155
  17. package/skills/graphify/build.py +0 -487
  18. package/skills/graphify/cache.py +0 -417
  19. package/skills/graphify/callflow_html.py +0 -2020
  20. package/skills/graphify/cluster.py +0 -272
  21. package/skills/graphify/command-kilo.md +0 -15
  22. package/skills/graphify/dedup.py +0 -429
  23. package/skills/graphify/detect.py +0 -1379
  24. package/skills/graphify/diagnostics.py +0 -390
  25. package/skills/graphify/export.py +0 -1408
  26. package/skills/graphify/extract.py +0 -11570
  27. package/skills/graphify/global_graph.py +0 -159
  28. package/skills/graphify/google_workspace.py +0 -223
  29. package/skills/graphify/hooks.py +0 -457
  30. package/skills/graphify/ingest.py +0 -331
  31. package/skills/graphify/llm.py +0 -1896
  32. package/skills/graphify/manifest.py +0 -4
  33. package/skills/graphify/mcp_ingest.py +0 -392
  34. package/skills/graphify/multigraph_compat.py +0 -212
  35. package/skills/graphify/pg_introspect.py +0 -142
  36. package/skills/graphify/prs.py +0 -748
  37. package/skills/graphify/querylog.py +0 -70
  38. package/skills/graphify/report.py +0 -218
  39. package/skills/graphify/scip_ingest.py +0 -363
  40. package/skills/graphify/security.py +0 -336
  41. package/skills/graphify/semantic_cleanup.py +0 -319
  42. package/skills/graphify/serve.py +0 -1309
  43. package/skills/graphify/skill-aider.md +0 -1246
  44. package/skills/graphify/skill-amp.md +0 -613
  45. package/skills/graphify/skill-claw.md +0 -616
  46. package/skills/graphify/skill-codex.md +0 -613
  47. package/skills/graphify/skill-copilot.md +0 -616
  48. package/skills/graphify/skill-devin.md +0 -1372
  49. package/skills/graphify/skill-droid.md +0 -613
  50. package/skills/graphify/skill-kilo.md +0 -625
  51. package/skills/graphify/skill-kiro.md +0 -615
  52. package/skills/graphify/skill-opencode.md +0 -608
  53. package/skills/graphify/skill-pi.md +0 -615
  54. package/skills/graphify/skill-trae.md +0 -614
  55. package/skills/graphify/skill-vscode.md +0 -612
  56. package/skills/graphify/skill-windows.md +0 -651
  57. package/skills/graphify/skills/amp/references/add-watch.md +0 -56
  58. package/skills/graphify/skills/amp/references/exports.md +0 -71
  59. package/skills/graphify/skills/amp/references/extraction-spec.md +0 -68
  60. package/skills/graphify/skills/amp/references/github-and-merge.md +0 -46
  61. package/skills/graphify/skills/amp/references/hooks.md +0 -33
  62. package/skills/graphify/skills/amp/references/query.md +0 -249
  63. package/skills/graphify/skills/amp/references/transcribe.md +0 -48
  64. package/skills/graphify/skills/amp/references/update.md +0 -179
  65. package/skills/graphify/skills/claude/references/add-watch.md +0 -56
  66. package/skills/graphify/skills/claude/references/exports.md +0 -71
  67. package/skills/graphify/skills/claude/references/extraction-spec.md +0 -68
  68. package/skills/graphify/skills/claude/references/github-and-merge.md +0 -46
  69. package/skills/graphify/skills/claude/references/hooks.md +0 -33
  70. package/skills/graphify/skills/claude/references/query.md +0 -103
  71. package/skills/graphify/skills/claude/references/transcribe.md +0 -48
  72. package/skills/graphify/skills/claude/references/update.md +0 -179
  73. package/skills/graphify/skills/claw/references/add-watch.md +0 -56
  74. package/skills/graphify/skills/claw/references/exports.md +0 -71
  75. package/skills/graphify/skills/claw/references/extraction-spec.md +0 -29
  76. package/skills/graphify/skills/claw/references/github-and-merge.md +0 -46
  77. package/skills/graphify/skills/claw/references/hooks.md +0 -33
  78. package/skills/graphify/skills/claw/references/query.md +0 -249
  79. package/skills/graphify/skills/claw/references/transcribe.md +0 -48
  80. package/skills/graphify/skills/claw/references/update.md +0 -179
  81. package/skills/graphify/skills/codex/references/add-watch.md +0 -56
  82. package/skills/graphify/skills/codex/references/exports.md +0 -71
  83. package/skills/graphify/skills/codex/references/extraction-spec.md +0 -29
  84. package/skills/graphify/skills/codex/references/github-and-merge.md +0 -46
  85. package/skills/graphify/skills/codex/references/hooks.md +0 -33
  86. package/skills/graphify/skills/codex/references/query.md +0 -249
  87. package/skills/graphify/skills/codex/references/transcribe.md +0 -48
  88. package/skills/graphify/skills/codex/references/update.md +0 -179
  89. package/skills/graphify/skills/copilot/references/add-watch.md +0 -56
  90. package/skills/graphify/skills/copilot/references/exports.md +0 -71
  91. package/skills/graphify/skills/copilot/references/extraction-spec.md +0 -68
  92. package/skills/graphify/skills/copilot/references/github-and-merge.md +0 -46
  93. package/skills/graphify/skills/copilot/references/hooks.md +0 -33
  94. package/skills/graphify/skills/copilot/references/query.md +0 -249
  95. package/skills/graphify/skills/copilot/references/transcribe.md +0 -48
  96. package/skills/graphify/skills/copilot/references/update.md +0 -179
  97. package/skills/graphify/skills/droid/references/add-watch.md +0 -56
  98. package/skills/graphify/skills/droid/references/exports.md +0 -71
  99. package/skills/graphify/skills/droid/references/extraction-spec.md +0 -68
  100. package/skills/graphify/skills/droid/references/github-and-merge.md +0 -46
  101. package/skills/graphify/skills/droid/references/hooks.md +0 -33
  102. package/skills/graphify/skills/droid/references/query.md +0 -249
  103. package/skills/graphify/skills/droid/references/transcribe.md +0 -48
  104. package/skills/graphify/skills/droid/references/update.md +0 -179
  105. package/skills/graphify/skills/kilo/references/add-watch.md +0 -56
  106. package/skills/graphify/skills/kilo/references/exports.md +0 -71
  107. package/skills/graphify/skills/kilo/references/extraction-spec.md +0 -68
  108. package/skills/graphify/skills/kilo/references/github-and-merge.md +0 -46
  109. package/skills/graphify/skills/kilo/references/hooks.md +0 -33
  110. package/skills/graphify/skills/kilo/references/query.md +0 -249
  111. package/skills/graphify/skills/kilo/references/transcribe.md +0 -48
  112. package/skills/graphify/skills/kilo/references/update.md +0 -179
  113. package/skills/graphify/skills/kiro/references/add-watch.md +0 -56
  114. package/skills/graphify/skills/kiro/references/exports.md +0 -71
  115. package/skills/graphify/skills/kiro/references/extraction-spec.md +0 -29
  116. package/skills/graphify/skills/kiro/references/github-and-merge.md +0 -46
  117. package/skills/graphify/skills/kiro/references/hooks.md +0 -33
  118. package/skills/graphify/skills/kiro/references/query.md +0 -249
  119. package/skills/graphify/skills/kiro/references/transcribe.md +0 -48
  120. package/skills/graphify/skills/kiro/references/update.md +0 -179
  121. package/skills/graphify/skills/opencode/references/add-watch.md +0 -56
  122. package/skills/graphify/skills/opencode/references/exports.md +0 -71
  123. package/skills/graphify/skills/opencode/references/extraction-spec.md +0 -68
  124. package/skills/graphify/skills/opencode/references/github-and-merge.md +0 -46
  125. package/skills/graphify/skills/opencode/references/hooks.md +0 -33
  126. package/skills/graphify/skills/opencode/references/query.md +0 -249
  127. package/skills/graphify/skills/opencode/references/transcribe.md +0 -48
  128. package/skills/graphify/skills/opencode/references/update.md +0 -179
  129. package/skills/graphify/skills/pi/references/add-watch.md +0 -56
  130. package/skills/graphify/skills/pi/references/exports.md +0 -71
  131. package/skills/graphify/skills/pi/references/extraction-spec.md +0 -29
  132. package/skills/graphify/skills/pi/references/github-and-merge.md +0 -46
  133. package/skills/graphify/skills/pi/references/hooks.md +0 -33
  134. package/skills/graphify/skills/pi/references/query.md +0 -249
  135. package/skills/graphify/skills/pi/references/transcribe.md +0 -48
  136. package/skills/graphify/skills/pi/references/update.md +0 -179
  137. package/skills/graphify/skills/trae/references/add-watch.md +0 -56
  138. package/skills/graphify/skills/trae/references/exports.md +0 -71
  139. package/skills/graphify/skills/trae/references/extraction-spec.md +0 -68
  140. package/skills/graphify/skills/trae/references/github-and-merge.md +0 -46
  141. package/skills/graphify/skills/trae/references/hooks.md +0 -35
  142. package/skills/graphify/skills/trae/references/query.md +0 -249
  143. package/skills/graphify/skills/trae/references/transcribe.md +0 -48
  144. package/skills/graphify/skills/trae/references/update.md +0 -179
  145. package/skills/graphify/skills/vscode/references/add-watch.md +0 -56
  146. package/skills/graphify/skills/vscode/references/exports.md +0 -71
  147. package/skills/graphify/skills/vscode/references/extraction-spec.md +0 -68
  148. package/skills/graphify/skills/vscode/references/github-and-merge.md +0 -46
  149. package/skills/graphify/skills/vscode/references/hooks.md +0 -33
  150. package/skills/graphify/skills/vscode/references/query.md +0 -249
  151. package/skills/graphify/skills/vscode/references/transcribe.md +0 -48
  152. package/skills/graphify/skills/vscode/references/update.md +0 -179
  153. package/skills/graphify/skills/windows/references/add-watch.md +0 -56
  154. package/skills/graphify/skills/windows/references/exports.md +0 -71
  155. package/skills/graphify/skills/windows/references/extraction-spec.md +0 -68
  156. package/skills/graphify/skills/windows/references/github-and-merge.md +0 -46
  157. package/skills/graphify/skills/windows/references/hooks.md +0 -33
  158. package/skills/graphify/skills/windows/references/query.md +0 -249
  159. package/skills/graphify/skills/windows/references/transcribe.md +0 -48
  160. package/skills/graphify/skills/windows/references/update.md +0 -179
  161. package/skills/graphify/symbol_resolution.py +0 -538
  162. package/skills/graphify/transcribe.py +0 -184
  163. package/skills/graphify/tree_html.py +0 -582
  164. package/skills/graphify/validate.py +0 -72
  165. package/skills/graphify/watch.py +0 -898
  166. package/skills/graphify/wiki.py +0 -282
@@ -1,1309 +0,0 @@
1
- # MCP stdio server - exposes graph query tools to Claude and other agents
2
- from __future__ import annotations
3
- import json
4
- import math
5
- import re
6
- import sys
7
- from pathlib import Path
8
- import networkx as nx
9
- from networkx.readwrite import json_graph
10
- from graphify.security import sanitize_label, check_graph_file_size_cap
11
- from graphify.build import edge_data
12
-
13
- try:
14
- import jieba as _jieba # type: ignore[import-untyped]
15
- except ImportError:
16
- _jieba = None
17
-
18
-
19
- def _load_graph(graph_path: str) -> nx.Graph:
20
- try:
21
- resolved = Path(graph_path).resolve()
22
- if resolved.suffix != ".json":
23
- raise ValueError(f"Graph path must be a .json file, got: {graph_path!r}")
24
- if not resolved.exists():
25
- raise FileNotFoundError(f"Graph file not found: {resolved}")
26
- check_graph_file_size_cap(resolved)
27
- safe = resolved
28
- data = json.loads(safe.read_text(encoding="utf-8"))
29
- if "links" not in data and "edges" in data:
30
- data = dict(data, links=data["edges"])
31
- data = {**data, "directed": True}
32
- try:
33
- return json_graph.node_link_graph(data, edges="links")
34
- except TypeError:
35
- return json_graph.node_link_graph(data)
36
- except (ValueError, FileNotFoundError) as exc:
37
- print(f"error: {exc}", file=sys.stderr)
38
- sys.exit(1)
39
- except json.JSONDecodeError as exc:
40
- print(f"error: graph.json is corrupted ({exc}). Re-run /graphify to rebuild.", file=sys.stderr)
41
- sys.exit(1)
42
-
43
-
44
- def _communities_from_graph(G: nx.Graph) -> dict[int, list[str]]:
45
- """Reconstruct community dict from community property stored on nodes."""
46
- communities: dict[int, list[str]] = {}
47
- for node_id, data in G.nodes(data=True):
48
- cid = data.get("community")
49
- if cid is not None:
50
- communities.setdefault(int(cid), []).append(node_id)
51
- return communities
52
-
53
-
54
- def _strip_diacritics(text: str) -> str:
55
- import unicodedata
56
- nfkd = unicodedata.normalize("NFKD", text)
57
- return "".join(c for c in nfkd if not unicodedata.combining(c))
58
-
59
-
60
- def _search_tokens(text: str) -> list[str]:
61
- """Split text into word tokens, stripping punctuation and diacritics."""
62
- return re.findall(r"\w+", _strip_diacritics(str(text)).lower())
63
-
64
-
65
- def _has_chinese(text: str) -> bool:
66
- return any("一" <= ch <= "鿿" for ch in text)
67
-
68
-
69
- def _segment_chinese(text: str) -> list[str]:
70
- """Segment Chinese text and keep the original term for exact matching."""
71
- if _jieba is not None:
72
- segments = [w for w in _jieba.cut(text) if len(w.strip()) > 0]
73
- else:
74
- segments = [text[i:i + 2] for i in range(len(text) - 1)] or [text]
75
- if len(text) > 1 and text not in segments:
76
- segments.append(text)
77
- return segments
78
-
79
-
80
- def _is_searchable(term: str) -> bool:
81
- """True if term is Chinese, non-English, or an English word longer than 2 chars."""
82
- if all("a" <= ch <= "z" for ch in term):
83
- return len(term) > 2
84
- return True
85
-
86
-
87
- def _query_terms(question: str) -> list[str]:
88
- """Split a query into searchable terms, segmenting Chinese text."""
89
- terms: list[str] = []
90
- for raw in question.split():
91
- if _has_chinese(raw):
92
- for seg in _segment_chinese(raw.lower().strip()):
93
- seg = seg.strip()
94
- if seg and _is_searchable(seg):
95
- terms.append(seg)
96
- else:
97
- # Strip punctuation without touching Unicode characters (avoid NFKD mangling non-Latin scripts)
98
- for tok in re.findall(r"\w+", raw.lower()):
99
- if _is_searchable(tok):
100
- terms.append(tok)
101
- return terms
102
-
103
-
104
- _EXACT_MATCH_BONUS = 1000.0
105
- _PREFIX_MATCH_BONUS = 100.0
106
- _SUBSTRING_MATCH_BONUS = 1.0
107
- _SOURCE_MATCH_BONUS = 0.5
108
-
109
-
110
- def _compute_idf(G: nx.Graph, terms: list[str]) -> dict[str, float]:
111
- """IDF weights for query terms, cached in G.graph['_idf_cache'].
112
-
113
- Common terms like 'error' or 'exception' that match hundreds of nodes get
114
- low weights; rare identifiers like 'FooBarService' get high weights.
115
- Cache is stored on the graph object itself so it auto-invalidates when
116
- _maybe_reload() replaces G with a new object.
117
- """
118
- cache: dict[str, float] = G.graph.setdefault("_idf_cache", {})
119
- N = G.number_of_nodes() or 1
120
- uncached = [t for t in terms if t not in cache]
121
- if uncached:
122
- df: dict[str, int] = {t: 0 for t in uncached}
123
- for _, data in G.nodes(data=True):
124
- norm_label = (
125
- data.get("norm_label") or _strip_diacritics(data.get("label") or "")
126
- ).lower()
127
- for t in uncached:
128
- if t in norm_label:
129
- df[t] += 1
130
- for t in uncached:
131
- cache[t] = math.log(1 + N / (1 + df[t]))
132
- return {t: cache.get(t, math.log(1 + N)) for t in terms}
133
-
134
-
135
- def _score_nodes(G: nx.Graph, terms: list[str]) -> list[tuple[float, str]]:
136
- scored = []
137
- norm_terms = [tok for t in terms for tok in _search_tokens(t)]
138
- idf = _compute_idf(G, norm_terms)
139
- # Whole-query string for full-label matching (mirrors _find_node's `term`).
140
- joined = " ".join(norm_terms)
141
- # Weight the full-query bonus by the rarest constituent term so a specific
142
- # multi-word label still outweighs common-token noise; floor at 1.0.
143
- joined_w = max((idf.get(t, 1.0) for t in norm_terms), default=1.0)
144
- for nid, data in G.nodes(data=True):
145
- norm_label = data.get("norm_label") or _strip_diacritics(data.get("label") or "").lower()
146
- bare_label = norm_label.rstrip("()")
147
- # Tokenized form of the label (punctuation stripped, same transform as the
148
- # query). norm_label may still carry punctuation like ':' or '-', which a
149
- # tokenized query can never equal; comparing token-joined forms on both
150
- # sides makes "uoce: dehumidifier driver" match query "uoce dehumidifier
151
- # driver".
152
- label_tokens = " ".join(_search_tokens(data.get("label") or ""))
153
- source = (data.get("source_file") or "").lower()
154
- score = 0.0
155
- # Full-query tier: a multi-word query that equals (or prefixes) the whole
156
- # label must dominate the per-token bag-of-words sums below, so `path`/
157
- # `query` resolve the same node `explain` does (via _find_node). Without
158
- # this, no single token equals a multi-word label, the per-token exact
159
- # tier never fires, and every node sharing the token set ties -> arbitrary
160
- # node-id sort -> wrong/disconnected endpoint -> false "No path found".
161
- if joined:
162
- nid_lower = nid.lower()
163
- if joined in (norm_label, bare_label, label_tokens, nid_lower):
164
- score += _EXACT_MATCH_BONUS * 10 * joined_w
165
- elif (
166
- norm_label.startswith(joined)
167
- or bare_label.startswith(joined)
168
- or label_tokens.startswith(joined)
169
- ):
170
- score += _PREFIX_MATCH_BONUS * 10 * joined_w
171
- for t in norm_terms:
172
- w = idf.get(t, 1.0)
173
- # Three-tier precedence: exact > prefix > substring (take the
174
- # strongest tier per term so a single term cannot double-count).
175
- if t == norm_label or t == bare_label:
176
- score += _EXACT_MATCH_BONUS * w
177
- elif norm_label.startswith(t) or bare_label.startswith(t):
178
- score += _PREFIX_MATCH_BONUS * w
179
- elif t in norm_label:
180
- score += _SUBSTRING_MATCH_BONUS * w
181
- if t in source:
182
- score += _SOURCE_MATCH_BONUS * w
183
- if score > 0:
184
- scored.append((score, nid))
185
- # Sort by score desc; break ties toward the shorter label so a concise exact
186
- # match beats a longer superset that happens to share the same score.
187
- scored.sort(key=lambda s: (-s[0], len(G.nodes[s[1]].get("label") or s[1]), s[1]))
188
- return scored
189
-
190
-
191
- def _pick_seeds(scored: list[tuple[float, str]], max_k: int = 3, gap_ratio: float = 0.2) -> list[str]:
192
- """Select BFS seed nodes, stopping when score drops too far below the top.
193
-
194
- Prevents high-frequency noise terms (error, exception) from stealing seed
195
- slots from a dominant identifier match. When FooBarService scores 1000 and
196
- error nodes score 1.0, only FooBarService is seeded — the score gap is 99.9%
197
- which is well above the 20% threshold that would allow additional seeds.
198
- """
199
- if not scored:
200
- return []
201
- top_score = scored[0][0]
202
- seeds = []
203
- for score, nid in scored[:max_k]:
204
- if seeds and score < top_score * gap_ratio:
205
- break
206
- seeds.append(nid)
207
- return seeds
208
-
209
-
210
- _CONTEXT_HINTS: tuple[tuple[str, tuple[str, ...]], ...] = (
211
- ("call", ("call", "calls", "called", "invoke", "invokes", "invoked")),
212
- ("import", ("import", "imports", "imported", "module", "modules")),
213
- ("field", ("field", "fields", "member", "members", "property", "properties")),
214
- ("parameter_type", ("parameter", "parameters", "param", "params", "argument", "arguments")),
215
- ("return_type", ("return", "returns", "returned")),
216
- ("generic_arg", ("generic", "generics", "template", "templates")),
217
- )
218
-
219
-
220
- _CONTEXT_FILTER_ALIASES: dict[str, str] = {
221
- "param": "parameter_type",
222
- "params": "parameter_type",
223
- "parameter": "parameter_type",
224
- "parameters": "parameter_type",
225
- "argument": "parameter_type",
226
- "arguments": "parameter_type",
227
- "arg": "parameter_type",
228
- "args": "parameter_type",
229
- "return": "return_type",
230
- "returns": "return_type",
231
- "returned": "return_type",
232
- "generic": "generic_arg",
233
- "generics": "generic_arg",
234
- "template": "generic_arg",
235
- "templates": "generic_arg",
236
- "annotation": "attribute",
237
- "annotations": "attribute",
238
- "decorator": "attribute",
239
- "decorators": "attribute",
240
- "calls": "call",
241
- "called": "call",
242
- "invoke": "call",
243
- "invocation": "call",
244
- "fields": "field",
245
- "property": "field",
246
- "properties": "field",
247
- "member": "field",
248
- "members": "field",
249
- "imports": "import",
250
- "imported": "import",
251
- "module": "import",
252
- "modules": "import",
253
- "exports": "export",
254
- "exported": "export",
255
- }
256
-
257
-
258
- def _normalize_context_filters(filters: list[str] | None) -> list[str]:
259
- if not filters:
260
- return []
261
- normalized: list[str] = []
262
- seen: set[str] = set()
263
- for value in filters:
264
- key = _strip_diacritics(str(value)).strip().lower()
265
- if not key:
266
- continue
267
- key = _CONTEXT_FILTER_ALIASES.get(key, key)
268
- if key not in seen:
269
- seen.add(key)
270
- normalized.append(key)
271
- return normalized
272
-
273
-
274
- def _infer_context_filters(question: str) -> list[str]:
275
- lowered = {
276
- _strip_diacritics(token).lower()
277
- for token in question.replace("?", " ").replace(",", " ").split()
278
- }
279
- inferred: list[str] = []
280
- for context, hints in _CONTEXT_HINTS:
281
- if any(hint in lowered for hint in hints):
282
- inferred.append(context)
283
- return inferred
284
-
285
-
286
- def _resolve_context_filters(question: str, explicit_filters: list[str] | None = None) -> tuple[list[str], str | None]:
287
- normalized = _normalize_context_filters(explicit_filters)
288
- if normalized:
289
- return normalized, "explicit"
290
- inferred = _infer_context_filters(question)
291
- if inferred:
292
- return inferred, "heuristic"
293
- return [], None
294
-
295
-
296
- def _filter_graph_by_context(G: nx.Graph, context_filters: list[str] | None) -> nx.Graph:
297
- filters = set(_normalize_context_filters(context_filters))
298
- if not filters:
299
- return G
300
- H = G.__class__()
301
- H.add_nodes_from(G.nodes(data=True))
302
- if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):
303
- for u, v, key, data in G.edges(keys=True, data=True):
304
- if data.get("context") in filters:
305
- H.add_edge(u, v, key=key, **data)
306
- else:
307
- for u, v, data in G.edges(data=True):
308
- if data.get("context") in filters:
309
- H.add_edge(u, v, **data)
310
- return H
311
-
312
-
313
- def _bfs(G: nx.Graph, start_nodes: list[str], depth: int) -> tuple[set[str], list[tuple]]:
314
- # Compute hub threshold: nodes above this degree are not expanded as transit.
315
- # p99 of degree distribution, floored at 50 to avoid over-blocking small graphs.
316
- degrees = [G.degree(n) for n in G.nodes()]
317
- if degrees:
318
- degrees_sorted = sorted(degrees)
319
- p99_idx = int(len(degrees_sorted) * 0.99)
320
- hub_threshold = max(50, degrees_sorted[p99_idx])
321
- else:
322
- hub_threshold = 50
323
- seed_set = set(start_nodes)
324
- visited: set[str] = set(start_nodes)
325
- frontier = set(start_nodes)
326
- edges_seen: list[tuple] = []
327
- for _ in range(depth):
328
- next_frontier: set[str] = set()
329
- for n in frontier:
330
- # Don't expand through high-degree hubs (except seeds - a hub that
331
- # is the starting node should still be explored).
332
- if n not in seed_set and G.degree(n) >= hub_threshold:
333
- continue
334
- for neighbor in G.neighbors(n):
335
- if neighbor not in visited:
336
- next_frontier.add(neighbor)
337
- edges_seen.append((n, neighbor))
338
- visited.update(next_frontier)
339
- frontier = next_frontier
340
- return visited, edges_seen
341
-
342
-
343
- def _dfs(G: nx.Graph, start_nodes: list[str], depth: int) -> tuple[set[str], list[tuple]]:
344
- degrees = [G.degree(n) for n in G.nodes()]
345
- if degrees:
346
- degrees_sorted = sorted(degrees)
347
- p99_idx = int(len(degrees_sorted) * 0.99)
348
- hub_threshold = max(50, degrees_sorted[p99_idx])
349
- else:
350
- hub_threshold = 50
351
- seed_set = set(start_nodes)
352
- visited: set[str] = set()
353
- edges_seen: list[tuple] = []
354
- stack = [(n, 0) for n in reversed(start_nodes)]
355
- while stack:
356
- node, d = stack.pop()
357
- if node in visited or d > depth:
358
- continue
359
- visited.add(node)
360
- if node not in seed_set and G.degree(node) >= hub_threshold:
361
- continue
362
- for neighbor in G.neighbors(node):
363
- if neighbor not in visited:
364
- stack.append((neighbor, d + 1))
365
- edges_seen.append((node, neighbor))
366
- return visited, edges_seen
367
-
368
-
369
- def _subgraph_to_text(G: nx.Graph, nodes: set[str], edges: list[tuple], token_budget: int = 2000, *, seeds: list[str] | None = None) -> str:
370
- """Render subgraph as text, cutting at token_budget (approx 3 chars/token).
371
-
372
- seeds: exact-match nodes rendered first before the degree-sorted expansion,
373
- so the queried symbol always appears at the top of the output.
374
- """
375
- char_budget = token_budget * 3
376
- lines = []
377
- seed_set = set(seeds or [])
378
- ordered = [n for n in (seeds or []) if n in nodes] + \
379
- sorted(nodes - seed_set, key=lambda n: G.degree(n), reverse=True)
380
- for nid in ordered:
381
- d = G.nodes[nid]
382
- # Every LLM-derived field passes through sanitize_label before being
383
- # concatenated into MCP tool output (F-010): an attacker who controls a
384
- # corpus document can otherwise inject ANSI escapes, fake graphify-out
385
- # log lines, or prompt-injection markup into the model's context via
386
- # source_file / source_location / community.
387
- line = (
388
- f"NODE {sanitize_label(d.get('label', nid))} "
389
- f"[src={sanitize_label(str(d.get('source_file', '')))} "
390
- f"loc={sanitize_label(str(d.get('source_location', '')))} "
391
- f"community={sanitize_label(str(d.get('community', '')))}]"
392
- )
393
- lines.append(line)
394
- for u, v in edges:
395
- if u in nodes and v in nodes:
396
- raw = G[u][v]
397
- d = next(iter(raw.values()), {}) if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)) else raw
398
- context = d.get("context")
399
- context_suffix = f" context={sanitize_label(str(context))}" if context else ""
400
- line = (
401
- f"EDGE {sanitize_label(G.nodes[u].get('label', u))} "
402
- f"--{sanitize_label(str(d.get('relation', '')))} "
403
- f"[{sanitize_label(str(d.get('confidence', '')))}{context_suffix}]--> "
404
- f"{sanitize_label(G.nodes[v].get('label', v))}"
405
- )
406
- lines.append(line)
407
- output = "\n".join(lines)
408
- if len(output) > char_budget:
409
- cut_at = output[:char_budget].rfind("\n")
410
- cut_at = cut_at if cut_at > 0 else char_budget
411
- total_nodes = sum(1 for l in lines if l.startswith("NODE "))
412
- shown_nodes = output[:cut_at].count("\nNODE ") + (1 if output.startswith("NODE ") else 0)
413
- cut_count = total_nodes - shown_nodes
414
- output = (
415
- output[:cut_at]
416
- + f"\n... (truncated — {cut_count} more nodes cut by ~{token_budget}-token budget."
417
- f" Narrow with context_filter=['call'] or use get_node for a specific symbol)"
418
- )
419
- return output
420
-
421
-
422
- def _query_graph_text(
423
- G: nx.Graph,
424
- question: str,
425
- *,
426
- mode: str = "bfs",
427
- depth: int = 3,
428
- token_budget: int = 2000,
429
- context_filters: list[str] | None = None,
430
- ) -> str:
431
- terms = _query_terms(question)
432
- scored = _score_nodes(G, terms)
433
- start_nodes = _pick_seeds(scored)
434
- if not start_nodes:
435
- return "No matching nodes found."
436
- resolved_filters, filter_source = _resolve_context_filters(question, context_filters)
437
- traversal_graph = _filter_graph_by_context(G, resolved_filters)
438
- nodes, edges = _dfs(traversal_graph, start_nodes, depth) if mode == "dfs" else _bfs(traversal_graph, start_nodes, depth)
439
- header_parts = [
440
- f"Traversal: {mode.upper()} depth={depth}",
441
- f"Start: {[G.nodes[n].get('label', n) for n in start_nodes]}",
442
- ]
443
- if resolved_filters:
444
- header_parts.append(f"Context: {', '.join(resolved_filters)} ({filter_source})")
445
- header_parts.append(f"{len(nodes)} nodes found")
446
- header = " | ".join(header_parts) + "\n\n"
447
- return header + _subgraph_to_text(traversal_graph, nodes, edges, token_budget)
448
-
449
-
450
- def _find_node(G: nx.Graph, label: str) -> list[str]:
451
- """Return node IDs whose label or ID matches the search term (diacritic-insensitive).
452
-
453
- Results are ordered by three-tier precedence: exact match, then prefix match,
454
- then substring match. Node-ID exact matches are grouped with label exact matches.
455
- """
456
- term = " ".join(_search_tokens(label))
457
- if not term:
458
- return []
459
- exact: list[str] = []
460
- prefix: list[str] = []
461
- substring: list[str] = []
462
- for nid, d in G.nodes(data=True):
463
- norm_label = d.get("norm_label") or _strip_diacritics(d.get("label") or "").lower()
464
- bare_label = norm_label.rstrip("()")
465
- nid_lower = nid.lower()
466
- if term == norm_label or term == bare_label or term == nid_lower:
467
- exact.append(nid)
468
- elif norm_label.startswith(term) or bare_label.startswith(term) or nid_lower.startswith(term):
469
- prefix.append(nid)
470
- elif term in norm_label:
471
- substring.append(nid)
472
- return exact + prefix + substring
473
-
474
-
475
- def _filter_blank_stdin() -> None:
476
- """Filter blank lines from stdin before MCP reads it.
477
-
478
- Some MCP clients (Claude Desktop, etc.) send blank lines between JSON
479
- messages. The MCP stdio transport tries to parse every line as a
480
- JSONRPCMessage, so a bare newline triggers a Pydantic ValidationError.
481
- This installs an OS-level pipe that relays stdin while dropping blanks.
482
- """
483
- import os
484
- import threading
485
-
486
- r_fd, w_fd = os.pipe()
487
- saved_fd = os.dup(sys.stdin.fileno())
488
-
489
- def _relay() -> None:
490
- try:
491
- with open(saved_fd, "rb") as src, open(w_fd, "wb") as dst:
492
- for line in src:
493
- if line.strip():
494
- dst.write(line)
495
- dst.flush()
496
- except Exception:
497
- pass
498
-
499
- threading.Thread(target=_relay, daemon=True).start()
500
- os.dup2(r_fd, sys.stdin.fileno())
501
- os.close(r_fd)
502
- sys.stdin = open(0, "r", closefd=False)
503
-
504
-
505
- def _build_server(graph_path: str):
506
- """Build the configured low-level MCP Server (shared by every transport).
507
-
508
- All graph query tools and resources are registered here over a single
509
- ``mcp.server.Server`` instance; the caller picks the transport (stdio or
510
- Streamable HTTP) and runs it. Hot-reload of graph.json works the same way
511
- regardless of transport, since reloads happen inside the tool handlers.
512
- """
513
- import threading
514
-
515
- try:
516
- from mcp.server import Server
517
- from mcp import types
518
- from mcp.types import AnyUrl
519
- except ImportError as e:
520
- raise ImportError('mcp not installed. Run: pip install "graphifyy[mcp]"') from e
521
-
522
- G = _load_graph(graph_path)
523
- communities = _communities_from_graph(G)
524
-
525
- # Hot-reload state: mtime+size key lets us detect graph.json changes without
526
- # polling. Initialised from the file stat at startup so the first tool call
527
- # never triggers a redundant reload.
528
- _reload_lock = threading.Lock()
529
- try:
530
- _s = Path(graph_path).stat()
531
- _reload_state: dict = {"mtime_ns": _s.st_mtime_ns, "size": _s.st_size}
532
- except FileNotFoundError:
533
- _reload_state = {"mtime_ns": 0, "size": -1}
534
-
535
- def _maybe_reload() -> None:
536
- nonlocal G, communities
537
- try:
538
- s = Path(graph_path).stat()
539
- key = (s.st_mtime_ns, s.st_size)
540
- except FileNotFoundError:
541
- return
542
- if key == (_reload_state["mtime_ns"], _reload_state["size"]):
543
- return
544
- with _reload_lock:
545
- try:
546
- s = Path(graph_path).stat()
547
- key = (s.st_mtime_ns, s.st_size)
548
- except FileNotFoundError:
549
- return
550
- if key == (_reload_state["mtime_ns"], _reload_state["size"]):
551
- return # another thread already reloaded
552
- try:
553
- new_G = _load_graph(graph_path)
554
- except SystemExit:
555
- return # keep serving stale graph on transient read error
556
- G = new_G
557
- communities = _communities_from_graph(new_G)
558
- _reload_state["mtime_ns"], _reload_state["size"] = key
559
-
560
- server = Server("graphify")
561
-
562
- @server.list_tools()
563
- async def list_tools() -> list[types.Tool]:
564
- return [
565
- types.Tool(
566
- name="query_graph",
567
- description="Search the knowledge graph using BFS or DFS. Returns relevant nodes and edges as text context.",
568
- inputSchema={
569
- "type": "object",
570
- "properties": {
571
- "question": {"type": "string", "description": "Natural language question or keyword search"},
572
- "mode": {"type": "string", "enum": ["bfs", "dfs"], "default": "bfs",
573
- "description": "bfs=broad context, dfs=trace a specific path"},
574
- "depth": {"type": "integer", "default": 3, "description": "Traversal depth (1-6)"},
575
- "token_budget": {"type": "integer", "default": 2000, "description": "Max output tokens"},
576
- "context_filter": {
577
- "type": "array",
578
- "items": {"type": "string"},
579
- "description": "Optional explicit edge-context filter, e.g. ['call', 'field']",
580
- },
581
- },
582
- "required": ["question"],
583
- },
584
- ),
585
- types.Tool(
586
- name="get_node",
587
- description="Get full details for a specific node by label or ID.",
588
- inputSchema={
589
- "type": "object",
590
- "properties": {"label": {"type": "string", "description": "Node label or ID to look up"}},
591
- "required": ["label"],
592
- },
593
- ),
594
- types.Tool(
595
- name="get_neighbors",
596
- description="Get all direct neighbors of a node with edge details.",
597
- inputSchema={
598
- "type": "object",
599
- "properties": {
600
- "label": {"type": "string"},
601
- "relation_filter": {"type": "string", "description": "Optional: filter by relation type"},
602
- },
603
- "required": ["label"],
604
- },
605
- ),
606
- types.Tool(
607
- name="get_community",
608
- description="Get all nodes in a community by community ID.",
609
- inputSchema={
610
- "type": "object",
611
- "properties": {"community_id": {"type": "integer", "description": "Community ID (0-indexed by size)"}},
612
- "required": ["community_id"],
613
- },
614
- ),
615
- types.Tool(
616
- name="god_nodes",
617
- description="Return the most connected nodes - the core abstractions of the knowledge graph.",
618
- inputSchema={"type": "object", "properties": {"top_n": {"type": "integer", "default": 10}}},
619
- ),
620
- types.Tool(
621
- name="graph_stats",
622
- description="Return summary statistics: node count, edge count, communities, confidence breakdown.",
623
- inputSchema={"type": "object", "properties": {}},
624
- ),
625
- types.Tool(
626
- name="shortest_path",
627
- description="Find the shortest path between two concepts in the knowledge graph.",
628
- inputSchema={
629
- "type": "object",
630
- "properties": {
631
- "source": {"type": "string", "description": "Source concept label or keyword"},
632
- "target": {"type": "string", "description": "Target concept label or keyword"},
633
- "max_hops": {"type": "integer", "default": 8, "description": "Maximum hops to consider"},
634
- },
635
- "required": ["source", "target"],
636
- },
637
- ),
638
- types.Tool(
639
- name="list_prs",
640
- description=(
641
- "List open GitHub PRs with CI status, review state, and graph impact "
642
- "(which communities each PR touches, blast radius). Use this before starting "
643
- "work to check if a PR already covers the area you're about to change."
644
- ),
645
- inputSchema={
646
- "type": "object",
647
- "properties": {
648
- "base": {"type": "string", "description": "Base branch to filter PRs by (auto-detected if omitted)"},
649
- "repo": {"type": "string", "description": "GitHub repo (owner/repo). Defaults to current repo."},
650
- },
651
- },
652
- ),
653
- types.Tool(
654
- name="get_pr_impact",
655
- description=(
656
- "Get detailed graph impact for a specific PR: which files it changes, "
657
- "which knowledge-graph communities are affected, and how many nodes are touched. "
658
- "Use this to assess merge risk or check for overlap with your current work."
659
- ),
660
- inputSchema={
661
- "type": "object",
662
- "properties": {
663
- "pr_number": {"type": "integer", "description": "PR number to analyse"},
664
- "repo": {"type": "string", "description": "GitHub repo (owner/repo). Defaults to current repo."},
665
- },
666
- "required": ["pr_number"],
667
- },
668
- ),
669
- types.Tool(
670
- name="triage_prs",
671
- description=(
672
- "Return all actionable open PRs (correct base, not stale) with full graph impact data "
673
- "so you can reason about review priority, merge order, and conflict risk. "
674
- "Call this when the user asks 'what PRs should I review?' or 'what's ready to merge?'"
675
- ),
676
- inputSchema={
677
- "type": "object",
678
- "properties": {
679
- "base": {"type": "string", "description": "Base branch to filter PRs by (auto-detected if omitted)"},
680
- "repo": {"type": "string", "description": "GitHub repo (owner/repo). Defaults to current repo."},
681
- },
682
- },
683
- ),
684
- ]
685
-
686
- def _tool_query_graph(arguments: dict) -> str:
687
- import time as _time
688
- from graphify import querylog
689
- question = arguments["question"]
690
- mode = arguments.get("mode", "bfs")
691
- depth = min(int(arguments.get("depth", 3)), 6)
692
- budget = int(arguments.get("token_budget", 2000))
693
- context_filter = arguments.get("context_filter")
694
- _t0 = _time.perf_counter()
695
- result = _query_graph_text(
696
- G,
697
- question,
698
- mode=mode,
699
- depth=depth,
700
- token_budget=budget,
701
- context_filters=context_filter,
702
- )
703
- querylog.log_query(
704
- kind="mcp_query",
705
- question=question,
706
- corpus=str(graph_path),
707
- result=result,
708
- mode=mode,
709
- depth=depth,
710
- token_budget=budget,
711
- duration_ms=(_time.perf_counter() - _t0) * 1000,
712
- )
713
- return result
714
-
715
- def _tool_get_node(arguments: dict) -> str:
716
- label = arguments["label"].lower()
717
- matches = [(nid, d) for nid, d in G.nodes(data=True)
718
- if label in (d.get("label") or "").lower() or label == nid.lower()]
719
- if not matches:
720
- return f"No node matching '{label}' found."
721
- nid, d = matches[0]
722
- # Sanitise every LLM-derived field before concatenation (F-010).
723
- return "\n".join([
724
- f"Node: {sanitize_label(d.get('label', nid))}",
725
- f" ID: {sanitize_label(nid)}",
726
- f" Source: {sanitize_label(str(d.get('source_file', '')))} {sanitize_label(str(d.get('source_location', '')))}",
727
- f" Type: {sanitize_label(str(d.get('file_type', '')))}",
728
- f" Community: {sanitize_label(str(d.get('community', '')))}",
729
- f" Degree: {G.degree(nid)}",
730
- ])
731
-
732
- def _tool_get_neighbors(arguments: dict) -> str:
733
- label = arguments["label"].lower()
734
- rel_filter = arguments.get("relation_filter", "").lower()
735
- matches = _find_node(G, label)
736
- if not matches:
737
- return f"No node matching '{label}' found."
738
- nid = matches[0]
739
- lines = [f"Neighbors of {sanitize_label(G.nodes[nid].get('label', nid))}:"]
740
- for nb in G.successors(nid):
741
- d = edge_data(G, nid, nb)
742
- rel = d.get("relation", "")
743
- if rel_filter and rel_filter not in rel.lower():
744
- continue
745
- lines.append(
746
- f" --> {sanitize_label(G.nodes[nb].get('label', nb))} "
747
- f"[{sanitize_label(str(rel))}] [{sanitize_label(str(d.get('confidence', '')))}]"
748
- )
749
- for nb in G.predecessors(nid):
750
- d = edge_data(G, nb, nid)
751
- rel = d.get("relation", "")
752
- if rel_filter and rel_filter not in rel.lower():
753
- continue
754
- lines.append(
755
- f" <-- {sanitize_label(G.nodes[nb].get('label', nb))} "
756
- f"[{sanitize_label(str(rel))}] [{sanitize_label(str(d.get('confidence', '')))}]"
757
- )
758
- return "\n".join(lines)
759
-
760
- def _tool_get_community(arguments: dict) -> str:
761
- cid = int(arguments["community_id"])
762
- nodes = communities.get(cid, [])
763
- if not nodes:
764
- return f"Community {cid} not found."
765
- lines = [f"Community {cid} ({len(nodes)} nodes):"]
766
- for n in nodes:
767
- d = G.nodes[n]
768
- # Sanitise label and source_file (F-010).
769
- lines.append(
770
- f" {sanitize_label(d.get('label', n))} "
771
- f"[{sanitize_label(str(d.get('source_file', '')))}]"
772
- )
773
- return "\n".join(lines)
774
-
775
- def _tool_god_nodes(arguments: dict) -> str:
776
- from graphify.analyze import god_nodes as _god_nodes
777
- nodes = _god_nodes(G, top_n=int(arguments.get("top_n", 10)))
778
- lines = ["God nodes (most connected):"]
779
- lines += [f" {i}. {n['label']} - {n['degree']} edges" for i, n in enumerate(nodes, 1)]
780
- return "\n".join(lines)
781
-
782
- def _tool_graph_stats(_: dict) -> str:
783
- confs = [d.get("confidence", "EXTRACTED") for _, _, d in G.edges(data=True)]
784
- total = len(confs) or 1
785
- return (
786
- f"Nodes: {G.number_of_nodes()}\n"
787
- f"Edges: {G.number_of_edges()}\n"
788
- f"Communities: {len(communities)}\n"
789
- f"EXTRACTED: {round(confs.count('EXTRACTED')/total*100)}%\n"
790
- f"INFERRED: {round(confs.count('INFERRED')/total*100)}%\n"
791
- f"AMBIGUOUS: {round(confs.count('AMBIGUOUS')/total*100)}%\n"
792
- )
793
-
794
- def _tool_shortest_path(arguments: dict) -> str:
795
- src_scored = _score_nodes(G, [t.lower() for t in arguments["source"].split()])
796
- tgt_scored = _score_nodes(G, [t.lower() for t in arguments["target"].split()])
797
- if not src_scored:
798
- return f"No node matching source '{arguments['source']}' found."
799
- if not tgt_scored:
800
- return f"No node matching target '{arguments['target']}' found."
801
- src_nid, tgt_nid = src_scored[0][1], tgt_scored[0][1]
802
- # Ambiguity guard: when both queries resolve to the same node, the
803
- # shortest path is trivially zero hops, which is almost never what the
804
- # caller wanted (see bug #828).
805
- if src_nid == tgt_nid:
806
- return (
807
- f"'{arguments['source']}' and '{arguments['target']}' both resolved to "
808
- f"the same node '{src_nid}'. Use a more specific label or the exact node ID."
809
- )
810
- warnings: list[str] = []
811
- for name, scored in (("source", src_scored), ("target", tgt_scored)):
812
- if len(scored) >= 2:
813
- top, runner = scored[0][0], scored[1][0]
814
- if top > 0 and (top - runner) / top < 0.10:
815
- warnings.append(
816
- f"warning: {name} match was ambiguous "
817
- f"(top score {top:g}, runner-up {runner:g})"
818
- )
819
- max_hops = int(arguments.get("max_hops", 8))
820
- try:
821
- # Use undirected view for path-finding (works regardless of query src/tgt order)
822
- path_nodes = nx.shortest_path(G.to_undirected(as_view=True), src_nid, tgt_nid)
823
- except (nx.NetworkXNoPath, nx.NodeNotFound):
824
- return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
825
- hops = len(path_nodes) - 1
826
- if hops > max_hops:
827
- return f"Path exceeds max_hops={max_hops} ({hops} hops found)."
828
- segments = []
829
- for i in range(len(path_nodes) - 1):
830
- u, v = path_nodes[i], path_nodes[i + 1]
831
- if G.has_edge(u, v):
832
- edata = edge_data(G, u, v)
833
- forward = True
834
- else:
835
- edata = edge_data(G, v, u)
836
- forward = False
837
- rel = edata.get("relation", "")
838
- conf = edata.get("confidence", "")
839
- conf_str = f" [{conf}]" if conf else ""
840
- if i == 0:
841
- segments.append(G.nodes[u].get("label", u))
842
- if forward:
843
- segments.append(f"--{rel}{conf_str}--> {G.nodes[v].get('label', v)}")
844
- else:
845
- segments.append(f"<--{rel}{conf_str}-- {G.nodes[v].get('label', v)}")
846
- prefix = ("\n".join(warnings) + "\n") if warnings else ""
847
- return prefix + f"Shortest path ({hops} hops):\n " + " ".join(segments)
848
-
849
- def _tool_list_prs(arguments: dict) -> str:
850
- from graphify.prs import fetch_prs, fetch_worktrees, format_prs_text, _detect_default_branch
851
- repo = arguments.get("repo") or None
852
- base = arguments.get("base") or _detect_default_branch(repo)
853
- try:
854
- prs = fetch_prs(repo=repo, base=base)
855
- except RuntimeError as e:
856
- return f"Error: {e}"
857
- worktrees = fetch_worktrees()
858
- for pr in prs:
859
- pr.worktree_path = worktrees.get(pr.branch)
860
- return format_prs_text(prs, base)
861
-
862
- def _tool_get_pr_impact(arguments: dict) -> str:
863
- from graphify.prs import fetch_pr_files, compute_pr_impact, _gh, _parse_ci
864
- number = int(arguments["pr_number"])
865
- repo = arguments.get("repo") or None
866
- # Use gh pr view directly — works for any base branch, not just the default
867
- view_args = ["pr", "view", str(number), "--json",
868
- "title,headRefName,baseRefName,author,isDraft,reviewDecision,statusCheckRollup,updatedAt"]
869
- if repo:
870
- view_args += ["--repo", repo]
871
- pr_data = _gh(*view_args)
872
- if pr_data is None:
873
- return f"PR #{number} not found or gh not authenticated."
874
- files = fetch_pr_files(number, repo)
875
- if not files:
876
- return f"PR #{number}: no changed files found (may require gh auth)."
877
- comms, nodes = compute_pr_impact(files, G)
878
- ci = _parse_ci(pr_data.get("statusCheckRollup") or [])
879
- lines = [
880
- f"PR #{number}: {pr_data['title']}",
881
- f"CI: {ci} Review: {pr_data.get('reviewDecision') or 'none'}",
882
- f"Base: {pr_data['baseRefName']} Author: {(pr_data.get('author') or {}).get('login', '?')}",
883
- f"\nGraph impact: {nodes} nodes across {len(comms)} communities",
884
- f"Communities touched: {comms}",
885
- f"Files changed ({len(files)}):",
886
- ]
887
- lines += [f" {f}" for f in files[:20]]
888
- if len(files) > 20:
889
- lines.append(f" … and {len(files) - 20} more")
890
- return "\n".join(lines)
891
-
892
- def _tool_triage_prs(arguments: dict) -> str:
893
- from concurrent.futures import ThreadPoolExecutor, as_completed
894
- from graphify.prs import fetch_prs, fetch_worktrees, fetch_pr_files, compute_pr_impact, _STATUS_ORDER, _detect_default_branch
895
- repo = arguments.get("repo") or None
896
- base = arguments.get("base") or _detect_default_branch(repo)
897
- try:
898
- prs = fetch_prs(repo=repo, base=base)
899
- except RuntimeError as e:
900
- return f"Error: {e}"
901
- worktrees = fetch_worktrees()
902
- for pr in prs:
903
- pr.worktree_path = worktrees.get(pr.branch)
904
- actionable = [p for p in prs if p.base_branch == base and p.status not in ("WRONG-BASE", "STALE")]
905
- if not actionable:
906
- return f"No actionable PRs targeting {base}."
907
- # Fetch diffs concurrently then compute graph impact using in-memory G
908
- workers = min(8, len(actionable))
909
- with ThreadPoolExecutor(max_workers=workers) as pool:
910
- future_to_pr = {pool.submit(fetch_pr_files, pr.number, repo): pr for pr in actionable}
911
- for fut in as_completed(future_to_pr):
912
- pr = future_to_pr[fut]
913
- try:
914
- files = fut.result()
915
- except Exception:
916
- files = []
917
- if files:
918
- pr.files_changed = files
919
- pr.communities_touched, pr.nodes_affected = compute_pr_impact(files, G)
920
- header = (
921
- f"Actionable PRs targeting {base}: {len(actionable)}\n"
922
- "Rank these by review priority. Higher blast_radius = more graph communities affected = higher merge risk.\n"
923
- )
924
- lines = [header]
925
- for p in sorted(actionable, key=lambda x: (_STATUS_ORDER.index(x.status) if x.status in _STATUS_ORDER else 99)):
926
- impact = f" blast_radius={p.blast_radius}" if p.blast_radius else ""
927
- wt = f" worktree={p.worktree_path}" if p.worktree_path else ""
928
- lines.append(
929
- f"PR #{p.number} [{p.status}] CI={p.ci_status} review={p.review_decision or 'none'} "
930
- f"age={p.days_old}d author={p.author}{impact}{wt}\n title: {p.title}"
931
- )
932
- return "\n\n".join(lines)
933
-
934
- _handlers = {
935
- "query_graph": _tool_query_graph,
936
- "get_node": _tool_get_node,
937
- "get_neighbors": _tool_get_neighbors,
938
- "get_community": _tool_get_community,
939
- "god_nodes": _tool_god_nodes,
940
- "graph_stats": _tool_graph_stats,
941
- "shortest_path": _tool_shortest_path,
942
- "list_prs": _tool_list_prs,
943
- "get_pr_impact": _tool_get_pr_impact,
944
- "triage_prs": _tool_triage_prs,
945
- }
946
-
947
- def _load_community_labels() -> dict[int, str]:
948
- labels_path = Path(graph_path).parent / ".graphify_labels.json"
949
- if labels_path.exists():
950
- try:
951
- return {int(k): v for k, v in json.loads(labels_path.read_text(encoding="utf-8")).items()}
952
- except Exception:
953
- pass
954
- return {cid: f"Community {cid}" for cid in communities}
955
-
956
- @server.list_resources()
957
- async def list_resources() -> list[types.Resource]:
958
- return [
959
- types.Resource(uri=AnyUrl("graphify://report"), name="Graph Report", description="Full GRAPH_REPORT.md", mimeType="text/markdown"),
960
- types.Resource(uri=AnyUrl("graphify://stats"), name="Graph Stats", description="Node/edge/community counts and confidence breakdown", mimeType="text/plain"),
961
- types.Resource(uri=AnyUrl("graphify://god-nodes"), name="God Nodes", description="Top 10 most-connected nodes", mimeType="text/plain"),
962
- types.Resource(uri=AnyUrl("graphify://surprises"), name="Surprising Connections", description="Cross-community surprising connections", mimeType="text/plain"),
963
- types.Resource(uri=AnyUrl("graphify://audit"), name="Confidence Audit", description="EXTRACTED/INFERRED/AMBIGUOUS edge breakdown", mimeType="text/plain"),
964
- types.Resource(uri=AnyUrl("graphify://questions"), name="Suggested Questions", description="Suggested questions for this codebase", mimeType="text/plain"),
965
- ]
966
-
967
- @server.read_resource()
968
- async def read_resource(uri: AnyUrl) -> str:
969
- _maybe_reload()
970
- uri_str = str(uri)
971
- if uri_str == "graphify://report":
972
- report_path = Path(graph_path).parent / "GRAPH_REPORT.md"
973
- if report_path.exists():
974
- return report_path.read_text(encoding="utf-8")
975
- return "GRAPH_REPORT.md not found. Run graphify extract first."
976
- if uri_str == "graphify://stats":
977
- return _tool_graph_stats({})
978
- if uri_str == "graphify://god-nodes":
979
- return _tool_god_nodes({"top_n": 10})
980
- if uri_str == "graphify://surprises":
981
- try:
982
- from graphify.analyze import surprising_connections
983
- surprises = surprising_connections(G, communities, top_n=10)
984
- if not surprises:
985
- return "No surprising connections found."
986
- lines = ["Surprising cross-community connections:"]
987
- for s in surprises:
988
- lines.append(f" {s.get('source', '')} <-> {s.get('target', '')} [{s.get('relation', '')}]")
989
- return "\n".join(lines)
990
- except Exception as exc:
991
- return f"Could not compute surprising connections: {exc}"
992
- if uri_str == "graphify://audit":
993
- confs = [d.get("confidence", "EXTRACTED") for _, _, d in G.edges(data=True)]
994
- total = len(confs) or 1
995
- return (
996
- f"Total edges: {total}\n"
997
- f"EXTRACTED: {confs.count('EXTRACTED')} ({round(confs.count('EXTRACTED')/total*100)}%)\n"
998
- f"INFERRED: {confs.count('INFERRED')} ({round(confs.count('INFERRED')/total*100)}%)\n"
999
- f"AMBIGUOUS: {confs.count('AMBIGUOUS')} ({round(confs.count('AMBIGUOUS')/total*100)}%)\n"
1000
- )
1001
- if uri_str == "graphify://questions":
1002
- try:
1003
- from graphify.analyze import suggest_questions
1004
- community_labels = _load_community_labels()
1005
- questions = suggest_questions(G, communities, community_labels, top_n=10)
1006
- if not questions:
1007
- return "No suggested questions available."
1008
- lines = ["Suggested questions:"]
1009
- for q in questions:
1010
- if isinstance(q, dict):
1011
- lines.append(f" - {q.get('question', '')}")
1012
- else:
1013
- lines.append(f" - {q}")
1014
- return "\n".join(lines)
1015
- except Exception as exc:
1016
- return f"Could not generate questions: {exc}"
1017
- raise ValueError(f"Unknown resource: {uri_str}")
1018
-
1019
- @server.call_tool()
1020
- async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
1021
- _maybe_reload()
1022
- handler = _handlers.get(name)
1023
- if not handler:
1024
- return [types.TextContent(type="text", text=f"Unknown tool: {name}")]
1025
- try:
1026
- return [types.TextContent(type="text", text=handler(arguments))]
1027
- except Exception as exc:
1028
- return [types.TextContent(type="text", text=f"Error executing {name}: {exc}")]
1029
-
1030
- return server
1031
-
1032
-
1033
- def serve(graph_path: str = "graphify-out/graph.json") -> None:
1034
- """Start the MCP server over stdio (the default, per-developer transport)."""
1035
- try:
1036
- from mcp.server.stdio import stdio_server
1037
- except ImportError as e:
1038
- raise ImportError('mcp not installed. Run: pip install "graphifyy[mcp]"') from e
1039
- import asyncio
1040
-
1041
- server = _build_server(graph_path)
1042
-
1043
- async def main() -> None:
1044
- async with stdio_server() as streams:
1045
- await server.run(streams[0], streams[1], server.create_initialization_options())
1046
-
1047
- _filter_blank_stdin()
1048
- asyncio.run(main())
1049
-
1050
-
1051
- class _MCPASGIApp:
1052
- """Raw-ASGI wrapper around the Streamable HTTP session manager.
1053
-
1054
- Passed to a Starlette ``Route`` as a class instance (not a function) so
1055
- Starlette treats it as an ASGI app: it serves the exact mount path for all
1056
- methods (GET/POST/DELETE) with no request/response wrapping and no
1057
- trailing-slash redirect — mirroring how FastMCP mounts the same manager.
1058
- """
1059
-
1060
- def __init__(self, manager) -> None:
1061
- self._manager = manager
1062
-
1063
- async def __call__(self, scope, receive, send) -> None:
1064
- await self._manager.handle_request(scope, receive, send)
1065
-
1066
-
1067
- class _ApiKeyMiddleware:
1068
- """Pure-ASGI API-key gate for the HTTP transport.
1069
-
1070
- Implemented as raw ASGI (not Starlette's BaseHTTPMiddleware) on purpose:
1071
- BaseHTTPMiddleware buffers responses and breaks the Streamable HTTP SSE
1072
- stream. This short-circuits with 401 before the request ever reaches the
1073
- session manager, leaving the streaming path untouched for authorized calls.
1074
- """
1075
-
1076
- def __init__(self, app, api_key: str) -> None:
1077
- self.app = app
1078
- self._expected = api_key.encode("utf-8")
1079
-
1080
- async def __call__(self, scope, receive, send) -> None:
1081
- if scope["type"] != "http":
1082
- await self.app(scope, receive, send)
1083
- return
1084
- import hmac
1085
- headers = dict(scope.get("headers") or [])
1086
- provided = headers.get(b"x-api-key")
1087
- if provided is None:
1088
- # RFC 6750: the auth scheme token is case-insensitive.
1089
- scheme, _, token = headers.get(b"authorization", b"").partition(b" ")
1090
- if scheme.lower() == b"bearer" and token:
1091
- provided = token.strip()
1092
- # Constant-time compare; reject when no key was supplied at all.
1093
- if provided is None or not hmac.compare_digest(provided, self._expected):
1094
- body = b'{"error": "unauthorized"}'
1095
- await send({
1096
- "type": "http.response.start",
1097
- "status": 401,
1098
- "headers": [
1099
- (b"content-type", b"application/json"),
1100
- (b"content-length", str(len(body)).encode("ascii")),
1101
- ],
1102
- })
1103
- await send({"type": "http.response.body", "body": body})
1104
- return
1105
- await self.app(scope, receive, send)
1106
-
1107
-
1108
- def _build_http_app(
1109
- graph_path: str,
1110
- *,
1111
- host: str = "127.0.0.1",
1112
- port: int = 8080,
1113
- api_key: str | None = None,
1114
- path: str = "/mcp",
1115
- json_response: bool = False,
1116
- stateless: bool = False,
1117
- session_timeout: float | None = 3600.0,
1118
- ):
1119
- """Build the Starlette ASGI app for the Streamable HTTP transport.
1120
-
1121
- Split out from :func:`serve_http` (which blocks on uvicorn) so the wiring
1122
- can be exercised with an in-process ASGI test client.
1123
-
1124
- ``session_timeout`` reaps stateful sessions idle for that many seconds so a
1125
- long-running shared server does not leak memory when IDE clients disconnect
1126
- without sending a DELETE. ``None`` (or <= 0) disables reaping; it is forced
1127
- to ``None`` in stateless mode, which has no sessions to reap.
1128
- """
1129
- try:
1130
- import contextlib
1131
-
1132
- from starlette.applications import Starlette
1133
- from starlette.middleware import Middleware
1134
- from starlette.routing import Route
1135
-
1136
- from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
1137
- from mcp.server.transport_security import TransportSecuritySettings
1138
- except ImportError as e:
1139
- raise ImportError(
1140
- 'HTTP transport needs the mcp extra (mcp + starlette + uvicorn). '
1141
- 'Run: pip install "graphifyy[mcp]"'
1142
- ) from e
1143
-
1144
- # A blank key (e.g. --api-key "" or an empty GRAPHIFY_API_KEY) must not be
1145
- # mistaken for "auth on" — normalize it to None so the gate is unambiguous.
1146
- api_key = (api_key or "").strip() or None
1147
-
1148
- server = _build_server(graph_path)
1149
-
1150
- # DNS-rebinding protection. When the operator binds a wildcard address they
1151
- # are intentionally exposing the server, so accept any Host header; for a
1152
- # loopback/specific bind, restrict Host to that address (with and without
1153
- # the port) plus the localhost aliases.
1154
- if host in ("0.0.0.0", "::", ""):
1155
- security = TransportSecuritySettings(enable_dns_rebinding_protection=False)
1156
- else:
1157
- allowed = {host, "localhost", "127.0.0.1"}
1158
- allowed |= {f"{h}:{port}" for h in list(allowed)}
1159
- security = TransportSecuritySettings(allowed_hosts=sorted(allowed))
1160
-
1161
- # The SDK rejects a non-positive timeout and forbids one in stateless mode.
1162
- idle_timeout = None if (stateless or not session_timeout or session_timeout <= 0) else session_timeout
1163
-
1164
- manager = StreamableHTTPSessionManager(
1165
- app=server,
1166
- json_response=json_response,
1167
- stateless=stateless,
1168
- security_settings=security,
1169
- session_idle_timeout=idle_timeout,
1170
- )
1171
-
1172
- @contextlib.asynccontextmanager
1173
- async def lifespan(_app):
1174
- # The session manager owns an anyio task group that must wrap the whole
1175
- # server lifetime, so enter it here rather than per-request.
1176
- async with manager.run():
1177
- yield
1178
-
1179
- middleware = []
1180
- if api_key:
1181
- middleware.append(Middleware(_ApiKeyMiddleware, api_key=api_key))
1182
-
1183
- return Starlette(
1184
- routes=[Route(path, endpoint=_MCPASGIApp(manager))],
1185
- middleware=middleware,
1186
- lifespan=lifespan,
1187
- )
1188
-
1189
-
1190
- def serve_http(
1191
- graph_path: str = "graphify-out/graph.json",
1192
- *,
1193
- host: str = "127.0.0.1",
1194
- port: int = 8080,
1195
- api_key: str | None = None,
1196
- path: str = "/mcp",
1197
- json_response: bool = False,
1198
- stateless: bool = False,
1199
- session_timeout: float | None = 3600.0,
1200
- ) -> None:
1201
- """Start the MCP server over Streamable HTTP (MCP spec 2025-03-26).
1202
-
1203
- Serves the same tools/resources as the stdio transport, so a single shared
1204
- process can host the graph for a whole team. Clients point their IDE MCP
1205
- config at ``http://<host>:<port><path>`` (default ``/mcp``).
1206
-
1207
- ``api_key`` (or the ``GRAPHIFY_API_KEY`` env var) enables a simple header
1208
- check (``Authorization: Bearer <key>`` or ``X-API-Key: <key>``). OAuth is a
1209
- deliberate follow-up. Binding ``0.0.0.0`` exposes the server beyond
1210
- localhost — set an api_key when you do.
1211
- """
1212
- try:
1213
- import uvicorn
1214
- except ImportError as e:
1215
- raise ImportError(
1216
- 'HTTP transport needs the mcp extra (mcp + starlette + uvicorn). '
1217
- 'Run: pip install "graphifyy[mcp]"'
1218
- ) from e
1219
-
1220
- api_key = (api_key or "").strip() or None
1221
-
1222
- app = _build_http_app(
1223
- graph_path,
1224
- host=host,
1225
- port=port,
1226
- api_key=api_key,
1227
- path=path,
1228
- json_response=json_response,
1229
- stateless=stateless,
1230
- session_timeout=session_timeout,
1231
- )
1232
-
1233
- auth_note = "api-key required" if api_key else "no auth (set --api-key to require one)"
1234
- print(
1235
- f"graphify MCP server (streamable-http) on http://{host}:{port}{path} - {auth_note}",
1236
- file=sys.stderr,
1237
- )
1238
- if host in ("0.0.0.0", "::", "") and not api_key:
1239
- print(
1240
- f"WARNING: binding {host or '0.0.0.0'} with no api-key exposes the graph "
1241
- "unauthenticated on the network. Set --api-key (or GRAPHIFY_API_KEY).",
1242
- file=sys.stderr,
1243
- )
1244
- uvicorn.run(app, host=host, port=port)
1245
-
1246
-
1247
- def _main(argv: list[str] | None = None) -> None:
1248
- import argparse
1249
- import os
1250
-
1251
- parser = argparse.ArgumentParser(
1252
- prog="python -m graphify.serve",
1253
- description="Serve a graphify knowledge graph over MCP (stdio or Streamable HTTP).",
1254
- )
1255
- parser.add_argument(
1256
- "graph_path",
1257
- nargs="?",
1258
- default="graphify-out/graph.json",
1259
- help="Path to graph.json (default: graphify-out/graph.json)",
1260
- )
1261
- parser.add_argument(
1262
- "--transport",
1263
- choices=["stdio", "http"],
1264
- default="stdio",
1265
- help="Transport to serve on (default: stdio)",
1266
- )
1267
- parser.add_argument("--host", default="127.0.0.1", help="HTTP bind host (default: 127.0.0.1)")
1268
- parser.add_argument("--port", type=int, default=8080, help="HTTP bind port (default: 8080)")
1269
- parser.add_argument(
1270
- "--api-key",
1271
- default=os.environ.get("GRAPHIFY_API_KEY"),
1272
- help="Require this key on the HTTP transport (env: GRAPHIFY_API_KEY)",
1273
- )
1274
- parser.add_argument("--path", default="/mcp", help="HTTP mount path (default: /mcp)")
1275
- parser.add_argument(
1276
- "--json-response",
1277
- action="store_true",
1278
- help="Return plain JSON responses instead of SSE streams",
1279
- )
1280
- parser.add_argument(
1281
- "--stateless",
1282
- action="store_true",
1283
- help="Run without per-session state (for load-balanced / CI deployments)",
1284
- )
1285
- parser.add_argument(
1286
- "--session-timeout",
1287
- type=float,
1288
- default=3600.0,
1289
- help="Reap stateful sessions idle this many seconds (default: 3600; 0 disables)",
1290
- )
1291
- args = parser.parse_args(argv)
1292
-
1293
- if args.transport == "http":
1294
- serve_http(
1295
- args.graph_path,
1296
- host=args.host,
1297
- port=args.port,
1298
- api_key=args.api_key,
1299
- path=args.path,
1300
- json_response=args.json_response,
1301
- stateless=args.stateless,
1302
- session_timeout=args.session_timeout,
1303
- )
1304
- else:
1305
- serve(args.graph_path)
1306
-
1307
-
1308
- if __name__ == "__main__":
1309
- _main()