@oriro/orirocli 0.1.9 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/README.md +16 -18
  2. package/dist/cli.js +4776 -2964
  3. package/package.json +2 -2
  4. package/skills/craft/ai-engineering/SKILL.md +2 -2
  5. package/skills/graphify/SKILL.md +0 -619
  6. package/skills/graphify/__init__.py +0 -28
  7. package/skills/graphify/__main__.py +0 -4582
  8. package/skills/graphify/affected.py +0 -154
  9. package/skills/graphify/always_on/agents-md.md +0 -12
  10. package/skills/graphify/always_on/antigravity-rules.md +0 -14
  11. package/skills/graphify/always_on/claude-md.md +0 -9
  12. package/skills/graphify/always_on/gemini-md.md +0 -9
  13. package/skills/graphify/always_on/kiro-steering.md +0 -5
  14. package/skills/graphify/always_on/vscode-instructions.md +0 -17
  15. package/skills/graphify/analyze.py +0 -724
  16. package/skills/graphify/benchmark.py +0 -155
  17. package/skills/graphify/build.py +0 -487
  18. package/skills/graphify/cache.py +0 -417
  19. package/skills/graphify/callflow_html.py +0 -2020
  20. package/skills/graphify/cluster.py +0 -272
  21. package/skills/graphify/command-kilo.md +0 -15
  22. package/skills/graphify/dedup.py +0 -429
  23. package/skills/graphify/detect.py +0 -1379
  24. package/skills/graphify/diagnostics.py +0 -390
  25. package/skills/graphify/export.py +0 -1408
  26. package/skills/graphify/extract.py +0 -11570
  27. package/skills/graphify/global_graph.py +0 -159
  28. package/skills/graphify/google_workspace.py +0 -223
  29. package/skills/graphify/hooks.py +0 -457
  30. package/skills/graphify/ingest.py +0 -331
  31. package/skills/graphify/llm.py +0 -1896
  32. package/skills/graphify/manifest.py +0 -4
  33. package/skills/graphify/mcp_ingest.py +0 -392
  34. package/skills/graphify/multigraph_compat.py +0 -212
  35. package/skills/graphify/pg_introspect.py +0 -142
  36. package/skills/graphify/prs.py +0 -748
  37. package/skills/graphify/querylog.py +0 -70
  38. package/skills/graphify/report.py +0 -218
  39. package/skills/graphify/scip_ingest.py +0 -363
  40. package/skills/graphify/security.py +0 -336
  41. package/skills/graphify/semantic_cleanup.py +0 -319
  42. package/skills/graphify/serve.py +0 -1309
  43. package/skills/graphify/skill-aider.md +0 -1246
  44. package/skills/graphify/skill-amp.md +0 -613
  45. package/skills/graphify/skill-claw.md +0 -616
  46. package/skills/graphify/skill-codex.md +0 -613
  47. package/skills/graphify/skill-copilot.md +0 -616
  48. package/skills/graphify/skill-devin.md +0 -1372
  49. package/skills/graphify/skill-droid.md +0 -613
  50. package/skills/graphify/skill-kilo.md +0 -625
  51. package/skills/graphify/skill-kiro.md +0 -615
  52. package/skills/graphify/skill-opencode.md +0 -608
  53. package/skills/graphify/skill-pi.md +0 -615
  54. package/skills/graphify/skill-trae.md +0 -614
  55. package/skills/graphify/skill-vscode.md +0 -612
  56. package/skills/graphify/skill-windows.md +0 -651
  57. package/skills/graphify/skills/amp/references/add-watch.md +0 -56
  58. package/skills/graphify/skills/amp/references/exports.md +0 -71
  59. package/skills/graphify/skills/amp/references/extraction-spec.md +0 -68
  60. package/skills/graphify/skills/amp/references/github-and-merge.md +0 -46
  61. package/skills/graphify/skills/amp/references/hooks.md +0 -33
  62. package/skills/graphify/skills/amp/references/query.md +0 -249
  63. package/skills/graphify/skills/amp/references/transcribe.md +0 -48
  64. package/skills/graphify/skills/amp/references/update.md +0 -179
  65. package/skills/graphify/skills/claude/references/add-watch.md +0 -56
  66. package/skills/graphify/skills/claude/references/exports.md +0 -71
  67. package/skills/graphify/skills/claude/references/extraction-spec.md +0 -68
  68. package/skills/graphify/skills/claude/references/github-and-merge.md +0 -46
  69. package/skills/graphify/skills/claude/references/hooks.md +0 -33
  70. package/skills/graphify/skills/claude/references/query.md +0 -103
  71. package/skills/graphify/skills/claude/references/transcribe.md +0 -48
  72. package/skills/graphify/skills/claude/references/update.md +0 -179
  73. package/skills/graphify/skills/claw/references/add-watch.md +0 -56
  74. package/skills/graphify/skills/claw/references/exports.md +0 -71
  75. package/skills/graphify/skills/claw/references/extraction-spec.md +0 -29
  76. package/skills/graphify/skills/claw/references/github-and-merge.md +0 -46
  77. package/skills/graphify/skills/claw/references/hooks.md +0 -33
  78. package/skills/graphify/skills/claw/references/query.md +0 -249
  79. package/skills/graphify/skills/claw/references/transcribe.md +0 -48
  80. package/skills/graphify/skills/claw/references/update.md +0 -179
  81. package/skills/graphify/skills/codex/references/add-watch.md +0 -56
  82. package/skills/graphify/skills/codex/references/exports.md +0 -71
  83. package/skills/graphify/skills/codex/references/extraction-spec.md +0 -29
  84. package/skills/graphify/skills/codex/references/github-and-merge.md +0 -46
  85. package/skills/graphify/skills/codex/references/hooks.md +0 -33
  86. package/skills/graphify/skills/codex/references/query.md +0 -249
  87. package/skills/graphify/skills/codex/references/transcribe.md +0 -48
  88. package/skills/graphify/skills/codex/references/update.md +0 -179
  89. package/skills/graphify/skills/copilot/references/add-watch.md +0 -56
  90. package/skills/graphify/skills/copilot/references/exports.md +0 -71
  91. package/skills/graphify/skills/copilot/references/extraction-spec.md +0 -68
  92. package/skills/graphify/skills/copilot/references/github-and-merge.md +0 -46
  93. package/skills/graphify/skills/copilot/references/hooks.md +0 -33
  94. package/skills/graphify/skills/copilot/references/query.md +0 -249
  95. package/skills/graphify/skills/copilot/references/transcribe.md +0 -48
  96. package/skills/graphify/skills/copilot/references/update.md +0 -179
  97. package/skills/graphify/skills/droid/references/add-watch.md +0 -56
  98. package/skills/graphify/skills/droid/references/exports.md +0 -71
  99. package/skills/graphify/skills/droid/references/extraction-spec.md +0 -68
  100. package/skills/graphify/skills/droid/references/github-and-merge.md +0 -46
  101. package/skills/graphify/skills/droid/references/hooks.md +0 -33
  102. package/skills/graphify/skills/droid/references/query.md +0 -249
  103. package/skills/graphify/skills/droid/references/transcribe.md +0 -48
  104. package/skills/graphify/skills/droid/references/update.md +0 -179
  105. package/skills/graphify/skills/kilo/references/add-watch.md +0 -56
  106. package/skills/graphify/skills/kilo/references/exports.md +0 -71
  107. package/skills/graphify/skills/kilo/references/extraction-spec.md +0 -68
  108. package/skills/graphify/skills/kilo/references/github-and-merge.md +0 -46
  109. package/skills/graphify/skills/kilo/references/hooks.md +0 -33
  110. package/skills/graphify/skills/kilo/references/query.md +0 -249
  111. package/skills/graphify/skills/kilo/references/transcribe.md +0 -48
  112. package/skills/graphify/skills/kilo/references/update.md +0 -179
  113. package/skills/graphify/skills/kiro/references/add-watch.md +0 -56
  114. package/skills/graphify/skills/kiro/references/exports.md +0 -71
  115. package/skills/graphify/skills/kiro/references/extraction-spec.md +0 -29
  116. package/skills/graphify/skills/kiro/references/github-and-merge.md +0 -46
  117. package/skills/graphify/skills/kiro/references/hooks.md +0 -33
  118. package/skills/graphify/skills/kiro/references/query.md +0 -249
  119. package/skills/graphify/skills/kiro/references/transcribe.md +0 -48
  120. package/skills/graphify/skills/kiro/references/update.md +0 -179
  121. package/skills/graphify/skills/opencode/references/add-watch.md +0 -56
  122. package/skills/graphify/skills/opencode/references/exports.md +0 -71
  123. package/skills/graphify/skills/opencode/references/extraction-spec.md +0 -68
  124. package/skills/graphify/skills/opencode/references/github-and-merge.md +0 -46
  125. package/skills/graphify/skills/opencode/references/hooks.md +0 -33
  126. package/skills/graphify/skills/opencode/references/query.md +0 -249
  127. package/skills/graphify/skills/opencode/references/transcribe.md +0 -48
  128. package/skills/graphify/skills/opencode/references/update.md +0 -179
  129. package/skills/graphify/skills/pi/references/add-watch.md +0 -56
  130. package/skills/graphify/skills/pi/references/exports.md +0 -71
  131. package/skills/graphify/skills/pi/references/extraction-spec.md +0 -29
  132. package/skills/graphify/skills/pi/references/github-and-merge.md +0 -46
  133. package/skills/graphify/skills/pi/references/hooks.md +0 -33
  134. package/skills/graphify/skills/pi/references/query.md +0 -249
  135. package/skills/graphify/skills/pi/references/transcribe.md +0 -48
  136. package/skills/graphify/skills/pi/references/update.md +0 -179
  137. package/skills/graphify/skills/trae/references/add-watch.md +0 -56
  138. package/skills/graphify/skills/trae/references/exports.md +0 -71
  139. package/skills/graphify/skills/trae/references/extraction-spec.md +0 -68
  140. package/skills/graphify/skills/trae/references/github-and-merge.md +0 -46
  141. package/skills/graphify/skills/trae/references/hooks.md +0 -35
  142. package/skills/graphify/skills/trae/references/query.md +0 -249
  143. package/skills/graphify/skills/trae/references/transcribe.md +0 -48
  144. package/skills/graphify/skills/trae/references/update.md +0 -179
  145. package/skills/graphify/skills/vscode/references/add-watch.md +0 -56
  146. package/skills/graphify/skills/vscode/references/exports.md +0 -71
  147. package/skills/graphify/skills/vscode/references/extraction-spec.md +0 -68
  148. package/skills/graphify/skills/vscode/references/github-and-merge.md +0 -46
  149. package/skills/graphify/skills/vscode/references/hooks.md +0 -33
  150. package/skills/graphify/skills/vscode/references/query.md +0 -249
  151. package/skills/graphify/skills/vscode/references/transcribe.md +0 -48
  152. package/skills/graphify/skills/vscode/references/update.md +0 -179
  153. package/skills/graphify/skills/windows/references/add-watch.md +0 -56
  154. package/skills/graphify/skills/windows/references/exports.md +0 -71
  155. package/skills/graphify/skills/windows/references/extraction-spec.md +0 -68
  156. package/skills/graphify/skills/windows/references/github-and-merge.md +0 -46
  157. package/skills/graphify/skills/windows/references/hooks.md +0 -33
  158. package/skills/graphify/skills/windows/references/query.md +0 -249
  159. package/skills/graphify/skills/windows/references/transcribe.md +0 -48
  160. package/skills/graphify/skills/windows/references/update.md +0 -179
  161. package/skills/graphify/symbol_resolution.py +0 -538
  162. package/skills/graphify/transcribe.py +0 -184
  163. package/skills/graphify/tree_html.py +0 -582
  164. package/skills/graphify/validate.py +0 -72
  165. package/skills/graphify/watch.py +0 -898
  166. package/skills/graphify/wiki.py +0 -282
@@ -1,724 +0,0 @@
1
- """Graph analysis: god nodes (most connected), surprising connections (cross-community), suggested questions."""
2
- from __future__ import annotations
3
- from pathlib import Path
4
- import networkx as nx
5
-
6
- from graphify.build import edge_data
7
-
8
- # Builtin/mock names that can appear as annotation-derived nodes in pre-existing
9
- # graphs. Excluded from god-node ranking so they don't displace real abstractions
10
- # even if they weren't filtered at extraction time (#1147).
11
- _BUILTIN_NOISE_LABELS = frozenset({
12
- "str", "int", "float", "bool", "bytes", "bytearray", "complex", "object",
13
- "True", "False",
14
- "MagicMock", "Mock", "AsyncMock", "NonCallableMock",
15
- "NonCallableMagicMock", "PropertyMock", "patch", "sentinel",
16
- })
17
-
18
- # Language families — extensions sharing a runtime can legitimately call each other
19
- _LANG_FAMILY: dict[str, str] = {
20
- **{e: "python" for e in (".py", ".pyw")},
21
- **{e: "js" for e in (".js", ".jsx", ".mjs", ".ejs", ".ts", ".tsx", ".vue", ".svelte")},
22
- **{e: "go" for e in (".go",)},
23
- **{e: "rust" for e in (".rs",)},
24
- **{e: "jvm" for e in (".java", ".kt", ".kts", ".scala")},
25
- **{e: "c" for e in (".c", ".h", ".cpp", ".cc", ".cxx", ".hpp")},
26
- **{e: "ruby" for e in (".rb",)},
27
- **{e: "swift" for e in (".swift",)},
28
- **{e: "dotnet" for e in (".cs",)},
29
- **{e: "php" for e in (".php",)},
30
- **{e: "r" for e in (".r",)},
31
- }
32
-
33
-
34
- def _cross_language(src_a: str, src_b: str) -> bool:
35
- """Return True if two source files belong to different language families."""
36
- ext_a = Path(src_a).suffix.lower()
37
- ext_b = Path(src_b).suffix.lower()
38
- fam_a = _LANG_FAMILY.get(ext_a)
39
- fam_b = _LANG_FAMILY.get(ext_b)
40
- if fam_a is None or fam_b is None:
41
- return False
42
- return fam_a != fam_b
43
-
44
-
45
- def _node_community_map(communities: dict[int, list[str]]) -> dict[str, int]:
46
- """Invert communities dict: node_id -> community_id."""
47
- return {n: cid for cid, nodes in communities.items() for n in nodes}
48
-
49
-
50
- def _is_file_node(G: nx.Graph, node_id: str) -> bool:
51
- """
52
- Return True if this node is a file-level hub node (e.g. 'client', 'models')
53
- or an AST method stub (e.g. '.auth_flow()', '.__init__()').
54
-
55
- These are synthetic nodes created by the AST extractor and should be excluded
56
- from god nodes, surprising connections, and knowledge gap reporting.
57
- """
58
- attrs = G.nodes[node_id]
59
- label = attrs.get("label", "")
60
- if not label:
61
- return False
62
- # File-level hub: label matches the actual source filename (not just any label ending in .py)
63
- source_file = attrs.get("source_file", "")
64
- if source_file:
65
- from pathlib import Path as _Path
66
- if label == _Path(source_file).name:
67
- return True
68
- # Method stub: AST extractor labels methods as '.method_name()'
69
- if label.startswith(".") and label.endswith("()"):
70
- return True
71
- # Module-level function stub: labeled 'function_name()' - only has a contains edge
72
- # These are real functions but structurally isolated by definition; not a gap worth flagging
73
- if label.endswith("()") and G.degree(node_id) <= 1:
74
- return True
75
- return False
76
-
77
-
78
- _JSON_NOISE_LABELS: frozenset[str] = frozenset({
79
- "start", "end", "name", "id", "type", "properties",
80
- "value", "key", "data", "items", "title", "description", "version",
81
- "dependencies", "devdependencies", "peerdependencies",
82
- "optionaldependencies", "bundleddependencies", "bundledependencies",
83
- })
84
-
85
-
86
- def _is_json_key_node(G: nx.Graph, node_id: str) -> bool:
87
- attrs = G.nodes[node_id]
88
- src = (attrs.get("source_file") or "").lower()
89
- if not src.endswith(".json"):
90
- return False
91
- label = (attrs.get("label") or "").strip().lower()
92
- return label in _JSON_NOISE_LABELS
93
-
94
-
95
- def god_nodes(G: nx.Graph, top_n: int = 10) -> list[dict]:
96
- """Return the top_n most-connected real entities - the core abstractions.
97
-
98
- File-level hub nodes are excluded: they accumulate import/contains edges
99
- mechanically and don't represent meaningful architectural abstractions.
100
- """
101
- degree = dict(G.degree())
102
- sorted_nodes = sorted(degree.items(), key=lambda x: x[1], reverse=True)
103
- result = []
104
- for node_id, deg in sorted_nodes:
105
- if _is_file_node(G, node_id) or _is_concept_node(G, node_id) or _is_json_key_node(G, node_id):
106
- continue
107
- if G.nodes[node_id].get("label", "") in _BUILTIN_NOISE_LABELS:
108
- continue
109
- result.append({
110
- "id": node_id,
111
- "label": G.nodes[node_id].get("label", node_id),
112
- "degree": deg,
113
- })
114
- if len(result) >= top_n:
115
- break
116
- return result
117
-
118
-
119
- def surprising_connections(
120
- G: nx.Graph,
121
- communities: dict[int, list[str]] | None = None,
122
- top_n: int = 5,
123
- ) -> list[dict]:
124
- """
125
- Find connections that are genuinely surprising - not obvious from file structure.
126
-
127
- Strategy:
128
- - Multi-file corpora: cross-file edges between real entities (not concept nodes).
129
- Sorted AMBIGUOUS → INFERRED → EXTRACTED.
130
- - Single-file / single-source corpora: cross-community edges that bridge
131
- distant parts of the graph (betweenness centrality on edges).
132
- These reveal non-obvious structural couplings.
133
-
134
- Concept nodes (empty source_file, or injected semantic annotations) are excluded
135
- from surprising connections because they are intentional, not discovered.
136
- """
137
- # Identify unique source files (ignore empty/null source_file)
138
- source_files = {
139
- data.get("source_file", "")
140
- for _, data in G.nodes(data=True)
141
- if data.get("source_file", "")
142
- }
143
- is_multi_source = len(source_files) > 1
144
-
145
- if is_multi_source:
146
- return _cross_file_surprises(G, communities or {}, top_n)
147
- else:
148
- return _cross_community_surprises(G, communities or {}, top_n)
149
-
150
-
151
- def _is_concept_node(G: nx.Graph, node_id: str) -> bool:
152
- """
153
- Return True if this node is a manually-injected semantic concept node
154
- rather than a real entity found in source code.
155
-
156
- Signals:
157
- - Empty source_file
158
- - source_file doesn't look like a real file path (no extension)
159
- """
160
- data = G.nodes[node_id]
161
- source = data.get("source_file", "")
162
- if not source:
163
- return True
164
- # Has no file extension → probably a concept label, not a real file
165
- if "." not in source.split("/")[-1]:
166
- return True
167
- return False
168
-
169
-
170
- from graphify.detect import CODE_EXTENSIONS, DOC_EXTENSIONS, PAPER_EXTENSIONS, IMAGE_EXTENSIONS
171
-
172
-
173
- def _file_category(path: str) -> str:
174
- ext = ("." + path.rsplit(".", 1)[-1].lower()) if "." in path else ""
175
- if ext in CODE_EXTENSIONS:
176
- return "code"
177
- if ext in PAPER_EXTENSIONS:
178
- return "paper"
179
- if ext in IMAGE_EXTENSIONS:
180
- return "image"
181
- return "doc"
182
-
183
-
184
- def _top_level_dir(path: str) -> str:
185
- """Return the first path component - used to detect cross-repo edges."""
186
- return path.split("/")[0] if "/" in path else path
187
-
188
-
189
- def _surprise_score(
190
- G: nx.Graph,
191
- u: str,
192
- v: str,
193
- data: dict,
194
- node_community: dict[str, int],
195
- u_source: str,
196
- v_source: str,
197
- degrees: dict[str, int] | None = None,
198
- ) -> tuple[int, list[str]]:
199
- """Score how surprising a cross-file edge is. Returns (score, reasons)."""
200
- score = 0
201
- reasons: list[str] = []
202
-
203
- # 1. Confidence weight - uncertain connections are more noteworthy
204
- conf = data.get("confidence", "EXTRACTED")
205
- relation = data.get("relation", "")
206
- conf_bonus = {"AMBIGUOUS": 3, "INFERRED": 2, "EXTRACTED": 1}.get(conf, 1)
207
-
208
- cat_u = _file_category(u_source)
209
- cat_v = _file_category(v_source)
210
-
211
- # Suppress all structural bonuses for INFERRED calls/uses that cross language
212
- # boundaries or connect code to a doc file. Both cases are resolver pollution:
213
- # label-matching fires across language families in monorepos, and code→doc
214
- # "calls" edges are extraction artefacts, not real architecture.
215
- # Excludes `semantically_similar_to` (genuine cross-boundary insight) and all
216
- # AMBIGUOUS/EXTRACTED edges (not from the resolver path).
217
- _suppress_structural = (
218
- conf == "INFERRED"
219
- and relation in ("calls", "uses")
220
- and (_cross_language(u_source, v_source) or {cat_u, cat_v} == {"code", "doc"})
221
- )
222
- if _suppress_structural:
223
- conf_bonus = 0
224
-
225
- score += conf_bonus
226
- if conf in ("AMBIGUOUS", "INFERRED"):
227
- reasons.append(f"{conf.lower()} connection - not explicitly stated in source")
228
-
229
- # 2. Cross file-type bonus - code↔paper or code↔image is non-obvious
230
- if cat_u != cat_v and not _suppress_structural:
231
- score += 2
232
- reasons.append(f"crosses file types ({cat_u} ↔ {cat_v})")
233
-
234
- # 3. Cross-repo bonus - different top-level directory
235
- if _top_level_dir(u_source) != _top_level_dir(v_source) and not _suppress_structural:
236
- score += 2
237
- reasons.append("connects across different repos/directories")
238
-
239
- # 4. Cross-community bonus - Leiden says these are structurally distant
240
- cid_u = node_community.get(u)
241
- cid_v = node_community.get(v)
242
- if cid_u is not None and cid_v is not None and cid_u != cid_v and not _suppress_structural:
243
- score += 1
244
- reasons.append("bridges separate communities")
245
-
246
- # 4b. Semantic similarity bonus - non-obvious conceptual links score higher
247
- if data.get("relation") == "semantically_similar_to":
248
- score = int(score * 1.5)
249
- reasons.append("semantically similar concepts with no structural link")
250
-
251
- # 5. Peripheral→hub: a low-degree node connecting to a high-degree one
252
- deg_u = degrees[u] if degrees is not None else G.degree(u)
253
- deg_v = degrees[v] if degrees is not None else G.degree(v)
254
- if min(deg_u, deg_v) <= 2 and max(deg_u, deg_v) >= 5:
255
- score += 1
256
- peripheral = G.nodes[u].get("label", u) if deg_u <= 2 else G.nodes[v].get("label", v)
257
- hub = G.nodes[v].get("label", v) if deg_u <= 2 else G.nodes[u].get("label", u)
258
- reasons.append(f"peripheral node `{peripheral}` unexpectedly reaches hub `{hub}`")
259
-
260
- return score, reasons
261
-
262
-
263
- def _cross_file_surprises(G: nx.Graph, communities: dict[int, list[str]], top_n: int) -> list[dict]:
264
- """
265
- Cross-file edges between real code/doc entities, ranked by a composite
266
- surprise score rather than confidence alone.
267
-
268
- Surprise score accounts for:
269
- - Confidence (AMBIGUOUS > INFERRED > EXTRACTED)
270
- - Cross file-type (code↔paper is more surprising than code↔code)
271
- - Cross-repo (different top-level directory)
272
- - Cross-community (Leiden says structurally distant)
273
- - Peripheral→hub (low-degree node reaching a god node)
274
-
275
- Each result includes a 'why' field explaining what makes it non-obvious.
276
- """
277
- node_community = _node_community_map(communities)
278
- degrees = dict(G.degree())
279
- candidates = []
280
-
281
- for u, v, data in G.edges(data=True):
282
- relation = data.get("relation", "")
283
- if relation in ("imports", "imports_from", "contains", "method"):
284
- continue
285
- if _is_concept_node(G, u) or _is_concept_node(G, v):
286
- continue
287
- if _is_file_node(G, u) or _is_file_node(G, v):
288
- continue
289
-
290
- u_source = G.nodes[u].get("source_file", "")
291
- v_source = G.nodes[v].get("source_file", "")
292
-
293
- if not u_source or not v_source or u_source == v_source:
294
- continue
295
-
296
- score, reasons = _surprise_score(G, u, v, data, node_community, u_source, v_source, degrees)
297
- src_id = data.get("_src", u)
298
- if src_id not in G.nodes:
299
- src_id = u
300
- tgt_id = data.get("_tgt", v)
301
- if tgt_id not in G.nodes:
302
- tgt_id = v
303
- candidates.append({
304
- "_score": score,
305
- "source": G.nodes[src_id].get("label", src_id),
306
- "target": G.nodes[tgt_id].get("label", tgt_id),
307
- "source_files": [
308
- G.nodes[src_id].get("source_file", ""),
309
- G.nodes[tgt_id].get("source_file", ""),
310
- ],
311
- "confidence": data.get("confidence", "EXTRACTED"),
312
- "relation": relation,
313
- "why": "; ".join(reasons) if reasons else "cross-file semantic connection",
314
- })
315
-
316
- candidates.sort(key=lambda x: x["_score"], reverse=True)
317
- for c in candidates:
318
- c.pop("_score")
319
-
320
- if candidates:
321
- return candidates[:top_n]
322
-
323
- return _cross_community_surprises(G, communities, top_n)
324
-
325
-
326
- def _cross_community_surprises(
327
- G: nx.Graph,
328
- communities: dict[int, list[str]],
329
- top_n: int,
330
- ) -> list[dict]:
331
- """
332
- For single-source corpora: find edges that bridge different communities.
333
- These are surprising because Leiden grouped everything else tightly -
334
- these edges cut across the natural structure.
335
-
336
- Falls back to high-betweenness edges if no community info is provided.
337
- """
338
- if not communities:
339
- # No community info - use edge betweenness centrality
340
- if G.number_of_edges() == 0:
341
- return []
342
- if G.number_of_nodes() > 5000:
343
- return []
344
- betweenness = nx.edge_betweenness_centrality(G)
345
- top_edges = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)[:top_n]
346
- result = []
347
- for (u, v), score in top_edges:
348
- data = edge_data(G, u, v)
349
- result.append({
350
- "source": G.nodes[u].get("label", u),
351
- "target": G.nodes[v].get("label", v),
352
- "source_files": [
353
- G.nodes[u].get("source_file", ""),
354
- G.nodes[v].get("source_file", ""),
355
- ],
356
- "confidence": data.get("confidence", "EXTRACTED"),
357
- "relation": data.get("relation", ""),
358
- "note": f"Bridges graph structure (betweenness={score:.3f})",
359
- })
360
- return result
361
-
362
- # Build node → community map
363
- node_community = _node_community_map(communities)
364
-
365
- surprises = []
366
- for u, v, data in G.edges(data=True):
367
- cid_u = node_community.get(u)
368
- cid_v = node_community.get(v)
369
- if cid_u is None or cid_v is None or cid_u == cid_v:
370
- continue
371
- # Skip file hub nodes and plain structural edges
372
- if _is_file_node(G, u) or _is_file_node(G, v):
373
- continue
374
- relation = data.get("relation", "")
375
- if relation in ("imports", "imports_from", "contains", "method"):
376
- continue
377
- # This edge crosses community boundaries - interesting
378
- confidence = data.get("confidence", "EXTRACTED")
379
- src_id = data.get("_src", u)
380
- if src_id not in G.nodes:
381
- src_id = u
382
- tgt_id = data.get("_tgt", v)
383
- if tgt_id not in G.nodes:
384
- tgt_id = v
385
- surprises.append({
386
- "source": G.nodes[src_id].get("label", src_id),
387
- "target": G.nodes[tgt_id].get("label", tgt_id),
388
- "source_files": [
389
- G.nodes[src_id].get("source_file", ""),
390
- G.nodes[tgt_id].get("source_file", ""),
391
- ],
392
- "confidence": confidence,
393
- "relation": relation,
394
- "note": f"Bridges community {cid_u} → community {cid_v}",
395
- "_pair": tuple(sorted([cid_u, cid_v])),
396
- })
397
-
398
- # Sort: AMBIGUOUS first, then INFERRED, then EXTRACTED
399
- order = {"AMBIGUOUS": 0, "INFERRED": 1, "EXTRACTED": 2}
400
- surprises.sort(key=lambda x: order.get(x["confidence"], 3))
401
-
402
- # Deduplicate by community pair - one representative edge per (A→B) boundary.
403
- # Without this, a single high-betweenness god node dominates all results.
404
- seen_pairs: set[tuple] = set()
405
- deduped = []
406
- for s in surprises:
407
- pair = s.pop("_pair")
408
- if pair not in seen_pairs:
409
- seen_pairs.add(pair)
410
- deduped.append(s)
411
- return deduped[:top_n]
412
-
413
-
414
- def suggest_questions(
415
- G: nx.Graph,
416
- communities: dict[int, list[str]],
417
- community_labels: dict[int, str],
418
- top_n: int = 7,
419
- ) -> list[dict]:
420
- """
421
- Generate questions the graph is uniquely positioned to answer.
422
- Based on: AMBIGUOUS edges, bridge nodes, underexplored god nodes, isolated nodes.
423
- Each question has a 'type', 'question', and 'why' field.
424
- """
425
- if community_labels:
426
- community_labels = {int(k) if isinstance(k, str) else k: v for k, v in community_labels.items()}
427
-
428
- questions = []
429
- node_community = _node_community_map(communities)
430
-
431
- # 1. AMBIGUOUS edges → unresolved relationship questions
432
- for u, v, data in G.edges(data=True):
433
- if data.get("confidence") == "AMBIGUOUS":
434
- ul = G.nodes[u].get("label", u)
435
- vl = G.nodes[v].get("label", v)
436
- relation = data.get("relation", "related to")
437
- questions.append({
438
- "type": "ambiguous_edge",
439
- "question": f"What is the exact relationship between `{ul}` and `{vl}`?",
440
- "why": f"Edge tagged AMBIGUOUS (relation: {relation}) - confidence is low.",
441
- })
442
-
443
- # 2. Bridge nodes (high betweenness) → cross-cutting concern questions
444
- if G.number_of_edges() > 0:
445
- k = min(100, G.number_of_nodes()) if G.number_of_nodes() > 1000 else None
446
- betweenness = nx.betweenness_centrality(G, k=k, seed=42)
447
- # Top bridge nodes that are NOT file-level hubs
448
- bridges = sorted(
449
- [(n, s) for n, s in betweenness.items()
450
- if not _is_file_node(G, n) and not _is_concept_node(G, n) and s > 0],
451
- key=lambda x: x[1],
452
- reverse=True,
453
- )[:3]
454
- for node_id, score in bridges:
455
- label = G.nodes[node_id].get("label", node_id)
456
- cid = node_community.get(node_id)
457
- comm_label = community_labels.get(cid, f"Community {cid}") if cid is not None else "unknown"
458
- neighbors = list(G.neighbors(node_id))
459
- neighbor_comms = {node_community.get(n) for n in neighbors if node_community.get(n) != cid}
460
- if neighbor_comms:
461
- other_labels = [community_labels.get(c, f"Community {c}") for c in neighbor_comms]
462
- questions.append({
463
- "type": "bridge_node",
464
- "question": f"Why does `{label}` connect `{comm_label}` to {', '.join(f'`{l}`' for l in other_labels)}?",
465
- "why": f"High betweenness centrality ({score:.3f}) - this node is a cross-community bridge.",
466
- })
467
-
468
- # 3. God nodes with many INFERRED edges → verification questions
469
- degree = dict(G.degree())
470
- top_nodes = sorted(
471
- [(n, d) for n, d in degree.items() if not _is_file_node(G, n)],
472
- key=lambda x: x[1],
473
- reverse=True,
474
- )[:5]
475
- for node_id, _ in top_nodes:
476
- inferred = [
477
- (u, v, d) for u, v, d in G.edges(node_id, data=True)
478
- if d.get("confidence") == "INFERRED"
479
- ]
480
- if len(inferred) >= 2:
481
- label = G.nodes[node_id].get("label", node_id)
482
- # Use _src/_tgt to get the correct direction; fall back to v (the other node)
483
- others = []
484
- for u, v, d in inferred[:2]:
485
- src_id = d.get("_src", u)
486
- if src_id not in G.nodes:
487
- src_id = u
488
- tgt_id = d.get("_tgt", v)
489
- if tgt_id not in G.nodes:
490
- tgt_id = v
491
- other_id = tgt_id if src_id == node_id else src_id
492
- others.append(G.nodes[other_id].get("label", other_id))
493
- questions.append({
494
- "type": "verify_inferred",
495
- "question": f"Are the {len(inferred)} inferred relationships involving `{label}` (e.g. with `{others[0]}` and `{others[1]}`) actually correct?",
496
- "why": f"`{label}` has {len(inferred)} INFERRED edges - model-reasoned connections that need verification.",
497
- })
498
-
499
- # 4. Isolated or weakly-connected nodes → exploration questions
500
- isolated = [
501
- n for n in G.nodes()
502
- if G.degree(n) <= 1 and not _is_file_node(G, n) and not _is_concept_node(G, n)
503
- ]
504
- if isolated:
505
- labels = [G.nodes[n].get("label", n) for n in isolated[:3]]
506
- questions.append({
507
- "type": "isolated_nodes",
508
- "question": f"What connects {', '.join(f'`{l}`' for l in labels)} to the rest of the system?",
509
- "why": f"{len(isolated)} weakly-connected nodes found - possible documentation gaps or missing edges.",
510
- })
511
-
512
- # 5. Low-cohesion communities → structural questions
513
- from .cluster import cohesion_score
514
- for cid, nodes in communities.items():
515
- score = cohesion_score(G, nodes)
516
- if score < 0.15 and len(nodes) >= 5:
517
- label = community_labels.get(cid, f"Community {cid}")
518
- questions.append({
519
- "type": "low_cohesion",
520
- "question": f"Should `{label}` be split into smaller, more focused modules?",
521
- "why": f"Cohesion score {score} - nodes in this community are weakly interconnected.",
522
- })
523
-
524
- if not questions:
525
- return [{
526
- "type": "no_signal",
527
- "question": None,
528
- "why": (
529
- "Not enough signal to generate questions. "
530
- "This usually means the corpus has no AMBIGUOUS edges, no bridge nodes, "
531
- "no INFERRED relationships, and all communities are tightly cohesive. "
532
- "Add more files or run with --mode deep to extract richer edges."
533
- ),
534
- }]
535
-
536
- return questions[:top_n]
537
-
538
-
539
- def graph_diff(G_old: nx.Graph, G_new: nx.Graph) -> dict:
540
- """Compare two graph snapshots and return what changed.
541
-
542
- Returns:
543
- {
544
- "new_nodes": [{"id": ..., "label": ...}],
545
- "removed_nodes": [{"id": ..., "label": ...}],
546
- "new_edges": [{"source": ..., "target": ..., "relation": ..., "confidence": ...}],
547
- "removed_edges": [...],
548
- "summary": "3 new nodes, 5 new edges, 1 node removed"
549
- }
550
- """
551
- old_nodes = set(G_old.nodes())
552
- new_nodes = set(G_new.nodes())
553
-
554
- added_node_ids = new_nodes - old_nodes
555
- removed_node_ids = old_nodes - new_nodes
556
-
557
- new_nodes_list = [
558
- {"id": n, "label": G_new.nodes[n].get("label", n)}
559
- for n in added_node_ids
560
- ]
561
- removed_nodes_list = [
562
- {"id": n, "label": G_old.nodes[n].get("label", n)}
563
- for n in removed_node_ids
564
- ]
565
-
566
- def edge_key(G: nx.Graph, u: str, v: str, data: dict) -> tuple:
567
- if G.is_directed():
568
- return (u, v, data.get("relation", ""))
569
- return (min(u, v), max(u, v), data.get("relation", ""))
570
-
571
- old_edge_keys = {
572
- edge_key(G_old, u, v, d)
573
- for u, v, d in G_old.edges(data=True)
574
- }
575
- new_edge_keys = {
576
- edge_key(G_new, u, v, d)
577
- for u, v, d in G_new.edges(data=True)
578
- }
579
-
580
- added_edge_keys = new_edge_keys - old_edge_keys
581
- removed_edge_keys = old_edge_keys - new_edge_keys
582
-
583
- new_edges_list = []
584
- for u, v, d in G_new.edges(data=True):
585
- if edge_key(G_new, u, v, d) in added_edge_keys:
586
- new_edges_list.append({
587
- "source": u,
588
- "target": v,
589
- "relation": d.get("relation", ""),
590
- "confidence": d.get("confidence", ""),
591
- })
592
-
593
- removed_edges_list = []
594
- for u, v, d in G_old.edges(data=True):
595
- if edge_key(G_old, u, v, d) in removed_edge_keys:
596
- removed_edges_list.append({
597
- "source": u,
598
- "target": v,
599
- "relation": d.get("relation", ""),
600
- "confidence": d.get("confidence", ""),
601
- })
602
-
603
- parts = []
604
- if new_nodes_list:
605
- parts.append(f"{len(new_nodes_list)} new node{'s' if len(new_nodes_list) != 1 else ''}")
606
- if new_edges_list:
607
- parts.append(f"{len(new_edges_list)} new edge{'s' if len(new_edges_list) != 1 else ''}")
608
- if removed_nodes_list:
609
- parts.append(f"{len(removed_nodes_list)} node{'s' if len(removed_nodes_list) != 1 else ''} removed")
610
- if removed_edges_list:
611
- parts.append(f"{len(removed_edges_list)} edge{'s' if len(removed_edges_list) != 1 else ''} removed")
612
- summary = ", ".join(parts) if parts else "no changes"
613
-
614
- return {
615
- "new_nodes": new_nodes_list,
616
- "removed_nodes": removed_nodes_list,
617
- "new_edges": new_edges_list,
618
- "removed_edges": removed_edges_list,
619
- "summary": summary,
620
- }
621
-
622
-
623
- def find_import_cycles(
624
- G: nx.Graph,
625
- max_cycle_length: int = 5,
626
- top_n: int = 20,
627
- ) -> list[dict]:
628
- """Detect circular import dependencies at the file level.
629
-
630
- Collapses symbol-level nodes to their parent file (using source_file attr
631
- or 'contains' edges), builds a directed file-level graph from imports_from
632
- edges, then finds simple cycles.
633
-
634
- Args:
635
- G: The full knowledge graph (may be undirected or directed).
636
- max_cycle_length: Only report cycles with at most this many files.
637
- top_n: Maximum number of cycles to return (shortest first).
638
-
639
- Returns:
640
- List of cycle records with stable structure:
641
- {
642
- "cycle": ["a.ts", "b.ts"],
643
- "length": 2,
644
- "why": "circular dependency"
645
- }
646
- """
647
- def _endpoint_source_file(node_id: str) -> str:
648
- attrs = G.nodes.get(node_id, {})
649
- src_file = attrs.get("source_file", "")
650
- return src_file if isinstance(src_file, str) else ""
651
-
652
- # Step 1: Build a directed file-level graph from import/re-export edges.
653
- # IMPORTANT: resolve endpoints using source_file only; never infer from label/id.
654
- file_graph = nx.DiGraph()
655
-
656
- for u, v, data in G.edges(data=True):
657
- rel = data.get("relation", "")
658
- if rel not in ("imports_from", "re_exports"):
659
- continue
660
-
661
- src_file_attr = data.get("source_file", "")
662
- if not isinstance(src_file_attr, str) or not src_file_attr:
663
- continue
664
-
665
- u_file = _endpoint_source_file(u)
666
- v_file = _endpoint_source_file(v)
667
-
668
- # Works for both DiGraph and Graph inputs:
669
- # orient edge from edge.source_file endpoint to the opposite endpoint.
670
- if u_file == src_file_attr:
671
- tgt_file = v_file
672
- elif v_file == src_file_attr:
673
- tgt_file = u_file
674
- else:
675
- # Fallback: if source endpoint cannot be matched exactly,
676
- # still treat edge.source_file as source and pick the opposite endpoint
677
- # only if one endpoint has a real source_file.
678
- tgt_file = v_file if v_file and v_file != src_file_attr else u_file
679
-
680
- if not tgt_file:
681
- continue
682
-
683
- file_graph.add_edge(src_file_attr, tgt_file)
684
-
685
- if not file_graph.edges():
686
- return []
687
-
688
- # Step 2: Find simple cycles, bounded by length.
689
- cycles: list[list[str]] = []
690
- for cycle in nx.simple_cycles(file_graph):
691
- if len(cycle) <= max_cycle_length:
692
- cycles.append(cycle)
693
- if len(cycles) >= top_n * 10:
694
- # Stop early to avoid combinatorial explosion
695
- break
696
-
697
- # Step 3: Sort by length (shortest = tightest coupling), then deduplicate.
698
- cycles.sort(key=len)
699
-
700
- # Deduplicate rotations: normalize each cycle by starting from the
701
- # lexicographically smallest element.
702
- seen: set[tuple[str, ...]] = set()
703
- unique_cycles: list[list[str]] = []
704
- for cycle in cycles:
705
- core = list(cycle)
706
- if not core:
707
- continue
708
- min_idx = core.index(min(core))
709
- normalized = tuple(core[min_idx:] + core[:min_idx])
710
- if normalized not in seen:
711
- seen.add(normalized)
712
- unique_cycles.append(list(normalized))
713
- if len(unique_cycles) >= top_n:
714
- break
715
-
716
- result: list[dict] = []
717
- for cycle in unique_cycles:
718
- result.append({
719
- "cycle": cycle,
720
- "length": len(cycle),
721
- "why": "circular dependency",
722
- })
723
-
724
- return result