graphifyy 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. graphifyy-0.3.0/LICENSE +21 -0
  2. {graphifyy-0.2.2 → graphifyy-0.3.0}/PKG-INFO +43 -9
  3. {graphifyy-0.2.2 → graphifyy-0.3.0}/README.md +16 -5
  4. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/__main__.py +62 -23
  5. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/build.py +30 -2
  6. graphifyy-0.3.0/graphify/extract.py +1588 -0
  7. graphifyy-0.3.0/graphify/skill-claw.md +1164 -0
  8. graphifyy-0.3.0/graphify/skill-codex.md +1221 -0
  9. graphifyy-0.3.0/graphify/skill-opencode.md +1216 -0
  10. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/skill.md +44 -33
  11. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphifyy.egg-info/PKG-INFO +43 -9
  12. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphifyy.egg-info/SOURCES.txt +5 -0
  13. {graphifyy-0.2.2 → graphifyy-0.3.0}/pyproject.toml +5 -5
  14. graphifyy-0.3.0/tests/test_install.py +85 -0
  15. graphifyy-0.2.2/graphify/extract.py +0 -2526
  16. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/__init__.py +0 -0
  17. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/analyze.py +0 -0
  18. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/benchmark.py +0 -0
  19. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/cache.py +0 -0
  20. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/cluster.py +0 -0
  21. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/detect.py +0 -0
  22. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/export.py +0 -0
  23. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/hooks.py +0 -0
  24. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/ingest.py +0 -0
  25. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/manifest.py +0 -0
  26. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/report.py +0 -0
  27. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/security.py +0 -0
  28. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/serve.py +0 -0
  29. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/validate.py +0 -0
  30. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/watch.py +0 -0
  31. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphify/wiki.py +0 -0
  32. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphifyy.egg-info/dependency_links.txt +0 -0
  33. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphifyy.egg-info/entry_points.txt +0 -0
  34. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphifyy.egg-info/requires.txt +0 -0
  35. {graphifyy-0.2.2 → graphifyy-0.3.0}/graphifyy.egg-info/top_level.txt +0 -0
  36. {graphifyy-0.2.2 → graphifyy-0.3.0}/setup.cfg +0 -0
  37. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_analyze.py +0 -0
  38. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_benchmark.py +0 -0
  39. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_build.py +0 -0
  40. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_cache.py +0 -0
  41. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_claude_md.py +0 -0
  42. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_cluster.py +0 -0
  43. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_confidence.py +0 -0
  44. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_detect.py +0 -0
  45. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_export.py +0 -0
  46. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_extract.py +0 -0
  47. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_hooks.py +0 -0
  48. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_hypergraph.py +0 -0
  49. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_ingest.py +0 -0
  50. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_languages.py +0 -0
  51. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_multilang.py +0 -0
  52. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_pipeline.py +0 -0
  53. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_rationale.py +0 -0
  54. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_report.py +0 -0
  55. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_security.py +0 -0
  56. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_semantic_similarity.py +0 -0
  57. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_serve.py +0 -0
  58. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_validate.py +0 -0
  59. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_watch.py +0 -0
  60. {graphifyy-0.2.2 → graphifyy-0.3.0}/tests/test_wiki.py +0 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Safi Shamsi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,14 +1,36 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphifyy
3
- Version: 0.2.2
4
- Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
5
- License: MIT
3
+ Version: 0.3.0
4
+ Summary: AI coding assistant skill (Claude Code, Codex, OpenCode, OpenClaw) - turn any folder of code, docs, papers, or images into a queryable knowledge graph
5
+ License: MIT License
6
+
7
+ Copyright (c) 2026 Safi Shamsi
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
6
27
  Project-URL: Homepage, https://github.com/safishamsi/graphify
7
28
  Project-URL: Repository, https://github.com/safishamsi/graphify
8
29
  Project-URL: Issues, https://github.com/safishamsi/graphify/issues
9
- Keywords: claude,claude-code,knowledge-graph,rag,graphrag,obsidian,community-detection,tree-sitter,leiden,llm
30
+ Keywords: claude,claude-code,codex,opencode,knowledge-graph,rag,graphrag,obsidian,community-detection,tree-sitter,leiden,llm
10
31
  Requires-Python: >=3.10
11
32
  Description-Content-Type: text/markdown
33
+ License-File: LICENSE
12
34
  Requires-Dist: networkx
13
35
  Requires-Dist: tree-sitter
14
36
  Requires-Dist: tree-sitter-python
@@ -42,13 +64,14 @@ Requires-Dist: pypdf; extra == "all"
42
64
  Requires-Dist: html2text; extra == "all"
43
65
  Requires-Dist: watchdog; extra == "all"
44
66
  Requires-Dist: graspologic; extra == "all"
67
+ Dynamic: license-file
45
68
 
46
69
  # graphify
47
70
 
48
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
71
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v3)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
49
72
  [![PyPI](https://img.shields.io/pypi/v/graphifyy)](https://pypi.org/project/graphifyy/)
50
73
 
51
- **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
74
+ **An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, or OpenClaw - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
52
75
 
53
76
  Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, even images in other languages - graphify uses Claude vision to extract concepts and relationships from all of it and connects them into one graph.
54
77
 
@@ -82,7 +105,18 @@ pip install graphifyy && graphify install
82
105
 
83
106
  > The PyPI package is temporarily named `graphifyy` while the `graphify` name is being reclaimed. The CLI and skill command are still `graphify`.
84
107
 
85
- Then open Claude Code in any directory and type:
108
+ ### Platform support
109
+
110
+ | Platform | Install command |
111
+ |----------|----------------|
112
+ | Claude Code | `graphify install` |
113
+ | Codex | `graphify install --platform codex` |
114
+ | OpenCode | `graphify install --platform opencode` |
115
+ | OpenClaw | `graphify install --platform claw` |
116
+
117
+ Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
118
+
119
+ Then open your AI coding assistant and type:
86
120
 
87
121
  ```
88
122
  /graphify .
@@ -111,7 +145,7 @@ Uninstall with `graphify claude uninstall`.
111
145
 
112
146
  ```bash
113
147
  mkdir -p ~/.claude/skills/graphify
114
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
148
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/skill.md \
115
149
  > ~/.claude/skills/graphify/SKILL.md
116
150
  ```
117
151
 
@@ -176,7 +210,7 @@ Works with any mix of file types:
176
210
 
177
211
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
178
212
 
179
- **Token benchmark** - printed automatically after every run. On a mixed corpus (Karpathy repos + papers + images): **71.5x** fewer tokens per query vs reading raw files.
213
+ **Token benchmark** - printed automatically after every run. On a mixed corpus (Karpathy repos + papers + images): **71.5x** fewer tokens per query vs reading raw files. The first run extracts and builds the graph (this costs tokens). Every subsequent query reads the compact graph instead of raw files — that's where the savings compound. The SHA256 cache means re-runs only re-process changed files.
180
214
 
181
215
  **Auto-sync** (`--watch`) - run in a background terminal and the graph updates itself as your codebase changes. Code file saves trigger an instant rebuild (AST only, no LLM). Doc/image changes notify you to run `--update` for the LLM re-pass.
182
216
 
@@ -1,9 +1,9 @@
1
1
  # graphify
2
2
 
3
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
3
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v3)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
4
4
  [![PyPI](https://img.shields.io/pypi/v/graphifyy)](https://pypi.org/project/graphifyy/)
5
5
 
6
- **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
6
+ **An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, or OpenClaw - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
7
7
 
8
8
  Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, even images in other languages - graphify uses Claude vision to extract concepts and relationships from all of it and connects them into one graph.
9
9
 
@@ -37,7 +37,18 @@ pip install graphifyy && graphify install
37
37
 
38
38
  > The PyPI package is temporarily named `graphifyy` while the `graphify` name is being reclaimed. The CLI and skill command are still `graphify`.
39
39
 
40
- Then open Claude Code in any directory and type:
40
+ ### Platform support
41
+
42
+ | Platform | Install command |
43
+ |----------|----------------|
44
+ | Claude Code | `graphify install` |
45
+ | Codex | `graphify install --platform codex` |
46
+ | OpenCode | `graphify install --platform opencode` |
47
+ | OpenClaw | `graphify install --platform claw` |
48
+
49
+ Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
50
+
51
+ Then open your AI coding assistant and type:
41
52
 
42
53
  ```
43
54
  /graphify .
@@ -66,7 +77,7 @@ Uninstall with `graphify claude uninstall`.
66
77
 
67
78
  ```bash
68
79
  mkdir -p ~/.claude/skills/graphify
69
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
80
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/skill.md \
70
81
  > ~/.claude/skills/graphify/SKILL.md
71
82
  ```
72
83
 
@@ -131,7 +142,7 @@ Works with any mix of file types:
131
142
 
132
143
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
133
144
 
134
- **Token benchmark** - printed automatically after every run. On a mixed corpus (Karpathy repos + papers + images): **71.5x** fewer tokens per query vs reading raw files.
145
+ **Token benchmark** - printed automatically after every run. On a mixed corpus (Karpathy repos + papers + images): **71.5x** fewer tokens per query vs reading raw files. The first run extracts and builds the graph (this costs tokens). Every subsequent query reads the compact graph instead of raw files — that's where the savings compound. The SHA256 cache means re-runs only re-process changed files.
135
146
 
136
147
  **Auto-sync** (`--watch`) - run in a background terminal and the graph updates itself as your codebase changes. Code file saves trigger an instant rebuild (AST only, no LLM). Doc/image changes notify you to run `--update` for the LLM re-pass.
137
148
 
@@ -29,39 +29,66 @@ _SKILL_REGISTRATION = (
29
29
  )
30
30
 
31
31
 
32
- def _bundled_skill() -> Path:
33
- """Path to the skill.md bundled with this package."""
34
- return Path(__file__).parent / "skill.md"
32
+ _PLATFORM_CONFIG: dict[str, dict] = {
33
+ "claude": {
34
+ "skill_file": "skill.md",
35
+ "skill_dst": Path(".claude") / "skills" / "graphify" / "SKILL.md",
36
+ "claude_md": True,
37
+ },
38
+ "codex": {
39
+ "skill_file": "skill-codex.md",
40
+ "skill_dst": Path(".agents") / "skills" / "graphify" / "SKILL.md",
41
+ "claude_md": False,
42
+ },
43
+ "opencode": {
44
+ "skill_file": "skill-opencode.md",
45
+ "skill_dst": Path(".config") / "opencode" / "skills" / "graphify" / "SKILL.md",
46
+ "claude_md": False,
47
+ },
48
+ "claw": {
49
+ "skill_file": "skill-claw.md",
50
+ "skill_dst": Path(".claw") / "skills" / "graphify" / "SKILL.md",
51
+ "claude_md": False,
52
+ },
53
+ }
54
+
35
55
 
56
+ def install(platform: str = "claude") -> None:
57
+ if platform not in _PLATFORM_CONFIG:
58
+ print(
59
+ f"error: unknown platform '{platform}'. Choose from: {', '.join(_PLATFORM_CONFIG)}",
60
+ file=sys.stderr,
61
+ )
62
+ sys.exit(1)
36
63
 
37
- def install() -> None:
38
- skill_src = _bundled_skill()
64
+ cfg = _PLATFORM_CONFIG[platform]
65
+ skill_src = Path(__file__).parent / cfg["skill_file"]
39
66
  if not skill_src.exists():
40
- print("error: skill.md not found in package - reinstall graphify", file=sys.stderr)
67
+ print(f"error: {cfg['skill_file']} not found in package - reinstall graphify", file=sys.stderr)
41
68
  sys.exit(1)
42
69
 
43
- # Copy skill to ~/.claude/skills/graphify/SKILL.md
44
- skill_dst = Path.home() / ".claude" / "skills" / "graphify" / "SKILL.md"
70
+ skill_dst = Path.home() / cfg["skill_dst"]
45
71
  skill_dst.parent.mkdir(parents=True, exist_ok=True)
46
72
  shutil.copy(skill_src, skill_dst)
47
73
  print(f" skill installed → {skill_dst}")
48
74
 
49
- # Register in ~/.claude/CLAUDE.md
50
- claude_md = Path.home() / ".claude" / "CLAUDE.md"
51
- if claude_md.exists():
52
- content = claude_md.read_text()
53
- if "graphify" in content:
54
- print(f" CLAUDE.md → already registered (no change)")
75
+ if cfg["claude_md"]:
76
+ # Register in ~/.claude/CLAUDE.md (Claude Code only)
77
+ claude_md = Path.home() / ".claude" / "CLAUDE.md"
78
+ if claude_md.exists():
79
+ content = claude_md.read_text()
80
+ if "graphify" in content:
81
+ print(f" CLAUDE.md → already registered (no change)")
82
+ else:
83
+ claude_md.write_text(content.rstrip() + _SKILL_REGISTRATION)
84
+ print(f" CLAUDE.md → skill registered in {claude_md}")
55
85
  else:
56
- claude_md.write_text(content.rstrip() + _SKILL_REGISTRATION)
57
- print(f" CLAUDE.md → skill registered in {claude_md}")
58
- else:
59
- claude_md.parent.mkdir(parents=True, exist_ok=True)
60
- claude_md.write_text(_SKILL_REGISTRATION.lstrip())
61
- print(f" CLAUDE.md → created at {claude_md}")
86
+ claude_md.parent.mkdir(parents=True, exist_ok=True)
87
+ claude_md.write_text(_SKILL_REGISTRATION.lstrip())
88
+ print(f" CLAUDE.md → created at {claude_md}")
62
89
 
63
90
  print()
64
- print("Done. Open Claude Code in any directory and type:")
91
+ print("Done. Open your AI coding assistant and type:")
65
92
  print()
66
93
  print(" /graphify .")
67
94
  print()
@@ -184,7 +211,7 @@ def main() -> None:
184
211
  print("Usage: graphify <command>")
185
212
  print()
186
213
  print("Commands:")
187
- print(" install copy skill to ~/.claude/skills/ and register in CLAUDE.md")
214
+ print(" install [--platform P] copy skill to platform config dir (claude|codex|opencode|claw)")
188
215
  print(" benchmark [graph.json] measure token reduction vs naive full-corpus approach")
189
216
  print(" hook install install post-commit git hook (auto-rebuilds graph on commit)")
190
217
  print(" hook uninstall remove post-commit git hook")
@@ -196,7 +223,19 @@ def main() -> None:
196
223
 
197
224
  cmd = sys.argv[1]
198
225
  if cmd == "install":
199
- install()
226
+ platform = "claude"
227
+ args = sys.argv[2:]
228
+ i = 0
229
+ while i < len(args):
230
+ if args[i].startswith("--platform="):
231
+ platform = args[i].split("=", 1)[1]
232
+ i += 1
233
+ elif args[i] == "--platform" and i + 1 < len(args):
234
+ platform = args[i + 1]
235
+ i += 2
236
+ else:
237
+ i += 1
238
+ install(platform=platform)
200
239
  elif cmd == "claude":
201
240
  subcmd = sys.argv[2] if len(sys.argv) > 2 else ""
202
241
  if subcmd == "install":
@@ -1,4 +1,25 @@
1
1
  # assemble node+edge dicts into a NetworkX graph, preserving edge direction
2
+ #
3
+ # Node deduplication — three layers:
4
+ #
5
+ # 1. Within a file (AST): each extractor tracks a `seen_ids` set. A node ID is
6
+ # emitted at most once per file, so duplicate class/function definitions in
7
+ # the same source file are collapsed to the first occurrence.
8
+ #
9
+ # 2. Between files (build): NetworkX G.add_node() is idempotent — calling it
10
+ # twice with the same ID overwrites the attributes with the second call's
11
+ # values. Nodes are added in extraction order (AST first, then semantic),
12
+ # so if the same entity is extracted by both passes the semantic node
13
+ # silently overwrites the AST node. This is intentional: semantic nodes
14
+ # carry richer labels and cross-file context, while AST nodes have precise
15
+ # source_location. If you need to change the priority, reorder extractions
16
+ # passed to build().
17
+ #
18
+ # 3. Semantic merge (skill): before calling build(), the skill merges cached
19
+ # and new semantic results using an explicit `seen` set keyed on node["id"],
20
+ # so duplicates across cache hits and new extractions are resolved there
21
+ # before any graph construction happens.
22
+ #
2
23
  from __future__ import annotations
3
24
  import sys
4
25
  import networkx as nx
@@ -32,11 +53,18 @@ def build_from_json(extraction: dict) -> nx.Graph:
32
53
 
33
54
 
34
55
  def build(extractions: list[dict]) -> nx.Graph:
35
- """Merge multiple extraction results into one graph."""
36
- combined: dict = {"nodes": [], "edges": [], "input_tokens": 0, "output_tokens": 0}
56
+ """Merge multiple extraction results into one graph.
57
+
58
+ Extractions are merged in order. For nodes with the same ID, the last
59
+ extraction's attributes win (NetworkX add_node overwrites). Pass AST
60
+ results before semantic results so semantic labels take precedence, or
61
+ reverse the order if you prefer AST source_location precision to win.
62
+ """
63
+ combined: dict = {"nodes": [], "edges": [], "hyperedges": [], "input_tokens": 0, "output_tokens": 0}
37
64
  for ext in extractions:
38
65
  combined["nodes"].extend(ext.get("nodes", []))
39
66
  combined["edges"].extend(ext.get("edges", []))
67
+ combined["hyperedges"].extend(ext.get("hyperedges", []))
40
68
  combined["input_tokens"] += ext.get("input_tokens", 0)
41
69
  combined["output_tokens"] += ext.get("output_tokens", 0)
42
70
  return build_from_json(combined)