codegraph-tools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. codegraph_tools-0.1.0/LICENSE +21 -0
  2. codegraph_tools-0.1.0/PKG-INFO +394 -0
  3. codegraph_tools-0.1.0/README.md +314 -0
  4. codegraph_tools-0.1.0/codegraph/__init__.py +28 -0
  5. codegraph_tools-0.1.0/codegraph/__main__.py +869 -0
  6. codegraph_tools-0.1.0/codegraph/analyze.py +525 -0
  7. codegraph_tools-0.1.0/codegraph/benchmark.py +129 -0
  8. codegraph_tools-0.1.0/codegraph/build.py +78 -0
  9. codegraph_tools-0.1.0/codegraph/cache.py +154 -0
  10. codegraph_tools-0.1.0/codegraph/cluster.py +137 -0
  11. codegraph_tools-0.1.0/codegraph/detect.py +484 -0
  12. codegraph_tools-0.1.0/codegraph/export.py +992 -0
  13. codegraph_tools-0.1.0/codegraph/extract.py +2719 -0
  14. codegraph_tools-0.1.0/codegraph/hooks.py +196 -0
  15. codegraph_tools-0.1.0/codegraph/ingest.py +291 -0
  16. codegraph_tools-0.1.0/codegraph/manifest.py +4 -0
  17. codegraph_tools-0.1.0/codegraph/report.py +155 -0
  18. codegraph_tools-0.1.0/codegraph/security.py +197 -0
  19. codegraph_tools-0.1.0/codegraph/serve.py +333 -0
  20. codegraph_tools-0.1.0/codegraph/skill-aider.md +1137 -0
  21. codegraph_tools-0.1.0/codegraph/skill-claw.md +1137 -0
  22. codegraph_tools-0.1.0/codegraph/skill-codex.md +1193 -0
  23. codegraph_tools-0.1.0/codegraph/skill-copilot.md +1219 -0
  24. codegraph_tools-0.1.0/codegraph/skill-droid.md +1190 -0
  25. codegraph_tools-0.1.0/codegraph/skill-opencode.md +1189 -0
  26. codegraph_tools-0.1.0/codegraph/skill-trae.md +1159 -0
  27. codegraph_tools-0.1.0/codegraph/skill-windows.md +1196 -0
  28. codegraph_tools-0.1.0/codegraph/skill.md +1222 -0
  29. codegraph_tools-0.1.0/codegraph/validate.py +71 -0
  30. codegraph_tools-0.1.0/codegraph/watch.py +162 -0
  31. codegraph_tools-0.1.0/codegraph/wiki.py +214 -0
  32. codegraph_tools-0.1.0/codegraph_tools.egg-info/PKG-INFO +394 -0
  33. codegraph_tools-0.1.0/codegraph_tools.egg-info/SOURCES.txt +62 -0
  34. codegraph_tools-0.1.0/codegraph_tools.egg-info/dependency_links.txt +1 -0
  35. codegraph_tools-0.1.0/codegraph_tools.egg-info/entry_points.txt +2 -0
  36. codegraph_tools-0.1.0/codegraph_tools.egg-info/requires.txt +52 -0
  37. codegraph_tools-0.1.0/codegraph_tools.egg-info/top_level.txt +1 -0
  38. codegraph_tools-0.1.0/pyproject.toml +60 -0
  39. codegraph_tools-0.1.0/setup.cfg +4 -0
  40. codegraph_tools-0.1.0/tests/test_analyze.py +232 -0
  41. codegraph_tools-0.1.0/tests/test_benchmark.py +119 -0
  42. codegraph_tools-0.1.0/tests/test_build.py +41 -0
  43. codegraph_tools-0.1.0/tests/test_cache.py +126 -0
  44. codegraph_tools-0.1.0/tests/test_claude_md.py +136 -0
  45. codegraph_tools-0.1.0/tests/test_cluster.py +76 -0
  46. codegraph_tools-0.1.0/tests/test_confidence.py +192 -0
  47. codegraph_tools-0.1.0/tests/test_detect.py +201 -0
  48. codegraph_tools-0.1.0/tests/test_export.py +127 -0
  49. codegraph_tools-0.1.0/tests/test_extract.py +170 -0
  50. codegraph_tools-0.1.0/tests/test_hooks.py +112 -0
  51. codegraph_tools-0.1.0/tests/test_hypergraph.py +205 -0
  52. codegraph_tools-0.1.0/tests/test_ingest.py +68 -0
  53. codegraph_tools-0.1.0/tests/test_install.py +320 -0
  54. codegraph_tools-0.1.0/tests/test_languages.py +510 -0
  55. codegraph_tools-0.1.0/tests/test_multilang.py +173 -0
  56. codegraph_tools-0.1.0/tests/test_pipeline.py +158 -0
  57. codegraph_tools-0.1.0/tests/test_rationale.py +89 -0
  58. codegraph_tools-0.1.0/tests/test_report.py +63 -0
  59. codegraph_tools-0.1.0/tests/test_security.py +189 -0
  60. codegraph_tools-0.1.0/tests/test_semantic_similarity.py +194 -0
  61. codegraph_tools-0.1.0/tests/test_serve.py +153 -0
  62. codegraph_tools-0.1.0/tests/test_validate.py +87 -0
  63. codegraph_tools-0.1.0/tests/test_watch.py +68 -0
  64. codegraph_tools-0.1.0/tests/test_wiki.py +139 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Safi Shamsi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,394 @@
1
+ Metadata-Version: 2.4
2
+ Name: codegraph-tools
3
+ Version: 0.1.0
4
+ Summary: AI coding assistant skill (Claude Code, Codex, OpenCode, Cursor, OpenClaw, Factory Droid, Trae) - turn any folder of code, docs, papers, or images into a queryable knowledge graph
5
+ License: MIT License
6
+
7
+ Copyright (c) 2026 Safi Shamsi
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: Homepage, https://github.com/safishamsi/graphify
28
+ Project-URL: Repository, https://github.com/safishamsi/graphify
29
+ Project-URL: Issues, https://github.com/safishamsi/graphify/issues
30
+ Keywords: claude,claude-code,codex,opencode,cursor,knowledge-graph,rag,graphrag,obsidian,community-detection,tree-sitter,leiden,llm
31
+ Requires-Python: >=3.10
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: networkx
35
+ Requires-Dist: tree-sitter>=0.23.0
36
+ Requires-Dist: tree-sitter-python
37
+ Requires-Dist: tree-sitter-javascript
38
+ Requires-Dist: tree-sitter-typescript
39
+ Requires-Dist: tree-sitter-go
40
+ Requires-Dist: tree-sitter-rust
41
+ Requires-Dist: tree-sitter-java
42
+ Requires-Dist: tree-sitter-c
43
+ Requires-Dist: tree-sitter-cpp
44
+ Requires-Dist: tree-sitter-ruby
45
+ Requires-Dist: tree-sitter-c-sharp
46
+ Requires-Dist: tree-sitter-kotlin
47
+ Requires-Dist: tree-sitter-scala
48
+ Requires-Dist: tree-sitter-php
49
+ Requires-Dist: tree-sitter-swift
50
+ Requires-Dist: tree-sitter-lua
51
+ Requires-Dist: tree-sitter-zig
52
+ Requires-Dist: tree-sitter-powershell
53
+ Requires-Dist: tree-sitter-elixir
54
+ Requires-Dist: tree-sitter-objc
55
+ Requires-Dist: tree-sitter-julia
56
+ Provides-Extra: mcp
57
+ Requires-Dist: mcp; extra == "mcp"
58
+ Provides-Extra: neo4j
59
+ Requires-Dist: neo4j; extra == "neo4j"
60
+ Provides-Extra: pdf
61
+ Requires-Dist: pypdf; extra == "pdf"
62
+ Requires-Dist: html2text; extra == "pdf"
63
+ Provides-Extra: watch
64
+ Requires-Dist: watchdog; extra == "watch"
65
+ Provides-Extra: leiden
66
+ Requires-Dist: graspologic; extra == "leiden"
67
+ Provides-Extra: office
68
+ Requires-Dist: python-docx; extra == "office"
69
+ Requires-Dist: openpyxl; extra == "office"
70
+ Provides-Extra: all
71
+ Requires-Dist: mcp; extra == "all"
72
+ Requires-Dist: neo4j; extra == "all"
73
+ Requires-Dist: pypdf; extra == "all"
74
+ Requires-Dist: html2text; extra == "all"
75
+ Requires-Dist: watchdog; extra == "all"
76
+ Requires-Dist: graspologic; extra == "all"
77
+ Requires-Dist: python-docx; extra == "all"
78
+ Requires-Dist: openpyxl; extra == "all"
79
+ Dynamic: license-file
80
+
81
+ # graphify
82
+
83
+ [English](README.md) | [简体中文](README.zh-CN.md) | [日本語](README.ja-JP.md) | [한국어](README.ko-KR.md)
84
+
85
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v3)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
86
+ [![PyPI](https://img.shields.io/pypi/v/graphifyy)](https://pypi.org/project/graphifyy/)
87
+ [![Downloads](https://static.pepy.tech/badge/graphifyy/month)](https://pepy.tech/project/graphifyy)
88
+ [![Sponsor](https://img.shields.io/badge/sponsor-safishamsi-ea4aaa?logo=github-sponsors)](https://github.com/sponsors/safishamsi)
89
+
90
+ **An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, Cursor, Gemini CLI, GitHub Copilot CLI, Aider, OpenClaw, Factory Droid, or Trae - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
91
+
92
+ Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, even images in other languages - graphify uses Claude vision to extract concepts and relationships from all of it and connects them into one graph. 20 languages supported via tree-sitter AST (Python, JS, TS, Go, Rust, Java, C, C++, Ruby, C#, Kotlin, Scala, PHP, Swift, Lua, Zig, PowerShell, Elixir, Objective-C, Julia).
93
+
94
+ > Andrej Karpathy keeps a `/raw` folder where he drops papers, tweets, screenshots, and notes. graphify is the answer to that problem - 71.5x fewer tokens per query vs reading the raw files, persistent across sessions, honest about what it found vs guessed.
95
+
96
+ ```
97
+ /graphify . # works on any folder - your codebase, notes, papers, anything
98
+ ```
99
+
100
+ ```
101
+ graphify-out/
102
+ ├── graph.html interactive graph - click nodes, search, filter by community
103
+ ├── GRAPH_REPORT.md god nodes, surprising connections, suggested questions
104
+ ├── graph.json persistent graph - query weeks later without re-reading
105
+ └── cache/ SHA256 cache - re-runs only process changed files
106
+ ```
107
+
108
+ Add a `.graphifyignore` file to exclude folders you don't want in the graph:
109
+
110
+ ```
111
+ # .graphifyignore
112
+ vendor/
113
+ node_modules/
114
+ dist/
115
+ *.generated.py
116
+ ```
117
+
118
+ Same syntax as `.gitignore`. Patterns match against file paths relative to the folder you run graphify on.
119
+
120
+ ## How it works
121
+
122
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
123
+
124
+ **Clustering is graph-topology-based — no embeddings.** Leiden finds communities by edge density. The semantic similarity edges that Claude extracts (`semantically_similar_to`, marked INFERRED) are already in the graph, so they influence community detection directly. The graph structure is the similarity signal — no separate embedding step or vector database needed.
125
+
126
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
127
+
128
+ ## Install
129
+
130
+ **Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai), [Cursor](https://cursor.com), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [GitHub Copilot CLI](https://docs.github.com/en/copilot/how-tos/copilot-cli), [Aider](https://aider.chat), [OpenClaw](https://openclaw.ai), [Factory Droid](https://factory.ai), or [Trae](https://trae.ai)
131
+
132
+ ```bash
133
+ pip install graphifyy && graphify install
134
+ ```
135
+
136
+ > **Official package:** The PyPI package is named `graphifyy` (install with `pip install graphifyy`). Other packages named `graphify*` on PyPI are not affiliated with this project. The only official repository is [safishamsi/graphify](https://github.com/safishamsi/graphify). The CLI and skill command are still `graphify`.
137
+
138
+ ### Platform support
139
+
140
+ | Platform | Install command |
141
+ |----------|----------------|
142
+ | Claude Code (Linux/Mac) | `graphify install` |
143
+ | Claude Code (Windows) | `graphify install` (auto-detected) or `graphify install --platform windows` |
144
+ | Codex | `graphify install --platform codex` |
145
+ | OpenCode | `graphify install --platform opencode` |
146
+ | GitHub Copilot CLI | `graphify install --platform copilot` |
147
+ | Aider | `graphify install --platform aider` |
148
+ | OpenClaw | `graphify install --platform claw` |
149
+ | Factory Droid | `graphify install --platform droid` |
150
+ | Trae | `graphify install --platform trae` |
151
+ | Trae CN | `graphify install --platform trae-cn` |
152
+ | Gemini CLI | `graphify install --platform gemini` |
153
+ | Cursor | `graphify cursor install` |
154
+
155
+ Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. Factory Droid uses the `Task` tool for parallel subagent dispatch. OpenClaw and Aider use sequential extraction (parallel agent support is still early on those platforms). Trae uses the Agent tool for parallel subagent dispatch and does **not** support PreToolUse hooks — AGENTS.md is the always-on mechanism.
156
+
157
+ Then open your AI coding assistant and type:
158
+
159
+ ```
160
+ /graphify .
161
+ ```
162
+
163
+ Note: Codex uses `$` instead of `/` for skill calling, so type `$graphify .` instead.
164
+
165
+ ### Make your assistant always use the graph (recommended)
166
+
167
+ After building a graph, run this once in your project:
168
+
169
+ | Platform | Command |
170
+ |----------|---------|
171
+ | Claude Code | `graphify claude install` |
172
+ | Codex | `graphify codex install` |
173
+ | OpenCode | `graphify opencode install` |
174
+ | GitHub Copilot CLI | `graphify copilot install` |
175
+ | Aider | `graphify aider install` |
176
+ | OpenClaw | `graphify claw install` |
177
+ | Factory Droid | `graphify droid install` |
178
+ | Trae | `graphify trae install` |
179
+ | Trae CN | `graphify trae-cn install` |
180
+ | Cursor | `graphify cursor install` |
181
+ | Gemini CLI | `graphify gemini install` |
182
+
183
+ **Claude Code** does two things: writes a `CLAUDE.md` section telling Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and installs a **PreToolUse hook** (`settings.json`) that fires before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ — so Claude navigates via the graph instead of grepping through every file.
184
+
185
+ **Codex** writes to `AGENTS.md` and also installs a **PreToolUse hook** in `.codex/hooks.json` that fires before every Bash tool call — same always-on mechanism as Claude Code.
186
+
187
+ **OpenCode** writes to `AGENTS.md` and also installs a **`tool.execute.before` plugin** (`.opencode/plugins/graphify.js` + `opencode.json` registration) that fires before bash tool calls and injects the graph reminder into tool output when the graph exists.
188
+
189
+ **Cursor** writes `.cursor/rules/graphify.mdc` with `alwaysApply: true` — Cursor includes it in every conversation automatically, no hook needed.
190
+
191
+ **Gemini CLI** copies the skill to `~/.gemini/skills/graphify/SKILL.md`, writes a `GEMINI.md` section, and installs a `BeforeTool` hook in `.gemini/settings.json` that fires before file-read tool calls — same always-on mechanism as Claude Code.
192
+
193
+ **Aider and OpenClaw, Factory Droid, Trae** write the same rules to `AGENTS.md` in your project root. These platforms don't support tool hooks, so AGENTS.md is the always-on mechanism.
194
+
195
+ **GitHub Copilot CLI** copies the skill to `~/.copilot/skills/graphify/SKILL.md`. Run `graphify copilot install` to set it up.
196
+
197
+ Uninstall with the matching uninstall command (e.g. `graphify claude uninstall`).
198
+
199
+ **Always-on vs explicit trigger — what's the difference?**
200
+
201
+ The always-on hook surfaces `GRAPH_REPORT.md` — a one-page summary of god nodes, communities, and surprising connections. Your assistant reads this before searching files, so it navigates by structure instead of keyword matching. That covers most everyday questions.
202
+
203
+ `/graphify query`, `/graphify path`, and `/graphify explain` go deeper: they traverse the raw `graph.json` hop by hop, trace exact paths between nodes, and surface edge-level detail (relation type, confidence score, source location). Use them when you want a specific question answered from the graph rather than a general orientation.
204
+
205
+ Think of it this way: the always-on hook gives your assistant a map. The `/graphify` commands let it navigate the map precisely.
206
+
207
+ ## Using `graph.json` with an LLM
208
+
209
+ `graph.json` is not meant to be pasted into a prompt all at once. The useful
210
+ workflow is:
211
+
212
+ 1. Start with `graphify-out/GRAPH_REPORT.md` for the high-level overview.
213
+ 2. Use `graphify query` to pull a smaller subgraph for the specific question
214
+ you want to answer.
215
+ 3. Give that focused output to your assistant instead of dumping the full raw
216
+ corpus.
217
+
218
+ For example, after running graphify on a project:
219
+
220
+ ```bash
221
+ graphify query "show the auth flow" --graph graphify-out/graph.json
222
+ graphify query "what connects DigestAuth to Response?" --graph graphify-out/graph.json
223
+ ```
224
+
225
+ The output includes node labels, edge types, confidence tags, source files, and
226
+ source locations. That makes it a good intermediate context block for an LLM:
227
+
228
+ ```text
229
+ Use this graph query output to answer the question. Prefer the graph structure
230
+ over guessing, and cite the source files when possible.
231
+ ```
232
+
233
+ If your assistant supports tool calling or MCP, use the graph directly instead
234
+ of pasting text. graphify can expose `graph.json` as an MCP server:
235
+
236
+ ```bash
237
+ python -m graphify.serve graphify-out/graph.json
238
+ ```
239
+
240
+ That gives the assistant structured graph access for repeated queries such as
241
+ `query_graph`, `get_node`, `get_neighbors`, and `shortest_path`.
242
+
243
+ <details>
244
+ <summary>Manual install (curl)</summary>
245
+
246
+ ```bash
247
+ mkdir -p ~/.claude/skills/graphify
248
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v3/graphify/skill.md \
249
+ > ~/.claude/skills/graphify/SKILL.md
250
+ ```
251
+
252
+ Add to `~/.claude/CLAUDE.md`:
253
+
254
+ ```
255
+ - **graphify** (`~/.claude/skills/graphify/SKILL.md`) - any input to knowledge graph. Trigger: `/graphify`
256
+ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"` before doing anything else.
257
+ ```
258
+
259
+ </details>
260
+
261
+ ## Usage
262
+
263
+ ```
264
+ /graphify # run on current directory
265
+ /graphify ./raw # run on a specific folder
266
+ /graphify ./raw --mode deep # more aggressive INFERRED edge extraction
267
+ /graphify ./raw --update # re-extract only changed files, merge into existing graph
268
+ /graphify ./raw --directed # build directed graph (preserves edge direction: source→target)
269
+ /graphify ./raw --cluster-only # rerun clustering on existing graph, no re-extraction
270
+ /graphify ./raw --no-viz # skip HTML, just produce report + JSON
271
+ /graphify ./raw --obsidian # also generate Obsidian vault (opt-in)
272
+ /graphify ./raw --obsidian --obsidian-dir ~/vaults/myproject # write vault to a specific directory
273
+
274
+ /graphify add https://arxiv.org/abs/1706.03762 # fetch a paper, save, update graph
275
+ /graphify add https://x.com/karpathy/status/... # fetch a tweet
276
+ /graphify add https://... --author "Name" # tag the original author
277
+ /graphify add https://... --contributor "Name" # tag who added it to the corpus
278
+
279
+ /graphify query "what connects attention to the optimizer?"
280
+ /graphify query "what connects attention to the optimizer?" --dfs # trace a specific path
281
+ /graphify query "what connects attention to the optimizer?" --budget 1500 # cap at N tokens
282
+ /graphify path "DigestAuth" "Response"
283
+ /graphify explain "SwinTransformer"
284
+
285
+ /graphify ./raw --watch # auto-sync graph as files change (code: instant, docs: notifies you)
286
+ /graphify ./raw --wiki # build agent-crawlable wiki (index.md + article per community)
287
+ /graphify ./raw --svg # export graph.svg
288
+ /graphify ./raw --graphml # export graph.graphml (Gephi, yEd)
289
+ /graphify ./raw --neo4j # generate cypher.txt for Neo4j
290
+ /graphify ./raw --neo4j-push bolt://localhost:7687 # push directly to a running Neo4j instance
291
+ /graphify ./raw --mcp # start MCP stdio server
292
+
293
+ # git hooks - platform-agnostic, rebuild graph on commit and branch switch
294
+ graphify hook install
295
+ graphify hook uninstall
296
+ graphify hook status
297
+
298
+ # always-on assistant instructions - platform-specific
299
+ graphify claude install # CLAUDE.md + PreToolUse hook (Claude Code)
300
+ graphify claude uninstall
301
+ graphify codex install # AGENTS.md (Codex)
302
+ graphify opencode install # AGENTS.md + tool.execute.before plugin (OpenCode)
303
+ graphify cursor install # .cursor/rules/graphify.mdc (Cursor)
304
+ graphify cursor uninstall
305
+ graphify gemini install # GEMINI.md + BeforeTool hook (Gemini CLI)
306
+ graphify gemini uninstall
307
+ graphify copilot install # skill file (GitHub Copilot CLI)
308
+ graphify copilot uninstall
309
+ graphify aider install # AGENTS.md (Aider)
310
+ graphify aider uninstall
311
+ graphify claw install # AGENTS.md (OpenClaw)
312
+ graphify droid install # AGENTS.md (Factory Droid)
313
+ graphify trae install # AGENTS.md (Trae)
314
+ graphify trae uninstall
315
+ graphify trae-cn install # AGENTS.md (Trae CN)
316
+ graphify trae-cn uninstall
317
+
318
+ # query the graph directly from the terminal (no AI assistant needed)
319
+ graphify query "what connects attention to the optimizer?"
320
+ graphify query "show the auth flow" --dfs
321
+ graphify query "what is CfgNode?" --budget 500
322
+ graphify query "..." --graph path/to/graph.json
323
+ ```
324
+
325
+ Works with any mix of file types:
326
+
327
+ | Type | Extensions | Extraction |
328
+ |------|-----------|------------|
329
+ | Code | `.py .ts .js .jsx .tsx .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua .zig .ps1 .ex .exs .m .mm .jl` | AST via tree-sitter + call-graph + docstring/comment rationale |
330
+ | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
331
+ | Office | `.docx .xlsx` | Converted to markdown then extracted via Claude (requires `pip install graphifyy[office]`) |
332
+ | Papers | `.pdf` | Citation mining + concept extraction |
333
+ | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
334
+
335
+ ## What you get
336
+
337
+ **God nodes** - highest-degree concepts (what everything connects through)
338
+
339
+ **Surprising connections** - ranked by composite score. Code-paper edges rank higher than code-code. Each result includes a plain-English why.
340
+
341
+ **Suggested questions** - 4-5 questions the graph is uniquely positioned to answer
342
+
343
+ **The "why"** - docstrings, inline comments (`# NOTE:`, `# IMPORTANT:`, `# HACK:`, `# WHY:`), and design rationale from docs are extracted as `rationale_for` nodes. Not just what the code does - why it was written that way.
344
+
345
+ **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
346
+
347
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
348
+
349
+ **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
350
+
351
+ **Token benchmark** - printed automatically after every run. On a mixed corpus (Karpathy repos + papers + images): **71.5x** fewer tokens per query vs reading raw files. The first run extracts and builds the graph (this costs tokens). Every subsequent query reads the compact graph instead of raw files — that's where the savings compound. The SHA256 cache means re-runs only re-process changed files.
352
+
353
+ **Auto-sync** (`--watch`) - run in a background terminal and the graph updates itself as your codebase changes. Code file saves trigger an instant rebuild (AST only, no LLM). Doc/image changes notify you to run `--update` for the LLM re-pass.
354
+
355
+ **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. If a rebuild fails, the hook exits with a non-zero code so git surfaces the error instead of silently continuing. No background process needed.
356
+
357
+ **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
358
+
359
+ ## Worked examples
360
+
361
+ | Corpus | Files | Reduction | Output |
362
+ |--------|-------|-----------|--------|
363
+ | Karpathy repos + 5 papers + 4 images | 52 | **71.5x** | [`worked/karpathy-repos/`](worked/karpathy-repos/) |
364
+ | graphify source + Transformer paper | 4 | **5.4x** | [`worked/mixed-corpus/`](worked/mixed-corpus/) |
365
+ | httpx (synthetic Python library) | 6 | ~1x | [`worked/httpx/`](worked/httpx/) |
366
+
367
+ Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
368
+
369
+ ## Privacy
370
+
371
+ graphify sends file contents to your AI coding assistant's underlying model API for semantic extraction of docs, papers, and images — Anthropic (Claude Code), OpenAI (Codex), or whichever provider your platform uses. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to your platform's model API during extraction, using your own API key.
372
+
373
+ ## Tech stack
374
+
375
+ NetworkX + Leiden (graspologic) + tree-sitter + vis.js. Semantic extraction via Claude (Claude Code), GPT-4 (Codex), or whichever model your platform runs. No Neo4j required, no server, runs entirely locally.
376
+
377
+ ## What we are building next
378
+
379
+ graphify is the graph layer. We are building [Penpax](https://safishamsi.github.io/penpax.ai) on top of it — an on-device digital twin that connects your meetings, browser history, files, emails, and code into one continuously updating knowledge graph. No cloud, no training on your data. [Join the waitlist.](https://safishamsi.github.io/penpax.ai)
380
+
381
+ ## Star history
382
+
383
+ [![Star History Chart](https://api.star-history.com/svg?repos=safishamsi/graphify&type=Date)](https://star-history.com/#safishamsi/graphify&Date)
384
+
385
+ <details>
386
+ <summary>Contributing</summary>
387
+
388
+ **Worked examples** are the most trust-building contribution. Run `/graphify` on a real corpus, save output to `worked/{slug}/`, write an honest `review.md` evaluating what the graph got right and wrong, submit a PR.
389
+
390
+ **Extraction bugs** - open an issue with the input file, the cache entry (`graphify-out/cache/`), and what was missed or invented.
391
+
392
+ See [ARCHITECTURE.md](ARCHITECTURE.md) for module responsibilities and how to add a language.
393
+
394
+ </details>