graphifyy 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {graphifyy-0.2.0 → graphifyy-0.2.2}/PKG-INFO +38 -11
  2. {graphifyy-0.2.0 → graphifyy-0.2.2}/README.md +34 -9
  3. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/__main__.py +1 -2
  4. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/cluster.py +20 -7
  5. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/skill.md +11 -17
  6. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/PKG-INFO +38 -11
  7. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/SOURCES.txt +1 -0
  8. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/requires.txt +4 -1
  9. {graphifyy-0.2.0 → graphifyy-0.2.2}/pyproject.toml +3 -3
  10. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_claude_md.py +39 -0
  11. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_hooks.py +33 -1
  12. graphifyy-0.2.2/tests/test_rationale.py +89 -0
  13. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/__init__.py +0 -0
  14. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/analyze.py +0 -0
  15. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/benchmark.py +0 -0
  16. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/build.py +0 -0
  17. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/cache.py +0 -0
  18. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/detect.py +0 -0
  19. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/export.py +0 -0
  20. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/extract.py +0 -0
  21. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/hooks.py +0 -0
  22. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/ingest.py +0 -0
  23. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/manifest.py +0 -0
  24. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/report.py +0 -0
  25. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/security.py +0 -0
  26. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/serve.py +0 -0
  27. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/validate.py +0 -0
  28. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/watch.py +0 -0
  29. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/wiki.py +0 -0
  30. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/dependency_links.txt +0 -0
  31. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/entry_points.txt +0 -0
  32. {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/top_level.txt +0 -0
  33. {graphifyy-0.2.0 → graphifyy-0.2.2}/setup.cfg +0 -0
  34. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_analyze.py +0 -0
  35. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_benchmark.py +0 -0
  36. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_build.py +0 -0
  37. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_cache.py +0 -0
  38. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_cluster.py +0 -0
  39. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_confidence.py +0 -0
  40. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_detect.py +0 -0
  41. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_export.py +0 -0
  42. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_extract.py +0 -0
  43. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_hypergraph.py +0 -0
  44. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_ingest.py +0 -0
  45. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_languages.py +0 -0
  46. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_multilang.py +0 -0
  47. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_pipeline.py +0 -0
  48. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_report.py +0 -0
  49. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_security.py +0 -0
  50. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_semantic_similarity.py +0 -0
  51. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_serve.py +0 -0
  52. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_validate.py +0 -0
  53. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_watch.py +0 -0
  54. {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_wiki.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphifyy
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/safishamsi/graphify
@@ -10,7 +10,6 @@ Keywords: claude,claude-code,knowledge-graph,rag,graphrag,obsidian,community-det
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  Requires-Dist: networkx
13
- Requires-Dist: graspologic
14
13
  Requires-Dist: tree-sitter
15
14
  Requires-Dist: tree-sitter-python
16
15
  Requires-Dist: tree-sitter-javascript
@@ -34,16 +33,20 @@ Requires-Dist: pypdf; extra == "pdf"
34
33
  Requires-Dist: html2text; extra == "pdf"
35
34
  Provides-Extra: watch
36
35
  Requires-Dist: watchdog; extra == "watch"
36
+ Provides-Extra: leiden
37
+ Requires-Dist: graspologic; extra == "leiden"
37
38
  Provides-Extra: all
38
39
  Requires-Dist: mcp; extra == "all"
39
40
  Requires-Dist: neo4j; extra == "all"
40
41
  Requires-Dist: pypdf; extra == "all"
41
42
  Requires-Dist: html2text; extra == "all"
42
43
  Requires-Dist: watchdog; extra == "all"
44
+ Requires-Dist: graspologic; extra == "all"
43
45
 
44
46
  # graphify
45
47
 
46
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v1)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
48
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
49
+ [![PyPI](https://img.shields.io/pypi/v/graphifyy)](https://pypi.org/project/graphifyy/)
47
50
 
48
51
  **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
49
52
 
@@ -63,6 +66,12 @@ graphify-out/
63
66
  └── cache/ SHA256 cache - re-runs only process changed files
64
67
  ```
65
68
 
69
+ ## How it works
70
+
71
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
72
+
73
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
74
+
66
75
  ## Install
67
76
 
68
77
  **Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
@@ -79,12 +88,30 @@ Then open Claude Code in any directory and type:
79
88
  /graphify .
80
89
  ```
81
90
 
91
+ ### Make Claude always use the graph (recommended)
92
+
93
+ After building a graph, run this once in your project:
94
+
95
+ ```bash
96
+ graphify claude install
97
+ ```
98
+
99
+ This does two things:
100
+
101
+ 1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
102
+
103
+ 2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
104
+
105
+ Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
106
+
107
+ Uninstall with `graphify claude uninstall`.
108
+
82
109
  <details>
83
110
  <summary>Manual install (curl)</summary>
84
111
 
85
112
  ```bash
86
113
  mkdir -p ~/.claude/skills/graphify
87
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v1/skills/graphify/skill.md \
114
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
88
115
  > ~/.claude/skills/graphify/SKILL.md
89
116
  ```
90
117
 
@@ -121,14 +148,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
121
148
  /graphify ./raw --mcp # start MCP stdio server
122
149
 
123
150
  graphify hook install # git hooks - rebuilds graph on commit and branch switch
124
- graphify claude install # write graphify rules to local CLAUDE.md + install PreToolUse hook
151
+ graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
125
152
  ```
126
153
 
127
154
  Works with any mix of file types:
128
155
 
129
156
  | Type | Extensions | Extraction |
130
157
  |------|-----------|------------|
131
- | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph pass + docstring/comment rationale |
158
+ | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
132
159
  | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
133
160
  | Papers | `.pdf` | Citation mining + concept extraction |
134
161
  | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
@@ -145,7 +172,7 @@ Works with any mix of file types:
145
172
 
146
173
  **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
147
174
 
148
- **Semantic similarity edges** - cross-file conceptual links that have no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
175
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
149
176
 
150
177
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
151
178
 
@@ -155,12 +182,8 @@ Works with any mix of file types:
155
182
 
156
183
  **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
157
184
 
158
- **Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
159
-
160
185
  **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
161
186
 
162
- Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
163
-
164
187
  ## Worked examples
165
188
 
166
189
  | Corpus | Files | Reduction | Output |
@@ -171,6 +194,10 @@ Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know w
171
194
 
172
195
  Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
173
196
 
197
+ ## Privacy
198
+
199
+ graphify sends file contents to the Claude API (Anthropic) for semantic extraction of docs, papers, and images. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to Anthropic's API during extraction, using your own API key via Claude Code.
200
+
174
201
  ## Tech stack
175
202
 
176
203
  NetworkX + Leiden (graspologic) + tree-sitter + Claude + vis.js. No Neo4j required, no server, runs entirely locally.
@@ -1,6 +1,7 @@
1
1
  # graphify
2
2
 
3
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v1)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
3
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/graphifyy)](https://pypi.org/project/graphifyy/)
4
5
 
5
6
  **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
6
7
 
@@ -20,6 +21,12 @@ graphify-out/
20
21
  └── cache/ SHA256 cache - re-runs only process changed files
21
22
  ```
22
23
 
24
+ ## How it works
25
+
26
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
27
+
28
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
29
+
23
30
  ## Install
24
31
 
25
32
  **Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
@@ -36,12 +43,30 @@ Then open Claude Code in any directory and type:
36
43
  /graphify .
37
44
  ```
38
45
 
46
+ ### Make Claude always use the graph (recommended)
47
+
48
+ After building a graph, run this once in your project:
49
+
50
+ ```bash
51
+ graphify claude install
52
+ ```
53
+
54
+ This does two things:
55
+
56
+ 1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
57
+
58
+ 2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
59
+
60
+ Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
61
+
62
+ Uninstall with `graphify claude uninstall`.
63
+
39
64
  <details>
40
65
  <summary>Manual install (curl)</summary>
41
66
 
42
67
  ```bash
43
68
  mkdir -p ~/.claude/skills/graphify
44
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v1/skills/graphify/skill.md \
69
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
45
70
  > ~/.claude/skills/graphify/SKILL.md
46
71
  ```
47
72
 
@@ -78,14 +103,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
78
103
  /graphify ./raw --mcp # start MCP stdio server
79
104
 
80
105
  graphify hook install # git hooks - rebuilds graph on commit and branch switch
81
- graphify claude install # write graphify rules to local CLAUDE.md + install PreToolUse hook
106
+ graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
82
107
  ```
83
108
 
84
109
  Works with any mix of file types:
85
110
 
86
111
  | Type | Extensions | Extraction |
87
112
  |------|-----------|------------|
88
- | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph pass + docstring/comment rationale |
113
+ | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
89
114
  | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
90
115
  | Papers | `.pdf` | Citation mining + concept extraction |
91
116
  | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
@@ -102,7 +127,7 @@ Works with any mix of file types:
102
127
 
103
128
  **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
104
129
 
105
- **Semantic similarity edges** - cross-file conceptual links that have no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
130
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
106
131
 
107
132
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
108
133
 
@@ -112,12 +137,8 @@ Works with any mix of file types:
112
137
 
113
138
  **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
114
139
 
115
- **Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
116
-
117
140
  **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
118
141
 
119
- Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
120
-
121
142
  ## Worked examples
122
143
 
123
144
  | Corpus | Files | Reduction | Output |
@@ -128,6 +149,10 @@ Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know w
128
149
 
129
150
  Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
130
151
 
152
+ ## Privacy
153
+
154
+ graphify sends file contents to the Claude API (Anthropic) for semantic extraction of docs, papers, and images. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to Anthropic's API during extraction, using your own API key via Claude Code.
155
+
131
156
  ## Tech stack
132
157
 
133
158
  NetworkX + Leiden (graspologic) + tree-sitter + Claude + vis.js. No Neo4j required, no server, runs entirely locally.
@@ -171,12 +171,11 @@ def claude_uninstall(project_dir: Path | None = None) -> None:
171
171
  ).rstrip()
172
172
  if cleaned:
173
173
  target.write_text(cleaned + "\n")
174
+ print(f"graphify section removed from {target.resolve()}")
174
175
  else:
175
176
  target.unlink()
176
177
  print(f"CLAUDE.md was empty after removal - deleted {target.resolve()}")
177
- return
178
178
 
179
- print(f"graphify section removed from {target.resolve()}")
180
179
  _uninstall_claude_hook(project_dir or Path("."))
181
180
 
182
181
 
@@ -1,8 +1,25 @@
1
- """Leiden community detection on NetworkX graphs. Splits oversized communities. Returns cohesion scores."""
1
+ """Community detection on NetworkX graphs. Uses Leiden (graspologic) if available, falls back to Louvain (networkx). Splits oversized communities. Returns cohesion scores."""
2
2
  from __future__ import annotations
3
3
  import networkx as nx
4
4
 
5
5
 
6
+ def _partition(G: nx.Graph) -> dict[str, int]:
7
+ """Run community detection. Returns {node_id: community_id}.
8
+
9
+ Tries Leiden (graspologic) first — best quality.
10
+ Falls back to Louvain (built into networkx) if graspologic is not installed.
11
+ """
12
+ try:
13
+ from graspologic.partition import leiden
14
+ return leiden(G)
15
+ except ImportError:
16
+ pass
17
+
18
+ # Fallback: networkx louvain (available since networkx 2.7)
19
+ communities = nx.community.louvain_communities(G, seed=42)
20
+ return {node: cid for cid, nodes in enumerate(communities) for node in nodes}
21
+
22
+
6
23
  def build_graph(nodes: list[dict], edges: list[dict]) -> nx.Graph:
7
24
  """Build a NetworkX graph from graphify node/edge dicts.
8
25
 
@@ -36,8 +53,6 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
36
53
  if G.number_of_edges() == 0:
37
54
  return {i: [n] for i, n in enumerate(sorted(G.nodes))}
38
55
 
39
- from graspologic.partition import leiden # lazy - avoids 15s numba JIT on import
40
-
41
56
  # Leiden warns and drops isolates - handle them separately
42
57
  isolates = [n for n in G.nodes() if G.degree(n) == 0]
43
58
  connected_nodes = [n for n in G.nodes() if G.degree(n) > 0]
@@ -45,7 +60,7 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
45
60
 
46
61
  raw: dict[int, list[str]] = {}
47
62
  if connected.number_of_nodes() > 0:
48
- partition: dict[str, int] = leiden(connected)
63
+ partition = _partition(connected)
49
64
  for node, cid in partition.items():
50
65
  raw.setdefault(cid, []).append(node)
51
66
 
@@ -76,13 +91,11 @@ def _split_community(G: nx.Graph, nodes: list[str]) -> list[list[str]]:
76
91
  # No edges - split into individual nodes
77
92
  return [[n] for n in sorted(nodes)]
78
93
  try:
79
- from graspologic.partition import leiden
80
- sub_partition: dict[str, int] = leiden(subgraph)
94
+ sub_partition = _partition(subgraph)
81
95
  sub_communities: dict[int, list[str]] = {}
82
96
  for node, cid in sub_partition.items():
83
97
  sub_communities.setdefault(cid, []).append(node)
84
98
  if len(sub_communities) <= 1:
85
- # Leiden couldn't split it - return as-is
86
99
  return [sorted(nodes)]
87
100
  return [sorted(v) for v in sub_communities.values()]
88
101
  except Exception:
@@ -411,9 +411,11 @@ print('Report updated with community labels')
411
411
  Replace `LABELS_DICT` with the actual dict you constructed (e.g. `{0: "Attention Mechanism", 1: "Training Pipeline"}`).
412
412
  Replace INPUT_PATH with the actual path.
413
413
 
414
- ### Step 6 - Generate Obsidian vault (default) + optional HTML
414
+ ### Step 6 - Generate Obsidian vault (opt-in) + HTML
415
415
 
416
- **Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was given** — it generates one file per node which creates thousands of files in large repos. Skip it by default.
416
+ **Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was explicitly given** — skip it otherwise, it generates one file per node.
417
+
418
+ If `--obsidian` was given:
417
419
 
418
420
  ```bash
419
421
  python3 -c "
@@ -444,7 +446,7 @@ print(' _COMMUNITY_* - overview notes with cohesion scores and dataview queries
444
446
  "
445
447
  ```
446
448
 
447
- Also generate the HTML graph (always, unless `--no-viz`):
449
+ Generate the HTML graph (always, unless `--no-viz`):
448
450
 
449
451
  ```bash
450
452
  python3 -c "
@@ -631,22 +633,14 @@ rm -f .graphify_detect.json .graphify_extract.json .graphify_ast.json .graphify_
631
633
  rm -f graphify-out/.needs_update 2>/dev/null || true
632
634
  ```
633
635
 
634
- Tell the user:
636
+ Tell the user (omit the obsidian line unless --obsidian was given):
635
637
  ```
636
- Graph complete. Outputs are in a hidden folder called graphify-out/ inside the directory you ran this on.
637
-
638
- The folder is hidden (dot prefix) so it won't show in Finder or a normal ls.
639
- To see it:
640
- Mac/Linux: ls -la graphify-out/
641
- VS Code: the Explorer panel shows hidden files by default
642
- Finder: Cmd+Shift+. to toggle hidden files
643
-
644
- What's inside:
645
- graphify-out/obsidian/ - open this folder as a vault in Obsidian (File > Open Vault)
646
- graphify-out/GRAPH_REPORT.md - full audit report, also readable here in Claude
647
- graphify-out/graph.json - persistent graph, query it later with /graphify query "..."
638
+ Graph complete. Outputs in PATH_TO_DIR/graphify-out/
648
639
 
649
- Full path: PATH_TO_DIR/graphify-out/
640
+ graph.html - interactive graph, open in browser
641
+ GRAPH_REPORT.md - audit report
642
+ graph.json - raw graph data
643
+ obsidian/ - Obsidian vault (only if --obsidian was given)
650
644
  ```
651
645
 
652
646
  Replace PATH_TO_DIR with the actual absolute path of the directory that was processed.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphifyy
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/safishamsi/graphify
@@ -10,7 +10,6 @@ Keywords: claude,claude-code,knowledge-graph,rag,graphrag,obsidian,community-det
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  Requires-Dist: networkx
13
- Requires-Dist: graspologic
14
13
  Requires-Dist: tree-sitter
15
14
  Requires-Dist: tree-sitter-python
16
15
  Requires-Dist: tree-sitter-javascript
@@ -34,16 +33,20 @@ Requires-Dist: pypdf; extra == "pdf"
34
33
  Requires-Dist: html2text; extra == "pdf"
35
34
  Provides-Extra: watch
36
35
  Requires-Dist: watchdog; extra == "watch"
36
+ Provides-Extra: leiden
37
+ Requires-Dist: graspologic; extra == "leiden"
37
38
  Provides-Extra: all
38
39
  Requires-Dist: mcp; extra == "all"
39
40
  Requires-Dist: neo4j; extra == "all"
40
41
  Requires-Dist: pypdf; extra == "all"
41
42
  Requires-Dist: html2text; extra == "all"
42
43
  Requires-Dist: watchdog; extra == "all"
44
+ Requires-Dist: graspologic; extra == "all"
43
45
 
44
46
  # graphify
45
47
 
46
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v1)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
48
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
49
+ [![PyPI](https://img.shields.io/pypi/v/graphifyy)](https://pypi.org/project/graphifyy/)
47
50
 
48
51
  **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
49
52
 
@@ -63,6 +66,12 @@ graphify-out/
63
66
  └── cache/ SHA256 cache - re-runs only process changed files
64
67
  ```
65
68
 
69
+ ## How it works
70
+
71
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
72
+
73
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
74
+
66
75
  ## Install
67
76
 
68
77
  **Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
@@ -79,12 +88,30 @@ Then open Claude Code in any directory and type:
79
88
  /graphify .
80
89
  ```
81
90
 
91
+ ### Make Claude always use the graph (recommended)
92
+
93
+ After building a graph, run this once in your project:
94
+
95
+ ```bash
96
+ graphify claude install
97
+ ```
98
+
99
+ This does two things:
100
+
101
+ 1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
102
+
103
+ 2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
104
+
105
+ Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
106
+
107
+ Uninstall with `graphify claude uninstall`.
108
+
82
109
  <details>
83
110
  <summary>Manual install (curl)</summary>
84
111
 
85
112
  ```bash
86
113
  mkdir -p ~/.claude/skills/graphify
87
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v1/skills/graphify/skill.md \
114
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
88
115
  > ~/.claude/skills/graphify/SKILL.md
89
116
  ```
90
117
 
@@ -121,14 +148,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
121
148
  /graphify ./raw --mcp # start MCP stdio server
122
149
 
123
150
  graphify hook install # git hooks - rebuilds graph on commit and branch switch
124
- graphify claude install # write graphify rules to local CLAUDE.md + install PreToolUse hook
151
+ graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
125
152
  ```
126
153
 
127
154
  Works with any mix of file types:
128
155
 
129
156
  | Type | Extensions | Extraction |
130
157
  |------|-----------|------------|
131
- | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph pass + docstring/comment rationale |
158
+ | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
132
159
  | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
133
160
  | Papers | `.pdf` | Citation mining + concept extraction |
134
161
  | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
@@ -145,7 +172,7 @@ Works with any mix of file types:
145
172
 
146
173
  **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
147
174
 
148
- **Semantic similarity edges** - cross-file conceptual links that have no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
175
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
149
176
 
150
177
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
151
178
 
@@ -155,12 +182,8 @@ Works with any mix of file types:
155
182
 
156
183
  **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
157
184
 
158
- **Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
159
-
160
185
  **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
161
186
 
162
- Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
163
-
164
187
  ## Worked examples
165
188
 
166
189
  | Corpus | Files | Reduction | Output |
@@ -171,6 +194,10 @@ Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know w
171
194
 
172
195
  Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
173
196
 
197
+ ## Privacy
198
+
199
+ graphify sends file contents to the Claude API (Anthropic) for semantic extraction of docs, papers, and images. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to Anthropic's API during extraction, using your own API key via Claude Code.
200
+
174
201
  ## Tech stack
175
202
 
176
203
  NetworkX + Leiden (graspologic) + tree-sitter + Claude + vis.js. No Neo4j required, no server, runs entirely locally.
@@ -42,6 +42,7 @@ tests/test_ingest.py
42
42
  tests/test_languages.py
43
43
  tests/test_multilang.py
44
44
  tests/test_pipeline.py
45
+ tests/test_rationale.py
45
46
  tests/test_report.py
46
47
  tests/test_security.py
47
48
  tests/test_semantic_similarity.py
@@ -1,5 +1,4 @@
1
1
  networkx
2
- graspologic
3
2
  tree-sitter
4
3
  tree-sitter-python
5
4
  tree-sitter-javascript
@@ -21,6 +20,10 @@ neo4j
21
20
  pypdf
22
21
  html2text
23
22
  watchdog
23
+ graspologic
24
+
25
+ [leiden]
26
+ graspologic
24
27
 
25
28
  [mcp]
26
29
  mcp
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "graphifyy"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -12,7 +12,6 @@ keywords = ["claude", "claude-code", "knowledge-graph", "rag", "graphrag", "obsi
12
12
  requires-python = ">=3.10"
13
13
  dependencies = [
14
14
  "networkx",
15
- "graspologic",
16
15
  "tree-sitter",
17
16
  "tree-sitter-python",
18
17
  "tree-sitter-javascript",
@@ -39,7 +38,8 @@ mcp = ["mcp"]
39
38
  neo4j = ["neo4j"]
40
39
  pdf = ["pypdf", "html2text"]
41
40
  watch = ["watchdog"]
42
- all = ["mcp", "neo4j", "pypdf", "html2text", "watchdog"]
41
+ leiden = ["graspologic"]
42
+ all = ["mcp", "neo4j", "pypdf", "html2text", "watchdog", "graspologic"]
43
43
 
44
44
  [project.scripts]
45
45
  graphify = "graphify.__main__:main"
@@ -95,3 +95,42 @@ def test_uninstall_no_op_when_no_file(tmp_path, capsys):
95
95
  claude_uninstall(tmp_path)
96
96
  out = capsys.readouterr().out
97
97
  assert "No CLAUDE.md" in out or "nothing to do" in out
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # settings.json PreToolUse hook
102
+ # ---------------------------------------------------------------------------
103
+
104
+ def test_install_creates_settings_json(tmp_path):
105
+ """claude_install also writes .claude/settings.json with PreToolUse hook."""
106
+ import json
107
+ claude_install(tmp_path)
108
+ settings_path = tmp_path / ".claude" / "settings.json"
109
+ assert settings_path.exists()
110
+ settings = json.loads(settings_path.read_text())
111
+ hooks = settings.get("hooks", {}).get("PreToolUse", [])
112
+ assert any("Glob|Grep" in h.get("matcher", "") for h in hooks)
113
+
114
+
115
+ def test_install_settings_json_idempotent(tmp_path):
116
+ """Running claude_install twice does not duplicate the PreToolUse hook."""
117
+ import json
118
+ claude_install(tmp_path)
119
+ claude_install(tmp_path)
120
+ settings_path = tmp_path / ".claude" / "settings.json"
121
+ settings = json.loads(settings_path.read_text())
122
+ hooks = settings.get("hooks", {}).get("PreToolUse", [])
123
+ glob_grep_hooks = [h for h in hooks if "Glob|Grep" in h.get("matcher", "")]
124
+ assert len(glob_grep_hooks) == 1
125
+
126
+
127
+ def test_uninstall_removes_settings_hook(tmp_path):
128
+ """claude_uninstall removes the PreToolUse hook from settings.json."""
129
+ import json
130
+ claude_install(tmp_path)
131
+ claude_uninstall(tmp_path)
132
+ settings_path = tmp_path / ".claude" / "settings.json"
133
+ if settings_path.exists():
134
+ settings = json.loads(settings_path.read_text())
135
+ hooks = settings.get("hooks", {}).get("PreToolUse", [])
136
+ assert not any("Glob|Grep" in h.get("matcher", "") for h in hooks)
@@ -2,7 +2,7 @@
2
2
  import subprocess
3
3
  from pathlib import Path
4
4
  import pytest
5
- from graphify.hooks import install, uninstall, status, _HOOK_MARKER
5
+ from graphify.hooks import install, uninstall, status, _HOOK_MARKER, _CHECKOUT_MARKER
6
6
 
7
7
 
8
8
  def _make_git_repo(tmp_path: Path) -> Path:
@@ -78,3 +78,35 @@ def test_status_not_installed(tmp_path):
78
78
  def test_no_git_repo_raises(tmp_path):
79
79
  with pytest.raises(RuntimeError, match="No git repository"):
80
80
  install(tmp_path / "not_a_repo")
81
+
82
+
83
+ def test_install_creates_post_checkout_hook(tmp_path):
84
+ repo = _make_git_repo(tmp_path)
85
+ install(repo)
86
+ hook = repo / ".git" / "hooks" / "post-checkout"
87
+ assert hook.exists()
88
+ assert _CHECKOUT_MARKER in hook.read_text()
89
+
90
+
91
+ def test_install_post_checkout_is_executable(tmp_path):
92
+ repo = _make_git_repo(tmp_path)
93
+ install(repo)
94
+ hook = repo / ".git" / "hooks" / "post-checkout"
95
+ assert hook.stat().st_mode & 0o111
96
+
97
+
98
+ def test_uninstall_removes_post_checkout_hook(tmp_path):
99
+ repo = _make_git_repo(tmp_path)
100
+ install(repo)
101
+ uninstall(repo)
102
+ hook = repo / ".git" / "hooks" / "post-checkout"
103
+ assert not hook.exists()
104
+
105
+
106
+ def test_status_shows_both_hooks(tmp_path):
107
+ repo = _make_git_repo(tmp_path)
108
+ install(repo)
109
+ result = status(repo)
110
+ assert "post-commit" in result
111
+ assert "post-checkout" in result
112
+ assert result.count("installed") >= 2
@@ -0,0 +1,89 @@
1
+ """Tests for rationale/docstring extraction in extract.py."""
2
+ import textwrap
3
+ from pathlib import Path
4
+ import pytest
5
+ from graphify.extract import extract_python
6
+
7
+
8
+ def _write_py(tmp_path: Path, code: str) -> Path:
9
+ p = tmp_path / "sample.py"
10
+ p.write_text(textwrap.dedent(code))
11
+ return p
12
+
13
+
14
+ def test_module_docstring_extracted(tmp_path):
15
+ path = _write_py(tmp_path, '''
16
+ """This module handles authentication because legacy sessions were insecure."""
17
+ def login(): pass
18
+ ''')
19
+ result = extract_python(path)
20
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
21
+ assert len(rationale) >= 1
22
+ assert any("authentication" in n["label"] for n in rationale)
23
+
24
+
25
+ def test_function_docstring_extracted(tmp_path):
26
+ path = _write_py(tmp_path, '''
27
+ def process():
28
+ """We use chunked processing here because the full dataset exceeds RAM."""
29
+ pass
30
+ ''')
31
+ result = extract_python(path)
32
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
33
+ assert any("chunked" in n["label"] for n in rationale)
34
+
35
+
36
+ def test_class_docstring_extracted(tmp_path):
37
+ path = _write_py(tmp_path, '''
38
+ class Cache:
39
+ """Chosen over Redis because we need zero external dependencies in the test env."""
40
+ pass
41
+ ''')
42
+ result = extract_python(path)
43
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
44
+ assert any("Redis" in n["label"] for n in rationale)
45
+
46
+
47
+ def test_rationale_comment_extracted(tmp_path):
48
+ path = _write_py(tmp_path, '''
49
+ def build():
50
+ # NOTE: must run before compile() or linker will fail
51
+ pass
52
+ ''')
53
+ result = extract_python(path)
54
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
55
+ assert any("NOTE" in n["label"] for n in rationale)
56
+
57
+
58
+ def test_rationale_for_edges_present(tmp_path):
59
+ path = _write_py(tmp_path, '''
60
+ """Module docstring explaining the why."""
61
+ def foo():
62
+ """Function docstring with rationale."""
63
+ pass
64
+ ''')
65
+ result = extract_python(path)
66
+ rationale_edges = [e for e in result["edges"] if e.get("relation") == "rationale_for"]
67
+ assert len(rationale_edges) >= 1
68
+
69
+
70
+ def test_short_docstring_ignored(tmp_path):
71
+ """Trivial docstrings under 20 chars should not become rationale nodes."""
72
+ path = _write_py(tmp_path, '''
73
+ def foo():
74
+ """Constructor."""
75
+ pass
76
+ ''')
77
+ result = extract_python(path)
78
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
79
+ assert len(rationale) == 0
80
+
81
+
82
+ def test_rationale_confidence_is_extracted(tmp_path):
83
+ path = _write_py(tmp_path, '''
84
+ """This module exists because we needed a standalone parser."""
85
+ def parse(): pass
86
+ ''')
87
+ result = extract_python(path)
88
+ rationale_edges = [e for e in result["edges"] if e.get("relation") == "rationale_for"]
89
+ assert all(e.get("confidence") == "EXTRACTED" for e in rationale_edges)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes