graphifyy 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {graphifyy-0.2.0 → graphifyy-0.2.1}/PKG-INFO +30 -10
  2. {graphifyy-0.2.0 → graphifyy-0.2.1}/README.md +29 -9
  3. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/__main__.py +1 -2
  4. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/skill.md +11 -17
  5. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphifyy.egg-info/PKG-INFO +30 -10
  6. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphifyy.egg-info/SOURCES.txt +1 -0
  7. {graphifyy-0.2.0 → graphifyy-0.2.1}/pyproject.toml +1 -1
  8. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_claude_md.py +39 -0
  9. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_hooks.py +33 -1
  10. graphifyy-0.2.1/tests/test_rationale.py +89 -0
  11. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/__init__.py +0 -0
  12. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/analyze.py +0 -0
  13. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/benchmark.py +0 -0
  14. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/build.py +0 -0
  15. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/cache.py +0 -0
  16. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/cluster.py +0 -0
  17. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/detect.py +0 -0
  18. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/export.py +0 -0
  19. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/extract.py +0 -0
  20. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/hooks.py +0 -0
  21. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/ingest.py +0 -0
  22. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/manifest.py +0 -0
  23. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/report.py +0 -0
  24. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/security.py +0 -0
  25. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/serve.py +0 -0
  26. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/validate.py +0 -0
  27. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/watch.py +0 -0
  28. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphify/wiki.py +0 -0
  29. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphifyy.egg-info/dependency_links.txt +0 -0
  30. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphifyy.egg-info/entry_points.txt +0 -0
  31. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphifyy.egg-info/requires.txt +0 -0
  32. {graphifyy-0.2.0 → graphifyy-0.2.1}/graphifyy.egg-info/top_level.txt +0 -0
  33. {graphifyy-0.2.0 → graphifyy-0.2.1}/setup.cfg +0 -0
  34. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_analyze.py +0 -0
  35. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_benchmark.py +0 -0
  36. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_build.py +0 -0
  37. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_cache.py +0 -0
  38. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_cluster.py +0 -0
  39. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_confidence.py +0 -0
  40. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_detect.py +0 -0
  41. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_export.py +0 -0
  42. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_extract.py +0 -0
  43. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_hypergraph.py +0 -0
  44. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_ingest.py +0 -0
  45. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_languages.py +0 -0
  46. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_multilang.py +0 -0
  47. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_pipeline.py +0 -0
  48. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_report.py +0 -0
  49. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_security.py +0 -0
  50. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_semantic_similarity.py +0 -0
  51. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_serve.py +0 -0
  52. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_validate.py +0 -0
  53. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_watch.py +0 -0
  54. {graphifyy-0.2.0 → graphifyy-0.2.1}/tests/test_wiki.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphifyy
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/safishamsi/graphify
@@ -43,7 +43,7 @@ Requires-Dist: watchdog; extra == "all"
43
43
 
44
44
  # graphify
45
45
 
46
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v1)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
46
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
47
47
 
48
48
  **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
49
49
 
@@ -63,6 +63,12 @@ graphify-out/
63
63
  └── cache/ SHA256 cache - re-runs only process changed files
64
64
  ```
65
65
 
66
+ ## How it works
67
+
68
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
69
+
70
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
71
+
66
72
  ## Install
67
73
 
68
74
  **Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
@@ -79,12 +85,30 @@ Then open Claude Code in any directory and type:
79
85
  /graphify .
80
86
  ```
81
87
 
88
+ ### Make Claude always use the graph (recommended)
89
+
90
+ After building a graph, run this once in your project:
91
+
92
+ ```bash
93
+ graphify claude install
94
+ ```
95
+
96
+ This does two things:
97
+
98
+ 1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
99
+
100
+ 2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
101
+
102
+ Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
103
+
104
+ Uninstall with `graphify claude uninstall`.
105
+
82
106
  <details>
83
107
  <summary>Manual install (curl)</summary>
84
108
 
85
109
  ```bash
86
110
  mkdir -p ~/.claude/skills/graphify
87
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v1/skills/graphify/skill.md \
111
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
88
112
  > ~/.claude/skills/graphify/SKILL.md
89
113
  ```
90
114
 
@@ -121,14 +145,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
121
145
  /graphify ./raw --mcp # start MCP stdio server
122
146
 
123
147
  graphify hook install # git hooks - rebuilds graph on commit and branch switch
124
- graphify claude install # write graphify rules to local CLAUDE.md + install PreToolUse hook
148
+ graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
125
149
  ```
126
150
 
127
151
  Works with any mix of file types:
128
152
 
129
153
  | Type | Extensions | Extraction |
130
154
  |------|-----------|------------|
131
- | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph pass + docstring/comment rationale |
155
+ | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
132
156
  | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
133
157
  | Papers | `.pdf` | Citation mining + concept extraction |
134
158
  | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
@@ -145,7 +169,7 @@ Works with any mix of file types:
145
169
 
146
170
  **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
147
171
 
148
- **Semantic similarity edges** - cross-file conceptual links that have no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
172
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
149
173
 
150
174
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
151
175
 
@@ -155,12 +179,8 @@ Works with any mix of file types:
155
179
 
156
180
  **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
157
181
 
158
- **Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
159
-
160
182
  **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
161
183
 
162
- Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
163
-
164
184
  ## Worked examples
165
185
 
166
186
  | Corpus | Files | Reduction | Output |
@@ -1,6 +1,6 @@
1
1
  # graphify
2
2
 
3
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v1)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
3
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
4
4
 
5
5
  **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
6
6
 
@@ -20,6 +20,12 @@ graphify-out/
20
20
  └── cache/ SHA256 cache - re-runs only process changed files
21
21
  ```
22
22
 
23
+ ## How it works
24
+
25
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
26
+
27
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
28
+
23
29
  ## Install
24
30
 
25
31
  **Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
@@ -36,12 +42,30 @@ Then open Claude Code in any directory and type:
36
42
  /graphify .
37
43
  ```
38
44
 
45
+ ### Make Claude always use the graph (recommended)
46
+
47
+ After building a graph, run this once in your project:
48
+
49
+ ```bash
50
+ graphify claude install
51
+ ```
52
+
53
+ This does two things:
54
+
55
+ 1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
56
+
57
+ 2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
58
+
59
+ Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
60
+
61
+ Uninstall with `graphify claude uninstall`.
62
+
39
63
  <details>
40
64
  <summary>Manual install (curl)</summary>
41
65
 
42
66
  ```bash
43
67
  mkdir -p ~/.claude/skills/graphify
44
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v1/skills/graphify/skill.md \
68
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
45
69
  > ~/.claude/skills/graphify/SKILL.md
46
70
  ```
47
71
 
@@ -78,14 +102,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
78
102
  /graphify ./raw --mcp # start MCP stdio server
79
103
 
80
104
  graphify hook install # git hooks - rebuilds graph on commit and branch switch
81
- graphify claude install # write graphify rules to local CLAUDE.md + install PreToolUse hook
105
+ graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
82
106
  ```
83
107
 
84
108
  Works with any mix of file types:
85
109
 
86
110
  | Type | Extensions | Extraction |
87
111
  |------|-----------|------------|
88
- | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph pass + docstring/comment rationale |
112
+ | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
89
113
  | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
90
114
  | Papers | `.pdf` | Citation mining + concept extraction |
91
115
  | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
@@ -102,7 +126,7 @@ Works with any mix of file types:
102
126
 
103
127
  **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
104
128
 
105
- **Semantic similarity edges** - cross-file conceptual links that have no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
129
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
106
130
 
107
131
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
108
132
 
@@ -112,12 +136,8 @@ Works with any mix of file types:
112
136
 
113
137
  **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
114
138
 
115
- **Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
116
-
117
139
  **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
118
140
 
119
- Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
120
-
121
141
  ## Worked examples
122
142
 
123
143
  | Corpus | Files | Reduction | Output |
@@ -171,12 +171,11 @@ def claude_uninstall(project_dir: Path | None = None) -> None:
171
171
  ).rstrip()
172
172
  if cleaned:
173
173
  target.write_text(cleaned + "\n")
174
+ print(f"graphify section removed from {target.resolve()}")
174
175
  else:
175
176
  target.unlink()
176
177
  print(f"CLAUDE.md was empty after removal - deleted {target.resolve()}")
177
- return
178
178
 
179
- print(f"graphify section removed from {target.resolve()}")
180
179
  _uninstall_claude_hook(project_dir or Path("."))
181
180
 
182
181
 
@@ -411,9 +411,11 @@ print('Report updated with community labels')
411
411
  Replace `LABELS_DICT` with the actual dict you constructed (e.g. `{0: "Attention Mechanism", 1: "Training Pipeline"}`).
412
412
  Replace INPUT_PATH with the actual path.
413
413
 
414
- ### Step 6 - Generate Obsidian vault (default) + optional HTML
414
+ ### Step 6 - Generate Obsidian vault (opt-in) + HTML
415
415
 
416
- **Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was given** — it generates one file per node which creates thousands of files in large repos. Skip it by default.
416
+ **Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was explicitly given** — skip it otherwise, it generates one file per node.
417
+
418
+ If `--obsidian` was given:
417
419
 
418
420
  ```bash
419
421
  python3 -c "
@@ -444,7 +446,7 @@ print(' _COMMUNITY_* - overview notes with cohesion scores and dataview queries
444
446
  "
445
447
  ```
446
448
 
447
- Also generate the HTML graph (always, unless `--no-viz`):
449
+ Generate the HTML graph (always, unless `--no-viz`):
448
450
 
449
451
  ```bash
450
452
  python3 -c "
@@ -631,22 +633,14 @@ rm -f .graphify_detect.json .graphify_extract.json .graphify_ast.json .graphify_
631
633
  rm -f graphify-out/.needs_update 2>/dev/null || true
632
634
  ```
633
635
 
634
- Tell the user:
636
+ Tell the user (omit the obsidian line unless --obsidian was given):
635
637
  ```
636
- Graph complete. Outputs are in a hidden folder called graphify-out/ inside the directory you ran this on.
637
-
638
- The folder is hidden (dot prefix) so it won't show in Finder or a normal ls.
639
- To see it:
640
- Mac/Linux: ls -la graphify-out/
641
- VS Code: the Explorer panel shows hidden files by default
642
- Finder: Cmd+Shift+. to toggle hidden files
643
-
644
- What's inside:
645
- graphify-out/obsidian/ - open this folder as a vault in Obsidian (File > Open Vault)
646
- graphify-out/GRAPH_REPORT.md - full audit report, also readable here in Claude
647
- graphify-out/graph.json - persistent graph, query it later with /graphify query "..."
638
+ Graph complete. Outputs in PATH_TO_DIR/graphify-out/
648
639
 
649
- Full path: PATH_TO_DIR/graphify-out/
640
+ graph.html - interactive graph, open in browser
641
+ GRAPH_REPORT.md - audit report
642
+ graph.json - raw graph data
643
+ obsidian/ - Obsidian vault (only if --obsidian was given)
650
644
  ```
651
645
 
652
646
  Replace PATH_TO_DIR with the actual absolute path of the directory that was processed.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphifyy
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/safishamsi/graphify
@@ -43,7 +43,7 @@ Requires-Dist: watchdog; extra == "all"
43
43
 
44
44
  # graphify
45
45
 
46
- [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v1)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
46
+ [![CI](https://github.com/safishamsi/graphify/actions/workflows/ci.yml/badge.svg?branch=v2)](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
47
47
 
48
48
  **A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
49
49
 
@@ -63,6 +63,12 @@ graphify-out/
63
63
  └── cache/ SHA256 cache - re-runs only process changed files
64
64
  ```
65
65
 
66
+ ## How it works
67
+
68
+ graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
69
+
70
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
71
+
66
72
  ## Install
67
73
 
68
74
  **Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
@@ -79,12 +85,30 @@ Then open Claude Code in any directory and type:
79
85
  /graphify .
80
86
  ```
81
87
 
88
+ ### Make Claude always use the graph (recommended)
89
+
90
+ After building a graph, run this once in your project:
91
+
92
+ ```bash
93
+ graphify claude install
94
+ ```
95
+
96
+ This does two things:
97
+
98
+ 1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
99
+
100
+ 2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
101
+
102
+ Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
103
+
104
+ Uninstall with `graphify claude uninstall`.
105
+
82
106
  <details>
83
107
  <summary>Manual install (curl)</summary>
84
108
 
85
109
  ```bash
86
110
  mkdir -p ~/.claude/skills/graphify
87
- curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v1/skills/graphify/skill.md \
111
+ curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
88
112
  > ~/.claude/skills/graphify/SKILL.md
89
113
  ```
90
114
 
@@ -121,14 +145,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
121
145
  /graphify ./raw --mcp # start MCP stdio server
122
146
 
123
147
  graphify hook install # git hooks - rebuilds graph on commit and branch switch
124
- graphify claude install # write graphify rules to local CLAUDE.md + install PreToolUse hook
148
+ graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
125
149
  ```
126
150
 
127
151
  Works with any mix of file types:
128
152
 
129
153
  | Type | Extensions | Extraction |
130
154
  |------|-----------|------------|
131
- | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph pass + docstring/comment rationale |
155
+ | Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
132
156
  | Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
133
157
  | Papers | `.pdf` | Citation mining + concept extraction |
134
158
  | Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
@@ -145,7 +169,7 @@ Works with any mix of file types:
145
169
 
146
170
  **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
147
171
 
148
- **Semantic similarity edges** - cross-file conceptual links that have no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
172
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
149
173
 
150
174
  **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
151
175
 
@@ -155,12 +179,8 @@ Works with any mix of file types:
155
179
 
156
180
  **Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
157
181
 
158
- **Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
159
-
160
182
  **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
161
183
 
162
- Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
163
-
164
184
  ## Worked examples
165
185
 
166
186
  | Corpus | Files | Reduction | Output |
@@ -42,6 +42,7 @@ tests/test_ingest.py
42
42
  tests/test_languages.py
43
43
  tests/test_multilang.py
44
44
  tests/test_pipeline.py
45
+ tests/test_rationale.py
45
46
  tests/test_report.py
46
47
  tests/test_security.py
47
48
  tests/test_semantic_similarity.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "graphifyy"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -95,3 +95,42 @@ def test_uninstall_no_op_when_no_file(tmp_path, capsys):
95
95
  claude_uninstall(tmp_path)
96
96
  out = capsys.readouterr().out
97
97
  assert "No CLAUDE.md" in out or "nothing to do" in out
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # settings.json PreToolUse hook
102
+ # ---------------------------------------------------------------------------
103
+
104
+ def test_install_creates_settings_json(tmp_path):
105
+ """claude_install also writes .claude/settings.json with PreToolUse hook."""
106
+ import json
107
+ claude_install(tmp_path)
108
+ settings_path = tmp_path / ".claude" / "settings.json"
109
+ assert settings_path.exists()
110
+ settings = json.loads(settings_path.read_text())
111
+ hooks = settings.get("hooks", {}).get("PreToolUse", [])
112
+ assert any("Glob|Grep" in h.get("matcher", "") for h in hooks)
113
+
114
+
115
+ def test_install_settings_json_idempotent(tmp_path):
116
+ """Running claude_install twice does not duplicate the PreToolUse hook."""
117
+ import json
118
+ claude_install(tmp_path)
119
+ claude_install(tmp_path)
120
+ settings_path = tmp_path / ".claude" / "settings.json"
121
+ settings = json.loads(settings_path.read_text())
122
+ hooks = settings.get("hooks", {}).get("PreToolUse", [])
123
+ glob_grep_hooks = [h for h in hooks if "Glob|Grep" in h.get("matcher", "")]
124
+ assert len(glob_grep_hooks) == 1
125
+
126
+
127
+ def test_uninstall_removes_settings_hook(tmp_path):
128
+ """claude_uninstall removes the PreToolUse hook from settings.json."""
129
+ import json
130
+ claude_install(tmp_path)
131
+ claude_uninstall(tmp_path)
132
+ settings_path = tmp_path / ".claude" / "settings.json"
133
+ if settings_path.exists():
134
+ settings = json.loads(settings_path.read_text())
135
+ hooks = settings.get("hooks", {}).get("PreToolUse", [])
136
+ assert not any("Glob|Grep" in h.get("matcher", "") for h in hooks)
@@ -2,7 +2,7 @@
2
2
  import subprocess
3
3
  from pathlib import Path
4
4
  import pytest
5
- from graphify.hooks import install, uninstall, status, _HOOK_MARKER
5
+ from graphify.hooks import install, uninstall, status, _HOOK_MARKER, _CHECKOUT_MARKER
6
6
 
7
7
 
8
8
  def _make_git_repo(tmp_path: Path) -> Path:
@@ -78,3 +78,35 @@ def test_status_not_installed(tmp_path):
78
78
  def test_no_git_repo_raises(tmp_path):
79
79
  with pytest.raises(RuntimeError, match="No git repository"):
80
80
  install(tmp_path / "not_a_repo")
81
+
82
+
83
+ def test_install_creates_post_checkout_hook(tmp_path):
84
+ repo = _make_git_repo(tmp_path)
85
+ install(repo)
86
+ hook = repo / ".git" / "hooks" / "post-checkout"
87
+ assert hook.exists()
88
+ assert _CHECKOUT_MARKER in hook.read_text()
89
+
90
+
91
+ def test_install_post_checkout_is_executable(tmp_path):
92
+ repo = _make_git_repo(tmp_path)
93
+ install(repo)
94
+ hook = repo / ".git" / "hooks" / "post-checkout"
95
+ assert hook.stat().st_mode & 0o111
96
+
97
+
98
+ def test_uninstall_removes_post_checkout_hook(tmp_path):
99
+ repo = _make_git_repo(tmp_path)
100
+ install(repo)
101
+ uninstall(repo)
102
+ hook = repo / ".git" / "hooks" / "post-checkout"
103
+ assert not hook.exists()
104
+
105
+
106
+ def test_status_shows_both_hooks(tmp_path):
107
+ repo = _make_git_repo(tmp_path)
108
+ install(repo)
109
+ result = status(repo)
110
+ assert "post-commit" in result
111
+ assert "post-checkout" in result
112
+ assert result.count("installed") >= 2
@@ -0,0 +1,89 @@
1
+ """Tests for rationale/docstring extraction in extract.py."""
2
+ import textwrap
3
+ from pathlib import Path
4
+ import pytest
5
+ from graphify.extract import extract_python
6
+
7
+
8
+ def _write_py(tmp_path: Path, code: str) -> Path:
9
+ p = tmp_path / "sample.py"
10
+ p.write_text(textwrap.dedent(code))
11
+ return p
12
+
13
+
14
+ def test_module_docstring_extracted(tmp_path):
15
+ path = _write_py(tmp_path, '''
16
+ """This module handles authentication because legacy sessions were insecure."""
17
+ def login(): pass
18
+ ''')
19
+ result = extract_python(path)
20
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
21
+ assert len(rationale) >= 1
22
+ assert any("authentication" in n["label"] for n in rationale)
23
+
24
+
25
+ def test_function_docstring_extracted(tmp_path):
26
+ path = _write_py(tmp_path, '''
27
+ def process():
28
+ """We use chunked processing here because the full dataset exceeds RAM."""
29
+ pass
30
+ ''')
31
+ result = extract_python(path)
32
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
33
+ assert any("chunked" in n["label"] for n in rationale)
34
+
35
+
36
+ def test_class_docstring_extracted(tmp_path):
37
+ path = _write_py(tmp_path, '''
38
+ class Cache:
39
+ """Chosen over Redis because we need zero external dependencies in the test env."""
40
+ pass
41
+ ''')
42
+ result = extract_python(path)
43
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
44
+ assert any("Redis" in n["label"] for n in rationale)
45
+
46
+
47
+ def test_rationale_comment_extracted(tmp_path):
48
+ path = _write_py(tmp_path, '''
49
+ def build():
50
+ # NOTE: must run before compile() or linker will fail
51
+ pass
52
+ ''')
53
+ result = extract_python(path)
54
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
55
+ assert any("NOTE" in n["label"] for n in rationale)
56
+
57
+
58
+ def test_rationale_for_edges_present(tmp_path):
59
+ path = _write_py(tmp_path, '''
60
+ """Module docstring explaining the why."""
61
+ def foo():
62
+ """Function docstring with rationale."""
63
+ pass
64
+ ''')
65
+ result = extract_python(path)
66
+ rationale_edges = [e for e in result["edges"] if e.get("relation") == "rationale_for"]
67
+ assert len(rationale_edges) >= 1
68
+
69
+
70
+ def test_short_docstring_ignored(tmp_path):
71
+ """Trivial docstrings under 20 chars should not become rationale nodes."""
72
+ path = _write_py(tmp_path, '''
73
+ def foo():
74
+ """Constructor."""
75
+ pass
76
+ ''')
77
+ result = extract_python(path)
78
+ rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
79
+ assert len(rationale) == 0
80
+
81
+
82
+ def test_rationale_confidence_is_extracted(tmp_path):
83
+ path = _write_py(tmp_path, '''
84
+ """This module exists because we needed a standalone parser."""
85
+ def parse(): pass
86
+ ''')
87
+ result = extract_python(path)
88
+ rationale_edges = [e for e in result["edges"] if e.get("relation") == "rationale_for"]
89
+ assert all(e.get("confidence") == "EXTRACTED" for e in rationale_edges)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes