graphifyy 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graphifyy-0.2.0 → graphifyy-0.2.2}/PKG-INFO +38 -11
- {graphifyy-0.2.0 → graphifyy-0.2.2}/README.md +34 -9
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/__main__.py +1 -2
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/cluster.py +20 -7
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/skill.md +11 -17
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/PKG-INFO +38 -11
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/SOURCES.txt +1 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/requires.txt +4 -1
- {graphifyy-0.2.0 → graphifyy-0.2.2}/pyproject.toml +3 -3
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_claude_md.py +39 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_hooks.py +33 -1
- graphifyy-0.2.2/tests/test_rationale.py +89 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/__init__.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/analyze.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/benchmark.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/build.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/cache.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/detect.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/export.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/extract.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/hooks.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/ingest.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/manifest.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/report.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/security.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/serve.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/validate.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/watch.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphify/wiki.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/dependency_links.txt +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/entry_points.txt +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/graphifyy.egg-info/top_level.txt +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/setup.cfg +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_analyze.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_benchmark.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_build.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_cache.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_cluster.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_confidence.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_detect.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_export.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_extract.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_hypergraph.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_ingest.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_languages.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_multilang.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_pipeline.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_report.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_security.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_semantic_similarity.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_serve.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_validate.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_watch.py +0 -0
- {graphifyy-0.2.0 → graphifyy-0.2.2}/tests/test_wiki.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphifyy
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/safishamsi/graphify
|
|
@@ -10,7 +10,6 @@ Keywords: claude,claude-code,knowledge-graph,rag,graphrag,obsidian,community-det
|
|
|
10
10
|
Requires-Python: >=3.10
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
Requires-Dist: networkx
|
|
13
|
-
Requires-Dist: graspologic
|
|
14
13
|
Requires-Dist: tree-sitter
|
|
15
14
|
Requires-Dist: tree-sitter-python
|
|
16
15
|
Requires-Dist: tree-sitter-javascript
|
|
@@ -34,16 +33,20 @@ Requires-Dist: pypdf; extra == "pdf"
|
|
|
34
33
|
Requires-Dist: html2text; extra == "pdf"
|
|
35
34
|
Provides-Extra: watch
|
|
36
35
|
Requires-Dist: watchdog; extra == "watch"
|
|
36
|
+
Provides-Extra: leiden
|
|
37
|
+
Requires-Dist: graspologic; extra == "leiden"
|
|
37
38
|
Provides-Extra: all
|
|
38
39
|
Requires-Dist: mcp; extra == "all"
|
|
39
40
|
Requires-Dist: neo4j; extra == "all"
|
|
40
41
|
Requires-Dist: pypdf; extra == "all"
|
|
41
42
|
Requires-Dist: html2text; extra == "all"
|
|
42
43
|
Requires-Dist: watchdog; extra == "all"
|
|
44
|
+
Requires-Dist: graspologic; extra == "all"
|
|
43
45
|
|
|
44
46
|
# graphify
|
|
45
47
|
|
|
46
|
-
[](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
|
|
49
|
+
[](https://pypi.org/project/graphifyy/)
|
|
47
50
|
|
|
48
51
|
**A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
|
|
49
52
|
|
|
@@ -63,6 +66,12 @@ graphify-out/
|
|
|
63
66
|
└── cache/ SHA256 cache - re-runs only process changed files
|
|
64
67
|
```
|
|
65
68
|
|
|
69
|
+
## How it works
|
|
70
|
+
|
|
71
|
+
graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
|
|
72
|
+
|
|
73
|
+
Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
|
|
74
|
+
|
|
66
75
|
## Install
|
|
67
76
|
|
|
68
77
|
**Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
|
|
@@ -79,12 +88,30 @@ Then open Claude Code in any directory and type:
|
|
|
79
88
|
/graphify .
|
|
80
89
|
```
|
|
81
90
|
|
|
91
|
+
### Make Claude always use the graph (recommended)
|
|
92
|
+
|
|
93
|
+
After building a graph, run this once in your project:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
graphify claude install
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
This does two things:
|
|
100
|
+
|
|
101
|
+
1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
|
|
102
|
+
|
|
103
|
+
2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
|
|
104
|
+
|
|
105
|
+
Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
|
|
106
|
+
|
|
107
|
+
Uninstall with `graphify claude uninstall`.
|
|
108
|
+
|
|
82
109
|
<details>
|
|
83
110
|
<summary>Manual install (curl)</summary>
|
|
84
111
|
|
|
85
112
|
```bash
|
|
86
113
|
mkdir -p ~/.claude/skills/graphify
|
|
87
|
-
curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/
|
|
114
|
+
curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
|
|
88
115
|
> ~/.claude/skills/graphify/SKILL.md
|
|
89
116
|
```
|
|
90
117
|
|
|
@@ -121,14 +148,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
|
|
|
121
148
|
/graphify ./raw --mcp # start MCP stdio server
|
|
122
149
|
|
|
123
150
|
graphify hook install # git hooks - rebuilds graph on commit and branch switch
|
|
124
|
-
graphify claude install #
|
|
151
|
+
graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
|
|
125
152
|
```
|
|
126
153
|
|
|
127
154
|
Works with any mix of file types:
|
|
128
155
|
|
|
129
156
|
| Type | Extensions | Extraction |
|
|
130
157
|
|------|-----------|------------|
|
|
131
|
-
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph
|
|
158
|
+
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
132
159
|
| Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
|
|
133
160
|
| Papers | `.pdf` | Citation mining + concept extraction |
|
|
134
161
|
| Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
|
|
@@ -145,7 +172,7 @@ Works with any mix of file types:
|
|
|
145
172
|
|
|
146
173
|
**Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
|
|
147
174
|
|
|
148
|
-
**Semantic similarity edges** - cross-file conceptual links
|
|
175
|
+
**Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
|
|
149
176
|
|
|
150
177
|
**Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
|
|
151
178
|
|
|
@@ -155,12 +182,8 @@ Works with any mix of file types:
|
|
|
155
182
|
|
|
156
183
|
**Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
|
|
157
184
|
|
|
158
|
-
**Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
|
|
159
|
-
|
|
160
185
|
**Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
|
|
161
186
|
|
|
162
|
-
Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
|
|
163
|
-
|
|
164
187
|
## Worked examples
|
|
165
188
|
|
|
166
189
|
| Corpus | Files | Reduction | Output |
|
|
@@ -171,6 +194,10 @@ Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know w
|
|
|
171
194
|
|
|
172
195
|
Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
|
|
173
196
|
|
|
197
|
+
## Privacy
|
|
198
|
+
|
|
199
|
+
graphify sends file contents to the Claude API (Anthropic) for semantic extraction of docs, papers, and images. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to Anthropic's API during extraction, using your own API key via Claude Code.
|
|
200
|
+
|
|
174
201
|
## Tech stack
|
|
175
202
|
|
|
176
203
|
NetworkX + Leiden (graspologic) + tree-sitter + Claude + vis.js. No Neo4j required, no server, runs entirely locally.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# graphify
|
|
2
2
|
|
|
3
|
-
[](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/graphifyy/)
|
|
4
5
|
|
|
5
6
|
**A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
|
|
6
7
|
|
|
@@ -20,6 +21,12 @@ graphify-out/
|
|
|
20
21
|
└── cache/ SHA256 cache - re-runs only process changed files
|
|
21
22
|
```
|
|
22
23
|
|
|
24
|
+
## How it works
|
|
25
|
+
|
|
26
|
+
graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
|
|
27
|
+
|
|
28
|
+
Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
|
|
29
|
+
|
|
23
30
|
## Install
|
|
24
31
|
|
|
25
32
|
**Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
|
|
@@ -36,12 +43,30 @@ Then open Claude Code in any directory and type:
|
|
|
36
43
|
/graphify .
|
|
37
44
|
```
|
|
38
45
|
|
|
46
|
+
### Make Claude always use the graph (recommended)
|
|
47
|
+
|
|
48
|
+
After building a graph, run this once in your project:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
graphify claude install
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
This does two things:
|
|
55
|
+
|
|
56
|
+
1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
|
|
57
|
+
|
|
58
|
+
2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
|
|
59
|
+
|
|
60
|
+
Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
|
|
61
|
+
|
|
62
|
+
Uninstall with `graphify claude uninstall`.
|
|
63
|
+
|
|
39
64
|
<details>
|
|
40
65
|
<summary>Manual install (curl)</summary>
|
|
41
66
|
|
|
42
67
|
```bash
|
|
43
68
|
mkdir -p ~/.claude/skills/graphify
|
|
44
|
-
curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/
|
|
69
|
+
curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
|
|
45
70
|
> ~/.claude/skills/graphify/SKILL.md
|
|
46
71
|
```
|
|
47
72
|
|
|
@@ -78,14 +103,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
|
|
|
78
103
|
/graphify ./raw --mcp # start MCP stdio server
|
|
79
104
|
|
|
80
105
|
graphify hook install # git hooks - rebuilds graph on commit and branch switch
|
|
81
|
-
graphify claude install #
|
|
106
|
+
graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
|
|
82
107
|
```
|
|
83
108
|
|
|
84
109
|
Works with any mix of file types:
|
|
85
110
|
|
|
86
111
|
| Type | Extensions | Extraction |
|
|
87
112
|
|------|-----------|------------|
|
|
88
|
-
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph
|
|
113
|
+
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
89
114
|
| Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
|
|
90
115
|
| Papers | `.pdf` | Citation mining + concept extraction |
|
|
91
116
|
| Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
|
|
@@ -102,7 +127,7 @@ Works with any mix of file types:
|
|
|
102
127
|
|
|
103
128
|
**Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
|
|
104
129
|
|
|
105
|
-
**Semantic similarity edges** - cross-file conceptual links
|
|
130
|
+
**Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
|
|
106
131
|
|
|
107
132
|
**Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
|
|
108
133
|
|
|
@@ -112,12 +137,8 @@ Works with any mix of file types:
|
|
|
112
137
|
|
|
113
138
|
**Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
|
|
114
139
|
|
|
115
|
-
**Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
|
|
116
|
-
|
|
117
140
|
**Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
|
|
118
141
|
|
|
119
|
-
Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
|
|
120
|
-
|
|
121
142
|
## Worked examples
|
|
122
143
|
|
|
123
144
|
| Corpus | Files | Reduction | Output |
|
|
@@ -128,6 +149,10 @@ Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know w
|
|
|
128
149
|
|
|
129
150
|
Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
|
|
130
151
|
|
|
152
|
+
## Privacy
|
|
153
|
+
|
|
154
|
+
graphify sends file contents to the Claude API (Anthropic) for semantic extraction of docs, papers, and images. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to Anthropic's API during extraction, using your own API key via Claude Code.
|
|
155
|
+
|
|
131
156
|
## Tech stack
|
|
132
157
|
|
|
133
158
|
NetworkX + Leiden (graspologic) + tree-sitter + Claude + vis.js. No Neo4j required, no server, runs entirely locally.
|
|
@@ -171,12 +171,11 @@ def claude_uninstall(project_dir: Path | None = None) -> None:
|
|
|
171
171
|
).rstrip()
|
|
172
172
|
if cleaned:
|
|
173
173
|
target.write_text(cleaned + "\n")
|
|
174
|
+
print(f"graphify section removed from {target.resolve()}")
|
|
174
175
|
else:
|
|
175
176
|
target.unlink()
|
|
176
177
|
print(f"CLAUDE.md was empty after removal - deleted {target.resolve()}")
|
|
177
|
-
return
|
|
178
178
|
|
|
179
|
-
print(f"graphify section removed from {target.resolve()}")
|
|
180
179
|
_uninstall_claude_hook(project_dir or Path("."))
|
|
181
180
|
|
|
182
181
|
|
|
@@ -1,8 +1,25 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Community detection on NetworkX graphs. Uses Leiden (graspologic) if available, falls back to Louvain (networkx). Splits oversized communities. Returns cohesion scores."""
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import networkx as nx
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
def _partition(G: nx.Graph) -> dict[str, int]:
|
|
7
|
+
"""Run community detection. Returns {node_id: community_id}.
|
|
8
|
+
|
|
9
|
+
Tries Leiden (graspologic) first — best quality.
|
|
10
|
+
Falls back to Louvain (built into networkx) if graspologic is not installed.
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
from graspologic.partition import leiden
|
|
14
|
+
return leiden(G)
|
|
15
|
+
except ImportError:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
# Fallback: networkx louvain (available since networkx 2.7)
|
|
19
|
+
communities = nx.community.louvain_communities(G, seed=42)
|
|
20
|
+
return {node: cid for cid, nodes in enumerate(communities) for node in nodes}
|
|
21
|
+
|
|
22
|
+
|
|
6
23
|
def build_graph(nodes: list[dict], edges: list[dict]) -> nx.Graph:
|
|
7
24
|
"""Build a NetworkX graph from graphify node/edge dicts.
|
|
8
25
|
|
|
@@ -36,8 +53,6 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
|
|
|
36
53
|
if G.number_of_edges() == 0:
|
|
37
54
|
return {i: [n] for i, n in enumerate(sorted(G.nodes))}
|
|
38
55
|
|
|
39
|
-
from graspologic.partition import leiden # lazy - avoids 15s numba JIT on import
|
|
40
|
-
|
|
41
56
|
# Leiden warns and drops isolates - handle them separately
|
|
42
57
|
isolates = [n for n in G.nodes() if G.degree(n) == 0]
|
|
43
58
|
connected_nodes = [n for n in G.nodes() if G.degree(n) > 0]
|
|
@@ -45,7 +60,7 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
|
|
|
45
60
|
|
|
46
61
|
raw: dict[int, list[str]] = {}
|
|
47
62
|
if connected.number_of_nodes() > 0:
|
|
48
|
-
partition
|
|
63
|
+
partition = _partition(connected)
|
|
49
64
|
for node, cid in partition.items():
|
|
50
65
|
raw.setdefault(cid, []).append(node)
|
|
51
66
|
|
|
@@ -76,13 +91,11 @@ def _split_community(G: nx.Graph, nodes: list[str]) -> list[list[str]]:
|
|
|
76
91
|
# No edges - split into individual nodes
|
|
77
92
|
return [[n] for n in sorted(nodes)]
|
|
78
93
|
try:
|
|
79
|
-
|
|
80
|
-
sub_partition: dict[str, int] = leiden(subgraph)
|
|
94
|
+
sub_partition = _partition(subgraph)
|
|
81
95
|
sub_communities: dict[int, list[str]] = {}
|
|
82
96
|
for node, cid in sub_partition.items():
|
|
83
97
|
sub_communities.setdefault(cid, []).append(node)
|
|
84
98
|
if len(sub_communities) <= 1:
|
|
85
|
-
# Leiden couldn't split it - return as-is
|
|
86
99
|
return [sorted(nodes)]
|
|
87
100
|
return [sorted(v) for v in sub_communities.values()]
|
|
88
101
|
except Exception:
|
|
@@ -411,9 +411,11 @@ print('Report updated with community labels')
|
|
|
411
411
|
Replace `LABELS_DICT` with the actual dict you constructed (e.g. `{0: "Attention Mechanism", 1: "Training Pipeline"}`).
|
|
412
412
|
Replace INPUT_PATH with the actual path.
|
|
413
413
|
|
|
414
|
-
### Step 6 - Generate Obsidian vault (
|
|
414
|
+
### Step 6 - Generate Obsidian vault (opt-in) + HTML
|
|
415
415
|
|
|
416
|
-
**Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was given** — it generates one file per node
|
|
416
|
+
**Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was explicitly given** — skip it otherwise, it generates one file per node.
|
|
417
|
+
|
|
418
|
+
If `--obsidian` was given:
|
|
417
419
|
|
|
418
420
|
```bash
|
|
419
421
|
python3 -c "
|
|
@@ -444,7 +446,7 @@ print(' _COMMUNITY_* - overview notes with cohesion scores and dataview queries
|
|
|
444
446
|
"
|
|
445
447
|
```
|
|
446
448
|
|
|
447
|
-
|
|
449
|
+
Generate the HTML graph (always, unless `--no-viz`):
|
|
448
450
|
|
|
449
451
|
```bash
|
|
450
452
|
python3 -c "
|
|
@@ -631,22 +633,14 @@ rm -f .graphify_detect.json .graphify_extract.json .graphify_ast.json .graphify_
|
|
|
631
633
|
rm -f graphify-out/.needs_update 2>/dev/null || true
|
|
632
634
|
```
|
|
633
635
|
|
|
634
|
-
Tell the user:
|
|
636
|
+
Tell the user (omit the obsidian line unless --obsidian was given):
|
|
635
637
|
```
|
|
636
|
-
Graph complete. Outputs
|
|
637
|
-
|
|
638
|
-
The folder is hidden (dot prefix) so it won't show in Finder or a normal ls.
|
|
639
|
-
To see it:
|
|
640
|
-
Mac/Linux: ls -la graphify-out/
|
|
641
|
-
VS Code: the Explorer panel shows hidden files by default
|
|
642
|
-
Finder: Cmd+Shift+. to toggle hidden files
|
|
643
|
-
|
|
644
|
-
What's inside:
|
|
645
|
-
graphify-out/obsidian/ - open this folder as a vault in Obsidian (File > Open Vault)
|
|
646
|
-
graphify-out/GRAPH_REPORT.md - full audit report, also readable here in Claude
|
|
647
|
-
graphify-out/graph.json - persistent graph, query it later with /graphify query "..."
|
|
638
|
+
Graph complete. Outputs in PATH_TO_DIR/graphify-out/
|
|
648
639
|
|
|
649
|
-
|
|
640
|
+
graph.html - interactive graph, open in browser
|
|
641
|
+
GRAPH_REPORT.md - audit report
|
|
642
|
+
graph.json - raw graph data
|
|
643
|
+
obsidian/ - Obsidian vault (only if --obsidian was given)
|
|
650
644
|
```
|
|
651
645
|
|
|
652
646
|
Replace PATH_TO_DIR with the actual absolute path of the directory that was processed.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphifyy
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/safishamsi/graphify
|
|
@@ -10,7 +10,6 @@ Keywords: claude,claude-code,knowledge-graph,rag,graphrag,obsidian,community-det
|
|
|
10
10
|
Requires-Python: >=3.10
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
Requires-Dist: networkx
|
|
13
|
-
Requires-Dist: graspologic
|
|
14
13
|
Requires-Dist: tree-sitter
|
|
15
14
|
Requires-Dist: tree-sitter-python
|
|
16
15
|
Requires-Dist: tree-sitter-javascript
|
|
@@ -34,16 +33,20 @@ Requires-Dist: pypdf; extra == "pdf"
|
|
|
34
33
|
Requires-Dist: html2text; extra == "pdf"
|
|
35
34
|
Provides-Extra: watch
|
|
36
35
|
Requires-Dist: watchdog; extra == "watch"
|
|
36
|
+
Provides-Extra: leiden
|
|
37
|
+
Requires-Dist: graspologic; extra == "leiden"
|
|
37
38
|
Provides-Extra: all
|
|
38
39
|
Requires-Dist: mcp; extra == "all"
|
|
39
40
|
Requires-Dist: neo4j; extra == "all"
|
|
40
41
|
Requires-Dist: pypdf; extra == "all"
|
|
41
42
|
Requires-Dist: html2text; extra == "all"
|
|
42
43
|
Requires-Dist: watchdog; extra == "all"
|
|
44
|
+
Requires-Dist: graspologic; extra == "all"
|
|
43
45
|
|
|
44
46
|
# graphify
|
|
45
47
|
|
|
46
|
-
[](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
|
|
49
|
+
[](https://pypi.org/project/graphifyy/)
|
|
47
50
|
|
|
48
51
|
**A Claude Code skill.** Type `/graphify` in Claude Code - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
|
|
49
52
|
|
|
@@ -63,6 +66,12 @@ graphify-out/
|
|
|
63
66
|
└── cache/ SHA256 cache - re-runs only process changed files
|
|
64
67
|
```
|
|
65
68
|
|
|
69
|
+
## How it works
|
|
70
|
+
|
|
71
|
+
graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
|
|
72
|
+
|
|
73
|
+
Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
|
|
74
|
+
|
|
66
75
|
## Install
|
|
67
76
|
|
|
68
77
|
**Requires:** [Claude Code](https://claude.ai/code) and Python 3.10+
|
|
@@ -79,12 +88,30 @@ Then open Claude Code in any directory and type:
|
|
|
79
88
|
/graphify .
|
|
80
89
|
```
|
|
81
90
|
|
|
91
|
+
### Make Claude always use the graph (recommended)
|
|
92
|
+
|
|
93
|
+
After building a graph, run this once in your project:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
graphify claude install
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
This does two things:
|
|
100
|
+
|
|
101
|
+
1. **CLAUDE.md rules** - tells Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and to rebuild the graph after editing code files.
|
|
102
|
+
|
|
103
|
+
2. **PreToolUse hook** (`settings.json`) - fires automatically before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ This means Claude navigates via the graph instead of grepping through every file - faster answers, fewer wasted tool calls, and responses grounded in the actual structure of your codebase rather than keyword matches.
|
|
104
|
+
|
|
105
|
+
Without this, Claude will grep raw files by default even when a graph exists. With it, the graph becomes the first thing Claude reaches for.
|
|
106
|
+
|
|
107
|
+
Uninstall with `graphify claude uninstall`.
|
|
108
|
+
|
|
82
109
|
<details>
|
|
83
110
|
<summary>Manual install (curl)</summary>
|
|
84
111
|
|
|
85
112
|
```bash
|
|
86
113
|
mkdir -p ~/.claude/skills/graphify
|
|
87
|
-
curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/
|
|
114
|
+
curl -fsSL https://raw.githubusercontent.com/safishamsi/graphify/v2/graphify/skill.md \
|
|
88
115
|
> ~/.claude/skills/graphify/SKILL.md
|
|
89
116
|
```
|
|
90
117
|
|
|
@@ -121,14 +148,14 @@ When the user types `/graphify`, invoke the Skill tool with `skill: "graphify"`
|
|
|
121
148
|
/graphify ./raw --mcp # start MCP stdio server
|
|
122
149
|
|
|
123
150
|
graphify hook install # git hooks - rebuilds graph on commit and branch switch
|
|
124
|
-
graphify claude install #
|
|
151
|
+
graphify claude install # always-on: CLAUDE.md + PreToolUse hook for this project
|
|
125
152
|
```
|
|
126
153
|
|
|
127
154
|
Works with any mix of file types:
|
|
128
155
|
|
|
129
156
|
| Type | Extensions | Extraction |
|
|
130
157
|
|------|-----------|------------|
|
|
131
|
-
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph
|
|
158
|
+
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
132
159
|
| Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
|
|
133
160
|
| Papers | `.pdf` | Citation mining + concept extraction |
|
|
134
161
|
| Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
|
|
@@ -145,7 +172,7 @@ Works with any mix of file types:
|
|
|
145
172
|
|
|
146
173
|
**Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
|
|
147
174
|
|
|
148
|
-
**Semantic similarity edges** - cross-file conceptual links
|
|
175
|
+
**Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
|
|
149
176
|
|
|
150
177
|
**Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
|
|
151
178
|
|
|
@@ -155,12 +182,8 @@ Works with any mix of file types:
|
|
|
155
182
|
|
|
156
183
|
**Git hooks** (`graphify hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. No background process needed.
|
|
157
184
|
|
|
158
|
-
**Always-on for Claude** (`graphify claude install`) - writes a `CLAUDE.md` section so Claude checks the graph before answering architecture questions, plus a `.claude/settings.json` PreToolUse hook that fires before every Glob/Grep - Claude is reminded to check the graph before searching raw files.
|
|
159
|
-
|
|
160
185
|
**Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
|
|
161
186
|
|
|
162
|
-
Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know what was found vs guessed.
|
|
163
|
-
|
|
164
187
|
## Worked examples
|
|
165
188
|
|
|
166
189
|
| Corpus | Files | Reduction | Output |
|
|
@@ -171,6 +194,10 @@ Every edge is tagged `EXTRACTED`, `INFERRED`, or `AMBIGUOUS` - you always know w
|
|
|
171
194
|
|
|
172
195
|
Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
|
|
173
196
|
|
|
197
|
+
## Privacy
|
|
198
|
+
|
|
199
|
+
graphify sends file contents to the Claude API (Anthropic) for semantic extraction of docs, papers, and images. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. No telemetry, usage tracking, or analytics of any kind. The only network calls are to Anthropic's API during extraction, using your own API key via Claude Code.
|
|
200
|
+
|
|
174
201
|
## Tech stack
|
|
175
202
|
|
|
176
203
|
NetworkX + Leiden (graspologic) + tree-sitter + Claude + vis.js. No Neo4j required, no server, runs entirely locally.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "graphifyy"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.2"
|
|
8
8
|
description = "Claude Code skill - turn any folder of code, docs, papers, images, or tweets into a queryable knowledge graph"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -12,7 +12,6 @@ keywords = ["claude", "claude-code", "knowledge-graph", "rag", "graphrag", "obsi
|
|
|
12
12
|
requires-python = ">=3.10"
|
|
13
13
|
dependencies = [
|
|
14
14
|
"networkx",
|
|
15
|
-
"graspologic",
|
|
16
15
|
"tree-sitter",
|
|
17
16
|
"tree-sitter-python",
|
|
18
17
|
"tree-sitter-javascript",
|
|
@@ -39,7 +38,8 @@ mcp = ["mcp"]
|
|
|
39
38
|
neo4j = ["neo4j"]
|
|
40
39
|
pdf = ["pypdf", "html2text"]
|
|
41
40
|
watch = ["watchdog"]
|
|
42
|
-
|
|
41
|
+
leiden = ["graspologic"]
|
|
42
|
+
all = ["mcp", "neo4j", "pypdf", "html2text", "watchdog", "graspologic"]
|
|
43
43
|
|
|
44
44
|
[project.scripts]
|
|
45
45
|
graphify = "graphify.__main__:main"
|
|
@@ -95,3 +95,42 @@ def test_uninstall_no_op_when_no_file(tmp_path, capsys):
|
|
|
95
95
|
claude_uninstall(tmp_path)
|
|
96
96
|
out = capsys.readouterr().out
|
|
97
97
|
assert "No CLAUDE.md" in out or "nothing to do" in out
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# settings.json PreToolUse hook
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
def test_install_creates_settings_json(tmp_path):
|
|
105
|
+
"""claude_install also writes .claude/settings.json with PreToolUse hook."""
|
|
106
|
+
import json
|
|
107
|
+
claude_install(tmp_path)
|
|
108
|
+
settings_path = tmp_path / ".claude" / "settings.json"
|
|
109
|
+
assert settings_path.exists()
|
|
110
|
+
settings = json.loads(settings_path.read_text())
|
|
111
|
+
hooks = settings.get("hooks", {}).get("PreToolUse", [])
|
|
112
|
+
assert any("Glob|Grep" in h.get("matcher", "") for h in hooks)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_install_settings_json_idempotent(tmp_path):
|
|
116
|
+
"""Running claude_install twice does not duplicate the PreToolUse hook."""
|
|
117
|
+
import json
|
|
118
|
+
claude_install(tmp_path)
|
|
119
|
+
claude_install(tmp_path)
|
|
120
|
+
settings_path = tmp_path / ".claude" / "settings.json"
|
|
121
|
+
settings = json.loads(settings_path.read_text())
|
|
122
|
+
hooks = settings.get("hooks", {}).get("PreToolUse", [])
|
|
123
|
+
glob_grep_hooks = [h for h in hooks if "Glob|Grep" in h.get("matcher", "")]
|
|
124
|
+
assert len(glob_grep_hooks) == 1
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_uninstall_removes_settings_hook(tmp_path):
|
|
128
|
+
"""claude_uninstall removes the PreToolUse hook from settings.json."""
|
|
129
|
+
import json
|
|
130
|
+
claude_install(tmp_path)
|
|
131
|
+
claude_uninstall(tmp_path)
|
|
132
|
+
settings_path = tmp_path / ".claude" / "settings.json"
|
|
133
|
+
if settings_path.exists():
|
|
134
|
+
settings = json.loads(settings_path.read_text())
|
|
135
|
+
hooks = settings.get("hooks", {}).get("PreToolUse", [])
|
|
136
|
+
assert not any("Glob|Grep" in h.get("matcher", "") for h in hooks)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import subprocess
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import pytest
|
|
5
|
-
from graphify.hooks import install, uninstall, status, _HOOK_MARKER
|
|
5
|
+
from graphify.hooks import install, uninstall, status, _HOOK_MARKER, _CHECKOUT_MARKER
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def _make_git_repo(tmp_path: Path) -> Path:
|
|
@@ -78,3 +78,35 @@ def test_status_not_installed(tmp_path):
|
|
|
78
78
|
def test_no_git_repo_raises(tmp_path):
|
|
79
79
|
with pytest.raises(RuntimeError, match="No git repository"):
|
|
80
80
|
install(tmp_path / "not_a_repo")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_install_creates_post_checkout_hook(tmp_path):
|
|
84
|
+
repo = _make_git_repo(tmp_path)
|
|
85
|
+
install(repo)
|
|
86
|
+
hook = repo / ".git" / "hooks" / "post-checkout"
|
|
87
|
+
assert hook.exists()
|
|
88
|
+
assert _CHECKOUT_MARKER in hook.read_text()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_install_post_checkout_is_executable(tmp_path):
|
|
92
|
+
repo = _make_git_repo(tmp_path)
|
|
93
|
+
install(repo)
|
|
94
|
+
hook = repo / ".git" / "hooks" / "post-checkout"
|
|
95
|
+
assert hook.stat().st_mode & 0o111
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_uninstall_removes_post_checkout_hook(tmp_path):
|
|
99
|
+
repo = _make_git_repo(tmp_path)
|
|
100
|
+
install(repo)
|
|
101
|
+
uninstall(repo)
|
|
102
|
+
hook = repo / ".git" / "hooks" / "post-checkout"
|
|
103
|
+
assert not hook.exists()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_status_shows_both_hooks(tmp_path):
|
|
107
|
+
repo = _make_git_repo(tmp_path)
|
|
108
|
+
install(repo)
|
|
109
|
+
result = status(repo)
|
|
110
|
+
assert "post-commit" in result
|
|
111
|
+
assert "post-checkout" in result
|
|
112
|
+
assert result.count("installed") >= 2
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Tests for rationale/docstring extraction in extract.py."""
|
|
2
|
+
import textwrap
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import pytest
|
|
5
|
+
from graphify.extract import extract_python
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _write_py(tmp_path: Path, code: str) -> Path:
|
|
9
|
+
p = tmp_path / "sample.py"
|
|
10
|
+
p.write_text(textwrap.dedent(code))
|
|
11
|
+
return p
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_module_docstring_extracted(tmp_path):
|
|
15
|
+
path = _write_py(tmp_path, '''
|
|
16
|
+
"""This module handles authentication because legacy sessions were insecure."""
|
|
17
|
+
def login(): pass
|
|
18
|
+
''')
|
|
19
|
+
result = extract_python(path)
|
|
20
|
+
rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
|
|
21
|
+
assert len(rationale) >= 1
|
|
22
|
+
assert any("authentication" in n["label"] for n in rationale)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_function_docstring_extracted(tmp_path):
|
|
26
|
+
path = _write_py(tmp_path, '''
|
|
27
|
+
def process():
|
|
28
|
+
"""We use chunked processing here because the full dataset exceeds RAM."""
|
|
29
|
+
pass
|
|
30
|
+
''')
|
|
31
|
+
result = extract_python(path)
|
|
32
|
+
rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
|
|
33
|
+
assert any("chunked" in n["label"] for n in rationale)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_class_docstring_extracted(tmp_path):
|
|
37
|
+
path = _write_py(tmp_path, '''
|
|
38
|
+
class Cache:
|
|
39
|
+
"""Chosen over Redis because we need zero external dependencies in the test env."""
|
|
40
|
+
pass
|
|
41
|
+
''')
|
|
42
|
+
result = extract_python(path)
|
|
43
|
+
rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
|
|
44
|
+
assert any("Redis" in n["label"] for n in rationale)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_rationale_comment_extracted(tmp_path):
|
|
48
|
+
path = _write_py(tmp_path, '''
|
|
49
|
+
def build():
|
|
50
|
+
# NOTE: must run before compile() or linker will fail
|
|
51
|
+
pass
|
|
52
|
+
''')
|
|
53
|
+
result = extract_python(path)
|
|
54
|
+
rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
|
|
55
|
+
assert any("NOTE" in n["label"] for n in rationale)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_rationale_for_edges_present(tmp_path):
|
|
59
|
+
path = _write_py(tmp_path, '''
|
|
60
|
+
"""Module docstring explaining the why."""
|
|
61
|
+
def foo():
|
|
62
|
+
"""Function docstring with rationale."""
|
|
63
|
+
pass
|
|
64
|
+
''')
|
|
65
|
+
result = extract_python(path)
|
|
66
|
+
rationale_edges = [e for e in result["edges"] if e.get("relation") == "rationale_for"]
|
|
67
|
+
assert len(rationale_edges) >= 1
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_short_docstring_ignored(tmp_path):
|
|
71
|
+
"""Trivial docstrings under 20 chars should not become rationale nodes."""
|
|
72
|
+
path = _write_py(tmp_path, '''
|
|
73
|
+
def foo():
|
|
74
|
+
"""Constructor."""
|
|
75
|
+
pass
|
|
76
|
+
''')
|
|
77
|
+
result = extract_python(path)
|
|
78
|
+
rationale = [n for n in result["nodes"] if n.get("file_type") == "rationale"]
|
|
79
|
+
assert len(rationale) == 0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_rationale_confidence_is_extracted(tmp_path):
|
|
83
|
+
path = _write_py(tmp_path, '''
|
|
84
|
+
"""This module exists because we needed a standalone parser."""
|
|
85
|
+
def parse(): pass
|
|
86
|
+
''')
|
|
87
|
+
result = extract_python(path)
|
|
88
|
+
rationale_edges = [e for e in result["edges"] if e.get("relation") == "rationale_for"]
|
|
89
|
+
assert all(e.get("confidence") == "EXTRACTED" for e in rationale_edges)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|