graphifyy 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graphifyy-0.3.1 → graphifyy-0.3.2}/PKG-INFO +23 -6
- {graphifyy-0.3.1 → graphifyy-0.3.2}/README.md +20 -5
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/__main__.py +9 -2
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/detect.py +59 -2
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/extract.py +314 -1
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/ingest.py +7 -2
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphifyy.egg-info/PKG-INFO +23 -6
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphifyy.egg-info/requires.txt +2 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/pyproject.toml +3 -1
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_detect.py +35 -1
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_extract.py +1 -1
- {graphifyy-0.3.1 → graphifyy-0.3.2}/LICENSE +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/__init__.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/analyze.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/benchmark.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/build.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/cache.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/cluster.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/export.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/hooks.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/manifest.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/report.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/security.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/serve.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/skill-claw.md +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/skill-codex.md +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/skill-opencode.md +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/skill.md +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/validate.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/watch.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphify/wiki.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphifyy.egg-info/SOURCES.txt +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphifyy.egg-info/dependency_links.txt +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphifyy.egg-info/entry_points.txt +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/graphifyy.egg-info/top_level.txt +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/setup.cfg +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_analyze.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_benchmark.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_build.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_cache.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_claude_md.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_cluster.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_confidence.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_export.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_hooks.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_hypergraph.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_ingest.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_install.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_languages.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_multilang.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_pipeline.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_rationale.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_report.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_security.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_semantic_similarity.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_serve.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_validate.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_watch.py +0 -0
- {graphifyy-0.3.1 → graphifyy-0.3.2}/tests/test_wiki.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphifyy
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: AI coding assistant skill (Claude Code, Codex, OpenCode, OpenClaw) - turn any folder of code, docs, papers, or images into a queryable knowledge graph
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -48,6 +48,8 @@ Requires-Dist: tree-sitter-scala
|
|
|
48
48
|
Requires-Dist: tree-sitter-php
|
|
49
49
|
Requires-Dist: tree-sitter-swift
|
|
50
50
|
Requires-Dist: tree-sitter-lua
|
|
51
|
+
Requires-Dist: tree-sitter-zig
|
|
52
|
+
Requires-Dist: tree-sitter-powershell
|
|
51
53
|
Provides-Extra: mcp
|
|
52
54
|
Requires-Dist: mcp; extra == "mcp"
|
|
53
55
|
Provides-Extra: neo4j
|
|
@@ -75,7 +77,7 @@ Dynamic: license-file
|
|
|
75
77
|
[](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
|
|
76
78
|
[](https://pypi.org/project/graphifyy/)
|
|
77
79
|
|
|
78
|
-
**An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, or
|
|
80
|
+
**An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, OpenClaw, or Factory Droid - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
|
|
79
81
|
|
|
80
82
|
Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, even images in other languages - graphify uses Claude vision to extract concepts and relationships from all of it and connects them into one graph.
|
|
81
83
|
|
|
@@ -93,6 +95,18 @@ graphify-out/
|
|
|
93
95
|
└── cache/ SHA256 cache - re-runs only process changed files
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
Add a `.graphifyignore` file to exclude folders you don't want in the graph:
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
# .graphifyignore
|
|
102
|
+
vendor/
|
|
103
|
+
node_modules/
|
|
104
|
+
dist/
|
|
105
|
+
*.generated.py
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Same syntax as `.gitignore`. Patterns match against file paths relative to the folder you run graphify on.
|
|
109
|
+
|
|
96
110
|
## How it works
|
|
97
111
|
|
|
98
112
|
graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
|
|
@@ -103,7 +117,7 @@ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED`
|
|
|
103
117
|
|
|
104
118
|
## Install
|
|
105
119
|
|
|
106
|
-
**Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai),
|
|
120
|
+
**Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai), [OpenClaw](https://openclaw.ai), or [Factory Droid](https://factory.ai)
|
|
107
121
|
|
|
108
122
|
```bash
|
|
109
123
|
pip install graphifyy && graphify install
|
|
@@ -119,8 +133,9 @@ pip install graphifyy && graphify install
|
|
|
119
133
|
| Codex | `graphify install --platform codex` |
|
|
120
134
|
| OpenCode | `graphify install --platform opencode` |
|
|
121
135
|
| OpenClaw | `graphify install --platform claw` |
|
|
136
|
+
| Factory Droid | `graphify install --platform droid` |
|
|
122
137
|
|
|
123
|
-
Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
|
|
138
|
+
Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. Factory Droid uses the `Task` tool for parallel subagent dispatch. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
|
|
124
139
|
|
|
125
140
|
Then open your AI coding assistant and type:
|
|
126
141
|
|
|
@@ -138,10 +153,11 @@ After building a graph, run this once in your project:
|
|
|
138
153
|
| Codex | `graphify codex install` |
|
|
139
154
|
| OpenCode | `graphify opencode install` |
|
|
140
155
|
| OpenClaw | `graphify claw install` |
|
|
156
|
+
| Factory Droid | `graphify droid install` |
|
|
141
157
|
|
|
142
158
|
**Claude Code** does two things: writes a `CLAUDE.md` section telling Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and installs a **PreToolUse hook** (`settings.json`) that fires before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ — so Claude navigates via the graph instead of grepping through every file.
|
|
143
159
|
|
|
144
|
-
**Codex, OpenCode, OpenClaw** write the same rules to `AGENTS.md` in your project root. These platforms don't support PreToolUse hooks, so AGENTS.md is the always-on mechanism.
|
|
160
|
+
**Codex, OpenCode, OpenClaw, Factory Droid** write the same rules to `AGENTS.md` in your project root. These platforms don't support PreToolUse hooks, so AGENTS.md is the always-on mechanism.
|
|
145
161
|
|
|
146
162
|
Uninstall with the matching uninstall command (e.g. `graphify claude uninstall`).
|
|
147
163
|
|
|
@@ -212,13 +228,14 @@ graphify claude uninstall
|
|
|
212
228
|
graphify codex install # AGENTS.md (Codex)
|
|
213
229
|
graphify opencode install # AGENTS.md (OpenCode)
|
|
214
230
|
graphify claw install # AGENTS.md (OpenClaw)
|
|
231
|
+
graphify droid install # AGENTS.md (Factory Droid)
|
|
215
232
|
```
|
|
216
233
|
|
|
217
234
|
Works with any mix of file types:
|
|
218
235
|
|
|
219
236
|
| Type | Extensions | Extraction |
|
|
220
237
|
|------|-----------|------------|
|
|
221
|
-
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
238
|
+
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua .zig .ps1` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
222
239
|
| Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
|
|
223
240
|
| Papers | `.pdf` | Citation mining + concept extraction |
|
|
224
241
|
| Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
|
|
6
6
|
[](https://pypi.org/project/graphifyy/)
|
|
7
7
|
|
|
8
|
-
**An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, or
|
|
8
|
+
**An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, OpenClaw, or Factory Droid - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
|
|
9
9
|
|
|
10
10
|
Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, even images in other languages - graphify uses Claude vision to extract concepts and relationships from all of it and connects them into one graph.
|
|
11
11
|
|
|
@@ -23,6 +23,18 @@ graphify-out/
|
|
|
23
23
|
└── cache/ SHA256 cache - re-runs only process changed files
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
+
Add a `.graphifyignore` file to exclude folders you don't want in the graph:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
# .graphifyignore
|
|
30
|
+
vendor/
|
|
31
|
+
node_modules/
|
|
32
|
+
dist/
|
|
33
|
+
*.generated.py
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Same syntax as `.gitignore`. Patterns match against file paths relative to the folder you run graphify on.
|
|
37
|
+
|
|
26
38
|
## How it works
|
|
27
39
|
|
|
28
40
|
graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
|
|
@@ -33,7 +45,7 @@ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED`
|
|
|
33
45
|
|
|
34
46
|
## Install
|
|
35
47
|
|
|
36
|
-
**Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai),
|
|
48
|
+
**Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai), [OpenClaw](https://openclaw.ai), or [Factory Droid](https://factory.ai)
|
|
37
49
|
|
|
38
50
|
```bash
|
|
39
51
|
pip install graphifyy && graphify install
|
|
@@ -49,8 +61,9 @@ pip install graphifyy && graphify install
|
|
|
49
61
|
| Codex | `graphify install --platform codex` |
|
|
50
62
|
| OpenCode | `graphify install --platform opencode` |
|
|
51
63
|
| OpenClaw | `graphify install --platform claw` |
|
|
64
|
+
| Factory Droid | `graphify install --platform droid` |
|
|
52
65
|
|
|
53
|
-
Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
|
|
66
|
+
Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. Factory Droid uses the `Task` tool for parallel subagent dispatch. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
|
|
54
67
|
|
|
55
68
|
Then open your AI coding assistant and type:
|
|
56
69
|
|
|
@@ -68,10 +81,11 @@ After building a graph, run this once in your project:
|
|
|
68
81
|
| Codex | `graphify codex install` |
|
|
69
82
|
| OpenCode | `graphify opencode install` |
|
|
70
83
|
| OpenClaw | `graphify claw install` |
|
|
84
|
+
| Factory Droid | `graphify droid install` |
|
|
71
85
|
|
|
72
86
|
**Claude Code** does two things: writes a `CLAUDE.md` section telling Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and installs a **PreToolUse hook** (`settings.json`) that fires before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ — so Claude navigates via the graph instead of grepping through every file.
|
|
73
87
|
|
|
74
|
-
**Codex, OpenCode, OpenClaw** write the same rules to `AGENTS.md` in your project root. These platforms don't support PreToolUse hooks, so AGENTS.md is the always-on mechanism.
|
|
88
|
+
**Codex, OpenCode, OpenClaw, Factory Droid** write the same rules to `AGENTS.md` in your project root. These platforms don't support PreToolUse hooks, so AGENTS.md is the always-on mechanism.
|
|
75
89
|
|
|
76
90
|
Uninstall with the matching uninstall command (e.g. `graphify claude uninstall`).
|
|
77
91
|
|
|
@@ -142,13 +156,14 @@ graphify claude uninstall
|
|
|
142
156
|
graphify codex install # AGENTS.md (Codex)
|
|
143
157
|
graphify opencode install # AGENTS.md (OpenCode)
|
|
144
158
|
graphify claw install # AGENTS.md (OpenClaw)
|
|
159
|
+
graphify droid install # AGENTS.md (Factory Droid)
|
|
145
160
|
```
|
|
146
161
|
|
|
147
162
|
Works with any mix of file types:
|
|
148
163
|
|
|
149
164
|
| Type | Extensions | Extraction |
|
|
150
165
|
|------|-----------|------------|
|
|
151
|
-
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
166
|
+
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua .zig .ps1` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
152
167
|
| Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
|
|
153
168
|
| Papers | `.pdf` | Citation mining + concept extraction |
|
|
154
169
|
| Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
|
|
@@ -50,6 +50,11 @@ _PLATFORM_CONFIG: dict[str, dict] = {
|
|
|
50
50
|
"skill_dst": Path(".claw") / "skills" / "graphify" / "SKILL.md",
|
|
51
51
|
"claude_md": False,
|
|
52
52
|
},
|
|
53
|
+
"droid": {
|
|
54
|
+
"skill_file": "skill-droid.md",
|
|
55
|
+
"skill_dst": Path(".factory") / "skills" / "graphify" / "SKILL.md",
|
|
56
|
+
"claude_md": False,
|
|
57
|
+
},
|
|
53
58
|
}
|
|
54
59
|
|
|
55
60
|
|
|
@@ -276,7 +281,7 @@ def main() -> None:
|
|
|
276
281
|
print("Usage: graphify <command>")
|
|
277
282
|
print()
|
|
278
283
|
print("Commands:")
|
|
279
|
-
print(" install [--platform P] copy skill to platform config dir (claude|codex|opencode|claw)")
|
|
284
|
+
print(" install [--platform P] copy skill to platform config dir (claude|codex|opencode|claw|droid)")
|
|
280
285
|
print(" benchmark [graph.json] measure token reduction vs naive full-corpus approach")
|
|
281
286
|
print(" hook install install post-commit/post-checkout git hooks (all platforms)")
|
|
282
287
|
print(" hook uninstall remove git hooks")
|
|
@@ -289,6 +294,8 @@ def main() -> None:
|
|
|
289
294
|
print(" opencode uninstall remove graphify section from AGENTS.md")
|
|
290
295
|
print(" claw install write graphify section to AGENTS.md (OpenClaw)")
|
|
291
296
|
print(" claw uninstall remove graphify section from AGENTS.md")
|
|
297
|
+
print(" droid install write graphify section to AGENTS.md (Factory Droid)")
|
|
298
|
+
print(" droid uninstall remove graphify section from AGENTS.md")
|
|
292
299
|
print()
|
|
293
300
|
return
|
|
294
301
|
|
|
@@ -316,7 +323,7 @@ def main() -> None:
|
|
|
316
323
|
else:
|
|
317
324
|
print("Usage: graphify claude [install|uninstall]", file=sys.stderr)
|
|
318
325
|
sys.exit(1)
|
|
319
|
-
elif cmd in ("codex", "opencode", "claw"):
|
|
326
|
+
elif cmd in ("codex", "opencode", "claw", "droid"):
|
|
320
327
|
subcmd = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
321
328
|
if subcmd == "install":
|
|
322
329
|
_agents_install(Path("."), cmd)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# file discovery, type classification, and corpus health checks
|
|
2
2
|
from __future__ import annotations
|
|
3
|
+
import fnmatch
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import re
|
|
@@ -16,7 +17,7 @@ class FileType(str, Enum):
|
|
|
16
17
|
|
|
17
18
|
_MANIFEST_PATH = "graphify-out/manifest.json"
|
|
18
19
|
|
|
19
|
-
CODE_EXTENSIONS = {'.py', '.ts', '.js', '.tsx', '.go', '.rs', '.java', '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp', '.rb', '.swift', '.kt', '.kts', '.cs', '.scala', '.php', '.lua', '.toc'}
|
|
20
|
+
CODE_EXTENSIONS = {'.py', '.ts', '.js', '.tsx', '.go', '.rs', '.java', '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp', '.rb', '.swift', '.kt', '.kts', '.cs', '.scala', '.php', '.lua', '.toc', '.zig', '.ps1'}
|
|
20
21
|
DOC_EXTENSIONS = {'.md', '.txt', '.rst'}
|
|
21
22
|
PAPER_EXTENSIONS = {'.pdf'}
|
|
22
23
|
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'}
|
|
@@ -134,6 +135,56 @@ def _is_noise_dir(part: str) -> bool:
|
|
|
134
135
|
return False
|
|
135
136
|
|
|
136
137
|
|
|
138
|
+
def _load_graphifyignore(root: Path) -> list[str]:
|
|
139
|
+
"""Read .graphifyignore from root and return a list of patterns.
|
|
140
|
+
|
|
141
|
+
Lines starting with # are comments. Blank lines are ignored.
|
|
142
|
+
Patterns follow gitignore semantics: glob matched against the path
|
|
143
|
+
relative to root. A leading slash anchors to root. A trailing slash
|
|
144
|
+
matches directories only (we match both dir and file for simplicity).
|
|
145
|
+
"""
|
|
146
|
+
ignore_file = root / ".graphifyignore"
|
|
147
|
+
if not ignore_file.exists():
|
|
148
|
+
return []
|
|
149
|
+
patterns = []
|
|
150
|
+
for line in ignore_file.read_text(errors="ignore").splitlines():
|
|
151
|
+
line = line.strip()
|
|
152
|
+
if line and not line.startswith("#"):
|
|
153
|
+
patterns.append(line)
|
|
154
|
+
return patterns
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _is_ignored(path: Path, root: Path, patterns: list[str]) -> bool:
|
|
158
|
+
"""Return True if path matches any .graphifyignore pattern."""
|
|
159
|
+
if not patterns:
|
|
160
|
+
return False
|
|
161
|
+
try:
|
|
162
|
+
rel = str(path.relative_to(root))
|
|
163
|
+
except ValueError:
|
|
164
|
+
return False
|
|
165
|
+
rel = rel.replace(os.sep, "/")
|
|
166
|
+
parts = rel.split("/")
|
|
167
|
+
for pattern in patterns:
|
|
168
|
+
# Normalize: strip leading/trailing slashes for matching purposes
|
|
169
|
+
p = pattern.strip("/")
|
|
170
|
+
if not p:
|
|
171
|
+
continue
|
|
172
|
+
# Match against full relative path
|
|
173
|
+
if fnmatch.fnmatch(rel, p):
|
|
174
|
+
return True
|
|
175
|
+
# Match against filename alone
|
|
176
|
+
if fnmatch.fnmatch(path.name, p):
|
|
177
|
+
return True
|
|
178
|
+
# Match against any path segment or prefix
|
|
179
|
+
# e.g. "vendor" or "vendor/" should match "vendor/lib.py"
|
|
180
|
+
for i, part in enumerate(parts):
|
|
181
|
+
if fnmatch.fnmatch(part, p):
|
|
182
|
+
return True
|
|
183
|
+
if fnmatch.fnmatch("/".join(parts[:i + 1]), p):
|
|
184
|
+
return True
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
|
|
137
188
|
def detect(root: Path) -> dict:
|
|
138
189
|
files: dict[FileType, list[str]] = {
|
|
139
190
|
FileType.CODE: [],
|
|
@@ -144,6 +195,7 @@ def detect(root: Path) -> dict:
|
|
|
144
195
|
total_words = 0
|
|
145
196
|
|
|
146
197
|
skipped_sensitive: list[str] = []
|
|
198
|
+
ignore_patterns = _load_graphifyignore(root)
|
|
147
199
|
|
|
148
200
|
# Always include graphify-out/memory/ - query results filed back into the graph
|
|
149
201
|
memory_dir = root / "graphify-out" / "memory"
|
|
@@ -162,7 +214,9 @@ def detect(root: Path) -> dict:
|
|
|
162
214
|
# Prune noise dirs in-place so os.walk never descends into them
|
|
163
215
|
dirnames[:] = [
|
|
164
216
|
d for d in dirnames
|
|
165
|
-
if not d.startswith(".")
|
|
217
|
+
if not d.startswith(".")
|
|
218
|
+
and not _is_noise_dir(d)
|
|
219
|
+
and not _is_ignored(dp / d, root, ignore_patterns)
|
|
166
220
|
]
|
|
167
221
|
for fname in filenames:
|
|
168
222
|
p = dp / fname
|
|
@@ -178,6 +232,8 @@ def detect(root: Path) -> dict:
|
|
|
178
232
|
# but catch hidden files at the root level
|
|
179
233
|
if p.name.startswith("."):
|
|
180
234
|
continue
|
|
235
|
+
if _is_ignored(p, root, ignore_patterns):
|
|
236
|
+
continue
|
|
181
237
|
if _is_sensitive(p):
|
|
182
238
|
skipped_sensitive.append(str(p))
|
|
183
239
|
continue
|
|
@@ -210,6 +266,7 @@ def detect(root: Path) -> dict:
|
|
|
210
266
|
"needs_graph": needs_graph,
|
|
211
267
|
"warning": warning,
|
|
212
268
|
"skipped_sensitive": skipped_sensitive,
|
|
269
|
+
"graphifyignore_patterns": len(ignore_patterns),
|
|
213
270
|
}
|
|
214
271
|
|
|
215
272
|
|
|
@@ -1482,6 +1482,317 @@ def extract_rust(path: Path) -> dict:
|
|
|
1482
1482
|
return {"nodes": nodes, "edges": clean_edges}
|
|
1483
1483
|
|
|
1484
1484
|
|
|
1485
|
+
# ── Zig ───────────────────────────────────────────────────────────────────────
|
|
1486
|
+
|
|
1487
|
+
def extract_zig(path: Path) -> dict:
|
|
1488
|
+
"""Extract functions, structs, enums, unions, and imports from a .zig file."""
|
|
1489
|
+
try:
|
|
1490
|
+
import tree_sitter_zig as tszig
|
|
1491
|
+
from tree_sitter import Language, Parser
|
|
1492
|
+
except ImportError:
|
|
1493
|
+
return {"nodes": [], "edges": [], "error": "tree_sitter_zig not installed"}
|
|
1494
|
+
|
|
1495
|
+
try:
|
|
1496
|
+
language = Language(tszig.language())
|
|
1497
|
+
parser = Parser(language)
|
|
1498
|
+
source = path.read_bytes()
|
|
1499
|
+
tree = parser.parse(source)
|
|
1500
|
+
root = tree.root_node
|
|
1501
|
+
except Exception as e:
|
|
1502
|
+
return {"nodes": [], "edges": [], "error": str(e)}
|
|
1503
|
+
|
|
1504
|
+
stem = path.stem
|
|
1505
|
+
str_path = str(path)
|
|
1506
|
+
nodes: list[dict] = []
|
|
1507
|
+
edges: list[dict] = []
|
|
1508
|
+
seen_ids: set[str] = set()
|
|
1509
|
+
function_bodies: list[tuple[str, Any]] = []
|
|
1510
|
+
|
|
1511
|
+
def add_node(nid: str, label: str, line: int) -> None:
|
|
1512
|
+
if nid not in seen_ids:
|
|
1513
|
+
seen_ids.add(nid)
|
|
1514
|
+
nodes.append({"id": nid, "label": label, "file_type": "code",
|
|
1515
|
+
"source_file": str_path, "source_location": f"L{line}"})
|
|
1516
|
+
|
|
1517
|
+
def add_edge(src: str, tgt: str, relation: str, line: int,
|
|
1518
|
+
confidence: str = "EXTRACTED", weight: float = 1.0) -> None:
|
|
1519
|
+
edges.append({"source": src, "target": tgt, "relation": relation,
|
|
1520
|
+
"confidence": confidence, "source_file": str_path,
|
|
1521
|
+
"source_location": f"L{line}", "weight": weight})
|
|
1522
|
+
|
|
1523
|
+
file_nid = _make_id(stem)
|
|
1524
|
+
add_node(file_nid, path.name, 1)
|
|
1525
|
+
|
|
1526
|
+
def _extract_import(node) -> None:
|
|
1527
|
+
for child in node.children:
|
|
1528
|
+
if child.type == "builtin_function":
|
|
1529
|
+
bi = None
|
|
1530
|
+
args = None
|
|
1531
|
+
for c in child.children:
|
|
1532
|
+
if c.type == "builtin_identifier":
|
|
1533
|
+
bi = _read_text(c, source)
|
|
1534
|
+
elif c.type == "arguments":
|
|
1535
|
+
args = c
|
|
1536
|
+
if bi in ("@import", "@cImport") and args:
|
|
1537
|
+
for arg in args.children:
|
|
1538
|
+
if arg.type in ("string_literal", "string"):
|
|
1539
|
+
raw = _read_text(arg, source).strip('"')
|
|
1540
|
+
module_name = raw.split("/")[-1].split(".")[0]
|
|
1541
|
+
if module_name:
|
|
1542
|
+
tgt_nid = _make_id(module_name)
|
|
1543
|
+
add_edge(file_nid, tgt_nid, "imports_from",
|
|
1544
|
+
node.start_point[0] + 1)
|
|
1545
|
+
return
|
|
1546
|
+
elif child.type == "field_expression":
|
|
1547
|
+
_extract_import(child)
|
|
1548
|
+
return
|
|
1549
|
+
|
|
1550
|
+
def walk(node, parent_struct_nid: str | None = None) -> None:
|
|
1551
|
+
t = node.type
|
|
1552
|
+
|
|
1553
|
+
if t == "function_declaration":
|
|
1554
|
+
name_node = node.child_by_field_name("name")
|
|
1555
|
+
if name_node:
|
|
1556
|
+
func_name = _read_text(name_node, source)
|
|
1557
|
+
line = node.start_point[0] + 1
|
|
1558
|
+
if parent_struct_nid:
|
|
1559
|
+
func_nid = _make_id(parent_struct_nid, func_name)
|
|
1560
|
+
add_node(func_nid, f".{func_name}()", line)
|
|
1561
|
+
add_edge(parent_struct_nid, func_nid, "method", line)
|
|
1562
|
+
else:
|
|
1563
|
+
func_nid = _make_id(stem, func_name)
|
|
1564
|
+
add_node(func_nid, f"{func_name}()", line)
|
|
1565
|
+
add_edge(file_nid, func_nid, "contains", line)
|
|
1566
|
+
body = node.child_by_field_name("body")
|
|
1567
|
+
if body:
|
|
1568
|
+
function_bodies.append((func_nid, body))
|
|
1569
|
+
return
|
|
1570
|
+
|
|
1571
|
+
if t == "variable_declaration":
|
|
1572
|
+
name_node = None
|
|
1573
|
+
value_node = None
|
|
1574
|
+
for child in node.children:
|
|
1575
|
+
if child.type == "identifier":
|
|
1576
|
+
name_node = child
|
|
1577
|
+
elif child.type in ("struct_declaration", "enum_declaration",
|
|
1578
|
+
"union_declaration", "builtin_function",
|
|
1579
|
+
"field_expression"):
|
|
1580
|
+
value_node = child
|
|
1581
|
+
|
|
1582
|
+
if value_node and value_node.type == "struct_declaration":
|
|
1583
|
+
if name_node:
|
|
1584
|
+
struct_name = _read_text(name_node, source)
|
|
1585
|
+
line = node.start_point[0] + 1
|
|
1586
|
+
struct_nid = _make_id(stem, struct_name)
|
|
1587
|
+
add_node(struct_nid, struct_name, line)
|
|
1588
|
+
add_edge(file_nid, struct_nid, "contains", line)
|
|
1589
|
+
for child in value_node.children:
|
|
1590
|
+
walk(child, parent_struct_nid=struct_nid)
|
|
1591
|
+
return
|
|
1592
|
+
|
|
1593
|
+
if value_node and value_node.type in ("enum_declaration", "union_declaration"):
|
|
1594
|
+
if name_node:
|
|
1595
|
+
type_name = _read_text(name_node, source)
|
|
1596
|
+
line = node.start_point[0] + 1
|
|
1597
|
+
type_nid = _make_id(stem, type_name)
|
|
1598
|
+
add_node(type_nid, type_name, line)
|
|
1599
|
+
add_edge(file_nid, type_nid, "contains", line)
|
|
1600
|
+
return
|
|
1601
|
+
|
|
1602
|
+
if value_node and value_node.type in ("builtin_function", "field_expression"):
|
|
1603
|
+
_extract_import(node)
|
|
1604
|
+
return
|
|
1605
|
+
|
|
1606
|
+
for child in node.children:
|
|
1607
|
+
walk(child, parent_struct_nid)
|
|
1608
|
+
|
|
1609
|
+
walk(root)
|
|
1610
|
+
|
|
1611
|
+
seen_call_pairs: set[tuple[str, str]] = set()
|
|
1612
|
+
|
|
1613
|
+
def walk_calls(node, caller_nid: str) -> None:
|
|
1614
|
+
if node.type == "function_declaration":
|
|
1615
|
+
return
|
|
1616
|
+
if node.type == "call_expression":
|
|
1617
|
+
fn = node.child_by_field_name("function")
|
|
1618
|
+
if fn:
|
|
1619
|
+
callee = _read_text(fn, source).split(".")[-1]
|
|
1620
|
+
tgt_nid = next((n["id"] for n in nodes if n["label"] in
|
|
1621
|
+
(f"{callee}()", f".{callee}()")), None)
|
|
1622
|
+
if tgt_nid and tgt_nid != caller_nid:
|
|
1623
|
+
pair = (caller_nid, tgt_nid)
|
|
1624
|
+
if pair not in seen_call_pairs:
|
|
1625
|
+
seen_call_pairs.add(pair)
|
|
1626
|
+
add_edge(caller_nid, tgt_nid, "calls",
|
|
1627
|
+
node.start_point[0] + 1,
|
|
1628
|
+
confidence="INFERRED", weight=0.8)
|
|
1629
|
+
for child in node.children:
|
|
1630
|
+
walk_calls(child, caller_nid)
|
|
1631
|
+
|
|
1632
|
+
for caller_nid, body_node in function_bodies:
|
|
1633
|
+
walk_calls(body_node, caller_nid)
|
|
1634
|
+
|
|
1635
|
+
clean_edges = [e for e in edges if e["source"] in seen_ids and
|
|
1636
|
+
(e["target"] in seen_ids or e["relation"] == "imports_from")]
|
|
1637
|
+
return {"nodes": nodes, "edges": clean_edges}
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
# ── PowerShell ────────────────────────────────────────────────────────────────
|
|
1641
|
+
|
|
1642
|
+
def extract_powershell(path: Path) -> dict:
|
|
1643
|
+
"""Extract functions, classes, methods, and using statements from a .ps1 file."""
|
|
1644
|
+
try:
|
|
1645
|
+
import tree_sitter_powershell as tsps
|
|
1646
|
+
from tree_sitter import Language, Parser
|
|
1647
|
+
except ImportError:
|
|
1648
|
+
return {"nodes": [], "edges": [], "error": "tree_sitter_powershell not installed"}
|
|
1649
|
+
|
|
1650
|
+
try:
|
|
1651
|
+
language = Language(tsps.language())
|
|
1652
|
+
parser = Parser(language)
|
|
1653
|
+
source = path.read_bytes()
|
|
1654
|
+
tree = parser.parse(source)
|
|
1655
|
+
root = tree.root_node
|
|
1656
|
+
except Exception as e:
|
|
1657
|
+
return {"nodes": [], "edges": [], "error": str(e)}
|
|
1658
|
+
|
|
1659
|
+
stem = path.stem
|
|
1660
|
+
str_path = str(path)
|
|
1661
|
+
nodes: list[dict] = []
|
|
1662
|
+
edges: list[dict] = []
|
|
1663
|
+
seen_ids: set[str] = set()
|
|
1664
|
+
function_bodies: list[tuple[str, Any]] = []
|
|
1665
|
+
|
|
1666
|
+
def add_node(nid: str, label: str, line: int) -> None:
|
|
1667
|
+
if nid not in seen_ids:
|
|
1668
|
+
seen_ids.add(nid)
|
|
1669
|
+
nodes.append({"id": nid, "label": label, "file_type": "code",
|
|
1670
|
+
"source_file": str_path, "source_location": f"L{line}"})
|
|
1671
|
+
|
|
1672
|
+
def add_edge(src: str, tgt: str, relation: str, line: int,
|
|
1673
|
+
confidence: str = "EXTRACTED", weight: float = 1.0) -> None:
|
|
1674
|
+
edges.append({"source": src, "target": tgt, "relation": relation,
|
|
1675
|
+
"confidence": confidence, "source_file": str_path,
|
|
1676
|
+
"source_location": f"L{line}", "weight": weight})
|
|
1677
|
+
|
|
1678
|
+
file_nid = _make_id(stem)
|
|
1679
|
+
add_node(file_nid, path.name, 1)
|
|
1680
|
+
|
|
1681
|
+
_PS_SKIP = frozenset({
|
|
1682
|
+
"using", "return", "if", "else", "elseif", "foreach", "for",
|
|
1683
|
+
"while", "do", "switch", "try", "catch", "finally", "throw",
|
|
1684
|
+
"break", "continue", "exit", "param", "begin", "process", "end",
|
|
1685
|
+
})
|
|
1686
|
+
|
|
1687
|
+
def _find_script_block_body(node):
|
|
1688
|
+
for child in node.children:
|
|
1689
|
+
if child.type == "script_block":
|
|
1690
|
+
for sc in child.children:
|
|
1691
|
+
if sc.type == "script_block_body":
|
|
1692
|
+
return sc
|
|
1693
|
+
return child
|
|
1694
|
+
return None
|
|
1695
|
+
|
|
1696
|
+
def walk(node, parent_class_nid: str | None = None) -> None:
|
|
1697
|
+
t = node.type
|
|
1698
|
+
|
|
1699
|
+
if t == "function_statement":
|
|
1700
|
+
name_node = next((c for c in node.children if c.type == "function_name"), None)
|
|
1701
|
+
if name_node:
|
|
1702
|
+
func_name = _read_text(name_node, source)
|
|
1703
|
+
line = node.start_point[0] + 1
|
|
1704
|
+
func_nid = _make_id(stem, func_name)
|
|
1705
|
+
add_node(func_nid, f"{func_name}()", line)
|
|
1706
|
+
add_edge(file_nid, func_nid, "contains", line)
|
|
1707
|
+
body = _find_script_block_body(node)
|
|
1708
|
+
if body:
|
|
1709
|
+
function_bodies.append((func_nid, body))
|
|
1710
|
+
return
|
|
1711
|
+
|
|
1712
|
+
if t == "class_statement":
|
|
1713
|
+
name_node = next((c for c in node.children if c.type == "simple_name"), None)
|
|
1714
|
+
if name_node:
|
|
1715
|
+
class_name = _read_text(name_node, source)
|
|
1716
|
+
line = node.start_point[0] + 1
|
|
1717
|
+
class_nid = _make_id(stem, class_name)
|
|
1718
|
+
add_node(class_nid, class_name, line)
|
|
1719
|
+
add_edge(file_nid, class_nid, "contains", line)
|
|
1720
|
+
for child in node.children:
|
|
1721
|
+
walk(child, parent_class_nid=class_nid)
|
|
1722
|
+
return
|
|
1723
|
+
|
|
1724
|
+
if t == "class_method_definition":
|
|
1725
|
+
name_node = next((c for c in node.children if c.type == "simple_name"), None)
|
|
1726
|
+
if name_node:
|
|
1727
|
+
method_name = _read_text(name_node, source)
|
|
1728
|
+
line = node.start_point[0] + 1
|
|
1729
|
+
if parent_class_nid:
|
|
1730
|
+
method_nid = _make_id(parent_class_nid, method_name)
|
|
1731
|
+
add_node(method_nid, f".{method_name}()", line)
|
|
1732
|
+
add_edge(parent_class_nid, method_nid, "method", line)
|
|
1733
|
+
else:
|
|
1734
|
+
method_nid = _make_id(stem, method_name)
|
|
1735
|
+
add_node(method_nid, f"{method_name}()", line)
|
|
1736
|
+
add_edge(file_nid, method_nid, "contains", line)
|
|
1737
|
+
body = _find_script_block_body(node)
|
|
1738
|
+
if body:
|
|
1739
|
+
function_bodies.append((method_nid, body))
|
|
1740
|
+
return
|
|
1741
|
+
|
|
1742
|
+
if t == "command":
|
|
1743
|
+
cmd_name_node = next((c for c in node.children if c.type == "command_name"), None)
|
|
1744
|
+
if cmd_name_node:
|
|
1745
|
+
cmd_text = _read_text(cmd_name_node, source).lower()
|
|
1746
|
+
if cmd_text == "using":
|
|
1747
|
+
tokens = []
|
|
1748
|
+
for child in node.children:
|
|
1749
|
+
if child.type == "command_elements":
|
|
1750
|
+
for el in child.children:
|
|
1751
|
+
if el.type == "generic_token":
|
|
1752
|
+
tokens.append(_read_text(el, source))
|
|
1753
|
+
module_tokens = [t for t in tokens
|
|
1754
|
+
if t.lower() not in ("namespace", "module", "assembly")]
|
|
1755
|
+
if module_tokens:
|
|
1756
|
+
module_name = module_tokens[-1].split(".")[-1]
|
|
1757
|
+
add_edge(file_nid, _make_id(module_name), "imports_from",
|
|
1758
|
+
node.start_point[0] + 1)
|
|
1759
|
+
return
|
|
1760
|
+
|
|
1761
|
+
for child in node.children:
|
|
1762
|
+
walk(child, parent_class_nid)
|
|
1763
|
+
|
|
1764
|
+
walk(root)
|
|
1765
|
+
|
|
1766
|
+
label_to_nid = {n["label"].strip("()").lstrip(".").lower(): n["id"] for n in nodes}
|
|
1767
|
+
seen_call_pairs: set[tuple[str, str]] = set()
|
|
1768
|
+
|
|
1769
|
+
def walk_calls(node, caller_nid: str) -> None:
|
|
1770
|
+
if node.type in ("function_statement", "class_statement"):
|
|
1771
|
+
return
|
|
1772
|
+
if node.type == "command":
|
|
1773
|
+
cmd_name_node = next((c for c in node.children if c.type == "command_name"), None)
|
|
1774
|
+
if cmd_name_node:
|
|
1775
|
+
cmd_text = _read_text(cmd_name_node, source)
|
|
1776
|
+
if cmd_text.lower() not in _PS_SKIP:
|
|
1777
|
+
tgt_nid = label_to_nid.get(cmd_text.lower())
|
|
1778
|
+
if tgt_nid and tgt_nid != caller_nid:
|
|
1779
|
+
pair = (caller_nid, tgt_nid)
|
|
1780
|
+
if pair not in seen_call_pairs:
|
|
1781
|
+
seen_call_pairs.add(pair)
|
|
1782
|
+
add_edge(caller_nid, tgt_nid, "calls",
|
|
1783
|
+
node.start_point[0] + 1,
|
|
1784
|
+
confidence="INFERRED", weight=0.8)
|
|
1785
|
+
for child in node.children:
|
|
1786
|
+
walk_calls(child, caller_nid)
|
|
1787
|
+
|
|
1788
|
+
for caller_nid, body_node in function_bodies:
|
|
1789
|
+
walk_calls(body_node, caller_nid)
|
|
1790
|
+
|
|
1791
|
+
clean_edges = [e for e in edges if e["source"] in seen_ids and
|
|
1792
|
+
(e["target"] in seen_ids or e["relation"] == "imports_from")]
|
|
1793
|
+
return {"nodes": nodes, "edges": clean_edges}
|
|
1794
|
+
|
|
1795
|
+
|
|
1485
1796
|
# ── Cross-file import resolution ──────────────────────────────────────────────
|
|
1486
1797
|
|
|
1487
1798
|
def _resolve_cross_file_imports(
|
|
@@ -1667,6 +1978,8 @@ def extract(paths: list[Path]) -> dict:
|
|
|
1667
1978
|
".swift": extract_swift,
|
|
1668
1979
|
".lua": extract_lua,
|
|
1669
1980
|
".toc": extract_lua,
|
|
1981
|
+
".zig": extract_zig,
|
|
1982
|
+
".ps1": extract_powershell,
|
|
1670
1983
|
}
|
|
1671
1984
|
|
|
1672
1985
|
for path in paths:
|
|
@@ -1709,7 +2022,7 @@ def collect_files(target: Path) -> list[Path]:
|
|
|
1709
2022
|
"*.py", "*.js", "*.ts", "*.tsx", "*.go", "*.rs",
|
|
1710
2023
|
"*.java", "*.c", "*.h", "*.cpp", "*.cc", "*.cxx", "*.hpp",
|
|
1711
2024
|
"*.rb", "*.cs", "*.kt", "*.kts", "*.scala", "*.php", "*.swift",
|
|
1712
|
-
"*.lua", "*.toc",
|
|
2025
|
+
"*.lua", "*.toc", "*.zig", "*.ps1",
|
|
1713
2026
|
)
|
|
1714
2027
|
results: list[Path] = []
|
|
1715
2028
|
for pattern in _EXTENSIONS:
|
|
@@ -10,6 +10,11 @@ from pathlib import Path
|
|
|
10
10
|
from graphify.security import safe_fetch, safe_fetch_text, validate_url
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
def _yaml_str(s: str) -> str:
|
|
14
|
+
"""Escape a string for embedding in a YAML double-quoted scalar."""
|
|
15
|
+
return s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", " ").replace("\r", " ")
|
|
16
|
+
|
|
17
|
+
|
|
13
18
|
def _safe_filename(url: str, suffix: str) -> str:
|
|
14
19
|
"""Turn a URL into a safe filename."""
|
|
15
20
|
parsed = urllib.parse.urlparse(url)
|
|
@@ -106,7 +111,7 @@ def _fetch_webpage(url: str, author: str | None, contributor: str | None) -> tup
|
|
|
106
111
|
content = f"""---
|
|
107
112
|
source_url: {url}
|
|
108
113
|
type: webpage
|
|
109
|
-
title: "{title}"
|
|
114
|
+
title: "{_yaml_str(title)}"
|
|
110
115
|
captured_at: {now}
|
|
111
116
|
contributor: {contributor or author or 'unknown'}
|
|
112
117
|
---
|
|
@@ -248,7 +253,7 @@ def save_query_result(
|
|
|
248
253
|
"---",
|
|
249
254
|
f'type: "{query_type}"',
|
|
250
255
|
f'date: "{now.isoformat()}"',
|
|
251
|
-
f'question: "{
|
|
256
|
+
f'question: "{_yaml_str(question)}"',
|
|
252
257
|
'contributor: "graphify"',
|
|
253
258
|
]
|
|
254
259
|
if source_nodes:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphifyy
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: AI coding assistant skill (Claude Code, Codex, OpenCode, OpenClaw) - turn any folder of code, docs, papers, or images into a queryable knowledge graph
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -48,6 +48,8 @@ Requires-Dist: tree-sitter-scala
|
|
|
48
48
|
Requires-Dist: tree-sitter-php
|
|
49
49
|
Requires-Dist: tree-sitter-swift
|
|
50
50
|
Requires-Dist: tree-sitter-lua
|
|
51
|
+
Requires-Dist: tree-sitter-zig
|
|
52
|
+
Requires-Dist: tree-sitter-powershell
|
|
51
53
|
Provides-Extra: mcp
|
|
52
54
|
Requires-Dist: mcp; extra == "mcp"
|
|
53
55
|
Provides-Extra: neo4j
|
|
@@ -75,7 +77,7 @@ Dynamic: license-file
|
|
|
75
77
|
[](https://github.com/safishamsi/graphify/actions/workflows/ci.yml)
|
|
76
78
|
[](https://pypi.org/project/graphifyy/)
|
|
77
79
|
|
|
78
|
-
**An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, or
|
|
80
|
+
**An AI coding assistant skill.** Type `/graphify` in Claude Code, Codex, OpenCode, OpenClaw, or Factory Droid - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
|
|
79
81
|
|
|
80
82
|
Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, even images in other languages - graphify uses Claude vision to extract concepts and relationships from all of it and connects them into one graph.
|
|
81
83
|
|
|
@@ -93,6 +95,18 @@ graphify-out/
|
|
|
93
95
|
└── cache/ SHA256 cache - re-runs only process changed files
|
|
94
96
|
```
|
|
95
97
|
|
|
98
|
+
Add a `.graphifyignore` file to exclude folders you don't want in the graph:
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
# .graphifyignore
|
|
102
|
+
vendor/
|
|
103
|
+
node_modules/
|
|
104
|
+
dist/
|
|
105
|
+
*.generated.py
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Same syntax as `.gitignore`. Patterns match against file paths relative to the folder you run graphify on.
|
|
109
|
+
|
|
96
110
|
## How it works
|
|
97
111
|
|
|
98
112
|
graphify runs in two passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, Claude subagents run in parallel over docs, papers, and images to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
|
|
@@ -103,7 +117,7 @@ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED`
|
|
|
103
117
|
|
|
104
118
|
## Install
|
|
105
119
|
|
|
106
|
-
**Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai),
|
|
120
|
+
**Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai), [OpenClaw](https://openclaw.ai), or [Factory Droid](https://factory.ai)
|
|
107
121
|
|
|
108
122
|
```bash
|
|
109
123
|
pip install graphifyy && graphify install
|
|
@@ -119,8 +133,9 @@ pip install graphifyy && graphify install
|
|
|
119
133
|
| Codex | `graphify install --platform codex` |
|
|
120
134
|
| OpenCode | `graphify install --platform opencode` |
|
|
121
135
|
| OpenClaw | `graphify install --platform claw` |
|
|
136
|
+
| Factory Droid | `graphify install --platform droid` |
|
|
122
137
|
|
|
123
|
-
Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
|
|
138
|
+
Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. Factory Droid uses the `Task` tool for parallel subagent dispatch. OpenClaw uses sequential extraction (parallel agent support is still early on that platform).
|
|
124
139
|
|
|
125
140
|
Then open your AI coding assistant and type:
|
|
126
141
|
|
|
@@ -138,10 +153,11 @@ After building a graph, run this once in your project:
|
|
|
138
153
|
| Codex | `graphify codex install` |
|
|
139
154
|
| OpenCode | `graphify opencode install` |
|
|
140
155
|
| OpenClaw | `graphify claw install` |
|
|
156
|
+
| Factory Droid | `graphify droid install` |
|
|
141
157
|
|
|
142
158
|
**Claude Code** does two things: writes a `CLAUDE.md` section telling Claude to read `graphify-out/GRAPH_REPORT.md` before answering architecture questions, and installs a **PreToolUse hook** (`settings.json`) that fires before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"graphify: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ — so Claude navigates via the graph instead of grepping through every file.
|
|
143
159
|
|
|
144
|
-
**Codex, OpenCode, OpenClaw** write the same rules to `AGENTS.md` in your project root. These platforms don't support PreToolUse hooks, so AGENTS.md is the always-on mechanism.
|
|
160
|
+
**Codex, OpenCode, OpenClaw, Factory Droid** write the same rules to `AGENTS.md` in your project root. These platforms don't support PreToolUse hooks, so AGENTS.md is the always-on mechanism.
|
|
145
161
|
|
|
146
162
|
Uninstall with the matching uninstall command (e.g. `graphify claude uninstall`).
|
|
147
163
|
|
|
@@ -212,13 +228,14 @@ graphify claude uninstall
|
|
|
212
228
|
graphify codex install # AGENTS.md (Codex)
|
|
213
229
|
graphify opencode install # AGENTS.md (OpenCode)
|
|
214
230
|
graphify claw install # AGENTS.md (OpenClaw)
|
|
231
|
+
graphify droid install # AGENTS.md (Factory Droid)
|
|
215
232
|
```
|
|
216
233
|
|
|
217
234
|
Works with any mix of file types:
|
|
218
235
|
|
|
219
236
|
| Type | Extensions | Extraction |
|
|
220
237
|
|------|-----------|------------|
|
|
221
|
-
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
238
|
+
| Code | `.py .ts .js .go .rs .java .c .cpp .rb .cs .kt .scala .php .swift .lua .zig .ps1` | AST via tree-sitter + call-graph + docstring/comment rationale |
|
|
222
239
|
| Docs | `.md .txt .rst` | Concepts + relationships + design rationale via Claude |
|
|
223
240
|
| Papers | `.pdf` | Citation mining + concept extraction |
|
|
224
241
|
| Images | `.png .jpg .webp .gif` | Claude vision - screenshots, diagrams, any language |
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "graphifyy"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.2"
|
|
8
8
|
description = "AI coding assistant skill (Claude Code, Codex, OpenCode, OpenClaw) - turn any folder of code, docs, papers, or images into a queryable knowledge graph"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -28,6 +28,8 @@ dependencies = [
|
|
|
28
28
|
"tree-sitter-php",
|
|
29
29
|
"tree-sitter-swift",
|
|
30
30
|
"tree-sitter-lua",
|
|
31
|
+
"tree-sitter-zig",
|
|
32
|
+
"tree-sitter-powershell",
|
|
31
33
|
]
|
|
32
34
|
|
|
33
35
|
[project.urls]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from graphify.detect import classify_file, count_words, detect, FileType, _looks_like_paper
|
|
2
|
+
from graphify.detect import classify_file, count_words, detect, FileType, _looks_like_paper, _is_ignored, _load_graphifyignore
|
|
3
3
|
|
|
4
4
|
FIXTURES = Path(__file__).parent / "fixtures"
|
|
5
5
|
|
|
@@ -69,3 +69,37 @@ def test_classify_attention_paper():
|
|
|
69
69
|
if paper_path.exists():
|
|
70
70
|
result = classify_file(paper_path)
|
|
71
71
|
assert result == FileType.PAPER
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_graphifyignore_excludes_file(tmp_path):
|
|
75
|
+
"""Files matching .graphifyignore patterns are excluded from detect()."""
|
|
76
|
+
(tmp_path / ".graphifyignore").write_text("vendor/\n*.generated.py\n")
|
|
77
|
+
vendor = tmp_path / "vendor"
|
|
78
|
+
vendor.mkdir()
|
|
79
|
+
(vendor / "lib.py").write_text("x = 1")
|
|
80
|
+
(tmp_path / "main.py").write_text("print('hi')")
|
|
81
|
+
(tmp_path / "schema.generated.py").write_text("x = 1")
|
|
82
|
+
|
|
83
|
+
result = detect(tmp_path)
|
|
84
|
+
file_list = result["files"]["code"]
|
|
85
|
+
assert any("main.py" in f for f in file_list)
|
|
86
|
+
assert not any("vendor" in f for f in file_list)
|
|
87
|
+
assert not any("generated" in f for f in file_list)
|
|
88
|
+
assert result["graphifyignore_patterns"] == 2
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_graphifyignore_missing_is_fine(tmp_path):
|
|
92
|
+
"""No .graphifyignore is not an error."""
|
|
93
|
+
(tmp_path / "main.py").write_text("x = 1")
|
|
94
|
+
result = detect(tmp_path)
|
|
95
|
+
assert result["graphifyignore_patterns"] == 0
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_graphifyignore_comments_ignored(tmp_path):
|
|
99
|
+
"""Comment lines in .graphifyignore are not treated as patterns."""
|
|
100
|
+
(tmp_path / ".graphifyignore").write_text("# this is a comment\n\nmain.py\n")
|
|
101
|
+
(tmp_path / "main.py").write_text("x = 1")
|
|
102
|
+
(tmp_path / "other.py").write_text("x = 2")
|
|
103
|
+
result = detect(tmp_path)
|
|
104
|
+
assert not any("main.py" in f for f in result["files"]["code"])
|
|
105
|
+
assert any("other.py" in f for f in result["files"]["code"])
|
|
@@ -61,7 +61,7 @@ def test_collect_files_from_dir():
|
|
|
61
61
|
supported = {".py", ".js", ".ts", ".tsx", ".go", ".rs",
|
|
62
62
|
".java", ".c", ".cpp", ".cc", ".cxx", ".rb",
|
|
63
63
|
".cs", ".kt", ".kts", ".scala", ".php", ".h", ".hpp",
|
|
64
|
-
".swift"}
|
|
64
|
+
".swift", ".lua", ".toc", ".zig", ".ps1"}
|
|
65
65
|
assert all(f.suffix in supported for f in files)
|
|
66
66
|
assert len(files) > 0
|
|
67
67
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|