dummyindex 0.4.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. dummyindex-0.4.31/LICENSE +21 -0
  2. dummyindex-0.4.31/PKG-INFO +516 -0
  3. dummyindex-0.4.31/README.md +425 -0
  4. dummyindex-0.4.31/dummyindex/__init__.py +60 -0
  5. dummyindex-0.4.31/dummyindex/__main__.py +1416 -0
  6. dummyindex-0.4.31/dummyindex/analysis/__init__.py +1 -0
  7. dummyindex-0.4.31/dummyindex/analysis/analyze.py +540 -0
  8. dummyindex-0.4.31/dummyindex/analysis/benchmark.py +129 -0
  9. dummyindex-0.4.31/dummyindex/analysis/cluster.py +137 -0
  10. dummyindex-0.4.31/dummyindex/analysis/feature_naming.py +460 -0
  11. dummyindex-0.4.31/dummyindex/analysis/features.py +647 -0
  12. dummyindex-0.4.31/dummyindex/analysis/flow_naming.py +370 -0
  13. dummyindex-0.4.31/dummyindex/analysis/flows.py +571 -0
  14. dummyindex-0.4.31/dummyindex/analysis/report.py +234 -0
  15. dummyindex-0.4.31/dummyindex/analysis/wiki.py +227 -0
  16. dummyindex-0.4.31/dummyindex/pipeline/__init__.py +1 -0
  17. dummyindex-0.4.31/dummyindex/pipeline/build.py +113 -0
  18. dummyindex-0.4.31/dummyindex/pipeline/cache.py +175 -0
  19. dummyindex-0.4.31/dummyindex/pipeline/detect.py +510 -0
  20. dummyindex-0.4.31/dummyindex/pipeline/export.py +2787 -0
  21. dummyindex-0.4.31/dummyindex/pipeline/extract.py +3440 -0
  22. dummyindex-0.4.31/dummyindex/pipeline/structure.py +643 -0
  23. dummyindex-0.4.31/dummyindex/pipeline/validate.py +158 -0
  24. dummyindex-0.4.31/dummyindex/runtime/__init__.py +1 -0
  25. dummyindex-0.4.31/dummyindex/runtime/hooks.py +237 -0
  26. dummyindex-0.4.31/dummyindex/runtime/ingest.py +297 -0
  27. dummyindex-0.4.31/dummyindex/runtime/manifest.py +4 -0
  28. dummyindex-0.4.31/dummyindex/runtime/run_log.py +252 -0
  29. dummyindex-0.4.31/dummyindex/runtime/security.py +205 -0
  30. dummyindex-0.4.31/dummyindex/runtime/serve.py +373 -0
  31. dummyindex-0.4.31/dummyindex/runtime/transcribe.py +182 -0
  32. dummyindex-0.4.31/dummyindex/runtime/watch.py +429 -0
  33. dummyindex-0.4.31/dummyindex/skills/__init__.py +1 -0
  34. dummyindex-0.4.31/dummyindex/skills/skill-aider.md +1727 -0
  35. dummyindex-0.4.31/dummyindex/skills/skill-claw.md +1727 -0
  36. dummyindex-0.4.31/dummyindex/skills/skill-codex.md +1785 -0
  37. dummyindex-0.4.31/dummyindex/skills/skill-copilot.md +1811 -0
  38. dummyindex-0.4.31/dummyindex/skills/skill-droid.md +1782 -0
  39. dummyindex-0.4.31/dummyindex/skills/skill-kiro.md +1726 -0
  40. dummyindex-0.4.31/dummyindex/skills/skill-opencode.md +1835 -0
  41. dummyindex-0.4.31/dummyindex/skills/skill-trae.md +1752 -0
  42. dummyindex-0.4.31/dummyindex/skills/skill-vscode.md +579 -0
  43. dummyindex-0.4.31/dummyindex/skills/skill-windows.md +1570 -0
  44. dummyindex-0.4.31/dummyindex/skills/skill.md +1897 -0
  45. dummyindex-0.4.31/dummyindex.egg-info/PKG-INFO +516 -0
  46. dummyindex-0.4.31/dummyindex.egg-info/SOURCES.txt +82 -0
  47. dummyindex-0.4.31/dummyindex.egg-info/dependency_links.txt +1 -0
  48. dummyindex-0.4.31/dummyindex.egg-info/entry_points.txt +2 -0
  49. dummyindex-0.4.31/dummyindex.egg-info/requires.txt +67 -0
  50. dummyindex-0.4.31/dummyindex.egg-info/top_level.txt +2 -0
  51. dummyindex-0.4.31/pyproject.toml +70 -0
  52. dummyindex-0.4.31/setup.cfg +4 -0
  53. dummyindex-0.4.31/tests/test_analyze.py +232 -0
  54. dummyindex-0.4.31/tests/test_benchmark.py +119 -0
  55. dummyindex-0.4.31/tests/test_build.py +62 -0
  56. dummyindex-0.4.31/tests/test_cache.py +184 -0
  57. dummyindex-0.4.31/tests/test_claude_md.py +136 -0
  58. dummyindex-0.4.31/tests/test_cluster.py +76 -0
  59. dummyindex-0.4.31/tests/test_confidence.py +192 -0
  60. dummyindex-0.4.31/tests/test_detect.py +238 -0
  61. dummyindex-0.4.31/tests/test_export.py +153 -0
  62. dummyindex-0.4.31/tests/test_extract.py +170 -0
  63. dummyindex-0.4.31/tests/test_feature_naming.py +212 -0
  64. dummyindex-0.4.31/tests/test_features.py +227 -0
  65. dummyindex-0.4.31/tests/test_flow_naming.py +184 -0
  66. dummyindex-0.4.31/tests/test_flows.py +244 -0
  67. dummyindex-0.4.31/tests/test_hooks.py +125 -0
  68. dummyindex-0.4.31/tests/test_hypergraph.py +205 -0
  69. dummyindex-0.4.31/tests/test_ingest.py +68 -0
  70. dummyindex-0.4.31/tests/test_install.py +321 -0
  71. dummyindex-0.4.31/tests/test_languages.py +562 -0
  72. dummyindex-0.4.31/tests/test_multilang.py +173 -0
  73. dummyindex-0.4.31/tests/test_pipeline.py +177 -0
  74. dummyindex-0.4.31/tests/test_rationale.py +89 -0
  75. dummyindex-0.4.31/tests/test_report.py +63 -0
  76. dummyindex-0.4.31/tests/test_run_log.py +155 -0
  77. dummyindex-0.4.31/tests/test_security.py +189 -0
  78. dummyindex-0.4.31/tests/test_semantic_similarity.py +194 -0
  79. dummyindex-0.4.31/tests/test_serve.py +153 -0
  80. dummyindex-0.4.31/tests/test_structure.py +108 -0
  81. dummyindex-0.4.31/tests/test_transcribe.py +147 -0
  82. dummyindex-0.4.31/tests/test_validate.py +87 -0
  83. dummyindex-0.4.31/tests/test_watch.py +96 -0
  84. dummyindex-0.4.31/tests/test_wiki.py +139 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ahmed Mulla
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,516 @@
1
+ Metadata-Version: 2.4
2
+ Name: dummyindex
3
+ Version: 0.4.31
4
+ Summary: AI coding assistant skill (Claude Code, Codex, OpenCode, Cursor, Gemini CLI, Aider, OpenClaw, Factory Droid, Trae, Hermes, Kiro, Google Antigravity) - turn any folder of code, docs, papers, images, or videos into a queryable knowledge graph
5
+ Author: Ahmed Mulla
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Ahmed Mulla
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/MullaAhmed/dummyindex
29
+ Project-URL: Repository, https://github.com/MullaAhmed/dummyindex
30
+ Project-URL: Issues, https://github.com/MullaAhmed/dummyindex/issues
31
+ Project-URL: LinkedIn, https://www.linkedin.com/in/ahmed-mulla/
32
+ Keywords: claude,claude-code,codex,opencode,cursor,gemini,aider,kiro,knowledge-graph,rag,graphrag,obsidian,community-detection,tree-sitter,leiden,llm
33
+ Requires-Python: <3.14,>=3.10
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: networkx
37
+ Requires-Dist: tree-sitter>=0.23.0
38
+ Requires-Dist: tree-sitter-python
39
+ Requires-Dist: tree-sitter-javascript
40
+ Requires-Dist: tree-sitter-typescript
41
+ Requires-Dist: tree-sitter-go
42
+ Requires-Dist: tree-sitter-rust
43
+ Requires-Dist: tree-sitter-java
44
+ Requires-Dist: tree-sitter-c
45
+ Requires-Dist: tree-sitter-cpp
46
+ Requires-Dist: tree-sitter-ruby
47
+ Requires-Dist: tree-sitter-c-sharp
48
+ Requires-Dist: tree-sitter-kotlin
49
+ Requires-Dist: tree-sitter-scala
50
+ Requires-Dist: tree-sitter-php
51
+ Requires-Dist: tree-sitter-swift
52
+ Requires-Dist: tree-sitter-lua
53
+ Requires-Dist: tree-sitter-zig
54
+ Requires-Dist: tree-sitter-powershell
55
+ Requires-Dist: tree-sitter-elixir
56
+ Requires-Dist: tree-sitter-objc
57
+ Requires-Dist: tree-sitter-julia
58
+ Requires-Dist: tree-sitter-verilog
59
+ Provides-Extra: mcp
60
+ Requires-Dist: mcp; extra == "mcp"
61
+ Provides-Extra: neo4j
62
+ Requires-Dist: neo4j; extra == "neo4j"
63
+ Provides-Extra: pdf
64
+ Requires-Dist: pypdf; extra == "pdf"
65
+ Requires-Dist: html2text; extra == "pdf"
66
+ Provides-Extra: watch
67
+ Requires-Dist: watchdog; extra == "watch"
68
+ Provides-Extra: svg
69
+ Requires-Dist: matplotlib; extra == "svg"
70
+ Provides-Extra: leiden
71
+ Requires-Dist: graspologic; python_version < "3.13" and extra == "leiden"
72
+ Provides-Extra: office
73
+ Requires-Dist: python-docx; extra == "office"
74
+ Requires-Dist: openpyxl; extra == "office"
75
+ Provides-Extra: video
76
+ Requires-Dist: faster-whisper; extra == "video"
77
+ Requires-Dist: yt-dlp; extra == "video"
78
+ Provides-Extra: all
79
+ Requires-Dist: mcp; extra == "all"
80
+ Requires-Dist: neo4j; extra == "all"
81
+ Requires-Dist: pypdf; extra == "all"
82
+ Requires-Dist: html2text; extra == "all"
83
+ Requires-Dist: watchdog; extra == "all"
84
+ Requires-Dist: graspologic; python_version < "3.13" and extra == "all"
85
+ Requires-Dist: python-docx; extra == "all"
86
+ Requires-Dist: openpyxl; extra == "all"
87
+ Requires-Dist: faster-whisper; extra == "all"
88
+ Requires-Dist: yt-dlp; extra == "all"
89
+ Requires-Dist: matplotlib; extra == "all"
90
+ Dynamic: license-file
91
+
92
+ <p align="center">
93
+ <a href="https://github.com/MullaAhmed/dummyindex"><img src="docs/logo-text.svg" width="260" alt="dummyIndex"/></a>
94
+ </p>
95
+
96
+ <p align="center">
97
+ <a href="https://github.com/MullaAhmed"><img src="https://img.shields.io/badge/GitHub-Ahmed%20Mulla-181717?logo=github" alt="GitHub: Ahmed Mulla"/></a>
98
+ <a href="https://www.linkedin.com/in/ahmed-mulla/"><img src="https://img.shields.io/badge/LinkedIn-Ahmed%20Mulla-0077B5?logo=linkedin" alt="LinkedIn: Ahmed Mulla"/></a>
99
+ </p>
100
+
101
+ **An AI coding assistant skill.** Type `/dummyindex` in Claude Code, Codex, OpenCode, Cursor, Gemini CLI, GitHub Copilot CLI, VS Code Copilot Chat, Aider, OpenClaw, Factory Droid, Trae, Hermes, Kiro, or Google Antigravity - it reads your files, builds a knowledge graph, and gives you back structure you didn't know was there. Understand a codebase faster. Find the "why" behind architectural decisions.
102
+
103
+ Fully multimodal. Drop in code, PDFs, markdown, screenshots, diagrams, whiteboard photos, images in other languages, or video and audio files - dummyindex extracts concepts and relationships from all of it and connects them into one graph. Videos are transcribed with Whisper using a domain-aware prompt derived from your corpus. 25 languages supported via tree-sitter AST (Python, JS, TS, Go, Rust, Java, C, C++, Ruby, C#, Kotlin, Scala, PHP, Swift, Lua, Zig, PowerShell, Elixir, Objective-C, Julia, Verilog, SystemVerilog, Vue, Svelte, Dart).
104
+
105
+ > Andrej Karpathy keeps a `/raw` folder where he drops papers, tweets, screenshots, and notes. dummyindex is the answer to that problem - 71.5x fewer tokens per query vs reading the raw files, persistent across sessions, honest about what it found vs guessed.
106
+
107
+ ```
108
+ /dummyindex . # works on any folder - your codebase, notes, papers, anything
109
+ ```
110
+
111
+ ```
112
+ dummyindex-out/
113
+ ├── graph.html interactive graph - open in any browser, click nodes, search, filter by community
114
+ ├── GRAPH_REPORT.md god nodes, surprising connections, suggested questions
115
+ ├── graph.json persistent graph - query weeks later without re-reading
116
+ ├── structure_graph.html top-down code structure viewer
117
+ ├── structure_graph.json folder → file → class/function tree with cross-edges
118
+ └── cache/ SHA256 cache - re-runs only process changed files
119
+ ```
120
+
121
+ Add a `.dummyindexignore` file to exclude folders you don't want in the graph:
122
+
123
+ ```
124
+ # .dummyindexignore
125
+ vendor/
126
+ node_modules/
127
+ dist/
128
+ *.generated.py
129
+ ```
130
+
131
+ Same syntax as `.gitignore`. You can keep a single `.dummyindexignore` at your repo root — patterns work correctly even when dummyindex is run on a subfolder.
132
+
133
+ ## How it works
134
+
135
+ dummyindex runs in three passes. First, a deterministic AST pass extracts structure from code files (classes, functions, imports, call graphs, docstrings, rationale comments) with no LLM needed. Second, video and audio files are transcribed locally with faster-whisper using a domain-aware prompt derived from corpus god nodes — transcripts are cached so re-runs are instant. Third, Claude subagents run in parallel over docs, papers, images, and transcripts to extract concepts, relationships, and design rationale. The results are merged into a NetworkX graph, clustered with Leiden community detection, and exported as interactive HTML, queryable JSON, and a plain-language audit report.
136
+
137
+ **Clustering is graph-topology-based — no embeddings.** Leiden finds communities by edge density. The semantic similarity edges that Claude extracts (`semantically_similar_to`, marked INFERRED) are already in the graph, so they influence community detection directly. The graph structure is the similarity signal — no separate embedding step or vector database needed.
138
+
139
+ Every relationship is tagged `EXTRACTED` (found directly in source), `INFERRED` (reasonable inference, with a confidence score), or `AMBIGUOUS` (flagged for review). You always know what was found vs guessed.
140
+
141
+ ## Install
142
+
143
+ **Requires:** Python 3.10+ and one of: [Claude Code](https://claude.ai/code), [Codex](https://openai.com/codex), [OpenCode](https://opencode.ai), [Cursor](https://cursor.com), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [GitHub Copilot CLI](https://docs.github.com/en/copilot/how-tos/copilot-cli), [VS Code Copilot Chat](https://code.visualstudio.com/docs/copilot/overview), [Aider](https://aider.chat), [OpenClaw](https://openclaw.ai), [Factory Droid](https://factory.ai), [Trae](https://trae.ai), [Kiro](https://kiro.dev), Hermes, or [Google Antigravity](https://antigravity.google)
144
+
145
+ ```bash
146
+ # Recommended — works on Mac and Linux with no PATH setup needed
147
+ uv tool install dummyindex && dummyindex install
148
+ # or with pipx
149
+ pipx install dummyindex && dummyindex install
150
+ # or plain pip
151
+ pip install dummyindex && dummyindex install
152
+ ```
153
+
154
+ > **Official package:** The PyPI package is named `dummyindex` (install with `pip install dummyindex`). Other packages named `dummyindex*` on PyPI are not affiliated with this project. The only official repository is [MullaAhmed/dummyindex](https://github.com/MullaAhmed/dummyindex). The CLI and skill command are still `dummyindex`.
155
+
156
+ > **`dummyindex: command not found`?** Use `uv tool install dummyindex` (recommended) or `pipx install dummyindex` — both put the CLI in a managed location that's automatically on PATH. With plain `pip`, you may need to add `~/.local/bin` (Linux) or `~/Library/Python/3.x/bin` (Mac) to your PATH, or run `python -m dummyindex` instead. On Windows, pip scripts land in `%APPDATA%\Python\PythonXY\Scripts`.
157
+
158
+ ### Platform support
159
+
160
+ | Platform | Install command |
161
+ |----------|----------------|
162
+ | Claude Code (Linux/Mac) | `dummyindex install` |
163
+ | Claude Code (Windows) | `dummyindex install` (auto-detected) or `dummyindex install --platform windows` |
164
+ | Codex | `dummyindex install --platform codex` |
165
+ | OpenCode | `dummyindex install --platform opencode` |
166
+ | GitHub Copilot CLI | `dummyindex install --platform copilot` |
167
+ | VS Code Copilot Chat | `dummyindex vscode install` |
168
+ | Aider | `dummyindex install --platform aider` |
169
+ | OpenClaw | `dummyindex install --platform claw` |
170
+ | Factory Droid | `dummyindex install --platform droid` |
171
+ | Trae | `dummyindex install --platform trae` |
172
+ | Trae CN | `dummyindex install --platform trae-cn` |
173
+ | Gemini CLI | `dummyindex install --platform gemini` |
174
+ | Hermes | `dummyindex install --platform hermes` |
175
+ | Kiro IDE/CLI | `dummyindex kiro install` |
176
+ | Cursor | `dummyindex cursor install` |
177
+ | Google Antigravity | `dummyindex antigravity install` |
178
+
179
+ Codex users also need `multi_agent = true` under `[features]` in `~/.codex/config.toml` for parallel extraction. Factory Droid uses the `Task` tool for parallel subagent dispatch. OpenClaw and Aider use sequential extraction (parallel agent support is still early on those platforms). Trae uses the Agent tool for parallel subagent dispatch and does **not** support PreToolUse hooks — AGENTS.md is the always-on mechanism. Codex supports PreToolUse hooks — `dummyindex codex install` installs one in `.codex/hooks.json` in addition to writing AGENTS.md.
180
+
181
+ Then open your AI coding assistant and type:
182
+
183
+ ```
184
+ /dummyindex .
185
+ ```
186
+
187
+ Note: Codex uses `$` instead of `/` for skill calling, so type `$dummyindex .` instead.
188
+
189
+ ### Make your assistant always use the graph (recommended)
190
+
191
+ After building a graph, run this once in your project:
192
+
193
+ | Platform | Command |
194
+ |----------|---------|
195
+ | Claude Code | `dummyindex claude install` |
196
+ | Codex | `dummyindex codex install` |
197
+ | OpenCode | `dummyindex opencode install` |
198
+ | GitHub Copilot CLI | `dummyindex copilot install` |
199
+ | VS Code Copilot Chat | `dummyindex vscode install` |
200
+ | Aider | `dummyindex aider install` |
201
+ | OpenClaw | `dummyindex claw install` |
202
+ | Factory Droid | `dummyindex droid install` |
203
+ | Trae | `dummyindex trae install` |
204
+ | Trae CN | `dummyindex trae-cn install` |
205
+ | Cursor | `dummyindex cursor install` |
206
+ | Gemini CLI | `dummyindex gemini install` |
207
+ | Hermes | `dummyindex hermes install` |
208
+ | Kiro IDE/CLI | `dummyindex kiro install` |
209
+ | Google Antigravity | `dummyindex antigravity install` |
210
+
211
+ **Claude Code** does two things: writes a `CLAUDE.md` section telling Claude to read `dummyindex-out/GRAPH_REPORT.md` before answering architecture questions, and installs a **PreToolUse hook** (`settings.json`) that fires before every Glob and Grep call. If a knowledge graph exists, Claude sees: _"dummyindex: Knowledge graph exists. Read GRAPH_REPORT.md for god nodes and community structure before searching raw files."_ — so Claude navigates via the graph instead of grepping through every file.
212
+
213
+ **Codex** writes to `AGENTS.md` and also installs a **PreToolUse hook** in `.codex/hooks.json` that fires before every Bash tool call — same always-on mechanism as Claude Code.
214
+
215
+ **OpenCode** writes to `AGENTS.md` and also installs a **`tool.execute.before` plugin** (`.opencode/plugins/dummyindex.js` + `opencode.json` registration) that fires before bash tool calls and injects the graph reminder into tool output when the graph exists.
216
+
217
+ **Cursor** writes `.cursor/rules/dummyindex.mdc` with `alwaysApply: true` — Cursor includes it in every conversation automatically, no hook needed.
218
+
219
+ **Gemini CLI** copies the skill to `~/.gemini/skills/dummyindex/SKILL.md`, writes a `GEMINI.md` section, and installs a `BeforeTool` hook in `.gemini/settings.json` that fires before file-read tool calls — same always-on mechanism as Claude Code.
220
+
221
+ **Aider, OpenClaw, Factory Droid, Trae, and Hermes** write the same rules to `AGENTS.md` in your project root and copy the skill to the platform's global skill directory. These platforms don't support tool hooks, so AGENTS.md is the always-on mechanism.
222
+
223
+ **Kiro IDE/CLI** writes the skill to `.kiro/skills/dummyindex/SKILL.md` (invoked via `/dummyindex`) and a steering file to `.kiro/steering/dummyindex.md` with `inclusion: always` — Kiro injects this into every conversation automatically, no hook needed.
224
+
225
+ **Google Antigravity** writes `.agents/rules/dummyindex.md` (always-on rules) and `.agents/workflows/dummyindex.md` (registers `/dummyindex` as a slash command). No hook equivalent exists in Antigravity — rules are the always-on mechanism.
226
+
227
+ **GitHub Copilot CLI** copies the skill to `~/.copilot/skills/dummyindex/SKILL.md`. Run `dummyindex copilot install` to set it up.
228
+
229
+ **VS Code Copilot Chat** installs a Python-only skill (works on Windows PowerShell and macOS/Linux alike) and writes `.github/copilot-instructions.md` in your project root — VS Code reads this automatically every session, making graph context always-on without any hook mechanism. Run `dummyindex vscode install`. Note: this configures the chat panel in VS Code, not the Copilot CLI terminal tool.
230
+
231
+ Uninstall with the matching uninstall command (e.g. `dummyindex claude uninstall`).
232
+
233
+ **Always-on vs explicit trigger — what's the difference?**
234
+
235
+ The always-on hook surfaces `GRAPH_REPORT.md` — a one-page summary of god nodes, communities, and surprising connections. Your assistant reads this before searching files, so it navigates by structure instead of keyword matching. That covers most everyday questions.
236
+
237
+ `/dummyindex query`, `/dummyindex path`, and `/dummyindex explain` go deeper: they traverse the raw `graph.json` hop by hop, trace exact paths between nodes, and surface edge-level detail (relation type, confidence score, source location). Use them when you want a specific question answered from the graph rather than a general orientation.
238
+
239
+ Think of it this way: the always-on hook gives your assistant a map. The `/dummyindex` commands let it navigate the map precisely.
240
+
241
+ ### Team workflows
242
+
243
+ `dummyindex-out/` is designed to be committed to git so every teammate starts with a fresh map.
244
+
245
+ **Recommended `.gitignore` additions:**
246
+ ```
247
+ # keep graph outputs, skip heavy/local-only files
248
+ dummyindex-out/cache/ # optional: commit for shared extraction speed, skip to keep repo small
249
+ dummyindex-out/manifest.json # mtime-based, invalid after git clone — always gitignore this
250
+ dummyindex-out/cost.json # local token tracking, not useful to share
251
+ ```
252
+
253
+ **Shared setup:**
254
+ 1. One person runs `/dummyindex .` to build the initial graph and commits `dummyindex-out/`.
255
+ 2. Everyone else pulls — their assistant reads `GRAPH_REPORT.md` immediately with no extra steps.
256
+ 3. Install the post-commit hook (`dummyindex hook install`) so the graph rebuilds automatically after code changes — no LLM calls needed for code-only updates.
257
+ 4. For doc/paper changes, whoever edits the files runs `/dummyindex --update` to refresh semantic nodes.
258
+
259
+ **Excluding paths** — create `.dummyindexignore` in your project root (same syntax as `.gitignore`). Files matching those patterns are skipped during detection and extraction.
260
+
261
+ ```
262
+ # .dummyindexignore example
263
+ AGENTS.md # dummyindex install files — don't extract your own instructions as knowledge
264
+ CLAUDE.md
265
+ GEMINI.md
266
+ .gemini/
267
+ .opencode/
268
+ docs/translations/ # generated content you don't want in the graph
269
+ ```
270
+
271
+ ## Using `graph.json` with an LLM
272
+
273
+ `graph.json` is not meant to be pasted into a prompt all at once. The useful
274
+ workflow is:
275
+
276
+ 1. Start with `dummyindex-out/GRAPH_REPORT.md` for the high-level overview.
277
+ 2. Use `dummyindex query` to pull a smaller subgraph for the specific question
278
+ you want to answer.
279
+ 3. Give that focused output to your assistant instead of dumping the full raw
280
+ corpus.
281
+
282
+ For example, after running dummyindex on a project:
283
+
284
+ ```bash
285
+ dummyindex query "show the auth flow" --graph dummyindex-out/graph.json
286
+ dummyindex query "what connects DigestAuth to Response?" --graph dummyindex-out/graph.json
287
+ ```
288
+
289
+ The output includes node labels, edge types, confidence tags, source files, and
290
+ source locations. That makes it a good intermediate context block for an LLM:
291
+
292
+ ```text
293
+ Use this graph query output to answer the question. Prefer the graph structure
294
+ over guessing, and cite the source files when possible.
295
+ ```
296
+
297
+ If your assistant supports tool calling or MCP, use the graph directly instead
298
+ of pasting text. dummyindex can expose `graph.json` as an MCP server:
299
+
300
+ ```bash
301
+ python -m dummyindex.runtime.serve dummyindex-out/graph.json
302
+ ```
303
+
304
+ That gives the assistant structured graph access for repeated queries such as
305
+ `query_graph`, `get_node`, `get_neighbors`, and `shortest_path`.
306
+
307
+ > **WSL / Linux note:** Ubuntu ships `python3`, not `python`. Install into a project venv to avoid PEP 668 conflicts, and use the full venv path in your `.mcp.json`:
308
+ > ```bash
309
+ > python3 -m venv .venv && .venv/bin/pip install "dummyindex[mcp]"
310
+ > ```
311
+ > ```json
312
+ > { "mcpServers": { "dummyindex": { "type": "stdio", "command": ".venv/bin/python3", "args": ["-m", "dummyindex.runtime.serve", "dummyindex-out/graph.json"] } } }
313
+ > ```
314
+ > Also note: the PyPI package is `dummyindex` (double-y) — `pip install dummyindex` installs an unrelated package.
315
+
316
+ ## Usage
317
+
318
+ ### AI assistant command
319
+
320
+ Use these from Claude Code, Codex, OpenCode, Cursor, Gemini CLI, Copilot Chat, or another supported assistant after installing the skill:
321
+
322
+ ```
323
+ /dummyindex # run on current directory
324
+ /dummyindex ./raw # run on a specific folder
325
+ /dummyindex ./raw --mode deep # more aggressive INFERRED edge extraction
326
+ /dummyindex ./raw --update # re-extract only changed files, merge into existing graph
327
+ /dummyindex ./raw --directed # build directed graph (preserves edge direction: source→target)
328
+ /dummyindex ./raw --cluster-only # rerun clustering on existing graph, no re-extraction
329
+ /dummyindex ./raw --no-viz # skip HTML, just produce report + JSON
330
+ /dummyindex ./raw --wiki # build agent-crawlable wiki
331
+ /dummyindex ./raw --obsidian # also generate Obsidian vault (opt-in)
332
+ /dummyindex ./raw --obsidian --obsidian-dir ~/vaults/myproject # write vault to a specific directory
333
+ /dummyindex ./raw --whisper-model medium # use a larger local Whisper model
334
+
335
+ /dummyindex add https://arxiv.org/abs/1706.03762 # fetch a paper, save, update graph through the assistant
336
+ /dummyindex add https://x.com/karpathy/status/... # fetch a tweet
337
+ /dummyindex add <video-url> # download audio, transcribe, add to graph
338
+ /dummyindex add https://... --author "Name" # tag the original author
339
+ /dummyindex add https://... --contributor "Name" # tag who added it to the corpus
340
+
341
+ /dummyindex query "what connects attention to the optimizer?"
342
+ /dummyindex query "what connects attention to the optimizer?" --dfs # trace a specific path
343
+ /dummyindex query "what connects attention to the optimizer?" --budget 1500 # cap at N tokens
344
+ /dummyindex path "DigestAuth" "Response"
345
+ /dummyindex explain "SwinTransformer"
346
+
347
+ /dummyindex ./raw --watch # auto-sync graph as files change (code: instant, docs: notifies you)
348
+ /dummyindex ./raw --wiki # build agent-crawlable wiki (index.md + article per community)
349
+ /dummyindex ./raw --svg # export graph.svg
350
+ /dummyindex ./raw --graphml # export graph.graphml (Gephi, yEd)
351
+ /dummyindex ./raw --neo4j # generate cypher.txt for Neo4j
352
+ /dummyindex ./raw --neo4j-push bolt://localhost:7687 # push directly to a running Neo4j instance
353
+ /dummyindex ./raw --mcp # start MCP stdio server
354
+ ```
355
+
356
+ ### Terminal CLI
357
+
358
+ Use these directly from a shell after installing the Python package:
359
+
360
+ ```bash
361
+ # git hooks - platform-agnostic, rebuild graph on commit and branch switch
362
+ dummyindex hook install
363
+ dummyindex hook uninstall
364
+ dummyindex hook status
365
+
366
+ # always-on assistant instructions - platform-specific
367
+ dummyindex claude install # CLAUDE.md + PreToolUse hook (Claude Code)
368
+ dummyindex claude uninstall
369
+ dummyindex codex install # AGENTS.md + PreToolUse hook in .codex/hooks.json (Codex)
370
+ dummyindex opencode install # AGENTS.md + tool.execute.before plugin (OpenCode)
371
+ dummyindex cursor install # .cursor/rules/dummyindex.mdc (Cursor)
372
+ dummyindex cursor uninstall
373
+ dummyindex gemini install # GEMINI.md + BeforeTool hook (Gemini CLI)
374
+ dummyindex gemini uninstall
375
+ dummyindex copilot install # skill file (GitHub Copilot CLI)
376
+ dummyindex copilot uninstall
377
+ dummyindex aider install # AGENTS.md (Aider)
378
+ dummyindex aider uninstall
379
+ dummyindex claw install # AGENTS.md (OpenClaw)
380
+ dummyindex droid install # AGENTS.md (Factory Droid)
381
+ dummyindex trae install # AGENTS.md (Trae)
382
+ dummyindex trae uninstall
383
+ dummyindex trae-cn install # AGENTS.md (Trae CN)
384
+ dummyindex trae-cn uninstall
385
+ dummyindex hermes install # AGENTS.md + ~/.hermes/skills/ (Hermes)
386
+ dummyindex hermes uninstall
387
+ dummyindex kiro install # .kiro/skills/ + .kiro/steering/dummyindex.md (Kiro IDE/CLI)
388
+ dummyindex kiro uninstall
389
+ dummyindex antigravity install # .agents/rules + .agents/workflows (Google Antigravity)
390
+ dummyindex antigravity uninstall
391
+
392
+ # query and navigate the graph directly from the terminal (no AI assistant needed)
393
+ dummyindex query "what connects attention to the optimizer?"
394
+ dummyindex query "show the auth flow" --dfs
395
+ dummyindex query "what is CfgNode?" --budget 500
396
+ dummyindex query "..." --graph path/to/graph.json
397
+ dummyindex path "DigestAuth" "Response" # shortest path between two nodes
398
+ dummyindex explain "SwinTransformer" # plain-language explanation of a node
399
+
400
+ # add content from the terminal
401
+ dummyindex add https://arxiv.org/abs/1706.03762 # fetch paper, save to ./raw
402
+ dummyindex add https://... --author "Name" --contributor "Name"
403
+ dummyindex update . # merge code changes into an existing graph
404
+
405
+ # save useful Q&A back into dummyindex-out/memory/
406
+ dummyindex save-result --question "..." --answer "..." --nodes NodeA NodeB
407
+
408
+ # incremental update and maintenance
409
+ dummyindex watch ./src # auto-rebuild on code changes
410
+ dummyindex check-update ./src # check if semantic re-extraction is pending (cron-safe)
411
+ dummyindex update ./src # re-extract code files, no LLM needed
412
+ dummyindex cluster-only ./my-project # rerun clustering on existing graph.json
413
+ ```
414
+
415
+ Works with any mix of file types:
416
+
417
+ | Type | Extensions | Extraction |
418
+ |------|-----------|------------|
419
+ | Code | `.py .js .ts .jsx .tsx .mjs .ejs .go .rs .java .c .h .cpp .hpp .cc .cxx .rb .cs .kt .kts .scala .php .blade.php .swift .lua .toc .zig .ps1 .ex .exs .m .mm .jl .vue .svelte .dart .v .sv` | AST via tree-sitter + call-graph (cross-file for all languages) + Java extends/implements + docstring/comment rationale |
420
+ | Docs | `.md .mdx .html .txt .rst` | Concepts + relationships + design rationale via Claude |
421
+ | Office | `.docx .xlsx` | Converted to markdown then extracted via Claude (requires `pip install dummyindex[office]`) |
422
+ | Papers | `.pdf` | Citation mining + concept extraction |
423
+ | Images | `.png .jpg .jpeg .webp .gif .svg` | Claude vision - screenshots, diagrams, any language |
424
+ | Video / Audio | `.mp4 .mov .mkv .webm .avi .m4v .mp3 .wav .m4a .ogg` | Transcribed locally with faster-whisper, transcript fed into Claude extraction (requires `pip install dummyindex[video]`) |
425
+ | YouTube / URLs | any video URL | Audio downloaded via yt-dlp, then same Whisper pipeline (requires `pip install dummyindex[video]`) |
426
+
427
+ ## Video and audio corpus
428
+
429
+ Drop video or audio files into your corpus folder alongside your code and docs — dummyindex picks them up automatically:
430
+
431
+ ```bash
432
+ pip install 'dummyindex[video]' # one-time setup
433
+ /dummyindex ./my-corpus # transcribes any video/audio files it finds
434
+ ```
435
+
436
+ Add a YouTube video (or any public video URL) directly:
437
+
438
+ ```bash
439
+ /dummyindex add <video-url>
440
+ ```
441
+
442
+ yt-dlp downloads audio-only (fast, small), Whisper transcribes it locally, and the transcript is fed into the same extraction pipeline as your other docs. Transcripts are cached in `dummyindex-out/transcripts/` so re-runs skip already-transcribed files.
443
+
444
+ For better accuracy on technical content, use a larger model:
445
+
446
+ ```bash
447
+ /dummyindex ./my-corpus --whisper-model medium
448
+ ```
449
+
450
+ Audio never leaves your machine. All transcription runs locally.
451
+
452
+ ## What you get
453
+
454
+ **God nodes** - highest-degree concepts (what everything connects through)
455
+
456
+ **Surprising connections** - ranked by composite score. Code-paper edges rank higher than code-code. Each result includes a plain-English why.
457
+
458
+ **Suggested questions** - 4-5 questions the graph is uniquely positioned to answer
459
+
460
+ **The "why"** - docstrings, inline comments (`# NOTE:`, `# IMPORTANT:`, `# HACK:`, `# WHY:`), and design rationale from docs are extracted as `rationale_for` nodes. Not just what the code does - why it was written that way.
461
+
462
+ **Confidence scores** - every INFERRED edge has a `confidence_score` (0.0-1.0). You know not just what was guessed but how confident the model was. EXTRACTED edges are always 1.0.
463
+
464
+ **Semantic similarity edges** - cross-file conceptual links with no structural connection. Two functions solving the same problem without calling each other, a class in code and a concept in a paper describing the same algorithm.
465
+
466
+ **Hyperedges** - group relationships connecting 3+ nodes that pairwise edges can't express. All classes implementing a shared protocol, all functions in an auth flow, all concepts from a paper section forming one idea.
467
+
468
+ **Token benchmark** - printed automatically after every run. On a mixed corpus (Karpathy repos + papers + images): **71.5x** fewer tokens per query vs reading raw files. The first run extracts and builds the graph (this costs tokens). Every subsequent query reads the compact graph instead of raw files — that's where the savings compound. The SHA256 cache means re-runs only re-process changed files.
469
+
470
+ **Auto-sync** (`--watch`) - run in a background terminal and the graph updates itself as your codebase changes. Code file saves trigger an instant rebuild (AST only, no LLM). Doc/image changes notify you to run `--update` for the LLM re-pass.
471
+
472
+ **Git hooks** (`dummyindex hook install`) - installs post-commit and post-checkout hooks. Graph rebuilds automatically after every commit and every branch switch. If a rebuild fails, the hook exits with a non-zero code so git surfaces the error instead of silently continuing. No background process needed.
473
+
474
+ **Wiki** (`--wiki`) - Wikipedia-style markdown articles per community and god node, with an `index.md` entry point. Point any agent at `index.md` and it can navigate the knowledge base by reading files instead of parsing JSON.
475
+
476
+ **Structure graph** - `structure_graph.html` and `structure_graph.json` are generated for code corpora. They show the deterministic folder → file → class/function hierarchy plus cross-file relationships, which is usually the fastest entry point for codebase navigation.
477
+
478
+ ## Worked examples
479
+
480
+ | Corpus | Files | Reduction | Output |
481
+ |--------|-------|-----------|--------|
482
+ | Karpathy repos + 5 papers + 4 images | 52 | **71.5x** | [`worked/karpathy-repos/`](worked/karpathy-repos/) |
483
+ | dummyindex source + Transformer paper | 4 | **5.4x** | [`worked/mixed-corpus/`](worked/mixed-corpus/) |
484
+ | httpx (synthetic Python library) | 6 | ~1x | [`worked/httpx/`](worked/httpx/) |
485
+
486
+ Token reduction scales with corpus size. 6 files fits in a context window anyway, so graph value there is structural clarity, not compression. At 52 files (code + papers + images) you get 71x+. Each `worked/` folder has the raw input files and the actual output (`GRAPH_REPORT.md`, `graph.json`) so you can run it yourself and verify the numbers.
487
+
488
+ ## Privacy
489
+
490
+ dummyindex sends file contents to your AI coding assistant's underlying model API for semantic extraction of docs, papers, and images — Anthropic (Claude Code), OpenAI (Codex), or whichever provider your platform uses. Code files are processed locally via tree-sitter AST — no file contents leave your machine for code. Video and audio files are transcribed locally with faster-whisper — audio never leaves your machine. No telemetry, usage tracking, or analytics of any kind. The only network calls are to your platform's model API during extraction, using your own API key.
491
+
492
+ ## Tech stack
493
+
494
+ NetworkX + Leiden (graspologic) + tree-sitter + vis.js. Semantic extraction via Claude (Claude Code), GPT-4 (Codex), or whichever model your platform runs. Video transcription via faster-whisper + yt-dlp (optional, `pip install dummyindex[video]`). No Neo4j required, no server, runs entirely locally.
495
+
496
+ ## Package layout
497
+
498
+ The `dummyindex/` package is organized by responsibility:
499
+
500
+ - `dummyindex/pipeline/` — detection, extraction, validation, build, and export
501
+ - `dummyindex/analysis/` — clustering, reporting, benchmarking, and wiki generation
502
+ - `dummyindex/runtime/` — watch mode, MCP serving, security, hooks, ingest, and transcription
503
+ - `dummyindex/skills/` — packaged platform skill markdown installed by `dummyindex install`
504
+
505
+ Public imports such as `dummyindex.pipeline.build`, `dummyindex.pipeline.detect`, and `dummyindex.runtime.watch` remain valid for backward compatibility.
506
+
507
+ <details>
508
+ <summary>Contributing</summary>
509
+
510
+ **Worked examples** are the most trust-building contribution. Run `/dummyindex` on a real corpus, save output to `worked/{slug}/`, write an honest `review.md` evaluating what the graph got right and wrong, submit a PR.
511
+
512
+ **Extraction bugs** - open an issue with the input file, the cache entry (`dummyindex-out/cache/`), and what was missed or invented.
513
+
514
+ See [ARCHITECTURE.md](ARCHITECTURE.md) for module responsibilities and how to add a language.
515
+
516
+ </details>