ultimate-pi 0.1.7 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (524) hide show
  1. package/.agents/skills/graphify/.graphify_version +1 -0
  2. package/.agents/skills/graphify/SKILL.md +1204 -0
  3. package/.agents/skills/wiki-autoresearch/SKILL.md +225 -97
  4. package/.agents/skills/wiki-autoresearch/references/program.md +28 -62
  5. package/.agents/skills/wiki-autoresearch/references/quality-sites.md +32 -0
  6. package/.env.example +5 -1
  7. package/.gitattributes +1 -0
  8. package/.github/workflows/publish-github-packages.yml +1 -1
  9. package/.pi/SYSTEM.md +72 -18
  10. package/.pi/agents/harness/adversary.md +32 -0
  11. package/.pi/agents/harness/evaluator.md +32 -0
  12. package/.pi/agents/harness/executor.md +34 -0
  13. package/.pi/agents/harness/meta-optimizer.md +33 -0
  14. package/.pi/agents/harness/planner.md +33 -0
  15. package/.pi/agents/harness/tie-breaker.md +35 -0
  16. package/.pi/agents/harness/trace-librarian.md +32 -0
  17. package/.pi/extensions/banner.png +0 -0
  18. package/.pi/extensions/budget-guard.ts +265 -0
  19. package/.pi/extensions/custom-footer.ts +194 -22
  20. package/.pi/extensions/custom-header.ts +47 -9
  21. package/.pi/extensions/debate-orchestrator.ts +479 -0
  22. package/.pi/extensions/harness-live-widget.ts +438 -0
  23. package/.pi/extensions/policy-gate.ts +349 -0
  24. package/.pi/extensions/review-integrity.ts +198 -0
  25. package/.pi/extensions/test-diff-integrity.ts +240 -0
  26. package/.pi/extensions/trace-recorder.ts +315 -0
  27. package/.pi/harness/README.md +23 -0
  28. package/.pi/harness/router/README.md +35 -0
  29. package/.pi/harness/router/apply-router-proposal.mjs +153 -0
  30. package/.pi/harness/router/propose-router-tuning.mjs +149 -0
  31. package/.pi/harness/specs/README.md +37 -0
  32. package/.pi/harness/specs/adversary-report.schema.json +53 -0
  33. package/.pi/harness/specs/budget-exhausted-event.schema.json +93 -0
  34. package/.pi/harness/specs/consensus-packet.schema.json +175 -0
  35. package/.pi/harness/specs/eval-verdict.schema.json +59 -0
  36. package/.pi/harness/specs/incident-record.schema.json +84 -0
  37. package/.pi/harness/specs/plan-packet.schema.json +90 -0
  38. package/.pi/harness/specs/round-result.schema.json +126 -0
  39. package/.pi/harness/specs/router-tuning-proposal.schema.json +114 -0
  40. package/.pi/harness/specs/run-trace.schema.json +107 -0
  41. package/.pi/lib/harness-ui-state.ts +311 -0
  42. package/.pi/mcp.json +4 -0
  43. package/.pi/model-router.json +93 -93
  44. package/.pi/prompts/graphify.md +23 -0
  45. package/.pi/prompts/harness-abort.md +41 -0
  46. package/.pi/prompts/harness-auto.md +83 -0
  47. package/.pi/prompts/harness-critic.md +52 -0
  48. package/.pi/prompts/harness-eval.md +51 -0
  49. package/.pi/prompts/harness-incident.md +51 -0
  50. package/.pi/prompts/harness-plan.md +64 -0
  51. package/.pi/prompts/harness-review.md +52 -0
  52. package/.pi/prompts/harness-router-tune.md +74 -0
  53. package/.pi/prompts/harness-run.md +59 -0
  54. package/.pi/prompts/harness-setup.md +316 -216
  55. package/.pi/prompts/harness-trace.md +51 -0
  56. package/.pi/prompts/wiki-autoresearch.md +9 -7
  57. package/.pi/prompts/wiki-save.md +20 -0
  58. package/.pi/skills/agent-router/SKILL.md +2 -4
  59. package/.pi/skills/ast-grep/SKILL.md +354 -0
  60. package/.pi/sounds/project-sounds.json +18 -24
  61. package/AGENTS.md +30 -0
  62. package/CHANGELOG.md +89 -0
  63. package/CONTRIBUTING.md +51 -1
  64. package/README.md +264 -20
  65. package/biome.json +8 -2
  66. package/lefthook.yml +3 -2
  67. package/node_modules/@sting8k/pi-vcc/README.md +200 -0
  68. package/node_modules/@sting8k/pi-vcc/index.ts +14 -0
  69. package/node_modules/@sting8k/pi-vcc/package.json +26 -0
  70. package/node_modules/@sting8k/pi-vcc/scripts/audit-sessions.ts +88 -0
  71. package/node_modules/@sting8k/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
  72. package/node_modules/@sting8k/pi-vcc/scripts/compare-before-after.ts +36 -0
  73. package/node_modules/@sting8k/pi-vcc/scripts/dump-branch-output.ts +20 -0
  74. package/node_modules/@sting8k/pi-vcc/src/commands/pi-vcc.ts +36 -0
  75. package/node_modules/@sting8k/pi-vcc/src/commands/vcc-recall.ts +65 -0
  76. package/node_modules/@sting8k/pi-vcc/src/core/brief.ts +381 -0
  77. package/node_modules/@sting8k/pi-vcc/src/core/build-sections.ts +79 -0
  78. package/node_modules/@sting8k/pi-vcc/src/core/content.ts +60 -0
  79. package/node_modules/@sting8k/pi-vcc/src/core/filter-noise.ts +42 -0
  80. package/node_modules/@sting8k/pi-vcc/src/core/format-recall.ts +27 -0
  81. package/node_modules/@sting8k/pi-vcc/src/core/format.ts +49 -0
  82. package/node_modules/@sting8k/pi-vcc/src/core/lineage.ts +26 -0
  83. package/node_modules/@sting8k/pi-vcc/src/core/load-messages.ts +41 -0
  84. package/node_modules/@sting8k/pi-vcc/src/core/normalize.ts +66 -0
  85. package/node_modules/@sting8k/pi-vcc/src/core/recall-scope.ts +14 -0
  86. package/node_modules/@sting8k/pi-vcc/src/core/render-entries.ts +55 -0
  87. package/node_modules/@sting8k/pi-vcc/src/core/report.ts +237 -0
  88. package/node_modules/@sting8k/pi-vcc/src/core/sanitize.ts +5 -0
  89. package/node_modules/@sting8k/pi-vcc/src/core/search-entries.ts +221 -0
  90. package/node_modules/@sting8k/pi-vcc/src/core/settings.ts +77 -0
  91. package/node_modules/@sting8k/pi-vcc/src/core/skill-collapse.ts +35 -0
  92. package/node_modules/@sting8k/pi-vcc/src/core/summarize.ts +157 -0
  93. package/node_modules/@sting8k/pi-vcc/src/core/tool-args.ts +14 -0
  94. package/node_modules/@sting8k/pi-vcc/src/details.ts +7 -0
  95. package/node_modules/@sting8k/pi-vcc/src/extract/commits.ts +69 -0
  96. package/node_modules/@sting8k/pi-vcc/src/extract/files.ts +80 -0
  97. package/node_modules/@sting8k/pi-vcc/src/extract/goals.ts +79 -0
  98. package/node_modules/@sting8k/pi-vcc/src/extract/preferences.ts +55 -0
  99. package/node_modules/@sting8k/pi-vcc/src/hooks/before-compact.ts +322 -0
  100. package/node_modules/@sting8k/pi-vcc/src/sections.ts +12 -0
  101. package/node_modules/@sting8k/pi-vcc/src/tools/recall.ts +109 -0
  102. package/node_modules/@sting8k/pi-vcc/src/types.ts +14 -0
  103. package/node_modules/@sting8k/pi-vcc/tests/before-compact-hook.test.ts +181 -0
  104. package/node_modules/@sting8k/pi-vcc/tests/before-compact.test.ts +140 -0
  105. package/node_modules/@sting8k/pi-vcc/tests/brief.test.ts +206 -0
  106. package/node_modules/@sting8k/pi-vcc/tests/build-sections.test.ts +59 -0
  107. package/node_modules/@sting8k/pi-vcc/tests/compile.test.ts +80 -0
  108. package/node_modules/@sting8k/pi-vcc/tests/content.test.ts +31 -0
  109. package/node_modules/@sting8k/pi-vcc/tests/extract-goals.test.ts +86 -0
  110. package/node_modules/@sting8k/pi-vcc/tests/extract-preferences.test.ts +30 -0
  111. package/node_modules/@sting8k/pi-vcc/tests/filter-noise.test.ts +61 -0
  112. package/node_modules/@sting8k/pi-vcc/tests/fixtures.ts +61 -0
  113. package/node_modules/@sting8k/pi-vcc/tests/format-recall.test.ts +30 -0
  114. package/node_modules/@sting8k/pi-vcc/tests/format.test.ts +62 -0
  115. package/node_modules/@sting8k/pi-vcc/tests/lineage.test.ts +33 -0
  116. package/node_modules/@sting8k/pi-vcc/tests/load-messages.test.ts +51 -0
  117. package/node_modules/@sting8k/pi-vcc/tests/normalize.test.ts +97 -0
  118. package/node_modules/@sting8k/pi-vcc/tests/real-sessions.test.ts +38 -0
  119. package/node_modules/@sting8k/pi-vcc/tests/recall-expand.test.ts +15 -0
  120. package/node_modules/@sting8k/pi-vcc/tests/recall-scope.test.ts +32 -0
  121. package/node_modules/@sting8k/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
  122. package/node_modules/@sting8k/pi-vcc/tests/render-entries.test.ts +62 -0
  123. package/node_modules/@sting8k/pi-vcc/tests/report.test.ts +44 -0
  124. package/node_modules/@sting8k/pi-vcc/tests/sanitize.test.ts +24 -0
  125. package/node_modules/@sting8k/pi-vcc/tests/search-entries.test.ts +144 -0
  126. package/node_modules/@sting8k/pi-vcc/tests/support/load-session.ts +23 -0
  127. package/node_modules/@sting8k/pi-vcc/tests/support/real-sessions.ts +51 -0
  128. package/package.json +15 -4
  129. package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
  130. package/scripts/index_youtube_urls.py +376 -0
  131. package/scripts/merge_graphify_corpora.py +398 -0
  132. package/scripts/regen_graphify_html.py +46 -0
  133. package/.agents/skills/defuddle/SKILL.md +0 -90
  134. package/.agents/skills/wiki/SKILL.md +0 -215
  135. package/.agents/skills/wiki/references/css-snippets.md +0 -122
  136. package/.agents/skills/wiki/references/frontmatter.md +0 -107
  137. package/.agents/skills/wiki/references/git-setup.md +0 -58
  138. package/.agents/skills/wiki/references/mcp-setup.md +0 -149
  139. package/.agents/skills/wiki/references/modes.md +0 -259
  140. package/.agents/skills/wiki/references/plugins.md +0 -96
  141. package/.agents/skills/wiki/references/rest-api.md +0 -124
  142. package/.agents/skills/wiki-fold/SKILL.md +0 -204
  143. package/.agents/skills/wiki-fold/references/fold-template.md +0 -133
  144. package/.agents/skills/wiki-ingest/SKILL.md +0 -288
  145. package/.agents/skills/wiki-lint/SKILL.md +0 -183
  146. package/.agents/skills/wiki-query/SKILL.md +0 -176
  147. package/.pi/agents/rethink.md +0 -140
  148. package/.pi/agents/wiki-ingest.md +0 -67
  149. package/.pi/agents/wiki-lint.md +0 -75
  150. package/.pi/internal/cursor-sdk-transcript-parser.ts +0 -59
  151. package/.pi/prompts/save.md +0 -16
  152. package/.pi/prompts/wiki.md +0 -23
  153. package/.pi/providers/cursor-sdk-provider.test.mjs +0 -476
  154. package/.pi/providers/cursor-sdk-provider.ts +0 -1085
  155. package/vault/AGENTS.md +0 -37
  156. package/vault/wiki/_templates/comparison.md +0 -39
  157. package/vault/wiki/_templates/concept.md +0 -40
  158. package/vault/wiki/_templates/decision.md +0 -21
  159. package/vault/wiki/_templates/entity.md +0 -32
  160. package/vault/wiki/_templates/flow.md +0 -14
  161. package/vault/wiki/_templates/module.md +0 -18
  162. package/vault/wiki/_templates/question.md +0 -31
  163. package/vault/wiki/_templates/source.md +0 -39
  164. package/vault/wiki/concepts/AST-Aware Code Chunking.md +0 -44
  165. package/vault/wiki/concepts/Build-Time Prompt Compilation.md +0 -107
  166. package/vault/wiki/concepts/Context Engine (AI Coding).md +0 -47
  167. package/vault/wiki/concepts/Context-Aware System Reminders.md +0 -61
  168. package/vault/wiki/concepts/Contextualized Text Embedding.md +0 -42
  169. package/vault/wiki/concepts/Contractor vs Employee AI Model.md +0 -55
  170. package/vault/wiki/concepts/Dual-Model Agent Architecture.md +0 -65
  171. package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +0 -43
  172. package/vault/wiki/concepts/Majority Vote Ensembling.md +0 -68
  173. package/vault/wiki/concepts/Meta-Harness.md +0 -16
  174. package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +0 -75
  175. package/vault/wiki/concepts/Prompt Enhancement.md +0 -90
  176. package/vault/wiki/concepts/Prompt Renderer.md +0 -89
  177. package/vault/wiki/concepts/Semantic Codebase Indexing.md +0 -67
  178. package/vault/wiki/concepts/additive-config-hierarchy.md +0 -16
  179. package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +0 -71
  180. package/vault/wiki/concepts/agent-browser-browser-automation.md +0 -99
  181. package/vault/wiki/concepts/agent-codebase-interface.md +0 -43
  182. package/vault/wiki/concepts/agent-harness-architecture.md +0 -67
  183. package/vault/wiki/concepts/agent-loop-detection-patterns.md +0 -133
  184. package/vault/wiki/concepts/agent-search-enforcement.md +0 -126
  185. package/vault/wiki/concepts/agent-skills-ecosystem.md +0 -74
  186. package/vault/wiki/concepts/agent-skills-pattern.md +0 -68
  187. package/vault/wiki/concepts/agentic-harness-context-enforcement.md +0 -91
  188. package/vault/wiki/concepts/agentic-harness.md +0 -34
  189. package/vault/wiki/concepts/agentic-orchestration-pipeline.md +0 -56
  190. package/vault/wiki/concepts/agentic-search-no-embeddings.md +0 -18
  191. package/vault/wiki/concepts/anthropic-context-engineering.md +0 -13
  192. package/vault/wiki/concepts/antigravity-agent-first-architecture.md +0 -61
  193. package/vault/wiki/concepts/ast-compression.md +0 -19
  194. package/vault/wiki/concepts/ast-truncation.md +0 -66
  195. package/vault/wiki/concepts/barrel-files.md +0 -37
  196. package/vault/wiki/concepts/browser-harness-agent.md +0 -41
  197. package/vault/wiki/concepts/browser-subagent-visual-verification.md +0 -82
  198. package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +0 -192
  199. package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +0 -161
  200. package/vault/wiki/concepts/codebase-to-context-ingestion.md +0 -46
  201. package/vault/wiki/concepts/codex-harness-innovations.md +0 -147
  202. package/vault/wiki/concepts/consensus-debate-flow.md +0 -17
  203. package/vault/wiki/concepts/consensus-debate.md +0 -206
  204. package/vault/wiki/concepts/content-addressed-spec-identity.md +0 -166
  205. package/vault/wiki/concepts/context-anxiety.md +0 -57
  206. package/vault/wiki/concepts/context-compression-techniques.md +0 -19
  207. package/vault/wiki/concepts/context-continuity.md +0 -22
  208. package/vault/wiki/concepts/context-drift-in-agents.md +0 -106
  209. package/vault/wiki/concepts/context-engineering.md +0 -62
  210. package/vault/wiki/concepts/context-folding.md +0 -67
  211. package/vault/wiki/concepts/context-mode.md +0 -38
  212. package/vault/wiki/concepts/cursor-harness-innovations.md +0 -107
  213. package/vault/wiki/concepts/deterministic-session-compaction.md +0 -79
  214. package/vault/wiki/concepts/drift-detection-unified.md +0 -296
  215. package/vault/wiki/concepts/execution-feedback-loop.md +0 -46
  216. package/vault/wiki/concepts/feedforward-feedback-harness.md +0 -60
  217. package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +0 -40
  218. package/vault/wiki/concepts/fork-safe-spec-storage.md +0 -89
  219. package/vault/wiki/concepts/fts5-sandbox.md +0 -19
  220. package/vault/wiki/concepts/fuzzy-edit-matching.md +0 -71
  221. package/vault/wiki/concepts/gemini-cli-architecture.md +0 -104
  222. package/vault/wiki/concepts/generator-evaluator-architecture.md +0 -64
  223. package/vault/wiki/concepts/guardian-agent-pattern.md +0 -67
  224. package/vault/wiki/concepts/harness-configuration-layers.md +0 -89
  225. package/vault/wiki/concepts/harness-control-frameworks.md +0 -155
  226. package/vault/wiki/concepts/harness-engineering-first-principles.md +0 -90
  227. package/vault/wiki/concepts/harness-h-formalism.md +0 -53
  228. package/vault/wiki/concepts/hybrid-code-search.md +0 -61
  229. package/vault/wiki/concepts/inline-post-edit-validation.md +0 -112
  230. package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +0 -110
  231. package/vault/wiki/concepts/lifecycle-hooks.md +0 -94
  232. package/vault/wiki/concepts/mcp-tool-routing.md +0 -102
  233. package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +0 -47
  234. package/vault/wiki/concepts/meta-agent-context-pruning.md +0 -151
  235. package/vault/wiki/concepts/model-adaptive-harness.md +0 -122
  236. package/vault/wiki/concepts/model-routing-agents.md +0 -101
  237. package/vault/wiki/concepts/monorepo-architecture.md +0 -45
  238. package/vault/wiki/concepts/multi-agent-specialization.md +0 -61
  239. package/vault/wiki/concepts/permission-subsystem.md +0 -16
  240. package/vault/wiki/concepts/pi-messenger-analysis.md +0 -243
  241. package/vault/wiki/concepts/pi-vscode-extension-landscape.md +0 -37
  242. package/vault/wiki/concepts/policy-engine-pattern.md +0 -78
  243. package/vault/wiki/concepts/progressive-disclosure-agents.md +0 -53
  244. package/vault/wiki/concepts/progressive-skill-disclosure.md +0 -17
  245. package/vault/wiki/concepts/provider-native-prompting.md +0 -203
  246. package/vault/wiki/concepts/quality-signal-sentrux.md +0 -37
  247. package/vault/wiki/concepts/repo-map-ranking.md +0 -42
  248. package/vault/wiki/concepts/result-monad-error-handling.md +0 -47
  249. package/vault/wiki/concepts/safety-defense-in-depth.md +0 -83
  250. package/vault/wiki/concepts/sandbox-os-enforcement.md +0 -18
  251. package/vault/wiki/concepts/selective-debate-routing.md +0 -70
  252. package/vault/wiki/concepts/self-evolving-harness.md +0 -60
  253. package/vault/wiki/concepts/sentrux-mcp-integration.md +0 -36
  254. package/vault/wiki/concepts/sentrux-rules-engine.md +0 -49
  255. package/vault/wiki/concepts/shell-pattern-compression.md +0 -24
  256. package/vault/wiki/concepts/skill-first-architecture.md +0 -166
  257. package/vault/wiki/concepts/structured-compaction.md +0 -78
  258. package/vault/wiki/concepts/subagent-orchestration.md +0 -17
  259. package/vault/wiki/concepts/subagent-worktree-isolation.md +0 -68
  260. package/vault/wiki/concepts/superpowers-methodology.md +0 -78
  261. package/vault/wiki/concepts/think-in-code.md +0 -73
  262. package/vault/wiki/concepts/ts-execution-layer.md +0 -100
  263. package/vault/wiki/concepts/typescript-strict-mode.md +0 -37
  264. package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +0 -53
  265. package/vault/wiki/concepts/verification-drift-detection.md +0 -19
  266. package/vault/wiki/consensus/consensus-records.md +0 -58
  267. package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +0 -122
  268. package/vault/wiki/decisions/2026-05-07-replace-lean-ctx-with-context-mode.md +0 -59
  269. package/vault/wiki/decisions/adr-008.md +0 -40
  270. package/vault/wiki/decisions/adr-009.md +0 -46
  271. package/vault/wiki/decisions/adr-010.md +0 -55
  272. package/vault/wiki/decisions/adr-011.md +0 -165
  273. package/vault/wiki/decisions/adr-012.md +0 -102
  274. package/vault/wiki/decisions/adr-013.md +0 -59
  275. package/vault/wiki/decisions/adr-014.md +0 -73
  276. package/vault/wiki/decisions/adr-015.md +0 -81
  277. package/vault/wiki/decisions/adr-016.md +0 -91
  278. package/vault/wiki/decisions/adr-017.md +0 -79
  279. package/vault/wiki/decisions/adr-018.md +0 -100
  280. package/vault/wiki/decisions/adr-019.md +0 -75
  281. package/vault/wiki/decisions/adr-020.md +0 -106
  282. package/vault/wiki/decisions/adr-021.md +0 -86
  283. package/vault/wiki/decisions/adr-022.md +0 -113
  284. package/vault/wiki/decisions/adr-023.md +0 -113
  285. package/vault/wiki/decisions/adr-024.md +0 -73
  286. package/vault/wiki/decisions/adr-025.md +0 -130
  287. package/vault/wiki/decisions/adr-026.md +0 -56
  288. package/vault/wiki/decisions/adr-027.md +0 -94
  289. package/vault/wiki/decisions/colocate-wiki.md +0 -34
  290. package/vault/wiki/entities/Anders Hejlsberg.md +0 -29
  291. package/vault/wiki/entities/Anthropic.md +0 -17
  292. package/vault/wiki/entities/Augment Code.md +0 -49
  293. package/vault/wiki/entities/Bjarne Stroustrup.md +0 -26
  294. package/vault/wiki/entities/Bolt.new (StackBlitz).md +0 -39
  295. package/vault/wiki/entities/Boris Cherny.md +0 -11
  296. package/vault/wiki/entities/Claude Code.md +0 -19
  297. package/vault/wiki/entities/Dennis Ritchie.md +0 -26
  298. package/vault/wiki/entities/Emergent Labs.md +0 -32
  299. package/vault/wiki/entities/Google Cloud.md +0 -16
  300. package/vault/wiki/entities/Guido van Rossum.md +0 -28
  301. package/vault/wiki/entities/Ken Thompson.md +0 -28
  302. package/vault/wiki/entities/Lee et al.md +0 -16
  303. package/vault/wiki/entities/Linus Torvalds.md +0 -28
  304. package/vault/wiki/entities/Lovable (company).md +0 -40
  305. package/vault/wiki/entities/Martin Fowler.md +0 -16
  306. package/vault/wiki/entities/Meng et al.md +0 -16
  307. package/vault/wiki/entities/OpenAI.md +0 -16
  308. package/vault/wiki/entities/Rocket.new.md +0 -38
  309. package/vault/wiki/entities/VILA-Lab.md +0 -15
  310. package/vault/wiki/entities/autodev-codebase.md +0 -18
  311. package/vault/wiki/entities/ck-tool.md +0 -59
  312. package/vault/wiki/entities/codesearch.md +0 -18
  313. package/vault/wiki/entities/disler-indydevdan.md +0 -33
  314. package/vault/wiki/entities/gsd-get-shit-done.md +0 -56
  315. package/vault/wiki/entities/javascript-runtimes.md +0 -48
  316. package/vault/wiki/entities/jesse-vincent.md +0 -38
  317. package/vault/wiki/entities/lean-ctx.md +0 -32
  318. package/vault/wiki/entities/opendev.md +0 -41
  319. package/vault/wiki/entities/ops-codegraph-tool.md +0 -18
  320. package/vault/wiki/entities/pi-coding-agent.md +0 -53
  321. package/vault/wiki/entities/sentrux.md +0 -54
  322. package/vault/wiki/entities/vgrep-tool.md +0 -57
  323. package/vault/wiki/entities/vitest.md +0 -41
  324. package/vault/wiki/flows/harness-wiki-pipeline.md +0 -204
  325. package/vault/wiki/hot.md +0 -932
  326. package/vault/wiki/index.md +0 -437
  327. package/vault/wiki/log.md +0 -422
  328. package/vault/wiki/meta/dashboard.md +0 -30
  329. package/vault/wiki/meta/lint-report-2026-04-30.md +0 -86
  330. package/vault/wiki/meta/lint-report-2026-05-02.md +0 -251
  331. package/vault/wiki/meta/overview.canvas +0 -43
  332. package/vault/wiki/modules/adversarial-verification.md +0 -57
  333. package/vault/wiki/modules/automated-observability.md +0 -54
  334. package/vault/wiki/modules/bench.md +0 -20
  335. package/vault/wiki/modules/extensions.md +0 -23
  336. package/vault/wiki/modules/grounding-checkpoints.md +0 -62
  337. package/vault/wiki/modules/harness-implementation-plan.md +0 -345
  338. package/vault/wiki/modules/harness-wiki-skill-mapping.md +0 -135
  339. package/vault/wiki/modules/harness.md +0 -86
  340. package/vault/wiki/modules/persistent-memory.md +0 -85
  341. package/vault/wiki/modules/schema-orchestration.md +0 -68
  342. package/vault/wiki/modules/skills.md +0 -27
  343. package/vault/wiki/modules/spec-hardening.md +0 -58
  344. package/vault/wiki/modules/structured-planning.md +0 -53
  345. package/vault/wiki/modules/think-in-code-enforcement.md +0 -153
  346. package/vault/wiki/modules/wiki-query-interface.md +0 -64
  347. package/vault/wiki/overview.md +0 -51
  348. package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +0 -87
  349. package/vault/wiki/questions/Research-sentrux-dev.md +0 -123
  350. package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +0 -164
  351. package/vault/wiki/questions/Research: Augment Code Context Engine.md +0 -244
  352. package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +0 -112
  353. package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +0 -209
  354. package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +0 -99
  355. package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +0 -107
  356. package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +0 -72
  357. package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +0 -166
  358. package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +0 -188
  359. package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +0 -120
  360. package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +0 -236
  361. package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +0 -95
  362. package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +0 -165
  363. package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +0 -216
  364. package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +0 -91
  365. package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +0 -88
  366. package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +0 -81
  367. package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +0 -71
  368. package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +0 -80
  369. package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +0 -72
  370. package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +0 -92
  371. package/vault/wiki/questions/Research: executor.sh Harness Integration.md +0 -170
  372. package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +0 -97
  373. package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +0 -80
  374. package/vault/wiki/questions/Research: pi-vcc.md +0 -113
  375. package/vault/wiki/questions/Research: semantic code search tools.md +0 -69
  376. package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +0 -73
  377. package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +0 -111
  378. package/vault/wiki/questions/mvp-implementation-blueprint.md +0 -552
  379. package/vault/wiki/questions/research-agent-first-codebase-exploration.md +0 -199
  380. package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +0 -142
  381. package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +0 -100
  382. package/vault/wiki/questions/research-wozcode-token-reduction.md +0 -67
  383. package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +0 -95
  384. package/vault/wiki/questions/resolved-context-window-economics.md +0 -167
  385. package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +0 -126
  386. package/vault/wiki/questions/resolved-mcp-tool-preference.md +0 -112
  387. package/vault/wiki/questions/resolved-small-model-meta-agents.md +0 -107
  388. package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +0 -95
  389. package/vault/wiki/sources/Auggie Context MCP Server.md +0 -63
  390. package/vault/wiki/sources/Augment Code Codacy AI Giants.md +0 -61
  391. package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +0 -49
  392. package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +0 -55
  393. package/vault/wiki/sources/Augment Context Engine Official.md +0 -71
  394. package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +0 -74
  395. package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +0 -58
  396. package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +0 -75
  397. package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +0 -85
  398. package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +0 -58
  399. package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +0 -100
  400. package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +0 -75
  401. package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +0 -107
  402. package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +0 -70
  403. package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +0 -88
  404. package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +0 -57
  405. package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +0 -53
  406. package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +0 -65
  407. package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +0 -83
  408. package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +0 -70
  409. package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +0 -58
  410. package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +0 -101
  411. package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +0 -100
  412. package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +0 -53
  413. package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +0 -70
  414. package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +0 -71
  415. package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +0 -89
  416. package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +0 -155
  417. package/vault/wiki/sources/Source: browser-harness CDP Harness.md +0 -126
  418. package/vault/wiki/sources/agent-drift-academic-paper.md +0 -79
  419. package/vault/wiki/sources/aider-repomap-tree-sitter.md +0 -42
  420. package/vault/wiki/sources/anthropic-compaction-api.md +0 -58
  421. package/vault/wiki/sources/anthropic-effective-harnesses.md +0 -42
  422. package/vault/wiki/sources/anthropic-prompt-best-practices.md +0 -100
  423. package/vault/wiki/sources/anthropic2026-harness-design.md +0 -63
  424. package/vault/wiki/sources/barrel-files-tkdodo.md +0 -38
  425. package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +0 -57
  426. package/vault/wiki/sources/bockeler2026-harness-engineering.md +0 -69
  427. package/vault/wiki/sources/cast-code-chunking-paper.md +0 -50
  428. package/vault/wiki/sources/ck-semantic-search.md +0 -78
  429. package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +0 -71
  430. package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +0 -50
  431. package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +0 -64
  432. package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +0 -70
  433. package/vault/wiki/sources/claude-context-editing-docs.md +0 -13
  434. package/vault/wiki/sources/cloudflare-codemode.md +0 -63
  435. package/vault/wiki/sources/code-chunk-library-supermemory.md +0 -63
  436. package/vault/wiki/sources/codeact-apple-2024.md +0 -62
  437. package/vault/wiki/sources/codex-dsc-rfc-8573.md +0 -41
  438. package/vault/wiki/sources/codex-open-source-agent-2026.md +0 -110
  439. package/vault/wiki/sources/coir-code-retrieval-benchmark.md +0 -51
  440. package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +0 -48
  441. package/vault/wiki/sources/context-folding-paper.md +0 -61
  442. package/vault/wiki/sources/context-mode-website.md +0 -63
  443. package/vault/wiki/sources/cursor-agent-best-practices-2026.md +0 -62
  444. package/vault/wiki/sources/cursor-fork-29b-2025.md +0 -50
  445. package/vault/wiki/sources/cursor-harness-april-2026.md +0 -76
  446. package/vault/wiki/sources/cursor-instant-apply-2024.md +0 -45
  447. package/vault/wiki/sources/cursor-shadow-workspace-2024.md +0 -52
  448. package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +0 -53
  449. package/vault/wiki/sources/cursor-vs-antigravity-2026.md +0 -51
  450. package/vault/wiki/sources/disler-pi-vs-claude-code.md +0 -69
  451. package/vault/wiki/sources/distill-deterministic-context-compression.md +0 -53
  452. package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +0 -48
  453. package/vault/wiki/sources/executor-rhyssullivan.md +0 -122
  454. package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +0 -125
  455. package/vault/wiki/sources/fan2025-imad.md +0 -60
  456. package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +0 -63
  457. package/vault/wiki/sources/gemini-3-prompting-guide.md +0 -78
  458. package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +0 -50
  459. package/vault/wiki/sources/gh-sub-issue-extension.md +0 -72
  460. package/vault/wiki/sources/github-fork-issues-discussion.md +0 -44
  461. package/vault/wiki/sources/github-issue-dependencies-docs.md +0 -49
  462. package/vault/wiki/sources/github-sub-issues-docs.md +0 -51
  463. package/vault/wiki/sources/gitingest.md +0 -91
  464. package/vault/wiki/sources/gitreverse.md +0 -63
  465. package/vault/wiki/sources/google-antigravity-official-blog.md +0 -47
  466. package/vault/wiki/sources/google-antigravity-wikipedia.md +0 -53
  467. package/vault/wiki/sources/gsd-codecentric-deep-dive.md +0 -57
  468. package/vault/wiki/sources/gsd-github-repo.md +0 -51
  469. package/vault/wiki/sources/gsd-hn-discussion.md +0 -59
  470. package/vault/wiki/sources/guido-python-design-philosophy.md +0 -56
  471. package/vault/wiki/sources/hejlsberg-7-learnings.md +0 -48
  472. package/vault/wiki/sources/ironclaw-drift-monitor.md +0 -80
  473. package/vault/wiki/sources/langsight-loop-detection.md +0 -80
  474. package/vault/wiki/sources/leanctx-website.md +0 -69
  475. package/vault/wiki/sources/lee2026-meta-harness.md +0 -59
  476. package/vault/wiki/sources/linux-kernel-coding-workflow.md +0 -50
  477. package/vault/wiki/sources/lou2026-autoharness.md +0 -53
  478. package/vault/wiki/sources/martin-fowler-harness-engineering.md +0 -73
  479. package/vault/wiki/sources/mcp-architecture-docs.md +0 -13
  480. package/vault/wiki/sources/meng2026-agent-harness-survey.md +0 -79
  481. package/vault/wiki/sources/mindstudio-four-agent-types.md +0 -68
  482. package/vault/wiki/sources/ms-chat-history-management.md +0 -13
  483. package/vault/wiki/sources/openai-prompt-guidance.md +0 -104
  484. package/vault/wiki/sources/openclaw-session-pruning.md +0 -13
  485. package/vault/wiki/sources/opencode-dcp.md +0 -13
  486. package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +0 -79
  487. package/vault/wiki/sources/openhands-platform.md +0 -39
  488. package/vault/wiki/sources/oss-guide-codebase-exploration.md +0 -53
  489. package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +0 -102
  490. package/vault/wiki/sources/pi-context-prune-github-repo.md +0 -38
  491. package/vault/wiki/sources/pi-mono-compaction-docs.md +0 -38
  492. package/vault/wiki/sources/pi-omni-compact-github-repo.md +0 -50
  493. package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +0 -45
  494. package/vault/wiki/sources/pi-vcc-github-repo.md +0 -69
  495. package/vault/wiki/sources/pi-vscode-marketplace.md +0 -41
  496. package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +0 -39
  497. package/vault/wiki/sources/py-tree-sitter.md +0 -13
  498. package/vault/wiki/sources/sentrux-dev-landing.md +0 -40
  499. package/vault/wiki/sources/sentrux-docs-pro-architecture.md +0 -75
  500. package/vault/wiki/sources/sentrux-docs-quality-signal.md +0 -46
  501. package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +0 -57
  502. package/vault/wiki/sources/sentrux-docs-rules-engine.md +0 -58
  503. package/vault/wiki/sources/sentrux-github-repo.md +0 -56
  504. package/vault/wiki/sources/superpowers-github-repo.md +0 -56
  505. package/vault/wiki/sources/superpowers-release-blog.md +0 -54
  506. package/vault/wiki/sources/superpowers-termdock-analysis.md +0 -45
  507. package/vault/wiki/sources/swe-agent-aci.md +0 -42
  508. package/vault/wiki/sources/swe-bench.md +0 -45
  509. package/vault/wiki/sources/swe-pruner-context-pruning.md +0 -13
  510. package/vault/wiki/sources/think-in-code-blog.md +0 -48
  511. package/vault/wiki/sources/tree-sitter-docs.md +0 -13
  512. package/vault/wiki/sources/ts-best-practices-2025-devto.md +0 -42
  513. package/vault/wiki/sources/ts-folder-structure-mingyang.md +0 -58
  514. package/vault/wiki/sources/ts-monorepo-koerselman.md +0 -44
  515. package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +0 -52
  516. package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +0 -42
  517. package/vault/wiki/sources/ts-strict-mode-rishikc.md +0 -43
  518. package/vault/wiki/sources/unix-philosophy.md +0 -48
  519. package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +0 -39
  520. package/vault/wiki/sources/vectara-guardian-agents.md +0 -79
  521. package/vault/wiki/sources/vgrep-semantic-search.md +0 -76
  522. package/vault/wiki/sources/vitest-official.md +0 -41
  523. package/vault/wiki/sources/vscode-pi-community-extension.md +0 -40
  524. package/vault/wiki/sources/wozcode.md +0 -79
@@ -0,0 +1,1204 @@
1
+ ---
2
+ name: graphify
3
+ description: any input (code, docs, papers, images, video) → knowledge graph → clustered communities → HTML + JSON + GRAPH_REPORT.md
4
+ ---
5
+
6
+ # /graphify
7
+
8
+ Deprecated mirror: canonical source is `.pi/skills/graphify/SKILL.md`.
9
+ Keep this copy aligned with the canonical version until consumers migrate.
10
+
11
+ Turn any folder of files into a navigable knowledge graph with community detection, an honest audit trail, and three outputs: interactive HTML, GraphRAG-ready JSON, and a plain-language GRAPH_REPORT.md.
12
+
13
+ ## Usage
14
+
15
+ ```
16
+ /graphify # full pipeline on current directory → Obsidian vault
17
+ /graphify <path> # full pipeline on specific path
18
+ /graphify <path> --mode deep # thorough extraction, richer INFERRED edges
19
+ /graphify <path> --update # incremental - re-extract only new/changed files
20
+ /graphify <path> --cluster-only # rerun clustering on existing graph
21
+ /graphify <path> --no-viz # skip visualization, just report + JSON
22
+ /graphify <path> --html # (HTML is generated by default - this flag is a no-op)
23
+ /graphify <path> --svg # also export graph.svg (embeds in Notion, GitHub)
24
+ /graphify <path> --graphml # export graph.graphml (Gephi, yEd)
25
+ /graphify <path> --neo4j # generate graphify-out/cypher.txt for Neo4j
26
+ /graphify <path> --neo4j-push bolt://localhost:7687 # push directly to Neo4j
27
+ /graphify <path> --mcp # start MCP stdio server for agent access
28
+ /graphify <path> --watch # watch folder, auto-rebuild on code changes (no LLM needed)
29
+ /graphify add <url> # fetch URL, save to ./raw, update graph
30
+ /graphify add <url> --author "Name" # tag who wrote it
31
+ /graphify add <url> --contributor "Name" # tag who added it to the corpus
32
+ /graphify query "<question>" # BFS traversal - broad context
33
+ /graphify query "<question>" --dfs # DFS - trace a specific path
34
+ /graphify query "<question>" --budget 1500 # cap answer at N tokens
35
+ /graphify path "AuthModule" "Database" # shortest path between two concepts
36
+ /graphify explain "SwinTransformer" # plain-language explanation of a node
37
+ ```
38
+
39
+ ## What graphify is for
40
+
41
+ graphify is built around Andrej Karpathy's /raw folder workflow: drop anything into a folder - papers, tweets, screenshots, code, notes - and get a structured knowledge graph that shows you what you didn't know was connected.
42
+
43
+ Three things it does that your AI assistant alone cannot:
44
+ 1. **Persistent graph** - relationships are stored in `graphify-out/graph.json` and survive across sessions. Ask questions weeks later without re-reading everything.
45
+ 2. **Honest audit trail** - every edge is tagged EXTRACTED, INFERRED, or AMBIGUOUS. You know what was found vs invented.
46
+ 3. **Cross-document surprise** - community detection finds connections between concepts in different files that you would never think to ask about directly.
47
+
48
+ Use it for:
49
+ - A codebase you're new to (understand architecture before touching anything)
50
+ - A reading list (papers + tweets + notes → one navigable graph)
51
+ - A research corpus (citation graph + concept graph in one)
52
+ - Your personal /raw folder (drop everything in, let it grow, query it)
53
+
54
+ ## What You Must Do When Invoked
55
+
56
+ If no path was given, use `.` (current directory). Do not ask the user for a path.
57
+
58
+ Follow these steps in order. Do not skip steps.
59
+
60
+ ### Step 1 - Ensure graphify is installed
61
+
62
+ ```bash
63
+ # Install via uv tool if not already available
64
+ command -v graphify >/dev/null 2>&1 || uv tool install graphifyy
65
+ mkdir -p graphify-out
66
+ # Get the Python interpreter uv uses for the tool
67
+ PYTHON=$(uv tool run graphify -- -c "import sys; print(sys.executable)" 2>/dev/null || echo "python3")
68
+ "$PYTHON" -c "import graphify" 2>/dev/null || { uv tool install graphifyy && "$PYTHON" -c "import graphify"; }
69
+ # Write interpreter path for all subsequent steps
70
+ "$PYTHON" -c "import sys; open('graphify-out/.graphify_python', 'w').write(sys.executable)"
71
+ ```
72
+
73
+ If the import succeeds, print nothing and move straight to Step 2.
74
+
75
+ **In every subsequent bash block, replace `python3` with `$(cat .graphify_python)` to use the correct interpreter.**
76
+
77
+ ### Step 2 - Detect files
78
+
79
+ ```bash
80
+ $(cat .graphify_python) -c "
81
+ import json
82
+ from graphify.detect import detect
83
+ from pathlib import Path
84
+ result = detect(Path('INPUT_PATH'))
85
+ print(json.dumps(result))
86
+ " > .graphify_detect.json
87
+ ```
88
+
89
+ Replace INPUT_PATH with the actual path the user provided. Do NOT cat or print the JSON - read it silently and present a clean summary instead:
90
+
91
+ ```
92
+ Corpus: X files · ~Y words
93
+ code: N files (.py .ts .go ...)
94
+ docs: N files (.md .txt ...)
95
+ papers: N files (.pdf ...)
96
+ images: N files
97
+ video: N files (.mp4 .mp3 ...)
98
+ ```
99
+
100
+ Omit any category with 0 files from the summary.
101
+
102
+ Then act on it:
103
+ - If `total_files` is 0: stop with "No supported files found in [path]."
104
+ - If `skipped_sensitive` is non-empty: mention file count skipped, not the file names.
105
+ - If `total_words` > 2,000,000 OR `total_files` > 200: show the warning and the top 5 subdirectories by file count, then ask which subfolder to run on. Wait for the user's answer before proceeding.
106
+ - Otherwise: proceed directly to Step 2.5 if video files were detected, or Step 3 if not.
107
+
108
+ ### Step 2.5 - Transcribe video / audio files (only if video files detected)
109
+
110
+ Skip this step entirely if `detect` returned zero `video` files.
111
+
112
+ Video and audio files cannot be read directly. Transcribe them to text first, then treat the transcripts as doc files in Step 3.
113
+
114
+ **Strategy:** Read the god nodes from the detect output or analysis file. You are already a language model - write a one-sentence domain hint yourself from those labels. Then pass it to Whisper as the initial prompt. No separate API call needed.
115
+
116
+ **However**, if the corpus has *only* video files and no other docs/code, use the generic fallback prompt: `"Use proper punctuation and paragraph breaks."`
117
+
118
+ **Step 1 - Write the Whisper prompt yourself.**
119
+
120
+ Read the top god node labels from detect output or analysis, then compose a short domain hint sentence, for example:
121
+
122
+ - Labels: `transformer, attention, encoder, decoder` -> `"Machine learning research on transformer architectures and attention mechanisms. Use proper punctuation and paragraph breaks."`
123
+ - Labels: `kubernetes, deployment, pod, helm` -> `"DevOps discussion about Kubernetes deployments and Helm charts. Use proper punctuation and paragraph breaks."`
124
+
125
+ Set it as `GRAPHIFY_WHISPER_PROMPT` in the environment before running the transcription command.
126
+
127
+ **Step 2 - Transcribe:**
128
+
129
+ ```bash
130
+ $(cat graphify-out/.graphify_python) -c "
131
+ import json, os
132
+ from pathlib import Path
133
+ from graphify.transcribe import transcribe_all
134
+
135
+ detect = json.loads(Path('graphify-out/.graphify_detect.json').read_text())
136
+ video_files = detect.get('files', {}).get('video', [])
137
+ prompt = os.environ.get('GRAPHIFY_WHISPER_PROMPT', 'Use proper punctuation and paragraph breaks.')
138
+
139
+ transcript_paths = transcribe_all(video_files, initial_prompt=prompt)
140
+ print(json.dumps(transcript_paths))
141
+ " > graphify-out/.graphify_transcripts.json
142
+ ```
143
+
144
+ After transcription:
145
+ - Read the transcript paths from `graphify-out/.graphify_transcripts.json`
146
+ - Add them to the docs list before dispatching semantic subagents in Step 3B
147
+ - Print how many transcripts were created: `Transcribed N video file(s) -> treating as docs`
148
+ - If transcription fails for a file, print a warning and continue with the rest
149
+
150
+ **Whisper model:** Default is `base`. If the user passed `--whisper-model <name>`, set `GRAPHIFY_WHISPER_MODEL=<name>` in the environment before running the command above.
151
+
152
+ ### Step 3 - Extract entities and relationships
153
+
154
+ **Before starting:** note whether `--mode deep` was given. You must pass `DEEP_MODE=true` to every subagent in Step B2 if it was. Track this from the original invocation - do not lose it.
155
+
156
+ This step has two parts: **structural extraction** (deterministic, free) and **semantic extraction** (your AI model, costs tokens).
157
+
158
+ **Run Part A (AST) and Part B (semantic) in parallel. Dispatch all semantic subagents AND start AST extraction in the same message. Both can run simultaneously since they operate on different file types. Merge results in Part C as before.**
159
+
160
+ Note: Parallelizing AST + semantic saves 5-15s on large corpora. AST is deterministic and fast; start it while subagents are processing docs/papers.
161
+
162
+ #### Part A - Structural extraction for code files
163
+
164
+ For any code files detected, run AST extraction in parallel with Part B subagents:
165
+
166
+ ```bash
167
+ $(cat .graphify_python) -c "
168
+ import sys, json
169
+ from graphify.extract import collect_files, extract
170
+ from pathlib import Path
171
+ import json
172
+
173
+ code_files = []
174
+ detect = json.loads(Path('.graphify_detect.json').read_text())
175
+ for f in detect.get('files', {}).get('code', []):
176
+ code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)])
177
+
178
+ if code_files:
179
+ result = extract(code_files)
180
+ Path('.graphify_ast.json').write_text(json.dumps(result, indent=2))
181
+ print(f'AST: {len(result[\"nodes\"])} nodes, {len(result[\"edges\"])} edges')
182
+ else:
183
+ Path('.graphify_ast.json').write_text(json.dumps({'nodes':[],'edges':[],'input_tokens':0,'output_tokens':0}))
184
+ print('No code files - skipping AST extraction')
185
+ "
186
+ ```
187
+
188
+ #### Part B - Semantic extraction (parallel subagents)
189
+
190
+ **Fast path:** If detection found zero docs, papers, and images (code-only corpus), skip Part B entirely and go straight to Part C. AST handles code - there is nothing for semantic subagents to do.
191
+
192
+ > **OpenClaw platform:** Multi-agent support is still early on OpenClaw. Extraction runs sequentially — you read and extract each file yourself. This is slower than parallel platforms but fully reliable.
193
+
194
+ Print: `"Semantic extraction: N files (sequential — OpenClaw)"`
195
+
196
+ **Step B0 - Check extraction cache first**
197
+
198
+ Before dispatching any subagents, check which files already have cached extraction results:
199
+
200
+ ```bash
201
+ $(cat .graphify_python) -c "
202
+ import json
203
+ from graphify.cache import check_semantic_cache
204
+ from pathlib import Path
205
+
206
+ detect = json.loads(Path('.graphify_detect.json').read_text())
207
+ all_files = [f for files in detect['files'].values() for f in files]
208
+
209
+ cached_nodes, cached_edges, cached_hyperedges, uncached = check_semantic_cache(all_files)
210
+
211
+ if cached_nodes or cached_edges or cached_hyperedges:
212
+ Path('.graphify_cached.json').write_text(json.dumps({'nodes': cached_nodes, 'edges': cached_edges, 'hyperedges': cached_hyperedges}))
213
+ Path('.graphify_uncached.txt').write_text('\n'.join(uncached))
214
+ print(f'Cache: {len(all_files)-len(uncached)} files hit, {len(uncached)} files need extraction')
215
+ "
216
+ ```
217
+
218
+ Only dispatch subagents for files listed in `.graphify_uncached.txt`. If all files are cached, skip to Part C directly.
219
+
220
+ **Step B1 - Split into chunks**
221
+
222
+ Load files from `.graphify_uncached.txt`. Split into chunks of 20-25 files each. Each image gets its own chunk (vision needs separate context). When splitting, group files from the same directory together so related artifacts land in the same chunk and cross-file relationships are more likely to be extracted.
223
+
224
+ **Step B2 - Sequential extraction (OpenClaw)**
225
+
226
+ Process each file one at a time. For each file:
227
+
228
+ 1. Read the file contents
229
+ 2. Extract nodes, edges, and hyperedges applying the same rules:
230
+ - EXTRACTED: relationship explicit in source (import, call, citation)
231
+ - INFERRED: reasonable inference (shared structure, implied dependency)
232
+ - AMBIGUOUS: uncertain — flag it, do not omit
233
+ - Code files: semantic edges AST cannot find. Do not re-extract imports.
234
+ - Doc/paper files: named concepts, entities, citations. Store rationale (WHY decisions were made) as a `rationale` attribute on the relevant node, not as a separate node. Use `file_type:"rationale"` for concept-like nodes (ideas, principles, mechanisms). Do NOT invent file_types like `concept`. When adding `calls` edges: source is caller, target is callee.
235
+ - Image files: use vision — understand what the image IS, not just OCR
236
+ - DEEP_MODE (if --mode deep): be aggressive with INFERRED edges
237
+ - Semantic similarity: if two concepts solve the same problem without a structural link, add `semantically_similar_to` INFERRED edge (confidence 0.6-0.95). Non-obvious cross-file links only.
238
+ - Hyperedges: if 3+ nodes share a concept/flow not captured by pairwise edges, add a hyperedge. Max 3 per file.
239
+ - confidence_score REQUIRED on every edge: EXTRACTED=1.0, INFERRED=0.6-0.9 (reason individually), AMBIGUOUS=0.1-0.3
240
+ 3. Accumulate results across all files
241
+
242
+ Schema for each file's output:
243
+ {"nodes":[{"id":"filestem_entityname","label":"Human Readable Name","file_type":"code|document|paper|image|rationale","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0}
244
+
245
+ After processing all files, write the accumulated result to `.graphify_semantic_new.json`.
246
+
247
+ **Step B3 - Cache and merge**
248
+
249
+ For the accumulated result:
250
+
251
+ If more than half the chunks failed, stop and tell the user.
252
+
253
+ Merge all chunk files into `.graphify_semantic_new.json`. **After each Agent call completes, read the real token counts from the Agent tool result's `usage` field and write them back into the chunk JSON before merging** — the chunk JSON itself always has placeholder zeros. Then run:
254
+ ```bash
255
+ $(cat graphify-out/.graphify_python) -c "
256
+ import json, glob
257
+ from pathlib import Path
258
+
259
+ chunks = sorted(glob.glob('graphify-out/.graphify_chunk_*.json'))
260
+ all_nodes, all_edges, all_hyperedges = [], [], []
261
+ total_in, total_out = 0, 0
262
+ for c in chunks:
263
+ d = json.loads(Path(c).read_text())
264
+ all_nodes += d.get('nodes', [])
265
+ all_edges += d.get('edges', [])
266
+ all_hyperedges += d.get('hyperedges', [])
267
+ total_in += d.get('input_tokens', 0)
268
+ total_out += d.get('output_tokens', 0)
269
+ Path('graphify-out/.graphify_semantic_new.json').write_text(json.dumps({
270
+ 'nodes': all_nodes, 'edges': all_edges, 'hyperedges': all_hyperedges,
271
+ 'input_tokens': total_in, 'output_tokens': total_out,
272
+ }, indent=2))
273
+ print(f'Merged {len(chunks)} chunks: {total_in:,} in / {total_out:,} out tokens')
274
+ "
275
+ ```
276
+
277
+ Save new results to cache:
278
+ ```bash
279
+ $(cat .graphify_python) -c "
280
+ import json
281
+ from graphify.cache import save_semantic_cache
282
+ from pathlib import Path
283
+
284
+ new = json.loads(Path('.graphify_semantic_new.json').read_text()) if Path('.graphify_semantic_new.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]}
285
+ saved = save_semantic_cache(new.get('nodes', []), new.get('edges', []), new.get('hyperedges', []))
286
+ print(f'Cached {saved} files')
287
+ "
288
+ ```
289
+
290
+ Merge cached + new results into `.graphify_semantic.json`:
291
+ ```bash
292
+ $(cat .graphify_python) -c "
293
+ import json
294
+ from pathlib import Path
295
+
296
+ cached = json.loads(Path('.graphify_cached.json').read_text()) if Path('.graphify_cached.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]}
297
+ new = json.loads(Path('.graphify_semantic_new.json').read_text()) if Path('.graphify_semantic_new.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]}
298
+
299
+ all_nodes = cached['nodes'] + new.get('nodes', [])
300
+ all_edges = cached['edges'] + new.get('edges', [])
301
+ all_hyperedges = cached.get('hyperedges', []) + new.get('hyperedges', [])
302
+ seen = set()
303
+ deduped = []
304
+ for n in all_nodes:
305
+ if n['id'] not in seen:
306
+ seen.add(n['id'])
307
+ deduped.append(n)
308
+
309
+ merged = {
310
+ 'nodes': deduped,
311
+ 'edges': all_edges,
312
+ 'hyperedges': all_hyperedges,
313
+ 'input_tokens': new.get('input_tokens', 0),
314
+ 'output_tokens': new.get('output_tokens', 0),
315
+ }
316
+ Path('.graphify_semantic.json').write_text(json.dumps(merged, indent=2))
317
+ print(f'Extraction complete - {len(deduped)} nodes, {len(all_edges)} edges ({len(cached[\"nodes\"])} from cache, {len(new.get(\"nodes\",[]))} new)')
318
+ "
319
+ ```
320
+ Clean up temp files: `rm -f .graphify_cached.json .graphify_uncached.txt .graphify_semantic_new.json`
321
+
322
+ #### Part C - Merge AST + semantic into final extraction
323
+
324
+ ```bash
325
+ $(cat .graphify_python) -c "
326
+ import sys, json
327
+ from pathlib import Path
328
+
329
+ ast = json.loads(Path('.graphify_ast.json').read_text())
330
+ sem = json.loads(Path('.graphify_semantic.json').read_text())
331
+
332
+ # Merge: AST nodes first, semantic nodes deduplicated by id
333
+ seen = {n['id'] for n in ast['nodes']}
334
+ merged_nodes = list(ast['nodes'])
335
+ for n in sem['nodes']:
336
+ if n['id'] not in seen:
337
+ merged_nodes.append(n)
338
+ seen.add(n['id'])
339
+
340
+ merged_edges = ast['edges'] + sem['edges']
341
+ merged_hyperedges = sem.get('hyperedges', [])
342
+ merged = {
343
+ 'nodes': merged_nodes,
344
+ 'edges': merged_edges,
345
+ 'hyperedges': merged_hyperedges,
346
+ 'input_tokens': sem.get('input_tokens', 0),
347
+ 'output_tokens': sem.get('output_tokens', 0),
348
+ }
349
+ Path('.graphify_extract.json').write_text(json.dumps(merged, indent=2))
350
+ total = len(merged_nodes)
351
+ edges = len(merged_edges)
352
+ print(f'Merged: {total} nodes, {edges} edges ({len(ast[\"nodes\"])} AST + {len(sem[\"nodes\"])} semantic)')
353
+ "
354
+ ```
355
+
356
+ ### Step 4 - Build graph, cluster, analyze, generate outputs
357
+
358
+ ```bash
359
+ mkdir -p graphify-out
360
+ $(cat .graphify_python) -c "
361
+ import sys, json
362
+ from graphify.build import build_from_json
363
+ from graphify.cluster import cluster, score_all
364
+ from graphify.analyze import god_nodes, surprising_connections, suggest_questions
365
+ from graphify.report import generate
366
+ from graphify.export import to_json
367
+ from pathlib import Path
368
+
369
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
370
+ detection = json.loads(Path('.graphify_detect.json').read_text())
371
+
372
+ G = build_from_json(extraction)
373
+ communities = cluster(G)
374
+ cohesion = score_all(G, communities)
375
+ tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)}
376
+ gods = god_nodes(G)
377
+ surprises = surprising_connections(G, communities)
378
+ labels = {cid: 'Community ' + str(cid) for cid in communities}
379
+ # Placeholder questions - regenerated with real labels in Step 5
380
+ questions = suggest_questions(G, communities, labels)
381
+
382
+ report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, 'INPUT_PATH', suggested_questions=questions)
383
+ Path('graphify-out/GRAPH_REPORT.md').write_text(report)
384
+ to_json(G, communities, 'graphify-out/graph.json')
385
+
386
+ analysis = {
387
+ 'communities': {str(k): v for k, v in communities.items()},
388
+ 'cohesion': {str(k): v for k, v in cohesion.items()},
389
+ 'gods': gods,
390
+ 'surprises': surprises,
391
+ 'questions': questions,
392
+ }
393
+ Path('graphify-out/.graphify_analysis.json').write_text(json.dumps(analysis, indent=2))
394
+ if G.number_of_nodes() == 0:
395
+ print('ERROR: Graph is empty - extraction produced no nodes.')
396
+ print('Possible causes: all files were skipped, binary-only corpus, or extraction failed.')
397
+ raise SystemExit(1)
398
+ print(f'Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {len(communities)} communities')
399
+ "
400
+ ```
401
+
402
+ If this step prints `ERROR: Graph is empty`, stop and tell the user what happened - do not proceed to labeling or visualization.
403
+
404
+ Replace INPUT_PATH with the actual path.
405
+
406
+ ### Step 5 - Label communities
407
+
408
+ Read `graphify-out/.graphify_analysis.json`. For each community key, look at its node labels and write a 2-5 word plain-language name (e.g. "Attention Mechanism", "Training Pipeline", "Data Loading").
409
+
410
+ Then regenerate the report and save the labels for the visualizer:
411
+
412
+ ```bash
413
+ $(cat .graphify_python) -c "
414
+ import sys, json
415
+ from graphify.build import build_from_json
416
+ from graphify.cluster import score_all
417
+ from graphify.analyze import god_nodes, surprising_connections, suggest_questions
418
+ from graphify.report import generate
419
+ from pathlib import Path
420
+
421
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
422
+ detection = json.loads(Path('.graphify_detect.json').read_text())
423
+ analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
424
+
425
+ G = build_from_json(extraction)
426
+ communities = {int(k): v for k, v in analysis['communities'].items()}
427
+ cohesion = {int(k): v for k, v in analysis['cohesion'].items()}
428
+ tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)}
429
+
430
+ # LABELS - replace these with the names you chose above
431
+ labels = LABELS_DICT
432
+
433
+ # Regenerate questions with real community labels (labels affect question phrasing)
434
+ questions = suggest_questions(G, communities, labels)
435
+
436
+ report = generate(G, communities, cohesion, labels, analysis['gods'], analysis['surprises'], detection, tokens, 'INPUT_PATH', suggested_questions=questions)
437
+ Path('graphify-out/GRAPH_REPORT.md').write_text(report)
438
+ Path('graphify-out/.graphify_labels.json').write_text(json.dumps({str(k): v for k, v in labels.items()}))
439
+ print('Report updated with community labels')
440
+ "
441
+ ```
442
+
443
+ Replace `LABELS_DICT` with the actual dict you constructed (e.g. `{0: "Attention Mechanism", 1: "Training Pipeline"}`).
444
+ Replace INPUT_PATH with the actual path.
445
+
446
+ ### Step 6 - Generate Obsidian vault (opt-in) + HTML
447
+
448
+ **Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was explicitly given** — skip it otherwise, it generates one file per node.
449
+
450
+ If `--obsidian` was given:
451
+
452
+ ```bash
453
+ $(cat .graphify_python) -c "
454
+ import sys, json
455
+ from graphify.build import build_from_json
456
+ from graphify.export import to_obsidian, to_canvas
457
+ from pathlib import Path
458
+
459
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
460
+ analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
461
+ labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
462
+
463
+ G = build_from_json(extraction)
464
+ communities = {int(k): v for k, v in analysis['communities'].items()}
465
+ cohesion = {int(k): v for k, v in analysis['cohesion'].items()}
466
+ labels = {int(k): v for k, v in labels_raw.items()}
467
+
468
+ n = to_obsidian(G, communities, 'graphify-out/obsidian', community_labels=labels or None, cohesion=cohesion)
469
+ print(f'Obsidian vault: {n} notes in graphify-out/obsidian/')
470
+
471
+ to_canvas(G, communities, 'graphify-out/obsidian/graph.canvas', community_labels=labels or None)
472
+ print('Canvas: graphify-out/obsidian/graph.canvas - open in Obsidian for structured community layout')
473
+ print()
474
+ print('Open graphify-out/obsidian/ as a vault in Obsidian.')
475
+ print(' Graph view - nodes colored by community (set automatically)')
476
+ print(' graph.canvas - structured layout with communities as groups')
477
+ print(' _COMMUNITY_* - overview notes with cohesion scores and dataview queries')
478
+ "
479
+ ```
480
+
481
+ Generate the HTML graph (always, unless `--no-viz`):
482
+
483
+ ```bash
484
+ $(cat .graphify_python) -c "
485
+ import sys, json
486
+ from graphify.build import build_from_json
487
+ from graphify.export import to_html
488
+ from pathlib import Path
489
+
490
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
491
+ analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
492
+ labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
493
+
494
+ G = build_from_json(extraction)
495
+ communities = {int(k): v for k, v in analysis['communities'].items()}
496
+ labels = {int(k): v for k, v in labels_raw.items()}
497
+
498
+ if G.number_of_nodes() > 5000:
499
+ print(f'Graph has {G.number_of_nodes()} nodes - too large for HTML viz. Use Obsidian vault instead.')
500
+ else:
501
+ to_html(G, communities, 'graphify-out/graph.html', community_labels=labels or None)
502
+ print('graph.html written - open in any browser, no server needed')
503
+ "
504
+ ```
505
+
506
+ ### Step 7 - Neo4j export (only if --neo4j or --neo4j-push flag)
507
+
508
+ **If `--neo4j`** - generate a Cypher file for manual import:
509
+
510
+ ```bash
511
+ $(cat .graphify_python) -c "
512
+ import sys, json
513
+ from graphify.build import build_from_json
514
+ from graphify.export import to_cypher
515
+ from pathlib import Path
516
+
517
+ G = build_from_json(json.loads(Path('.graphify_extract.json').read_text()))
518
+ to_cypher(G, 'graphify-out/cypher.txt')
519
+ print('cypher.txt written - import with: cypher-shell < graphify-out/cypher.txt')
520
+ "
521
+ ```
522
+
523
+ **If `--neo4j-push <uri>`** - push directly to a running Neo4j instance. Ask the user for credentials if not provided:
524
+
525
+ ```bash
526
+ $(cat .graphify_python) -c "
527
+ import sys, json
528
+ from graphify.build import build_from_json
529
+ from graphify.cluster import cluster
530
+ from graphify.export import push_to_neo4j
531
+ from pathlib import Path
532
+
533
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
534
+ analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
535
+ G = build_from_json(extraction)
536
+ communities = {int(k): v for k, v in analysis['communities'].items()}
537
+
538
+ result = push_to_neo4j(G, uri='NEO4J_URI', user='NEO4J_USER', password='NEO4J_PASSWORD', communities=communities)
539
+ print(f'Pushed to Neo4j: {result[\"nodes\"]} nodes, {result[\"edges\"]} edges')
540
+ "
541
+ ```
542
+
543
+ Replace `NEO4J_URI`, `NEO4J_USER`, `NEO4J_PASSWORD` with actual values. Default URI is `bolt://localhost:7687`, default user is `neo4j`. Uses MERGE - safe to re-run without creating duplicates.
544
+
545
+ ### Step 7b - SVG export (only if --svg flag)
546
+
547
+ ```bash
548
+ $(cat .graphify_python) -c "
549
+ import sys, json
550
+ from graphify.build import build_from_json
551
+ from graphify.export import to_svg
552
+ from pathlib import Path
553
+
554
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
555
+ analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
556
+ labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
557
+
558
+ G = build_from_json(extraction)
559
+ communities = {int(k): v for k, v in analysis['communities'].items()}
560
+ labels = {int(k): v for k, v in labels_raw.items()}
561
+
562
+ to_svg(G, communities, 'graphify-out/graph.svg', community_labels=labels or None)
563
+ print('graph.svg written - embeds in Obsidian, Notion, GitHub READMEs')
564
+ "
565
+ ```
566
+
567
+ ### Step 7c - GraphML export (only if --graphml flag)
568
+
569
+ ```bash
570
+ $(cat .graphify_python) -c "
571
+ import json
572
+ from graphify.build import build_from_json
573
+ from graphify.export import to_graphml
574
+ from pathlib import Path
575
+
576
+ extraction = json.loads(Path('.graphify_extract.json').read_text())
577
+ analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
578
+
579
+ G = build_from_json(extraction)
580
+ communities = {int(k): v for k, v in analysis['communities'].items()}
581
+
582
+ to_graphml(G, communities, 'graphify-out/graph.graphml')
583
+ print('graph.graphml written - open in Gephi, yEd, or any GraphML tool')
584
+ "
585
+ ```
586
+
587
+ ### Step 7d - MCP server (only if --mcp flag)
588
+
589
+ ```bash
590
+ python3 -m graphify.serve graphify-out/graph.json
591
+ ```
592
+
593
+ This starts a stdio MCP server that exposes tools: `query_graph`, `get_node`, `get_neighbors`, `get_community`, `god_nodes`, `graph_stats`, `shortest_path`. Add to Claude Desktop or any MCP-compatible agent orchestrator so other agents can query the graph live.
594
+
595
+ To configure in Claude Desktop, add to `claude_desktop_config.json`:
596
+ ```json
597
+ {
598
+ "mcpServers": {
599
+ "graphify": {
600
+ "command": "python3",
601
+ "args": ["-m", "graphify.serve", "/absolute/path/to/graphify-out/graph.json"]
602
+ }
603
+ }
604
+ }
605
+ ```
606
+
607
+ ### Step 8 - Token reduction benchmark (only if total_words > 5000)
608
+
609
+ If `total_words` from `.graphify_detect.json` is greater than 5,000, run:
610
+
611
+ ```bash
612
+ $(cat .graphify_python) -c "
613
+ import json
614
+ from graphify.benchmark import run_benchmark, print_benchmark
615
+ from pathlib import Path
616
+
617
+ detection = json.loads(Path('.graphify_detect.json').read_text())
618
+ result = run_benchmark('graphify-out/graph.json', corpus_words=detection['total_words'])
619
+ print_benchmark(result)
620
+ "
621
+ ```
622
+
623
+ Print the output directly in chat. If `total_words <= 5000`, skip silently - the graph value is structural clarity, not token compression, for small corpora.
624
+
625
+ ---
626
+
627
+ ### Step 9 - Save manifest, update cost tracker, clean up, and report
628
+
629
+ ```bash
630
+ $(cat .graphify_python) -c "
631
+ import json
632
+ from pathlib import Path
633
+ from datetime import datetime, timezone
634
+ from graphify.detect import save_manifest
635
+
636
+ # Save manifest for --update
637
+ detect = json.loads(Path('.graphify_detect.json').read_text())
638
+ save_manifest(detect['files'])
639
+
640
+ # Update cumulative cost tracker
641
+ extract = json.loads(Path('.graphify_extract.json').read_text())
642
+ input_tok = extract.get('input_tokens', 0)
643
+ output_tok = extract.get('output_tokens', 0)
644
+
645
+ cost_path = Path('graphify-out/cost.json')
646
+ if cost_path.exists():
647
+ cost = json.loads(cost_path.read_text())
648
+ else:
649
+ cost = {'runs': [], 'total_input_tokens': 0, 'total_output_tokens': 0}
650
+
651
+ cost['runs'].append({
652
+ 'date': datetime.now(timezone.utc).isoformat(),
653
+ 'input_tokens': input_tok,
654
+ 'output_tokens': output_tok,
655
+ 'files': detect.get('total_files', 0),
656
+ })
657
+ cost['total_input_tokens'] += input_tok
658
+ cost['total_output_tokens'] += output_tok
659
+ cost_path.write_text(json.dumps(cost, indent=2))
660
+
661
+ print(f'This run: {input_tok:,} input tokens, {output_tok:,} output tokens')
662
+ print(f'All time: {cost[\"total_input_tokens\"]:,} input, {cost[\"total_output_tokens\"]:,} output ({len(cost[\"runs\"])} runs)')
663
+ "
664
+ rm -f .graphify_detect.json .graphify_extract.json .graphify_ast.json .graphify_semantic.json .graphify_chunk_*.json
665
+ rm -f graphify-out/.needs_update 2>/dev/null || true
666
+ ```
667
+
668
+ Tell the user (omit the obsidian line unless --obsidian was given):
669
+ ```
670
+ Graph complete. Outputs in PATH_TO_DIR/graphify-out/
671
+
672
+ graph.html - interactive graph, open in browser
673
+ GRAPH_REPORT.md - audit report
674
+ graph.json - raw graph data
675
+ obsidian/ - Obsidian vault (only if --obsidian was given)
676
+ ```
677
+
678
+ If graphify saved you time, consider supporting it: https://github.com/sponsors/safishamsi
679
+
680
+ Replace PATH_TO_DIR with the actual absolute path of the directory that was processed.
681
+
682
+ Then paste these sections from GRAPH_REPORT.md directly into the chat:
683
+ - God Nodes
684
+ - Surprising Connections
685
+ - Suggested Questions
686
+
687
+ Do NOT paste the full report - just those three sections. Keep it concise.
688
+
689
+ Then immediately offer to explore. Pick the single most interesting suggested question from the report - the one that crosses the most community boundaries or has the most surprising bridge node - and ask:
690
+
691
+ > "The most interesting question this graph can answer: **[question]**. Want me to trace it?"
692
+
693
+ If the user says yes, run `/graphify query "[question]"` on the graph and walk them through the answer using the graph structure - which nodes connect, which community boundaries get crossed, what the path reveals. Keep going as long as they want to explore. Each answer should end with a natural follow-up ("this connects to X - want to go deeper?") so the session feels like navigation, not a one-shot report.
694
+
695
+ The graph is the map. Your job after the pipeline is to be the guide.
696
+
697
+ ---
698
+
699
+ ## For --update (incremental re-extraction)
700
+
701
+ Use when you've added or modified files since the last run. Only re-extracts changed files - saves tokens and time.
702
+
703
+ ```bash
704
+ $(cat .graphify_python) -c "
705
+ import sys, json
706
+ from graphify.detect import detect_incremental, save_manifest
707
+ from pathlib import Path
708
+
709
+ result = detect_incremental(Path('INPUT_PATH'))
710
+ new_total = result.get('new_total', 0)
711
+ print(json.dumps(result, indent=2))
712
+ Path('.graphify_incremental.json').write_text(json.dumps(result))
713
+ if new_total == 0:
714
+ print('No files changed since last run. Nothing to update.')
715
+ raise SystemExit(0)
716
+ print(f'{new_total} new/changed file(s) to re-extract.')
717
+ "
718
+ ```
719
+
720
+ If new files exist, first check whether all changed files are code files:
721
+
722
+ ```bash
723
+ $(cat .graphify_python) -c "
724
+ import json
725
+ from pathlib import Path
726
+
727
+ result = json.loads(open('.graphify_incremental.json').read()) if Path('.graphify_incremental.json').exists() else {}
728
+ code_exts = {'.py','.ts','.js','.go','.rs','.java','.cpp','.c','.rb','.swift','.kt','.cs','.scala','.php','.cc','.cxx','.hpp','.h','.kts'}
729
+ new_files = result.get('new_files', {})
730
+ all_changed = [f for files in new_files.values() for f in files]
731
+ code_only = all(Path(f).suffix.lower() in code_exts for f in all_changed)
732
+ print('code_only:', code_only)
733
+ "
734
+ ```
735
+
736
+ If `code_only` is True: print `[graphify update] Code-only changes detected - skipping semantic extraction (no LLM needed)`, run only Step 3A (AST) on the changed files, skip Step 3B entirely (no subagents), then go straight to merge and Steps 4–8.
737
+
738
+ If `code_only` is False (any changed file is a doc/paper/image): run the full Steps 3A–3C pipeline as normal.
739
+
740
+ Then:
741
+
742
+ ```bash
743
+ $(cat .graphify_python) -c "
744
+ import sys, json
745
+ from graphify.build import build_from_json
746
+ from graphify.export import to_json
747
+ from networkx.readwrite import json_graph
748
+ import networkx as nx
749
+ from pathlib import Path
750
+
751
+ # Load existing graph
752
+ existing_data = json.loads(Path('graphify-out/graph.json').read_text())
753
+ G_existing = json_graph.node_link_graph(existing_data, edges='links')
754
+
755
+ # Load new extraction
756
+ new_extraction = json.loads(Path('.graphify_extract.json').read_text())
757
+ G_new = build_from_json(new_extraction)
758
+
759
+ # Merge: new nodes/edges into existing graph
760
+ G_existing.update(G_new)
761
+ print(f'Merged: {G_existing.number_of_nodes()} nodes, {G_existing.number_of_edges()} edges')
762
+ "
763
+ ```
764
+
765
+ Then run Steps 4–8 on the merged graph as normal.
766
+
767
+ After Step 4, show the graph diff:
768
+
769
+ ```bash
770
+ $(cat .graphify_python) -c "
771
+ import json
772
+ from graphify.analyze import graph_diff
773
+ from graphify.build import build_from_json
774
+ from networkx.readwrite import json_graph
775
+ import networkx as nx
776
+ from pathlib import Path
777
+
778
+ # Load old graph (before update) from backup written before merge
779
+ old_data = json.loads(Path('.graphify_old.json').read_text()) if Path('.graphify_old.json').exists() else None
780
+ new_extract = json.loads(Path('.graphify_extract.json').read_text())
781
+ G_new = build_from_json(new_extract)
782
+
783
+ if old_data:
784
+ G_old = json_graph.node_link_graph(old_data, edges='links')
785
+ diff = graph_diff(G_old, G_new)
786
+ print(diff['summary'])
787
+ if diff['new_nodes']:
788
+ print('New nodes:', ', '.join(n['label'] for n in diff['new_nodes'][:5]))
789
+ if diff['new_edges']:
790
+ print('New edges:', len(diff['new_edges']))
791
+ "
792
+ ```
793
+
794
+ Before the merge step, save the old graph: `cp graphify-out/graph.json .graphify_old.json`
795
+ Clean up after: `rm -f .graphify_old.json`
796
+
797
+ ---
798
+
799
+ ## For --cluster-only
800
+
801
+ Skip Steps 1–3. Load the existing graph from `graphify-out/graph.json` and re-run clustering:
802
+
803
+ ```bash
804
+ $(cat .graphify_python) -c "
805
+ import sys, json
806
+ from graphify.cluster import cluster, score_all
807
+ from graphify.analyze import god_nodes, surprising_connections
808
+ from graphify.report import generate
809
+ from graphify.export import to_json
810
+ from networkx.readwrite import json_graph
811
+ import networkx as nx
812
+ from pathlib import Path
813
+
814
+ data = json.loads(Path('graphify-out/graph.json').read_text())
815
+ G = json_graph.node_link_graph(data, edges='links')
816
+
817
+ detection = {'total_files': 0, 'total_words': 99999, 'needs_graph': True, 'warning': None,
818
+ 'files': {'code': [], 'document': [], 'paper': []}}
819
+ tokens = {'input': 0, 'output': 0}
820
+
821
+ communities = cluster(G)
822
+ cohesion = score_all(G, communities)
823
+ gods = god_nodes(G)
824
+ surprises = surprising_connections(G, communities)
825
+ labels = {cid: 'Community ' + str(cid) for cid in communities}
826
+
827
+ report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, '.')
828
+ Path('graphify-out/GRAPH_REPORT.md').write_text(report)
829
+ to_json(G, communities, 'graphify-out/graph.json')
830
+
831
+ analysis = {
832
+ 'communities': {str(k): v for k, v in communities.items()},
833
+ 'cohesion': {str(k): v for k, v in cohesion.items()},
834
+ 'gods': gods,
835
+ 'surprises': surprises,
836
+ }
837
+ Path('graphify-out/.graphify_analysis.json').write_text(json.dumps(analysis, indent=2))
838
+ print(f'Re-clustered: {len(communities)} communities')
839
+ "
840
+ ```
841
+
842
+ Then run Steps 5–9 as normal (label communities, generate viz, benchmark, clean up, report).
843
+
844
+ ---
845
+
846
+ ## For /graphify query
847
+
848
+ Two traversal modes - choose based on the question:
849
+
850
+ | Mode | Flag | Best for |
851
+ |------|------|----------|
852
+ | BFS (default) | _(none)_ | "What is X connected to?" - broad context, nearest neighbors first |
853
+ | DFS | `--dfs` | "How does X reach Y?" - trace a specific chain or dependency path |
854
+
855
+ First check the graph exists:
856
+ ```bash
857
+ $(cat .graphify_python) -c "
858
+ from pathlib import Path
859
+ if not Path('graphify-out/graph.json').exists():
860
+ print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
861
+ raise SystemExit(1)
862
+ "
863
+ ```
864
+ If it fails, stop and tell the user to run `/graphify <path>` first.
865
+
866
+ Load `graphify-out/graph.json`, then:
867
+
868
+ 1. Find the 1-3 nodes whose label best matches key terms in the question.
869
+ 2. Run the appropriate traversal from each starting node.
870
+ 3. Read the subgraph - node labels, edge relations, confidence tags, source locations.
871
+ 4. Answer using **only** what the graph contains. Quote `source_location` when citing a specific fact.
872
+ 5. If the graph lacks enough information, say so - do not hallucinate edges.
873
+
874
+ ```bash
875
+ $(cat .graphify_python) -c "
876
+ import sys, json
877
+ from networkx.readwrite import json_graph
878
+ import networkx as nx
879
+ from pathlib import Path
880
+
881
+ data = json.loads(Path('graphify-out/graph.json').read_text())
882
+ G = json_graph.node_link_graph(data, edges='links')
883
+
884
+ question = 'QUESTION'
885
+ mode = 'MODE' # 'bfs' or 'dfs'
886
+ terms = [t.lower() for t in question.split() if len(t) > 3]
887
+
888
+ # Find best-matching start nodes
889
+ scored = []
890
+ for nid, ndata in G.nodes(data=True):
891
+ label = ndata.get('label', '').lower()
892
+ score = sum(1 for t in terms if t in label)
893
+ if score > 0:
894
+ scored.append((score, nid))
895
+ scored.sort(reverse=True)
896
+ start_nodes = [nid for _, nid in scored[:3]]
897
+
898
+ if not start_nodes:
899
+ print('No matching nodes found for query terms:', terms)
900
+ sys.exit(0)
901
+
902
+ subgraph_nodes = set()
903
+ subgraph_edges = []
904
+
905
+ if mode == 'dfs':
906
+ # DFS: follow one path as deep as possible before backtracking.
907
+ # Depth-limited to 6 to avoid traversing the whole graph.
908
+ visited = set()
909
+ stack = [(n, 0) for n in reversed(start_nodes)]
910
+ while stack:
911
+ node, depth = stack.pop()
912
+ if node in visited or depth > 6:
913
+ continue
914
+ visited.add(node)
915
+ subgraph_nodes.add(node)
916
+ for neighbor in G.neighbors(node):
917
+ if neighbor not in visited:
918
+ stack.append((neighbor, depth + 1))
919
+ subgraph_edges.append((node, neighbor))
920
+ else:
921
+ # BFS: explore all neighbors layer by layer up to depth 3.
922
+ frontier = set(start_nodes)
923
+ subgraph_nodes = set(start_nodes)
924
+ for _ in range(3):
925
+ next_frontier = set()
926
+ for n in frontier:
927
+ for neighbor in G.neighbors(n):
928
+ if neighbor not in subgraph_nodes:
929
+ next_frontier.add(neighbor)
930
+ subgraph_edges.append((n, neighbor))
931
+ subgraph_nodes.update(next_frontier)
932
+ frontier = next_frontier
933
+
934
+ # Token-budget aware output: rank by relevance, cut at budget (~4 chars/token)
935
+ token_budget = BUDGET # default 2000
936
+ char_budget = token_budget * 4
937
+
938
+ # Score each node by term overlap for ranked output
939
+ def relevance(nid):
940
+ label = G.nodes[nid].get('label', '').lower()
941
+ return sum(1 for t in terms if t in label)
942
+
943
+ ranked_nodes = sorted(subgraph_nodes, key=relevance, reverse=True)
944
+
945
+ lines = [f'Traversal: {mode.upper()} | Start: {[G.nodes[n].get(\"label\",n) for n in start_nodes]} | {len(subgraph_nodes)} nodes']
946
+ for nid in ranked_nodes:
947
+ d = G.nodes[nid]
948
+ lines.append(f' NODE {d.get(\"label\", nid)} [src={d.get(\"source_file\",\"\")} loc={d.get(\"source_location\",\"\")}]')
949
+ for u, v in subgraph_edges:
950
+ if u in subgraph_nodes and v in subgraph_nodes:
951
+ d = G.edges[u, v]
952
+ lines.append(f' EDGE {G.nodes[u].get(\"label\",u)} --{d.get(\"relation\",\"\")} [{d.get(\"confidence\",\"\")}]--> {G.nodes[v].get(\"label\",v)}')
953
+
954
+ output = '\n'.join(lines)
955
+ if len(output) > char_budget:
956
+ output = output[:char_budget] + f'\n... (truncated at ~{token_budget} token budget - use --budget N for more)'
957
+ print(output)
958
+ "
959
+ ```
960
+
961
+ Replace `QUESTION` with the user's actual question, `MODE` with `bfs` or `dfs`, and `BUDGET` with the token budget (default `2000`, or whatever `--budget N` specifies). Then answer based on the subgraph output above.
962
+
963
+ After writing the answer, save it back into the graph so it improves future queries:
964
+
965
+ ```bash
966
+ $(cat .graphify_python) -m graphify save-result --question "QUESTION" --answer "ANSWER" --type query --nodes NODE1 NODE2
967
+ ```
968
+
969
+ Replace `QUESTION` with the question, `ANSWER` with your full answer text, `SOURCE_NODES` with the list of node labels you cited. This closes the feedback loop: the next `--update` will extract this Q&A as a node in the graph.
970
+
971
+ ---
972
+
973
+ ## For /graphify path
974
+
975
+ Find the shortest path between two named concepts in the graph.
976
+
977
+ First check the graph exists:
978
+ ```bash
979
+ $(cat .graphify_python) -c "
980
+ from pathlib import Path
981
+ if not Path('graphify-out/graph.json').exists():
982
+ print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
983
+ raise SystemExit(1)
984
+ "
985
+ ```
986
+ If it fails, stop and tell the user to run `/graphify <path>` first.
987
+
988
+ ```bash
989
+ $(cat .graphify_python) -c "
990
+ import json, sys
991
+ import networkx as nx
992
+ from networkx.readwrite import json_graph
993
+ from pathlib import Path
994
+
995
+ data = json.loads(Path('graphify-out/graph.json').read_text())
996
+ G = json_graph.node_link_graph(data, edges='links')
997
+
998
+ a_term = 'NODE_A'
999
+ b_term = 'NODE_B'
1000
+
1001
+ def find_node(term):
1002
+ term = term.lower()
1003
+ scored = sorted(
1004
+ [(sum(1 for w in term.split() if w in G.nodes[n].get('label','').lower()), n)
1005
+ for n in G.nodes()],
1006
+ reverse=True
1007
+ )
1008
+ return scored[0][1] if scored and scored[0][0] > 0 else None
1009
+
1010
+ src = find_node(a_term)
1011
+ tgt = find_node(b_term)
1012
+
1013
+ if not src or not tgt:
1014
+ print(f'Could not find nodes matching: {a_term!r} or {b_term!r}')
1015
+ sys.exit(0)
1016
+
1017
+ try:
1018
+ path = nx.shortest_path(G, src, tgt)
1019
+ print(f'Shortest path ({len(path)-1} hops):')
1020
+ for i, nid in enumerate(path):
1021
+ label = G.nodes[nid].get('label', nid)
1022
+ if i < len(path) - 1:
1023
+ edge = G.edges[nid, path[i+1]]
1024
+ rel = edge.get('relation', '')
1025
+ conf = edge.get('confidence', '')
1026
+ print(f' {label} --{rel}--> [{conf}]')
1027
+ else:
1028
+ print(f' {label}')
1029
+ except nx.NetworkXNoPath:
1030
+ print(f'No path found between {a_term!r} and {b_term!r}')
1031
+ except nx.NodeNotFound as e:
1032
+ print(f'Node not found: {e}')
1033
+ "
1034
+ ```
1035
+
1036
+ Replace `NODE_A` and `NODE_B` with the actual concept names from the user. Then explain the path in plain language - what each hop means, why it's significant.
1037
+
1038
+ After writing the explanation, save it back:
1039
+
1040
+ ```bash
1041
+ $(cat .graphify_python) -m graphify save-result --question "Path from NODE_A to NODE_B" --answer "ANSWER" --type path_query --nodes NODE_A NODE_B
1042
+ ```
1043
+
1044
+ ---
1045
+
1046
+ ## For /graphify explain
1047
+
1048
+ Give a plain-language explanation of a single node - everything connected to it.
1049
+
1050
+ First check the graph exists:
1051
+ ```bash
1052
+ $(cat .graphify_python) -c "
1053
+ from pathlib import Path
1054
+ if not Path('graphify-out/graph.json').exists():
1055
+ print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
1056
+ raise SystemExit(1)
1057
+ "
1058
+ ```
1059
+ If it fails, stop and tell the user to run `/graphify <path>` first.
1060
+
1061
+ ```bash
1062
+ $(cat .graphify_python) -c "
1063
+ import json, sys
1064
+ import networkx as nx
1065
+ from networkx.readwrite import json_graph
1066
+ from pathlib import Path
1067
+
1068
+ data = json.loads(Path('graphify-out/graph.json').read_text())
1069
+ G = json_graph.node_link_graph(data, edges='links')
1070
+
1071
+ term = 'NODE_NAME'
1072
+ term_lower = term.lower()
1073
+
1074
+ # Find best matching node
1075
+ scored = sorted(
1076
+ [(sum(1 for w in term_lower.split() if w in G.nodes[n].get('label','').lower()), n)
1077
+ for n in G.nodes()],
1078
+ reverse=True
1079
+ )
1080
+ if not scored or scored[0][0] == 0:
1081
+ print(f'No node matching {term!r}')
1082
+ sys.exit(0)
1083
+
1084
+ nid = scored[0][1]
1085
+ data_n = G.nodes[nid]
1086
+ print(f'NODE: {data_n.get(\"label\", nid)}')
1087
+ print(f' source: {data_n.get(\"source_file\",\"unknown\")}')
1088
+ print(f' type: {data_n.get(\"file_type\",\"unknown\")}')
1089
+ print(f' degree: {G.degree(nid)}')
1090
+ print()
1091
+ print('CONNECTIONS:')
1092
+ for neighbor in G.neighbors(nid):
1093
+ edge = G.edges[nid, neighbor]
1094
+ nlabel = G.nodes[neighbor].get('label', neighbor)
1095
+ rel = edge.get('relation', '')
1096
+ conf = edge.get('confidence', '')
1097
+ src_file = G.nodes[neighbor].get('source_file', '')
1098
+ print(f' --{rel}--> {nlabel} [{conf}] ({src_file})')
1099
+ "
1100
+ ```
1101
+
1102
+ Replace `NODE_NAME` with the concept the user asked about. Then write a 3-5 sentence explanation of what this node is, what it connects to, and why those connections are significant. Use the source locations as citations.
1103
+
1104
+ After writing the explanation, save it back:
1105
+
1106
+ ```bash
1107
+ $(cat .graphify_python) -m graphify save-result --question "Explain NODE_NAME" --answer "ANSWER" --type explain --nodes NODE_NAME
1108
+ ```
1109
+
1110
+ ---
1111
+
1112
+ ## For /graphify add
1113
+
1114
+ Fetch a URL and add it to the corpus, then update the graph.
1115
+
1116
+ ```bash
1117
+ $(cat .graphify_python) -c "
1118
+ import sys
1119
+ from graphify.ingest import ingest
1120
+ from pathlib import Path
1121
+
1122
+ try:
1123
+ out = ingest('URL', Path('./raw'), author='AUTHOR', contributor='CONTRIBUTOR')
1124
+ print(f'Saved to {out}')
1125
+ except ValueError as e:
1126
+ print(f'error: {e}', file=sys.stderr)
1127
+ sys.exit(1)
1128
+ except RuntimeError as e:
1129
+ print(f'error: {e}', file=sys.stderr)
1130
+ sys.exit(1)
1131
+ "
1132
+ ```
1133
+
1134
+ Replace `URL` with the actual URL, `AUTHOR` with the user's name if provided, `CONTRIBUTOR` likewise. If the command exits with an error, tell the user what went wrong - do not silently continue. After a successful save, automatically run the `--update` pipeline on `./raw` to merge the new file into the existing graph.
1135
+
1136
+ Supported URL types (auto-detected):
1137
+ - Twitter/X → fetched via oEmbed, saved as `.md` with tweet text and author
1138
+ - arXiv → abstract + metadata saved as `.md`
1139
+ - PDF → downloaded as `.pdf`
1140
+ - Images (.png/.jpg/.webp) → downloaded, vision extraction runs on next build
1141
+ - Any webpage → converted to markdown via html2text
1142
+
1143
+ ---
1144
+
1145
+ ## For --watch
1146
+
1147
+ Start a background watcher that monitors a folder and auto-updates the graph when files change.
1148
+
1149
+ ```bash
1150
+ python3 -m graphify.watch INPUT_PATH --debounce 3
1151
+ ```
1152
+
1153
+ Replace INPUT_PATH with the folder to watch. Behavior depends on what changed:
1154
+
1155
+ - **Code files only (.py, .ts, .go, etc.):** re-runs AST extraction + rebuild + cluster immediately, no LLM needed. `graph.json` and `GRAPH_REPORT.md` are updated automatically.
1156
+ - **Docs, papers, or images:** writes a `graphify-out/needs_update` flag and prints a notification to run `/graphify --update` (LLM semantic re-extraction required).
1157
+
1158
+ Debounce (default 3s): waits until file activity stops before triggering, so a wave of parallel agent writes doesn't trigger a rebuild per file.
1159
+
1160
+ Press Ctrl+C to stop.
1161
+
1162
+ For agentic workflows: run `--watch` in a background terminal. Code changes from agent waves are picked up automatically between waves. If agents are also writing docs or notes, you'll need a manual `/graphify --update` after those waves.
1163
+
1164
+ ---
1165
+
1166
+ ## For git commit hook
1167
+
1168
+ Install a post-commit hook that auto-rebuilds the graph after every commit. No background process needed - triggers once per commit, works with any editor.
1169
+
1170
+ ```bash
1171
+ graphify hook install # install
1172
+ graphify hook uninstall # remove
1173
+ graphify hook status # check
1174
+ ```
1175
+
1176
+ After every `git commit`, the hook detects which code files changed (via `git diff HEAD~1`), re-runs AST extraction on those files, and rebuilds `graph.json` and `GRAPH_REPORT.md`. Doc/image changes are ignored by the hook - run `/graphify --update` manually for those.
1177
+
1178
+ If a post-commit hook already exists, graphify appends to it rather than replacing it.
1179
+
1180
+ ---
1181
+
1182
+ ## For native CLAUDE.md integration
1183
+
1184
+ Run once per project to make graphify always-on in Claude Code sessions:
1185
+
1186
+ ```bash
1187
+ graphify claude install
1188
+ ```
1189
+
1190
+ This writes a `## graphify` section to the local `CLAUDE.md` that instructs Claude to check the graph before answering codebase questions and rebuild it after code changes. No manual `/graphify` needed in future sessions.
1191
+
1192
+ ```bash
1193
+ graphify claude uninstall # remove the section
1194
+ ```
1195
+
1196
+ ---
1197
+
1198
+ ## Honesty Rules
1199
+
1200
+ - Never invent an edge. If unsure, use AMBIGUOUS.
1201
+ - Never skip the corpus check warning.
1202
+ - Always show token cost in the report.
1203
+ - Never hide cohesion scores behind symbols - show the raw number.
1204
+ - Never run HTML viz on a graph with more than 5,000 nodes without warning the user.