ultimate-pi 0.1.7 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/graphify/.graphify_version +1 -0
- package/.agents/skills/graphify/SKILL.md +1204 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +225 -97
- package/.agents/skills/wiki-autoresearch/references/program.md +28 -62
- package/.agents/skills/wiki-autoresearch/references/quality-sites.md +32 -0
- package/.env.example +5 -1
- package/.gitattributes +1 -0
- package/.github/workflows/publish-github-packages.yml +1 -1
- package/.pi/SYSTEM.md +72 -18
- package/.pi/agents/harness/adversary.md +32 -0
- package/.pi/agents/harness/evaluator.md +32 -0
- package/.pi/agents/harness/executor.md +34 -0
- package/.pi/agents/harness/meta-optimizer.md +33 -0
- package/.pi/agents/harness/planner.md +33 -0
- package/.pi/agents/harness/tie-breaker.md +35 -0
- package/.pi/agents/harness/trace-librarian.md +32 -0
- package/.pi/extensions/banner.png +0 -0
- package/.pi/extensions/budget-guard.ts +265 -0
- package/.pi/extensions/custom-footer.ts +194 -22
- package/.pi/extensions/custom-header.ts +47 -9
- package/.pi/extensions/debate-orchestrator.ts +479 -0
- package/.pi/extensions/harness-live-widget.ts +438 -0
- package/.pi/extensions/policy-gate.ts +349 -0
- package/.pi/extensions/review-integrity.ts +198 -0
- package/.pi/extensions/test-diff-integrity.ts +240 -0
- package/.pi/extensions/trace-recorder.ts +315 -0
- package/.pi/harness/README.md +23 -0
- package/.pi/harness/router/README.md +35 -0
- package/.pi/harness/router/apply-router-proposal.mjs +153 -0
- package/.pi/harness/router/propose-router-tuning.mjs +149 -0
- package/.pi/harness/specs/README.md +37 -0
- package/.pi/harness/specs/adversary-report.schema.json +53 -0
- package/.pi/harness/specs/budget-exhausted-event.schema.json +93 -0
- package/.pi/harness/specs/consensus-packet.schema.json +175 -0
- package/.pi/harness/specs/eval-verdict.schema.json +59 -0
- package/.pi/harness/specs/incident-record.schema.json +84 -0
- package/.pi/harness/specs/plan-packet.schema.json +90 -0
- package/.pi/harness/specs/round-result.schema.json +126 -0
- package/.pi/harness/specs/router-tuning-proposal.schema.json +114 -0
- package/.pi/harness/specs/run-trace.schema.json +107 -0
- package/.pi/lib/harness-ui-state.ts +311 -0
- package/.pi/mcp.json +4 -0
- package/.pi/model-router.json +93 -93
- package/.pi/prompts/graphify.md +23 -0
- package/.pi/prompts/harness-abort.md +41 -0
- package/.pi/prompts/harness-auto.md +83 -0
- package/.pi/prompts/harness-critic.md +52 -0
- package/.pi/prompts/harness-eval.md +51 -0
- package/.pi/prompts/harness-incident.md +51 -0
- package/.pi/prompts/harness-plan.md +64 -0
- package/.pi/prompts/harness-review.md +52 -0
- package/.pi/prompts/harness-router-tune.md +74 -0
- package/.pi/prompts/harness-run.md +59 -0
- package/.pi/prompts/harness-setup.md +316 -216
- package/.pi/prompts/harness-trace.md +51 -0
- package/.pi/prompts/wiki-autoresearch.md +9 -7
- package/.pi/prompts/wiki-save.md +20 -0
- package/.pi/skills/agent-router/SKILL.md +2 -4
- package/.pi/skills/ast-grep/SKILL.md +354 -0
- package/.pi/sounds/project-sounds.json +18 -24
- package/AGENTS.md +30 -0
- package/CHANGELOG.md +89 -0
- package/CONTRIBUTING.md +51 -1
- package/README.md +264 -20
- package/biome.json +8 -2
- package/lefthook.yml +3 -2
- package/node_modules/@sting8k/pi-vcc/README.md +200 -0
- package/node_modules/@sting8k/pi-vcc/index.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/package.json +26 -0
- package/node_modules/@sting8k/pi-vcc/scripts/audit-sessions.ts +88 -0
- package/node_modules/@sting8k/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
- package/node_modules/@sting8k/pi-vcc/scripts/compare-before-after.ts +36 -0
- package/node_modules/@sting8k/pi-vcc/scripts/dump-branch-output.ts +20 -0
- package/node_modules/@sting8k/pi-vcc/src/commands/pi-vcc.ts +36 -0
- package/node_modules/@sting8k/pi-vcc/src/commands/vcc-recall.ts +65 -0
- package/node_modules/@sting8k/pi-vcc/src/core/brief.ts +381 -0
- package/node_modules/@sting8k/pi-vcc/src/core/build-sections.ts +79 -0
- package/node_modules/@sting8k/pi-vcc/src/core/content.ts +60 -0
- package/node_modules/@sting8k/pi-vcc/src/core/filter-noise.ts +42 -0
- package/node_modules/@sting8k/pi-vcc/src/core/format-recall.ts +27 -0
- package/node_modules/@sting8k/pi-vcc/src/core/format.ts +49 -0
- package/node_modules/@sting8k/pi-vcc/src/core/lineage.ts +26 -0
- package/node_modules/@sting8k/pi-vcc/src/core/load-messages.ts +41 -0
- package/node_modules/@sting8k/pi-vcc/src/core/normalize.ts +66 -0
- package/node_modules/@sting8k/pi-vcc/src/core/recall-scope.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/src/core/render-entries.ts +55 -0
- package/node_modules/@sting8k/pi-vcc/src/core/report.ts +237 -0
- package/node_modules/@sting8k/pi-vcc/src/core/sanitize.ts +5 -0
- package/node_modules/@sting8k/pi-vcc/src/core/search-entries.ts +221 -0
- package/node_modules/@sting8k/pi-vcc/src/core/settings.ts +77 -0
- package/node_modules/@sting8k/pi-vcc/src/core/skill-collapse.ts +35 -0
- package/node_modules/@sting8k/pi-vcc/src/core/summarize.ts +157 -0
- package/node_modules/@sting8k/pi-vcc/src/core/tool-args.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/src/details.ts +7 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/commits.ts +69 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/files.ts +80 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/goals.ts +79 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/preferences.ts +55 -0
- package/node_modules/@sting8k/pi-vcc/src/hooks/before-compact.ts +322 -0
- package/node_modules/@sting8k/pi-vcc/src/sections.ts +12 -0
- package/node_modules/@sting8k/pi-vcc/src/tools/recall.ts +109 -0
- package/node_modules/@sting8k/pi-vcc/src/types.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/tests/before-compact-hook.test.ts +181 -0
- package/node_modules/@sting8k/pi-vcc/tests/before-compact.test.ts +140 -0
- package/node_modules/@sting8k/pi-vcc/tests/brief.test.ts +206 -0
- package/node_modules/@sting8k/pi-vcc/tests/build-sections.test.ts +59 -0
- package/node_modules/@sting8k/pi-vcc/tests/compile.test.ts +80 -0
- package/node_modules/@sting8k/pi-vcc/tests/content.test.ts +31 -0
- package/node_modules/@sting8k/pi-vcc/tests/extract-goals.test.ts +86 -0
- package/node_modules/@sting8k/pi-vcc/tests/extract-preferences.test.ts +30 -0
- package/node_modules/@sting8k/pi-vcc/tests/filter-noise.test.ts +61 -0
- package/node_modules/@sting8k/pi-vcc/tests/fixtures.ts +61 -0
- package/node_modules/@sting8k/pi-vcc/tests/format-recall.test.ts +30 -0
- package/node_modules/@sting8k/pi-vcc/tests/format.test.ts +62 -0
- package/node_modules/@sting8k/pi-vcc/tests/lineage.test.ts +33 -0
- package/node_modules/@sting8k/pi-vcc/tests/load-messages.test.ts +51 -0
- package/node_modules/@sting8k/pi-vcc/tests/normalize.test.ts +97 -0
- package/node_modules/@sting8k/pi-vcc/tests/real-sessions.test.ts +38 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-expand.test.ts +15 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-scope.test.ts +32 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
- package/node_modules/@sting8k/pi-vcc/tests/render-entries.test.ts +62 -0
- package/node_modules/@sting8k/pi-vcc/tests/report.test.ts +44 -0
- package/node_modules/@sting8k/pi-vcc/tests/sanitize.test.ts +24 -0
- package/node_modules/@sting8k/pi-vcc/tests/search-entries.test.ts +144 -0
- package/node_modules/@sting8k/pi-vcc/tests/support/load-session.ts +23 -0
- package/node_modules/@sting8k/pi-vcc/tests/support/real-sessions.ts +51 -0
- package/package.json +15 -4
- package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
- package/scripts/index_youtube_urls.py +376 -0
- package/scripts/merge_graphify_corpora.py +398 -0
- package/scripts/regen_graphify_html.py +46 -0
- package/.agents/skills/defuddle/SKILL.md +0 -90
- package/.agents/skills/wiki/SKILL.md +0 -215
- package/.agents/skills/wiki/references/css-snippets.md +0 -122
- package/.agents/skills/wiki/references/frontmatter.md +0 -107
- package/.agents/skills/wiki/references/git-setup.md +0 -58
- package/.agents/skills/wiki/references/mcp-setup.md +0 -149
- package/.agents/skills/wiki/references/modes.md +0 -259
- package/.agents/skills/wiki/references/plugins.md +0 -96
- package/.agents/skills/wiki/references/rest-api.md +0 -124
- package/.agents/skills/wiki-fold/SKILL.md +0 -204
- package/.agents/skills/wiki-fold/references/fold-template.md +0 -133
- package/.agents/skills/wiki-ingest/SKILL.md +0 -288
- package/.agents/skills/wiki-lint/SKILL.md +0 -183
- package/.agents/skills/wiki-query/SKILL.md +0 -176
- package/.pi/agents/rethink.md +0 -140
- package/.pi/agents/wiki-ingest.md +0 -67
- package/.pi/agents/wiki-lint.md +0 -75
- package/.pi/internal/cursor-sdk-transcript-parser.ts +0 -59
- package/.pi/prompts/save.md +0 -16
- package/.pi/prompts/wiki.md +0 -23
- package/.pi/providers/cursor-sdk-provider.test.mjs +0 -476
- package/.pi/providers/cursor-sdk-provider.ts +0 -1085
- package/vault/AGENTS.md +0 -37
- package/vault/wiki/_templates/comparison.md +0 -39
- package/vault/wiki/_templates/concept.md +0 -40
- package/vault/wiki/_templates/decision.md +0 -21
- package/vault/wiki/_templates/entity.md +0 -32
- package/vault/wiki/_templates/flow.md +0 -14
- package/vault/wiki/_templates/module.md +0 -18
- package/vault/wiki/_templates/question.md +0 -31
- package/vault/wiki/_templates/source.md +0 -39
- package/vault/wiki/concepts/AST-Aware Code Chunking.md +0 -44
- package/vault/wiki/concepts/Build-Time Prompt Compilation.md +0 -107
- package/vault/wiki/concepts/Context Engine (AI Coding).md +0 -47
- package/vault/wiki/concepts/Context-Aware System Reminders.md +0 -61
- package/vault/wiki/concepts/Contextualized Text Embedding.md +0 -42
- package/vault/wiki/concepts/Contractor vs Employee AI Model.md +0 -55
- package/vault/wiki/concepts/Dual-Model Agent Architecture.md +0 -65
- package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +0 -43
- package/vault/wiki/concepts/Majority Vote Ensembling.md +0 -68
- package/vault/wiki/concepts/Meta-Harness.md +0 -16
- package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +0 -75
- package/vault/wiki/concepts/Prompt Enhancement.md +0 -90
- package/vault/wiki/concepts/Prompt Renderer.md +0 -89
- package/vault/wiki/concepts/Semantic Codebase Indexing.md +0 -67
- package/vault/wiki/concepts/additive-config-hierarchy.md +0 -16
- package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +0 -71
- package/vault/wiki/concepts/agent-browser-browser-automation.md +0 -99
- package/vault/wiki/concepts/agent-codebase-interface.md +0 -43
- package/vault/wiki/concepts/agent-harness-architecture.md +0 -67
- package/vault/wiki/concepts/agent-loop-detection-patterns.md +0 -133
- package/vault/wiki/concepts/agent-search-enforcement.md +0 -126
- package/vault/wiki/concepts/agent-skills-ecosystem.md +0 -74
- package/vault/wiki/concepts/agent-skills-pattern.md +0 -68
- package/vault/wiki/concepts/agentic-harness-context-enforcement.md +0 -91
- package/vault/wiki/concepts/agentic-harness.md +0 -34
- package/vault/wiki/concepts/agentic-orchestration-pipeline.md +0 -56
- package/vault/wiki/concepts/agentic-search-no-embeddings.md +0 -18
- package/vault/wiki/concepts/anthropic-context-engineering.md +0 -13
- package/vault/wiki/concepts/antigravity-agent-first-architecture.md +0 -61
- package/vault/wiki/concepts/ast-compression.md +0 -19
- package/vault/wiki/concepts/ast-truncation.md +0 -66
- package/vault/wiki/concepts/barrel-files.md +0 -37
- package/vault/wiki/concepts/browser-harness-agent.md +0 -41
- package/vault/wiki/concepts/browser-subagent-visual-verification.md +0 -82
- package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +0 -192
- package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +0 -161
- package/vault/wiki/concepts/codebase-to-context-ingestion.md +0 -46
- package/vault/wiki/concepts/codex-harness-innovations.md +0 -147
- package/vault/wiki/concepts/consensus-debate-flow.md +0 -17
- package/vault/wiki/concepts/consensus-debate.md +0 -206
- package/vault/wiki/concepts/content-addressed-spec-identity.md +0 -166
- package/vault/wiki/concepts/context-anxiety.md +0 -57
- package/vault/wiki/concepts/context-compression-techniques.md +0 -19
- package/vault/wiki/concepts/context-continuity.md +0 -22
- package/vault/wiki/concepts/context-drift-in-agents.md +0 -106
- package/vault/wiki/concepts/context-engineering.md +0 -62
- package/vault/wiki/concepts/context-folding.md +0 -67
- package/vault/wiki/concepts/context-mode.md +0 -38
- package/vault/wiki/concepts/cursor-harness-innovations.md +0 -107
- package/vault/wiki/concepts/deterministic-session-compaction.md +0 -79
- package/vault/wiki/concepts/drift-detection-unified.md +0 -296
- package/vault/wiki/concepts/execution-feedback-loop.md +0 -46
- package/vault/wiki/concepts/feedforward-feedback-harness.md +0 -60
- package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +0 -40
- package/vault/wiki/concepts/fork-safe-spec-storage.md +0 -89
- package/vault/wiki/concepts/fts5-sandbox.md +0 -19
- package/vault/wiki/concepts/fuzzy-edit-matching.md +0 -71
- package/vault/wiki/concepts/gemini-cli-architecture.md +0 -104
- package/vault/wiki/concepts/generator-evaluator-architecture.md +0 -64
- package/vault/wiki/concepts/guardian-agent-pattern.md +0 -67
- package/vault/wiki/concepts/harness-configuration-layers.md +0 -89
- package/vault/wiki/concepts/harness-control-frameworks.md +0 -155
- package/vault/wiki/concepts/harness-engineering-first-principles.md +0 -90
- package/vault/wiki/concepts/harness-h-formalism.md +0 -53
- package/vault/wiki/concepts/hybrid-code-search.md +0 -61
- package/vault/wiki/concepts/inline-post-edit-validation.md +0 -112
- package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +0 -110
- package/vault/wiki/concepts/lifecycle-hooks.md +0 -94
- package/vault/wiki/concepts/mcp-tool-routing.md +0 -102
- package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +0 -47
- package/vault/wiki/concepts/meta-agent-context-pruning.md +0 -151
- package/vault/wiki/concepts/model-adaptive-harness.md +0 -122
- package/vault/wiki/concepts/model-routing-agents.md +0 -101
- package/vault/wiki/concepts/monorepo-architecture.md +0 -45
- package/vault/wiki/concepts/multi-agent-specialization.md +0 -61
- package/vault/wiki/concepts/permission-subsystem.md +0 -16
- package/vault/wiki/concepts/pi-messenger-analysis.md +0 -243
- package/vault/wiki/concepts/pi-vscode-extension-landscape.md +0 -37
- package/vault/wiki/concepts/policy-engine-pattern.md +0 -78
- package/vault/wiki/concepts/progressive-disclosure-agents.md +0 -53
- package/vault/wiki/concepts/progressive-skill-disclosure.md +0 -17
- package/vault/wiki/concepts/provider-native-prompting.md +0 -203
- package/vault/wiki/concepts/quality-signal-sentrux.md +0 -37
- package/vault/wiki/concepts/repo-map-ranking.md +0 -42
- package/vault/wiki/concepts/result-monad-error-handling.md +0 -47
- package/vault/wiki/concepts/safety-defense-in-depth.md +0 -83
- package/vault/wiki/concepts/sandbox-os-enforcement.md +0 -18
- package/vault/wiki/concepts/selective-debate-routing.md +0 -70
- package/vault/wiki/concepts/self-evolving-harness.md +0 -60
- package/vault/wiki/concepts/sentrux-mcp-integration.md +0 -36
- package/vault/wiki/concepts/sentrux-rules-engine.md +0 -49
- package/vault/wiki/concepts/shell-pattern-compression.md +0 -24
- package/vault/wiki/concepts/skill-first-architecture.md +0 -166
- package/vault/wiki/concepts/structured-compaction.md +0 -78
- package/vault/wiki/concepts/subagent-orchestration.md +0 -17
- package/vault/wiki/concepts/subagent-worktree-isolation.md +0 -68
- package/vault/wiki/concepts/superpowers-methodology.md +0 -78
- package/vault/wiki/concepts/think-in-code.md +0 -73
- package/vault/wiki/concepts/ts-execution-layer.md +0 -100
- package/vault/wiki/concepts/typescript-strict-mode.md +0 -37
- package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +0 -53
- package/vault/wiki/concepts/verification-drift-detection.md +0 -19
- package/vault/wiki/consensus/consensus-records.md +0 -58
- package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +0 -122
- package/vault/wiki/decisions/2026-05-07-replace-lean-ctx-with-context-mode.md +0 -59
- package/vault/wiki/decisions/adr-008.md +0 -40
- package/vault/wiki/decisions/adr-009.md +0 -46
- package/vault/wiki/decisions/adr-010.md +0 -55
- package/vault/wiki/decisions/adr-011.md +0 -165
- package/vault/wiki/decisions/adr-012.md +0 -102
- package/vault/wiki/decisions/adr-013.md +0 -59
- package/vault/wiki/decisions/adr-014.md +0 -73
- package/vault/wiki/decisions/adr-015.md +0 -81
- package/vault/wiki/decisions/adr-016.md +0 -91
- package/vault/wiki/decisions/adr-017.md +0 -79
- package/vault/wiki/decisions/adr-018.md +0 -100
- package/vault/wiki/decisions/adr-019.md +0 -75
- package/vault/wiki/decisions/adr-020.md +0 -106
- package/vault/wiki/decisions/adr-021.md +0 -86
- package/vault/wiki/decisions/adr-022.md +0 -113
- package/vault/wiki/decisions/adr-023.md +0 -113
- package/vault/wiki/decisions/adr-024.md +0 -73
- package/vault/wiki/decisions/adr-025.md +0 -130
- package/vault/wiki/decisions/adr-026.md +0 -56
- package/vault/wiki/decisions/adr-027.md +0 -94
- package/vault/wiki/decisions/colocate-wiki.md +0 -34
- package/vault/wiki/entities/Anders Hejlsberg.md +0 -29
- package/vault/wiki/entities/Anthropic.md +0 -17
- package/vault/wiki/entities/Augment Code.md +0 -49
- package/vault/wiki/entities/Bjarne Stroustrup.md +0 -26
- package/vault/wiki/entities/Bolt.new (StackBlitz).md +0 -39
- package/vault/wiki/entities/Boris Cherny.md +0 -11
- package/vault/wiki/entities/Claude Code.md +0 -19
- package/vault/wiki/entities/Dennis Ritchie.md +0 -26
- package/vault/wiki/entities/Emergent Labs.md +0 -32
- package/vault/wiki/entities/Google Cloud.md +0 -16
- package/vault/wiki/entities/Guido van Rossum.md +0 -28
- package/vault/wiki/entities/Ken Thompson.md +0 -28
- package/vault/wiki/entities/Lee et al.md +0 -16
- package/vault/wiki/entities/Linus Torvalds.md +0 -28
- package/vault/wiki/entities/Lovable (company).md +0 -40
- package/vault/wiki/entities/Martin Fowler.md +0 -16
- package/vault/wiki/entities/Meng et al.md +0 -16
- package/vault/wiki/entities/OpenAI.md +0 -16
- package/vault/wiki/entities/Rocket.new.md +0 -38
- package/vault/wiki/entities/VILA-Lab.md +0 -15
- package/vault/wiki/entities/autodev-codebase.md +0 -18
- package/vault/wiki/entities/ck-tool.md +0 -59
- package/vault/wiki/entities/codesearch.md +0 -18
- package/vault/wiki/entities/disler-indydevdan.md +0 -33
- package/vault/wiki/entities/gsd-get-shit-done.md +0 -56
- package/vault/wiki/entities/javascript-runtimes.md +0 -48
- package/vault/wiki/entities/jesse-vincent.md +0 -38
- package/vault/wiki/entities/lean-ctx.md +0 -32
- package/vault/wiki/entities/opendev.md +0 -41
- package/vault/wiki/entities/ops-codegraph-tool.md +0 -18
- package/vault/wiki/entities/pi-coding-agent.md +0 -53
- package/vault/wiki/entities/sentrux.md +0 -54
- package/vault/wiki/entities/vgrep-tool.md +0 -57
- package/vault/wiki/entities/vitest.md +0 -41
- package/vault/wiki/flows/harness-wiki-pipeline.md +0 -204
- package/vault/wiki/hot.md +0 -932
- package/vault/wiki/index.md +0 -437
- package/vault/wiki/log.md +0 -422
- package/vault/wiki/meta/dashboard.md +0 -30
- package/vault/wiki/meta/lint-report-2026-04-30.md +0 -86
- package/vault/wiki/meta/lint-report-2026-05-02.md +0 -251
- package/vault/wiki/meta/overview.canvas +0 -43
- package/vault/wiki/modules/adversarial-verification.md +0 -57
- package/vault/wiki/modules/automated-observability.md +0 -54
- package/vault/wiki/modules/bench.md +0 -20
- package/vault/wiki/modules/extensions.md +0 -23
- package/vault/wiki/modules/grounding-checkpoints.md +0 -62
- package/vault/wiki/modules/harness-implementation-plan.md +0 -345
- package/vault/wiki/modules/harness-wiki-skill-mapping.md +0 -135
- package/vault/wiki/modules/harness.md +0 -86
- package/vault/wiki/modules/persistent-memory.md +0 -85
- package/vault/wiki/modules/schema-orchestration.md +0 -68
- package/vault/wiki/modules/skills.md +0 -27
- package/vault/wiki/modules/spec-hardening.md +0 -58
- package/vault/wiki/modules/structured-planning.md +0 -53
- package/vault/wiki/modules/think-in-code-enforcement.md +0 -153
- package/vault/wiki/modules/wiki-query-interface.md +0 -64
- package/vault/wiki/overview.md +0 -51
- package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +0 -87
- package/vault/wiki/questions/Research-sentrux-dev.md +0 -123
- package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +0 -164
- package/vault/wiki/questions/Research: Augment Code Context Engine.md +0 -244
- package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +0 -112
- package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +0 -209
- package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +0 -99
- package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +0 -107
- package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +0 -72
- package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +0 -166
- package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +0 -188
- package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +0 -120
- package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +0 -236
- package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +0 -95
- package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +0 -165
- package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +0 -216
- package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +0 -91
- package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +0 -88
- package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +0 -81
- package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +0 -71
- package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +0 -80
- package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +0 -72
- package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +0 -92
- package/vault/wiki/questions/Research: executor.sh Harness Integration.md +0 -170
- package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +0 -97
- package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +0 -80
- package/vault/wiki/questions/Research: pi-vcc.md +0 -113
- package/vault/wiki/questions/Research: semantic code search tools.md +0 -69
- package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +0 -73
- package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +0 -111
- package/vault/wiki/questions/mvp-implementation-blueprint.md +0 -552
- package/vault/wiki/questions/research-agent-first-codebase-exploration.md +0 -199
- package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +0 -142
- package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +0 -100
- package/vault/wiki/questions/research-wozcode-token-reduction.md +0 -67
- package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +0 -95
- package/vault/wiki/questions/resolved-context-window-economics.md +0 -167
- package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +0 -126
- package/vault/wiki/questions/resolved-mcp-tool-preference.md +0 -112
- package/vault/wiki/questions/resolved-small-model-meta-agents.md +0 -107
- package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +0 -95
- package/vault/wiki/sources/Auggie Context MCP Server.md +0 -63
- package/vault/wiki/sources/Augment Code Codacy AI Giants.md +0 -61
- package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +0 -49
- package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +0 -55
- package/vault/wiki/sources/Augment Context Engine Official.md +0 -71
- package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +0 -74
- package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +0 -58
- package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +0 -75
- package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +0 -85
- package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +0 -58
- package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +0 -100
- package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +0 -75
- package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +0 -107
- package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +0 -70
- package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +0 -88
- package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +0 -57
- package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +0 -53
- package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +0 -65
- package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +0 -83
- package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +0 -70
- package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +0 -58
- package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +0 -101
- package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +0 -100
- package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +0 -53
- package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +0 -70
- package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +0 -71
- package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +0 -89
- package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +0 -155
- package/vault/wiki/sources/Source: browser-harness CDP Harness.md +0 -126
- package/vault/wiki/sources/agent-drift-academic-paper.md +0 -79
- package/vault/wiki/sources/aider-repomap-tree-sitter.md +0 -42
- package/vault/wiki/sources/anthropic-compaction-api.md +0 -58
- package/vault/wiki/sources/anthropic-effective-harnesses.md +0 -42
- package/vault/wiki/sources/anthropic-prompt-best-practices.md +0 -100
- package/vault/wiki/sources/anthropic2026-harness-design.md +0 -63
- package/vault/wiki/sources/barrel-files-tkdodo.md +0 -38
- package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +0 -57
- package/vault/wiki/sources/bockeler2026-harness-engineering.md +0 -69
- package/vault/wiki/sources/cast-code-chunking-paper.md +0 -50
- package/vault/wiki/sources/ck-semantic-search.md +0 -78
- package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +0 -71
- package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +0 -50
- package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +0 -64
- package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +0 -70
- package/vault/wiki/sources/claude-context-editing-docs.md +0 -13
- package/vault/wiki/sources/cloudflare-codemode.md +0 -63
- package/vault/wiki/sources/code-chunk-library-supermemory.md +0 -63
- package/vault/wiki/sources/codeact-apple-2024.md +0 -62
- package/vault/wiki/sources/codex-dsc-rfc-8573.md +0 -41
- package/vault/wiki/sources/codex-open-source-agent-2026.md +0 -110
- package/vault/wiki/sources/coir-code-retrieval-benchmark.md +0 -51
- package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +0 -48
- package/vault/wiki/sources/context-folding-paper.md +0 -61
- package/vault/wiki/sources/context-mode-website.md +0 -63
- package/vault/wiki/sources/cursor-agent-best-practices-2026.md +0 -62
- package/vault/wiki/sources/cursor-fork-29b-2025.md +0 -50
- package/vault/wiki/sources/cursor-harness-april-2026.md +0 -76
- package/vault/wiki/sources/cursor-instant-apply-2024.md +0 -45
- package/vault/wiki/sources/cursor-shadow-workspace-2024.md +0 -52
- package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +0 -53
- package/vault/wiki/sources/cursor-vs-antigravity-2026.md +0 -51
- package/vault/wiki/sources/disler-pi-vs-claude-code.md +0 -69
- package/vault/wiki/sources/distill-deterministic-context-compression.md +0 -53
- package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +0 -48
- package/vault/wiki/sources/executor-rhyssullivan.md +0 -122
- package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +0 -125
- package/vault/wiki/sources/fan2025-imad.md +0 -60
- package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +0 -63
- package/vault/wiki/sources/gemini-3-prompting-guide.md +0 -78
- package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +0 -50
- package/vault/wiki/sources/gh-sub-issue-extension.md +0 -72
- package/vault/wiki/sources/github-fork-issues-discussion.md +0 -44
- package/vault/wiki/sources/github-issue-dependencies-docs.md +0 -49
- package/vault/wiki/sources/github-sub-issues-docs.md +0 -51
- package/vault/wiki/sources/gitingest.md +0 -91
- package/vault/wiki/sources/gitreverse.md +0 -63
- package/vault/wiki/sources/google-antigravity-official-blog.md +0 -47
- package/vault/wiki/sources/google-antigravity-wikipedia.md +0 -53
- package/vault/wiki/sources/gsd-codecentric-deep-dive.md +0 -57
- package/vault/wiki/sources/gsd-github-repo.md +0 -51
- package/vault/wiki/sources/gsd-hn-discussion.md +0 -59
- package/vault/wiki/sources/guido-python-design-philosophy.md +0 -56
- package/vault/wiki/sources/hejlsberg-7-learnings.md +0 -48
- package/vault/wiki/sources/ironclaw-drift-monitor.md +0 -80
- package/vault/wiki/sources/langsight-loop-detection.md +0 -80
- package/vault/wiki/sources/leanctx-website.md +0 -69
- package/vault/wiki/sources/lee2026-meta-harness.md +0 -59
- package/vault/wiki/sources/linux-kernel-coding-workflow.md +0 -50
- package/vault/wiki/sources/lou2026-autoharness.md +0 -53
- package/vault/wiki/sources/martin-fowler-harness-engineering.md +0 -73
- package/vault/wiki/sources/mcp-architecture-docs.md +0 -13
- package/vault/wiki/sources/meng2026-agent-harness-survey.md +0 -79
- package/vault/wiki/sources/mindstudio-four-agent-types.md +0 -68
- package/vault/wiki/sources/ms-chat-history-management.md +0 -13
- package/vault/wiki/sources/openai-prompt-guidance.md +0 -104
- package/vault/wiki/sources/openclaw-session-pruning.md +0 -13
- package/vault/wiki/sources/opencode-dcp.md +0 -13
- package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +0 -79
- package/vault/wiki/sources/openhands-platform.md +0 -39
- package/vault/wiki/sources/oss-guide-codebase-exploration.md +0 -53
- package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +0 -102
- package/vault/wiki/sources/pi-context-prune-github-repo.md +0 -38
- package/vault/wiki/sources/pi-mono-compaction-docs.md +0 -38
- package/vault/wiki/sources/pi-omni-compact-github-repo.md +0 -50
- package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +0 -45
- package/vault/wiki/sources/pi-vcc-github-repo.md +0 -69
- package/vault/wiki/sources/pi-vscode-marketplace.md +0 -41
- package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +0 -39
- package/vault/wiki/sources/py-tree-sitter.md +0 -13
- package/vault/wiki/sources/sentrux-dev-landing.md +0 -40
- package/vault/wiki/sources/sentrux-docs-pro-architecture.md +0 -75
- package/vault/wiki/sources/sentrux-docs-quality-signal.md +0 -46
- package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +0 -57
- package/vault/wiki/sources/sentrux-docs-rules-engine.md +0 -58
- package/vault/wiki/sources/sentrux-github-repo.md +0 -56
- package/vault/wiki/sources/superpowers-github-repo.md +0 -56
- package/vault/wiki/sources/superpowers-release-blog.md +0 -54
- package/vault/wiki/sources/superpowers-termdock-analysis.md +0 -45
- package/vault/wiki/sources/swe-agent-aci.md +0 -42
- package/vault/wiki/sources/swe-bench.md +0 -45
- package/vault/wiki/sources/swe-pruner-context-pruning.md +0 -13
- package/vault/wiki/sources/think-in-code-blog.md +0 -48
- package/vault/wiki/sources/tree-sitter-docs.md +0 -13
- package/vault/wiki/sources/ts-best-practices-2025-devto.md +0 -42
- package/vault/wiki/sources/ts-folder-structure-mingyang.md +0 -58
- package/vault/wiki/sources/ts-monorepo-koerselman.md +0 -44
- package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +0 -52
- package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +0 -42
- package/vault/wiki/sources/ts-strict-mode-rishikc.md +0 -43
- package/vault/wiki/sources/unix-philosophy.md +0 -48
- package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +0 -39
- package/vault/wiki/sources/vectara-guardian-agents.md +0 -79
- package/vault/wiki/sources/vgrep-semantic-search.md +0 -76
- package/vault/wiki/sources/vitest-official.md +0 -41
- package/vault/wiki/sources/vscode-pi-community-extension.md +0 -40
- package/vault/wiki/sources/wozcode.md +0 -79
|
@@ -0,0 +1,1204 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: graphify
|
|
3
|
+
description: any input (code, docs, papers, images, video) → knowledge graph → clustered communities → HTML + JSON + GRAPH_REPORT.md
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# /graphify
|
|
7
|
+
|
|
8
|
+
Deprecated mirror: canonical source is `.pi/skills/graphify/SKILL.md`.
|
|
9
|
+
Keep this copy aligned with the canonical version until consumers migrate.
|
|
10
|
+
|
|
11
|
+
Turn any folder of files into a navigable knowledge graph with community detection, an honest audit trail, and three outputs: interactive HTML, GraphRAG-ready JSON, and a plain-language GRAPH_REPORT.md.
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
/graphify # full pipeline on current directory → Obsidian vault
|
|
17
|
+
/graphify <path> # full pipeline on specific path
|
|
18
|
+
/graphify <path> --mode deep # thorough extraction, richer INFERRED edges
|
|
19
|
+
/graphify <path> --update # incremental - re-extract only new/changed files
|
|
20
|
+
/graphify <path> --cluster-only # rerun clustering on existing graph
|
|
21
|
+
/graphify <path> --no-viz # skip visualization, just report + JSON
|
|
22
|
+
/graphify <path> --html # (HTML is generated by default - this flag is a no-op)
|
|
23
|
+
/graphify <path> --svg # also export graph.svg (embeds in Notion, GitHub)
|
|
24
|
+
/graphify <path> --graphml # export graph.graphml (Gephi, yEd)
|
|
25
|
+
/graphify <path> --neo4j # generate graphify-out/cypher.txt for Neo4j
|
|
26
|
+
/graphify <path> --neo4j-push bolt://localhost:7687 # push directly to Neo4j
|
|
27
|
+
/graphify <path> --mcp # start MCP stdio server for agent access
|
|
28
|
+
/graphify <path> --watch # watch folder, auto-rebuild on code changes (no LLM needed)
|
|
29
|
+
/graphify add <url> # fetch URL, save to ./raw, update graph
|
|
30
|
+
/graphify add <url> --author "Name" # tag who wrote it
|
|
31
|
+
/graphify add <url> --contributor "Name" # tag who added it to the corpus
|
|
32
|
+
/graphify query "<question>" # BFS traversal - broad context
|
|
33
|
+
/graphify query "<question>" --dfs # DFS - trace a specific path
|
|
34
|
+
/graphify query "<question>" --budget 1500 # cap answer at N tokens
|
|
35
|
+
/graphify path "AuthModule" "Database" # shortest path between two concepts
|
|
36
|
+
/graphify explain "SwinTransformer" # plain-language explanation of a node
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## What graphify is for
|
|
40
|
+
|
|
41
|
+
graphify is built around Andrej Karpathy's /raw folder workflow: drop anything into a folder - papers, tweets, screenshots, code, notes - and get a structured knowledge graph that shows you what you didn't know was connected.
|
|
42
|
+
|
|
43
|
+
Three things it does that your AI assistant alone cannot:
|
|
44
|
+
1. **Persistent graph** - relationships are stored in `graphify-out/graph.json` and survive across sessions. Ask questions weeks later without re-reading everything.
|
|
45
|
+
2. **Honest audit trail** - every edge is tagged EXTRACTED, INFERRED, or AMBIGUOUS. You know what was found vs invented.
|
|
46
|
+
3. **Cross-document surprise** - community detection finds connections between concepts in different files that you would never think to ask about directly.
|
|
47
|
+
|
|
48
|
+
Use it for:
|
|
49
|
+
- A codebase you're new to (understand architecture before touching anything)
|
|
50
|
+
- A reading list (papers + tweets + notes → one navigable graph)
|
|
51
|
+
- A research corpus (citation graph + concept graph in one)
|
|
52
|
+
- Your personal /raw folder (drop everything in, let it grow, query it)
|
|
53
|
+
|
|
54
|
+
## What You Must Do When Invoked
|
|
55
|
+
|
|
56
|
+
If no path was given, use `.` (current directory). Do not ask the user for a path.
|
|
57
|
+
|
|
58
|
+
Follow these steps in order. Do not skip steps.
|
|
59
|
+
|
|
60
|
+
### Step 1 - Ensure graphify is installed
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Install via uv tool if not already available
|
|
64
|
+
command -v graphify >/dev/null 2>&1 || uv tool install graphifyy
|
|
65
|
+
mkdir -p graphify-out
|
|
66
|
+
# Get the Python interpreter uv uses for the tool
|
|
67
|
+
PYTHON=$(uv tool run graphify -- -c "import sys; print(sys.executable)" 2>/dev/null || echo "python3")
|
|
68
|
+
"$PYTHON" -c "import graphify" 2>/dev/null || { uv tool install graphifyy && "$PYTHON" -c "import graphify"; }
|
|
69
|
+
# Write interpreter path for all subsequent steps
|
|
70
|
+
"$PYTHON" -c "import sys; open('graphify-out/.graphify_python', 'w').write(sys.executable)"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
If the import succeeds, print nothing and move straight to Step 2.
|
|
74
|
+
|
|
75
|
+
**In every subsequent bash block, replace `python3` with `$(cat .graphify_python)` to use the correct interpreter.**
|
|
76
|
+
|
|
77
|
+
### Step 2 - Detect files
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
$(cat .graphify_python) -c "
|
|
81
|
+
import json
|
|
82
|
+
from graphify.detect import detect
|
|
83
|
+
from pathlib import Path
|
|
84
|
+
result = detect(Path('INPUT_PATH'))
|
|
85
|
+
print(json.dumps(result))
|
|
86
|
+
" > .graphify_detect.json
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Replace INPUT_PATH with the actual path the user provided. Do NOT cat or print the JSON - read it silently and present a clean summary instead:
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
Corpus: X files · ~Y words
|
|
93
|
+
code: N files (.py .ts .go ...)
|
|
94
|
+
docs: N files (.md .txt ...)
|
|
95
|
+
papers: N files (.pdf ...)
|
|
96
|
+
images: N files
|
|
97
|
+
video: N files (.mp4 .mp3 ...)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Omit any category with 0 files from the summary.
|
|
101
|
+
|
|
102
|
+
Then act on it:
|
|
103
|
+
- If `total_files` is 0: stop with "No supported files found in [path]."
|
|
104
|
+
- If `skipped_sensitive` is non-empty: mention file count skipped, not the file names.
|
|
105
|
+
- If `total_words` > 2,000,000 OR `total_files` > 200: show the warning and the top 5 subdirectories by file count, then ask which subfolder to run on. Wait for the user's answer before proceeding.
|
|
106
|
+
- Otherwise: proceed directly to Step 2.5 if video files were detected, or Step 3 if not.
|
|
107
|
+
|
|
108
|
+
### Step 2.5 - Transcribe video / audio files (only if video files detected)
|
|
109
|
+
|
|
110
|
+
Skip this step entirely if `detect` returned zero `video` files.
|
|
111
|
+
|
|
112
|
+
Video and audio files cannot be read directly. Transcribe them to text first, then treat the transcripts as doc files in Step 3.
|
|
113
|
+
|
|
114
|
+
**Strategy:** Read the god nodes from the detect output or analysis file. You are already a language model - write a one-sentence domain hint yourself from those labels. Then pass it to Whisper as the initial prompt. No separate API call needed.
|
|
115
|
+
|
|
116
|
+
**However**, if the corpus has *only* video files and no other docs/code, use the generic fallback prompt: `"Use proper punctuation and paragraph breaks."`
|
|
117
|
+
|
|
118
|
+
**Step 1 - Write the Whisper prompt yourself.**
|
|
119
|
+
|
|
120
|
+
Read the top god node labels from detect output or analysis, then compose a short domain hint sentence, for example:
|
|
121
|
+
|
|
122
|
+
- Labels: `transformer, attention, encoder, decoder` -> `"Machine learning research on transformer architectures and attention mechanisms. Use proper punctuation and paragraph breaks."`
|
|
123
|
+
- Labels: `kubernetes, deployment, pod, helm` -> `"DevOps discussion about Kubernetes deployments and Helm charts. Use proper punctuation and paragraph breaks."`
|
|
124
|
+
|
|
125
|
+
Set it as `GRAPHIFY_WHISPER_PROMPT` in the environment before running the transcription command.
|
|
126
|
+
|
|
127
|
+
**Step 2 - Transcribe:**
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
$(cat graphify-out/.graphify_python) -c "
|
|
131
|
+
import json, os
|
|
132
|
+
from pathlib import Path
|
|
133
|
+
from graphify.transcribe import transcribe_all
|
|
134
|
+
|
|
135
|
+
detect = json.loads(Path('graphify-out/.graphify_detect.json').read_text())
|
|
136
|
+
video_files = detect.get('files', {}).get('video', [])
|
|
137
|
+
prompt = os.environ.get('GRAPHIFY_WHISPER_PROMPT', 'Use proper punctuation and paragraph breaks.')
|
|
138
|
+
|
|
139
|
+
transcript_paths = transcribe_all(video_files, initial_prompt=prompt)
|
|
140
|
+
print(json.dumps(transcript_paths))
|
|
141
|
+
" > graphify-out/.graphify_transcripts.json
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
After transcription:
|
|
145
|
+
- Read the transcript paths from `graphify-out/.graphify_transcripts.json`
|
|
146
|
+
- Add them to the docs list before dispatching semantic subagents in Step 3B
|
|
147
|
+
- Print how many transcripts were created: `Transcribed N video file(s) -> treating as docs`
|
|
148
|
+
- If transcription fails for a file, print a warning and continue with the rest
|
|
149
|
+
|
|
150
|
+
**Whisper model:** Default is `base`. If the user passed `--whisper-model <name>`, set `GRAPHIFY_WHISPER_MODEL=<name>` in the environment before running the command above.
|
|
151
|
+
|
|
152
|
+
### Step 3 - Extract entities and relationships
|
|
153
|
+
|
|
154
|
+
**Before starting:** note whether `--mode deep` was given. You must pass `DEEP_MODE=true` to every subagent in Step B2 if it was. Track this from the original invocation - do not lose it.
|
|
155
|
+
|
|
156
|
+
This step has two parts: **structural extraction** (deterministic, free) and **semantic extraction** (your AI model, costs tokens).
|
|
157
|
+
|
|
158
|
+
**Run Part A (AST) and Part B (semantic) in parallel. Dispatch all semantic subagents AND start AST extraction in the same message. Both can run simultaneously since they operate on different file types. Merge results in Part C as before.**
|
|
159
|
+
|
|
160
|
+
Note: Parallelizing AST + semantic saves 5-15s on large corpora. AST is deterministic and fast; start it while subagents are processing docs/papers.
|
|
161
|
+
|
|
162
|
+
#### Part A - Structural extraction for code files
|
|
163
|
+
|
|
164
|
+
For any code files detected, run AST extraction in parallel with Part B subagents:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
$(cat .graphify_python) -c "
|
|
168
|
+
import sys, json
|
|
169
|
+
from graphify.extract import collect_files, extract
|
|
170
|
+
from pathlib import Path
|
|
171
|
+
import json
|
|
172
|
+
|
|
173
|
+
code_files = []
|
|
174
|
+
detect = json.loads(Path('.graphify_detect.json').read_text())
|
|
175
|
+
for f in detect.get('files', {}).get('code', []):
|
|
176
|
+
code_files.extend(collect_files(Path(f)) if Path(f).is_dir() else [Path(f)])
|
|
177
|
+
|
|
178
|
+
if code_files:
|
|
179
|
+
result = extract(code_files)
|
|
180
|
+
Path('.graphify_ast.json').write_text(json.dumps(result, indent=2))
|
|
181
|
+
print(f'AST: {len(result[\"nodes\"])} nodes, {len(result[\"edges\"])} edges')
|
|
182
|
+
else:
|
|
183
|
+
Path('.graphify_ast.json').write_text(json.dumps({'nodes':[],'edges':[],'input_tokens':0,'output_tokens':0}))
|
|
184
|
+
print('No code files - skipping AST extraction')
|
|
185
|
+
"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
#### Part B - Semantic extraction (parallel subagents)
|
|
189
|
+
|
|
190
|
+
**Fast path:** If detection found zero docs, papers, and images (code-only corpus), skip Part B entirely and go straight to Part C. AST handles code - there is nothing for semantic subagents to do.
|
|
191
|
+
|
|
192
|
+
> **OpenClaw platform:** Multi-agent support is still early on OpenClaw. Extraction runs sequentially — you read and extract each file yourself. This is slower than parallel platforms but fully reliable.
|
|
193
|
+
|
|
194
|
+
Print: `"Semantic extraction: N files (sequential — OpenClaw)"`
|
|
195
|
+
|
|
196
|
+
**Step B0 - Check extraction cache first**
|
|
197
|
+
|
|
198
|
+
Before dispatching any subagents, check which files already have cached extraction results:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
$(cat .graphify_python) -c "
|
|
202
|
+
import json
|
|
203
|
+
from graphify.cache import check_semantic_cache
|
|
204
|
+
from pathlib import Path
|
|
205
|
+
|
|
206
|
+
detect = json.loads(Path('.graphify_detect.json').read_text())
|
|
207
|
+
all_files = [f for files in detect['files'].values() for f in files]
|
|
208
|
+
|
|
209
|
+
cached_nodes, cached_edges, cached_hyperedges, uncached = check_semantic_cache(all_files)
|
|
210
|
+
|
|
211
|
+
if cached_nodes or cached_edges or cached_hyperedges:
|
|
212
|
+
Path('.graphify_cached.json').write_text(json.dumps({'nodes': cached_nodes, 'edges': cached_edges, 'hyperedges': cached_hyperedges}))
|
|
213
|
+
Path('.graphify_uncached.txt').write_text('\n'.join(uncached))
|
|
214
|
+
print(f'Cache: {len(all_files)-len(uncached)} files hit, {len(uncached)} files need extraction')
|
|
215
|
+
"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Only dispatch subagents for files listed in `.graphify_uncached.txt`. If all files are cached, skip to Part C directly.
|
|
219
|
+
|
|
220
|
+
**Step B1 - Split into chunks**
|
|
221
|
+
|
|
222
|
+
Load files from `.graphify_uncached.txt`. Split into chunks of 20-25 files each. Each image gets its own chunk (vision needs separate context). When splitting, group files from the same directory together so related artifacts land in the same chunk and cross-file relationships are more likely to be extracted.
|
|
223
|
+
|
|
224
|
+
**Step B2 - Sequential extraction (OpenClaw)**
|
|
225
|
+
|
|
226
|
+
Process each file one at a time. For each file:
|
|
227
|
+
|
|
228
|
+
1. Read the file contents
|
|
229
|
+
2. Extract nodes, edges, and hyperedges applying the same rules:
|
|
230
|
+
- EXTRACTED: relationship explicit in source (import, call, citation)
|
|
231
|
+
- INFERRED: reasonable inference (shared structure, implied dependency)
|
|
232
|
+
- AMBIGUOUS: uncertain — flag it, do not omit
|
|
233
|
+
- Code files: semantic edges AST cannot find. Do not re-extract imports.
|
|
234
|
+
- Doc/paper files: named concepts, entities, citations. Store rationale (WHY decisions were made) as a `rationale` attribute on the relevant node, not as a separate node. Use `file_type:"rationale"` for concept-like nodes (ideas, principles, mechanisms). Do NOT invent file_types like `concept`. When adding `calls` edges: source is caller, target is callee.
|
|
235
|
+
- Image files: use vision — understand what the image IS, not just OCR
|
|
236
|
+
- DEEP_MODE (if --mode deep): be aggressive with INFERRED edges
|
|
237
|
+
- Semantic similarity: if two concepts solve the same problem without a structural link, add `semantically_similar_to` INFERRED edge (confidence 0.6-0.95). Non-obvious cross-file links only.
|
|
238
|
+
- Hyperedges: if 3+ nodes share a concept/flow not captured by pairwise edges, add a hyperedge. Max 3 per file.
|
|
239
|
+
- confidence_score REQUIRED on every edge: EXTRACTED=1.0, INFERRED=0.6-0.9 (reason individually), AMBIGUOUS=0.1-0.3
|
|
240
|
+
3. Accumulate results across all files
|
|
241
|
+
|
|
242
|
+
Schema for each file's output:
|
|
243
|
+
{"nodes":[{"id":"filestem_entityname","label":"Human Readable Name","file_type":"code|document|paper|image|rationale","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to|rationale_for","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[{"id":"snake_case_id","label":"Human Readable Label","nodes":["node_id1","node_id2","node_id3"],"relation":"participate_in|implement|form","confidence":"EXTRACTED|INFERRED","confidence_score":0.75,"source_file":"relative/path"}],"input_tokens":0,"output_tokens":0}
|
|
244
|
+
|
|
245
|
+
After processing all files, write the accumulated result to `.graphify_semantic_new.json`.
|
|
246
|
+
|
|
247
|
+
**Step B3 - Cache and merge**
|
|
248
|
+
|
|
249
|
+
For the accumulated result:
|
|
250
|
+
|
|
251
|
+
If more than half the chunks failed, stop and tell the user.
|
|
252
|
+
|
|
253
|
+
Merge all chunk files into `.graphify_semantic_new.json`. **After each Agent call completes, read the real token counts from the Agent tool result's `usage` field and write them back into the chunk JSON before merging** — the chunk JSON itself always has placeholder zeros. Then run:
|
|
254
|
+
```bash
|
|
255
|
+
$(cat graphify-out/.graphify_python) -c "
|
|
256
|
+
import json, glob
|
|
257
|
+
from pathlib import Path
|
|
258
|
+
|
|
259
|
+
chunks = sorted(glob.glob('graphify-out/.graphify_chunk_*.json'))
|
|
260
|
+
all_nodes, all_edges, all_hyperedges = [], [], []
|
|
261
|
+
total_in, total_out = 0, 0
|
|
262
|
+
for c in chunks:
|
|
263
|
+
d = json.loads(Path(c).read_text())
|
|
264
|
+
all_nodes += d.get('nodes', [])
|
|
265
|
+
all_edges += d.get('edges', [])
|
|
266
|
+
all_hyperedges += d.get('hyperedges', [])
|
|
267
|
+
total_in += d.get('input_tokens', 0)
|
|
268
|
+
total_out += d.get('output_tokens', 0)
|
|
269
|
+
Path('graphify-out/.graphify_semantic_new.json').write_text(json.dumps({
|
|
270
|
+
'nodes': all_nodes, 'edges': all_edges, 'hyperedges': all_hyperedges,
|
|
271
|
+
'input_tokens': total_in, 'output_tokens': total_out,
|
|
272
|
+
}, indent=2))
|
|
273
|
+
print(f'Merged {len(chunks)} chunks: {total_in:,} in / {total_out:,} out tokens')
|
|
274
|
+
"
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
Save new results to cache:
|
|
278
|
+
```bash
|
|
279
|
+
$(cat .graphify_python) -c "
|
|
280
|
+
import json
|
|
281
|
+
from graphify.cache import save_semantic_cache
|
|
282
|
+
from pathlib import Path
|
|
283
|
+
|
|
284
|
+
new = json.loads(Path('.graphify_semantic_new.json').read_text()) if Path('.graphify_semantic_new.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]}
|
|
285
|
+
saved = save_semantic_cache(new.get('nodes', []), new.get('edges', []), new.get('hyperedges', []))
|
|
286
|
+
print(f'Cached {saved} files')
|
|
287
|
+
"
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Merge cached + new results into `.graphify_semantic.json`:
|
|
291
|
+
```bash
|
|
292
|
+
$(cat .graphify_python) -c "
|
|
293
|
+
import json
|
|
294
|
+
from pathlib import Path
|
|
295
|
+
|
|
296
|
+
cached = json.loads(Path('.graphify_cached.json').read_text()) if Path('.graphify_cached.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]}
|
|
297
|
+
new = json.loads(Path('.graphify_semantic_new.json').read_text()) if Path('.graphify_semantic_new.json').exists() else {'nodes':[],'edges':[],'hyperedges':[]}
|
|
298
|
+
|
|
299
|
+
all_nodes = cached['nodes'] + new.get('nodes', [])
|
|
300
|
+
all_edges = cached['edges'] + new.get('edges', [])
|
|
301
|
+
all_hyperedges = cached.get('hyperedges', []) + new.get('hyperedges', [])
|
|
302
|
+
seen = set()
|
|
303
|
+
deduped = []
|
|
304
|
+
for n in all_nodes:
|
|
305
|
+
if n['id'] not in seen:
|
|
306
|
+
seen.add(n['id'])
|
|
307
|
+
deduped.append(n)
|
|
308
|
+
|
|
309
|
+
merged = {
|
|
310
|
+
'nodes': deduped,
|
|
311
|
+
'edges': all_edges,
|
|
312
|
+
'hyperedges': all_hyperedges,
|
|
313
|
+
'input_tokens': new.get('input_tokens', 0),
|
|
314
|
+
'output_tokens': new.get('output_tokens', 0),
|
|
315
|
+
}
|
|
316
|
+
Path('.graphify_semantic.json').write_text(json.dumps(merged, indent=2))
|
|
317
|
+
print(f'Extraction complete - {len(deduped)} nodes, {len(all_edges)} edges ({len(cached[\"nodes\"])} from cache, {len(new.get(\"nodes\",[]))} new)')
|
|
318
|
+
"
|
|
319
|
+
```
|
|
320
|
+
Clean up temp files: `rm -f .graphify_cached.json .graphify_uncached.txt .graphify_semantic_new.json`
|
|
321
|
+
|
|
322
|
+
#### Part C - Merge AST + semantic into final extraction
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
$(cat .graphify_python) -c "
|
|
326
|
+
import sys, json
|
|
327
|
+
from pathlib import Path
|
|
328
|
+
|
|
329
|
+
ast = json.loads(Path('.graphify_ast.json').read_text())
|
|
330
|
+
sem = json.loads(Path('.graphify_semantic.json').read_text())
|
|
331
|
+
|
|
332
|
+
# Merge: AST nodes first, semantic nodes deduplicated by id
|
|
333
|
+
seen = {n['id'] for n in ast['nodes']}
|
|
334
|
+
merged_nodes = list(ast['nodes'])
|
|
335
|
+
for n in sem['nodes']:
|
|
336
|
+
if n['id'] not in seen:
|
|
337
|
+
merged_nodes.append(n)
|
|
338
|
+
seen.add(n['id'])
|
|
339
|
+
|
|
340
|
+
merged_edges = ast['edges'] + sem['edges']
|
|
341
|
+
merged_hyperedges = sem.get('hyperedges', [])
|
|
342
|
+
merged = {
|
|
343
|
+
'nodes': merged_nodes,
|
|
344
|
+
'edges': merged_edges,
|
|
345
|
+
'hyperedges': merged_hyperedges,
|
|
346
|
+
'input_tokens': sem.get('input_tokens', 0),
|
|
347
|
+
'output_tokens': sem.get('output_tokens', 0),
|
|
348
|
+
}
|
|
349
|
+
Path('.graphify_extract.json').write_text(json.dumps(merged, indent=2))
|
|
350
|
+
total = len(merged_nodes)
|
|
351
|
+
edges = len(merged_edges)
|
|
352
|
+
print(f'Merged: {total} nodes, {edges} edges ({len(ast[\"nodes\"])} AST + {len(sem[\"nodes\"])} semantic)')
|
|
353
|
+
"
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### Step 4 - Build graph, cluster, analyze, generate outputs
|
|
357
|
+
|
|
358
|
+
```bash
|
|
359
|
+
mkdir -p graphify-out
|
|
360
|
+
$(cat .graphify_python) -c "
|
|
361
|
+
import sys, json
|
|
362
|
+
from graphify.build import build_from_json
|
|
363
|
+
from graphify.cluster import cluster, score_all
|
|
364
|
+
from graphify.analyze import god_nodes, surprising_connections, suggest_questions
|
|
365
|
+
from graphify.report import generate
|
|
366
|
+
from graphify.export import to_json
|
|
367
|
+
from pathlib import Path
|
|
368
|
+
|
|
369
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
370
|
+
detection = json.loads(Path('.graphify_detect.json').read_text())
|
|
371
|
+
|
|
372
|
+
G = build_from_json(extraction)
|
|
373
|
+
communities = cluster(G)
|
|
374
|
+
cohesion = score_all(G, communities)
|
|
375
|
+
tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)}
|
|
376
|
+
gods = god_nodes(G)
|
|
377
|
+
surprises = surprising_connections(G, communities)
|
|
378
|
+
labels = {cid: 'Community ' + str(cid) for cid in communities}
|
|
379
|
+
# Placeholder questions - regenerated with real labels in Step 5
|
|
380
|
+
questions = suggest_questions(G, communities, labels)
|
|
381
|
+
|
|
382
|
+
report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, 'INPUT_PATH', suggested_questions=questions)
|
|
383
|
+
Path('graphify-out/GRAPH_REPORT.md').write_text(report)
|
|
384
|
+
to_json(G, communities, 'graphify-out/graph.json')
|
|
385
|
+
|
|
386
|
+
analysis = {
|
|
387
|
+
'communities': {str(k): v for k, v in communities.items()},
|
|
388
|
+
'cohesion': {str(k): v for k, v in cohesion.items()},
|
|
389
|
+
'gods': gods,
|
|
390
|
+
'surprises': surprises,
|
|
391
|
+
'questions': questions,
|
|
392
|
+
}
|
|
393
|
+
Path('graphify-out/.graphify_analysis.json').write_text(json.dumps(analysis, indent=2))
|
|
394
|
+
if G.number_of_nodes() == 0:
|
|
395
|
+
print('ERROR: Graph is empty - extraction produced no nodes.')
|
|
396
|
+
print('Possible causes: all files were skipped, binary-only corpus, or extraction failed.')
|
|
397
|
+
raise SystemExit(1)
|
|
398
|
+
print(f'Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, {len(communities)} communities')
|
|
399
|
+
"
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
If this step prints `ERROR: Graph is empty`, stop and tell the user what happened - do not proceed to labeling or visualization.
|
|
403
|
+
|
|
404
|
+
Replace INPUT_PATH with the actual path.
|
|
405
|
+
|
|
406
|
+
### Step 5 - Label communities
|
|
407
|
+
|
|
408
|
+
Read `graphify-out/.graphify_analysis.json`. For each community key, look at its node labels and write a 2-5 word plain-language name (e.g. "Attention Mechanism", "Training Pipeline", "Data Loading").
|
|
409
|
+
|
|
410
|
+
Then regenerate the report and save the labels for the visualizer:
|
|
411
|
+
|
|
412
|
+
```bash
|
|
413
|
+
$(cat .graphify_python) -c "
|
|
414
|
+
import sys, json
|
|
415
|
+
from graphify.build import build_from_json
|
|
416
|
+
from graphify.cluster import score_all
|
|
417
|
+
from graphify.analyze import god_nodes, surprising_connections, suggest_questions
|
|
418
|
+
from graphify.report import generate
|
|
419
|
+
from pathlib import Path
|
|
420
|
+
|
|
421
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
422
|
+
detection = json.loads(Path('.graphify_detect.json').read_text())
|
|
423
|
+
analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
424
|
+
|
|
425
|
+
G = build_from_json(extraction)
|
|
426
|
+
communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
427
|
+
cohesion = {int(k): v for k, v in analysis['cohesion'].items()}
|
|
428
|
+
tokens = {'input': extraction.get('input_tokens', 0), 'output': extraction.get('output_tokens', 0)}
|
|
429
|
+
|
|
430
|
+
# LABELS - replace these with the names you chose above
|
|
431
|
+
labels = LABELS_DICT
|
|
432
|
+
|
|
433
|
+
# Regenerate questions with real community labels (labels affect question phrasing)
|
|
434
|
+
questions = suggest_questions(G, communities, labels)
|
|
435
|
+
|
|
436
|
+
report = generate(G, communities, cohesion, labels, analysis['gods'], analysis['surprises'], detection, tokens, 'INPUT_PATH', suggested_questions=questions)
|
|
437
|
+
Path('graphify-out/GRAPH_REPORT.md').write_text(report)
|
|
438
|
+
Path('graphify-out/.graphify_labels.json').write_text(json.dumps({str(k): v for k, v in labels.items()}))
|
|
439
|
+
print('Report updated with community labels')
|
|
440
|
+
"
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
Replace `LABELS_DICT` with the actual dict you constructed (e.g. `{0: "Attention Mechanism", 1: "Training Pipeline"}`).
|
|
444
|
+
Replace INPUT_PATH with the actual path.
|
|
445
|
+
|
|
446
|
+
### Step 6 - Generate Obsidian vault (opt-in) + HTML
|
|
447
|
+
|
|
448
|
+
**Generate HTML always** (unless `--no-viz`). **Obsidian vault only if `--obsidian` was explicitly given** — skip it otherwise, it generates one file per node.
|
|
449
|
+
|
|
450
|
+
If `--obsidian` was given:
|
|
451
|
+
|
|
452
|
+
```bash
|
|
453
|
+
$(cat .graphify_python) -c "
|
|
454
|
+
import sys, json
|
|
455
|
+
from graphify.build import build_from_json
|
|
456
|
+
from graphify.export import to_obsidian, to_canvas
|
|
457
|
+
from pathlib import Path
|
|
458
|
+
|
|
459
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
460
|
+
analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
461
|
+
labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
|
|
462
|
+
|
|
463
|
+
G = build_from_json(extraction)
|
|
464
|
+
communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
465
|
+
cohesion = {int(k): v for k, v in analysis['cohesion'].items()}
|
|
466
|
+
labels = {int(k): v for k, v in labels_raw.items()}
|
|
467
|
+
|
|
468
|
+
n = to_obsidian(G, communities, 'graphify-out/obsidian', community_labels=labels or None, cohesion=cohesion)
|
|
469
|
+
print(f'Obsidian vault: {n} notes in graphify-out/obsidian/')
|
|
470
|
+
|
|
471
|
+
to_canvas(G, communities, 'graphify-out/obsidian/graph.canvas', community_labels=labels or None)
|
|
472
|
+
print('Canvas: graphify-out/obsidian/graph.canvas - open in Obsidian for structured community layout')
|
|
473
|
+
print()
|
|
474
|
+
print('Open graphify-out/obsidian/ as a vault in Obsidian.')
|
|
475
|
+
print(' Graph view - nodes colored by community (set automatically)')
|
|
476
|
+
print(' graph.canvas - structured layout with communities as groups')
|
|
477
|
+
print(' _COMMUNITY_* - overview notes with cohesion scores and dataview queries')
|
|
478
|
+
"
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
Generate the HTML graph (always, unless `--no-viz`):
|
|
482
|
+
|
|
483
|
+
```bash
|
|
484
|
+
$(cat .graphify_python) -c "
|
|
485
|
+
import sys, json
|
|
486
|
+
from graphify.build import build_from_json
|
|
487
|
+
from graphify.export import to_html
|
|
488
|
+
from pathlib import Path
|
|
489
|
+
|
|
490
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
491
|
+
analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
492
|
+
labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
|
|
493
|
+
|
|
494
|
+
G = build_from_json(extraction)
|
|
495
|
+
communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
496
|
+
labels = {int(k): v for k, v in labels_raw.items()}
|
|
497
|
+
|
|
498
|
+
if G.number_of_nodes() > 5000:
|
|
499
|
+
print(f'Graph has {G.number_of_nodes()} nodes - too large for HTML viz. Use Obsidian vault instead.')
|
|
500
|
+
else:
|
|
501
|
+
to_html(G, communities, 'graphify-out/graph.html', community_labels=labels or None)
|
|
502
|
+
print('graph.html written - open in any browser, no server needed')
|
|
503
|
+
"
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
### Step 7 - Neo4j export (only if --neo4j or --neo4j-push flag)
|
|
507
|
+
|
|
508
|
+
**If `--neo4j`** - generate a Cypher file for manual import:
|
|
509
|
+
|
|
510
|
+
```bash
|
|
511
|
+
$(cat .graphify_python) -c "
|
|
512
|
+
import sys, json
|
|
513
|
+
from graphify.build import build_from_json
|
|
514
|
+
from graphify.export import to_cypher
|
|
515
|
+
from pathlib import Path
|
|
516
|
+
|
|
517
|
+
G = build_from_json(json.loads(Path('.graphify_extract.json').read_text()))
|
|
518
|
+
to_cypher(G, 'graphify-out/cypher.txt')
|
|
519
|
+
print('cypher.txt written - import with: cypher-shell < graphify-out/cypher.txt')
|
|
520
|
+
"
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
**If `--neo4j-push <uri>`** - push directly to a running Neo4j instance. Ask the user for credentials if not provided:
|
|
524
|
+
|
|
525
|
+
```bash
|
|
526
|
+
$(cat .graphify_python) -c "
|
|
527
|
+
import sys, json
|
|
528
|
+
from graphify.build import build_from_json
|
|
529
|
+
from graphify.cluster import cluster
|
|
530
|
+
from graphify.export import push_to_neo4j
|
|
531
|
+
from pathlib import Path
|
|
532
|
+
|
|
533
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
534
|
+
analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
535
|
+
G = build_from_json(extraction)
|
|
536
|
+
communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
537
|
+
|
|
538
|
+
result = push_to_neo4j(G, uri='NEO4J_URI', user='NEO4J_USER', password='NEO4J_PASSWORD', communities=communities)
|
|
539
|
+
print(f'Pushed to Neo4j: {result[\"nodes\"]} nodes, {result[\"edges\"]} edges')
|
|
540
|
+
"
|
|
541
|
+
```
|
|
542
|
+
|
|
543
|
+
Replace `NEO4J_URI`, `NEO4J_USER`, `NEO4J_PASSWORD` with actual values. Default URI is `bolt://localhost:7687`, default user is `neo4j`. Uses MERGE - safe to re-run without creating duplicates.
|
|
544
|
+
|
|
545
|
+
### Step 7b - SVG export (only if --svg flag)
|
|
546
|
+
|
|
547
|
+
```bash
|
|
548
|
+
$(cat .graphify_python) -c "
|
|
549
|
+
import sys, json
|
|
550
|
+
from graphify.build import build_from_json
|
|
551
|
+
from graphify.export import to_svg
|
|
552
|
+
from pathlib import Path
|
|
553
|
+
|
|
554
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
555
|
+
analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
556
|
+
labels_raw = json.loads(Path('graphify-out/.graphify_labels.json').read_text()) if Path('graphify-out/.graphify_labels.json').exists() else {}
|
|
557
|
+
|
|
558
|
+
G = build_from_json(extraction)
|
|
559
|
+
communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
560
|
+
labels = {int(k): v for k, v in labels_raw.items()}
|
|
561
|
+
|
|
562
|
+
to_svg(G, communities, 'graphify-out/graph.svg', community_labels=labels or None)
|
|
563
|
+
print('graph.svg written - embeds in Obsidian, Notion, GitHub READMEs')
|
|
564
|
+
"
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
### Step 7c - GraphML export (only if --graphml flag)
|
|
568
|
+
|
|
569
|
+
```bash
|
|
570
|
+
$(cat .graphify_python) -c "
|
|
571
|
+
import json
|
|
572
|
+
from graphify.build import build_from_json
|
|
573
|
+
from graphify.export import to_graphml
|
|
574
|
+
from pathlib import Path
|
|
575
|
+
|
|
576
|
+
extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
577
|
+
analysis = json.loads(Path('graphify-out/.graphify_analysis.json').read_text())
|
|
578
|
+
|
|
579
|
+
G = build_from_json(extraction)
|
|
580
|
+
communities = {int(k): v for k, v in analysis['communities'].items()}
|
|
581
|
+
|
|
582
|
+
to_graphml(G, communities, 'graphify-out/graph.graphml')
|
|
583
|
+
print('graph.graphml written - open in Gephi, yEd, or any GraphML tool')
|
|
584
|
+
"
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
### Step 7d - MCP server (only if --mcp flag)
|
|
588
|
+
|
|
589
|
+
```bash
|
|
590
|
+
python3 -m graphify.serve graphify-out/graph.json
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
This starts a stdio MCP server that exposes tools: `query_graph`, `get_node`, `get_neighbors`, `get_community`, `god_nodes`, `graph_stats`, `shortest_path`. Add to Claude Desktop or any MCP-compatible agent orchestrator so other agents can query the graph live.
|
|
594
|
+
|
|
595
|
+
To configure in Claude Desktop, add to `claude_desktop_config.json`:
|
|
596
|
+
```json
|
|
597
|
+
{
|
|
598
|
+
"mcpServers": {
|
|
599
|
+
"graphify": {
|
|
600
|
+
"command": "python3",
|
|
601
|
+
"args": ["-m", "graphify.serve", "/absolute/path/to/graphify-out/graph.json"]
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
```
|
|
606
|
+
|
|
607
|
+
### Step 8 - Token reduction benchmark (only if total_words > 5000)
|
|
608
|
+
|
|
609
|
+
If `total_words` from `.graphify_detect.json` is greater than 5,000, run:
|
|
610
|
+
|
|
611
|
+
```bash
|
|
612
|
+
$(cat .graphify_python) -c "
|
|
613
|
+
import json
|
|
614
|
+
from graphify.benchmark import run_benchmark, print_benchmark
|
|
615
|
+
from pathlib import Path
|
|
616
|
+
|
|
617
|
+
detection = json.loads(Path('.graphify_detect.json').read_text())
|
|
618
|
+
result = run_benchmark('graphify-out/graph.json', corpus_words=detection['total_words'])
|
|
619
|
+
print_benchmark(result)
|
|
620
|
+
"
|
|
621
|
+
```
|
|
622
|
+
|
|
623
|
+
Print the output directly in chat. If `total_words <= 5000`, skip silently - the graph value is structural clarity, not token compression, for small corpora.
|
|
624
|
+
|
|
625
|
+
---
|
|
626
|
+
|
|
627
|
+
### Step 9 - Save manifest, update cost tracker, clean up, and report
|
|
628
|
+
|
|
629
|
+
```bash
|
|
630
|
+
$(cat .graphify_python) -c "
|
|
631
|
+
import json
|
|
632
|
+
from pathlib import Path
|
|
633
|
+
from datetime import datetime, timezone
|
|
634
|
+
from graphify.detect import save_manifest
|
|
635
|
+
|
|
636
|
+
# Save manifest for --update
|
|
637
|
+
detect = json.loads(Path('.graphify_detect.json').read_text())
|
|
638
|
+
save_manifest(detect['files'])
|
|
639
|
+
|
|
640
|
+
# Update cumulative cost tracker
|
|
641
|
+
extract = json.loads(Path('.graphify_extract.json').read_text())
|
|
642
|
+
input_tok = extract.get('input_tokens', 0)
|
|
643
|
+
output_tok = extract.get('output_tokens', 0)
|
|
644
|
+
|
|
645
|
+
cost_path = Path('graphify-out/cost.json')
|
|
646
|
+
if cost_path.exists():
|
|
647
|
+
cost = json.loads(cost_path.read_text())
|
|
648
|
+
else:
|
|
649
|
+
cost = {'runs': [], 'total_input_tokens': 0, 'total_output_tokens': 0}
|
|
650
|
+
|
|
651
|
+
cost['runs'].append({
|
|
652
|
+
'date': datetime.now(timezone.utc).isoformat(),
|
|
653
|
+
'input_tokens': input_tok,
|
|
654
|
+
'output_tokens': output_tok,
|
|
655
|
+
'files': detect.get('total_files', 0),
|
|
656
|
+
})
|
|
657
|
+
cost['total_input_tokens'] += input_tok
|
|
658
|
+
cost['total_output_tokens'] += output_tok
|
|
659
|
+
cost_path.write_text(json.dumps(cost, indent=2))
|
|
660
|
+
|
|
661
|
+
print(f'This run: {input_tok:,} input tokens, {output_tok:,} output tokens')
|
|
662
|
+
print(f'All time: {cost[\"total_input_tokens\"]:,} input, {cost[\"total_output_tokens\"]:,} output ({len(cost[\"runs\"])} runs)')
|
|
663
|
+
"
|
|
664
|
+
rm -f .graphify_detect.json .graphify_extract.json .graphify_ast.json .graphify_semantic.json .graphify_chunk_*.json
|
|
665
|
+
rm -f graphify-out/.needs_update 2>/dev/null || true
|
|
666
|
+
```
|
|
667
|
+
|
|
668
|
+
Tell the user (omit the obsidian line unless --obsidian was given):
|
|
669
|
+
```
|
|
670
|
+
Graph complete. Outputs in PATH_TO_DIR/graphify-out/
|
|
671
|
+
|
|
672
|
+
graph.html - interactive graph, open in browser
|
|
673
|
+
GRAPH_REPORT.md - audit report
|
|
674
|
+
graph.json - raw graph data
|
|
675
|
+
obsidian/ - Obsidian vault (only if --obsidian was given)
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
If graphify saved you time, consider supporting it: https://github.com/sponsors/safishamsi
|
|
679
|
+
|
|
680
|
+
Replace PATH_TO_DIR with the actual absolute path of the directory that was processed.
|
|
681
|
+
|
|
682
|
+
Then paste these sections from GRAPH_REPORT.md directly into the chat:
|
|
683
|
+
- God Nodes
|
|
684
|
+
- Surprising Connections
|
|
685
|
+
- Suggested Questions
|
|
686
|
+
|
|
687
|
+
Do NOT paste the full report - just those three sections. Keep it concise.
|
|
688
|
+
|
|
689
|
+
Then immediately offer to explore. Pick the single most interesting suggested question from the report - the one that crosses the most community boundaries or has the most surprising bridge node - and ask:
|
|
690
|
+
|
|
691
|
+
> "The most interesting question this graph can answer: **[question]**. Want me to trace it?"
|
|
692
|
+
|
|
693
|
+
If the user says yes, run `/graphify query "[question]"` on the graph and walk them through the answer using the graph structure - which nodes connect, which community boundaries get crossed, what the path reveals. Keep going as long as they want to explore. Each answer should end with a natural follow-up ("this connects to X - want to go deeper?") so the session feels like navigation, not a one-shot report.
|
|
694
|
+
|
|
695
|
+
The graph is the map. Your job after the pipeline is to be the guide.
|
|
696
|
+
|
|
697
|
+
---
|
|
698
|
+
|
|
699
|
+
## For --update (incremental re-extraction)
|
|
700
|
+
|
|
701
|
+
Use when you've added or modified files since the last run. Only re-extracts changed files - saves tokens and time.
|
|
702
|
+
|
|
703
|
+
```bash
|
|
704
|
+
$(cat .graphify_python) -c "
|
|
705
|
+
import sys, json
|
|
706
|
+
from graphify.detect import detect_incremental, save_manifest
|
|
707
|
+
from pathlib import Path
|
|
708
|
+
|
|
709
|
+
result = detect_incremental(Path('INPUT_PATH'))
|
|
710
|
+
new_total = result.get('new_total', 0)
|
|
711
|
+
print(json.dumps(result, indent=2))
|
|
712
|
+
Path('.graphify_incremental.json').write_text(json.dumps(result))
|
|
713
|
+
if new_total == 0:
|
|
714
|
+
print('No files changed since last run. Nothing to update.')
|
|
715
|
+
raise SystemExit(0)
|
|
716
|
+
print(f'{new_total} new/changed file(s) to re-extract.')
|
|
717
|
+
"
|
|
718
|
+
```
|
|
719
|
+
|
|
720
|
+
If new files exist, first check whether all changed files are code files:
|
|
721
|
+
|
|
722
|
+
```bash
|
|
723
|
+
$(cat .graphify_python) -c "
|
|
724
|
+
import json
|
|
725
|
+
from pathlib import Path
|
|
726
|
+
|
|
727
|
+
result = json.loads(open('.graphify_incremental.json').read()) if Path('.graphify_incremental.json').exists() else {}
|
|
728
|
+
code_exts = {'.py','.ts','.js','.go','.rs','.java','.cpp','.c','.rb','.swift','.kt','.cs','.scala','.php','.cc','.cxx','.hpp','.h','.kts'}
|
|
729
|
+
new_files = result.get('new_files', {})
|
|
730
|
+
all_changed = [f for files in new_files.values() for f in files]
|
|
731
|
+
code_only = all(Path(f).suffix.lower() in code_exts for f in all_changed)
|
|
732
|
+
print('code_only:', code_only)
|
|
733
|
+
"
|
|
734
|
+
```
|
|
735
|
+
|
|
736
|
+
If `code_only` is True: print `[graphify update] Code-only changes detected - skipping semantic extraction (no LLM needed)`, run only Step 3A (AST) on the changed files, skip Step 3B entirely (no subagents), then go straight to merge and Steps 4–8.
|
|
737
|
+
|
|
738
|
+
If `code_only` is False (any changed file is a doc/paper/image): run the full Steps 3A–3C pipeline as normal.
|
|
739
|
+
|
|
740
|
+
Then:
|
|
741
|
+
|
|
742
|
+
```bash
|
|
743
|
+
$(cat .graphify_python) -c "
|
|
744
|
+
import sys, json
|
|
745
|
+
from graphify.build import build_from_json
|
|
746
|
+
from graphify.export import to_json
|
|
747
|
+
from networkx.readwrite import json_graph
|
|
748
|
+
import networkx as nx
|
|
749
|
+
from pathlib import Path
|
|
750
|
+
|
|
751
|
+
# Load existing graph
|
|
752
|
+
existing_data = json.loads(Path('graphify-out/graph.json').read_text())
|
|
753
|
+
G_existing = json_graph.node_link_graph(existing_data, edges='links')
|
|
754
|
+
|
|
755
|
+
# Load new extraction
|
|
756
|
+
new_extraction = json.loads(Path('.graphify_extract.json').read_text())
|
|
757
|
+
G_new = build_from_json(new_extraction)
|
|
758
|
+
|
|
759
|
+
# Merge: new nodes/edges into existing graph
|
|
760
|
+
G_existing.update(G_new)
|
|
761
|
+
print(f'Merged: {G_existing.number_of_nodes()} nodes, {G_existing.number_of_edges()} edges')
|
|
762
|
+
"
|
|
763
|
+
```
|
|
764
|
+
|
|
765
|
+
Then run Steps 4–8 on the merged graph as normal.
|
|
766
|
+
|
|
767
|
+
After Step 4, show the graph diff:
|
|
768
|
+
|
|
769
|
+
```bash
|
|
770
|
+
$(cat .graphify_python) -c "
|
|
771
|
+
import json
|
|
772
|
+
from graphify.analyze import graph_diff
|
|
773
|
+
from graphify.build import build_from_json
|
|
774
|
+
from networkx.readwrite import json_graph
|
|
775
|
+
import networkx as nx
|
|
776
|
+
from pathlib import Path
|
|
777
|
+
|
|
778
|
+
# Load old graph (before update) from backup written before merge
|
|
779
|
+
old_data = json.loads(Path('.graphify_old.json').read_text()) if Path('.graphify_old.json').exists() else None
|
|
780
|
+
new_extract = json.loads(Path('.graphify_extract.json').read_text())
|
|
781
|
+
G_new = build_from_json(new_extract)
|
|
782
|
+
|
|
783
|
+
if old_data:
|
|
784
|
+
G_old = json_graph.node_link_graph(old_data, edges='links')
|
|
785
|
+
diff = graph_diff(G_old, G_new)
|
|
786
|
+
print(diff['summary'])
|
|
787
|
+
if diff['new_nodes']:
|
|
788
|
+
print('New nodes:', ', '.join(n['label'] for n in diff['new_nodes'][:5]))
|
|
789
|
+
if diff['new_edges']:
|
|
790
|
+
print('New edges:', len(diff['new_edges']))
|
|
791
|
+
"
|
|
792
|
+
```
|
|
793
|
+
|
|
794
|
+
Before the merge step, save the old graph: `cp graphify-out/graph.json .graphify_old.json`
|
|
795
|
+
Clean up after: `rm -f .graphify_old.json`
|
|
796
|
+
|
|
797
|
+
---
|
|
798
|
+
|
|
799
|
+
## For --cluster-only
|
|
800
|
+
|
|
801
|
+
Skip Steps 1–3. Load the existing graph from `graphify-out/graph.json` and re-run clustering:
|
|
802
|
+
|
|
803
|
+
```bash
|
|
804
|
+
$(cat .graphify_python) -c "
|
|
805
|
+
import sys, json
|
|
806
|
+
from graphify.cluster import cluster, score_all
|
|
807
|
+
from graphify.analyze import god_nodes, surprising_connections
|
|
808
|
+
from graphify.report import generate
|
|
809
|
+
from graphify.export import to_json
|
|
810
|
+
from networkx.readwrite import json_graph
|
|
811
|
+
import networkx as nx
|
|
812
|
+
from pathlib import Path
|
|
813
|
+
|
|
814
|
+
data = json.loads(Path('graphify-out/graph.json').read_text())
|
|
815
|
+
G = json_graph.node_link_graph(data, edges='links')
|
|
816
|
+
|
|
817
|
+
detection = {'total_files': 0, 'total_words': 99999, 'needs_graph': True, 'warning': None,
|
|
818
|
+
'files': {'code': [], 'document': [], 'paper': []}}
|
|
819
|
+
tokens = {'input': 0, 'output': 0}
|
|
820
|
+
|
|
821
|
+
communities = cluster(G)
|
|
822
|
+
cohesion = score_all(G, communities)
|
|
823
|
+
gods = god_nodes(G)
|
|
824
|
+
surprises = surprising_connections(G, communities)
|
|
825
|
+
labels = {cid: 'Community ' + str(cid) for cid in communities}
|
|
826
|
+
|
|
827
|
+
report = generate(G, communities, cohesion, labels, gods, surprises, detection, tokens, '.')
|
|
828
|
+
Path('graphify-out/GRAPH_REPORT.md').write_text(report)
|
|
829
|
+
to_json(G, communities, 'graphify-out/graph.json')
|
|
830
|
+
|
|
831
|
+
analysis = {
|
|
832
|
+
'communities': {str(k): v for k, v in communities.items()},
|
|
833
|
+
'cohesion': {str(k): v for k, v in cohesion.items()},
|
|
834
|
+
'gods': gods,
|
|
835
|
+
'surprises': surprises,
|
|
836
|
+
}
|
|
837
|
+
Path('graphify-out/.graphify_analysis.json').write_text(json.dumps(analysis, indent=2))
|
|
838
|
+
print(f'Re-clustered: {len(communities)} communities')
|
|
839
|
+
"
|
|
840
|
+
```
|
|
841
|
+
|
|
842
|
+
Then run Steps 5–9 as normal (label communities, generate viz, benchmark, clean up, report).
|
|
843
|
+
|
|
844
|
+
---
|
|
845
|
+
|
|
846
|
+
## For /graphify query
|
|
847
|
+
|
|
848
|
+
Two traversal modes - choose based on the question:
|
|
849
|
+
|
|
850
|
+
| Mode | Flag | Best for |
|
|
851
|
+
|------|------|----------|
|
|
852
|
+
| BFS (default) | _(none)_ | "What is X connected to?" - broad context, nearest neighbors first |
|
|
853
|
+
| DFS | `--dfs` | "How does X reach Y?" - trace a specific chain or dependency path |
|
|
854
|
+
|
|
855
|
+
First check the graph exists:
|
|
856
|
+
```bash
|
|
857
|
+
$(cat .graphify_python) -c "
|
|
858
|
+
from pathlib import Path
|
|
859
|
+
if not Path('graphify-out/graph.json').exists():
|
|
860
|
+
print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
|
|
861
|
+
raise SystemExit(1)
|
|
862
|
+
"
|
|
863
|
+
```
|
|
864
|
+
If it fails, stop and tell the user to run `/graphify <path>` first.
|
|
865
|
+
|
|
866
|
+
Load `graphify-out/graph.json`, then:
|
|
867
|
+
|
|
868
|
+
1. Find the 1-3 nodes whose label best matches key terms in the question.
|
|
869
|
+
2. Run the appropriate traversal from each starting node.
|
|
870
|
+
3. Read the subgraph - node labels, edge relations, confidence tags, source locations.
|
|
871
|
+
4. Answer using **only** what the graph contains. Quote `source_location` when citing a specific fact.
|
|
872
|
+
5. If the graph lacks enough information, say so - do not hallucinate edges.
|
|
873
|
+
|
|
874
|
+
```bash
|
|
875
|
+
$(cat .graphify_python) -c "
|
|
876
|
+
import sys, json
|
|
877
|
+
from networkx.readwrite import json_graph
|
|
878
|
+
import networkx as nx
|
|
879
|
+
from pathlib import Path
|
|
880
|
+
|
|
881
|
+
data = json.loads(Path('graphify-out/graph.json').read_text())
|
|
882
|
+
G = json_graph.node_link_graph(data, edges='links')
|
|
883
|
+
|
|
884
|
+
question = 'QUESTION'
|
|
885
|
+
mode = 'MODE' # 'bfs' or 'dfs'
|
|
886
|
+
terms = [t.lower() for t in question.split() if len(t) > 3]
|
|
887
|
+
|
|
888
|
+
# Find best-matching start nodes
|
|
889
|
+
scored = []
|
|
890
|
+
for nid, ndata in G.nodes(data=True):
|
|
891
|
+
label = ndata.get('label', '').lower()
|
|
892
|
+
score = sum(1 for t in terms if t in label)
|
|
893
|
+
if score > 0:
|
|
894
|
+
scored.append((score, nid))
|
|
895
|
+
scored.sort(reverse=True)
|
|
896
|
+
start_nodes = [nid for _, nid in scored[:3]]
|
|
897
|
+
|
|
898
|
+
if not start_nodes:
|
|
899
|
+
print('No matching nodes found for query terms:', terms)
|
|
900
|
+
sys.exit(0)
|
|
901
|
+
|
|
902
|
+
subgraph_nodes = set()
|
|
903
|
+
subgraph_edges = []
|
|
904
|
+
|
|
905
|
+
if mode == 'dfs':
|
|
906
|
+
# DFS: follow one path as deep as possible before backtracking.
|
|
907
|
+
# Depth-limited to 6 to avoid traversing the whole graph.
|
|
908
|
+
visited = set()
|
|
909
|
+
stack = [(n, 0) for n in reversed(start_nodes)]
|
|
910
|
+
while stack:
|
|
911
|
+
node, depth = stack.pop()
|
|
912
|
+
if node in visited or depth > 6:
|
|
913
|
+
continue
|
|
914
|
+
visited.add(node)
|
|
915
|
+
subgraph_nodes.add(node)
|
|
916
|
+
for neighbor in G.neighbors(node):
|
|
917
|
+
if neighbor not in visited:
|
|
918
|
+
stack.append((neighbor, depth + 1))
|
|
919
|
+
subgraph_edges.append((node, neighbor))
|
|
920
|
+
else:
|
|
921
|
+
# BFS: explore all neighbors layer by layer up to depth 3.
|
|
922
|
+
frontier = set(start_nodes)
|
|
923
|
+
subgraph_nodes = set(start_nodes)
|
|
924
|
+
for _ in range(3):
|
|
925
|
+
next_frontier = set()
|
|
926
|
+
for n in frontier:
|
|
927
|
+
for neighbor in G.neighbors(n):
|
|
928
|
+
if neighbor not in subgraph_nodes:
|
|
929
|
+
next_frontier.add(neighbor)
|
|
930
|
+
subgraph_edges.append((n, neighbor))
|
|
931
|
+
subgraph_nodes.update(next_frontier)
|
|
932
|
+
frontier = next_frontier
|
|
933
|
+
|
|
934
|
+
# Token-budget aware output: rank by relevance, cut at budget (~4 chars/token)
|
|
935
|
+
token_budget = BUDGET # default 2000
|
|
936
|
+
char_budget = token_budget * 4
|
|
937
|
+
|
|
938
|
+
# Score each node by term overlap for ranked output
|
|
939
|
+
def relevance(nid):
|
|
940
|
+
label = G.nodes[nid].get('label', '').lower()
|
|
941
|
+
return sum(1 for t in terms if t in label)
|
|
942
|
+
|
|
943
|
+
ranked_nodes = sorted(subgraph_nodes, key=relevance, reverse=True)
|
|
944
|
+
|
|
945
|
+
lines = [f'Traversal: {mode.upper()} | Start: {[G.nodes[n].get(\"label\",n) for n in start_nodes]} | {len(subgraph_nodes)} nodes']
|
|
946
|
+
for nid in ranked_nodes:
|
|
947
|
+
d = G.nodes[nid]
|
|
948
|
+
lines.append(f' NODE {d.get(\"label\", nid)} [src={d.get(\"source_file\",\"\")} loc={d.get(\"source_location\",\"\")}]')
|
|
949
|
+
for u, v in subgraph_edges:
|
|
950
|
+
if u in subgraph_nodes and v in subgraph_nodes:
|
|
951
|
+
d = G.edges[u, v]
|
|
952
|
+
lines.append(f' EDGE {G.nodes[u].get(\"label\",u)} --{d.get(\"relation\",\"\")} [{d.get(\"confidence\",\"\")}]--> {G.nodes[v].get(\"label\",v)}')
|
|
953
|
+
|
|
954
|
+
output = '\n'.join(lines)
|
|
955
|
+
if len(output) > char_budget:
|
|
956
|
+
output = output[:char_budget] + f'\n... (truncated at ~{token_budget} token budget - use --budget N for more)'
|
|
957
|
+
print(output)
|
|
958
|
+
"
|
|
959
|
+
```
|
|
960
|
+
|
|
961
|
+
Replace `QUESTION` with the user's actual question, `MODE` with `bfs` or `dfs`, and `BUDGET` with the token budget (default `2000`, or whatever `--budget N` specifies). Then answer based on the subgraph output above.
|
|
962
|
+
|
|
963
|
+
After writing the answer, save it back into the graph so it improves future queries:
|
|
964
|
+
|
|
965
|
+
```bash
|
|
966
|
+
$(cat .graphify_python) -m graphify save-result --question "QUESTION" --answer "ANSWER" --type query --nodes NODE1 NODE2
|
|
967
|
+
```
|
|
968
|
+
|
|
969
|
+
Replace `QUESTION` with the question, `ANSWER` with your full answer text, `SOURCE_NODES` with the list of node labels you cited. This closes the feedback loop: the next `--update` will extract this Q&A as a node in the graph.
|
|
970
|
+
|
|
971
|
+
---
|
|
972
|
+
|
|
973
|
+
## For /graphify path
|
|
974
|
+
|
|
975
|
+
Find the shortest path between two named concepts in the graph.
|
|
976
|
+
|
|
977
|
+
First check the graph exists:
|
|
978
|
+
```bash
|
|
979
|
+
$(cat .graphify_python) -c "
|
|
980
|
+
from pathlib import Path
|
|
981
|
+
if not Path('graphify-out/graph.json').exists():
|
|
982
|
+
print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
|
|
983
|
+
raise SystemExit(1)
|
|
984
|
+
"
|
|
985
|
+
```
|
|
986
|
+
If it fails, stop and tell the user to run `/graphify <path>` first.
|
|
987
|
+
|
|
988
|
+
```bash
|
|
989
|
+
$(cat .graphify_python) -c "
|
|
990
|
+
import json, sys
|
|
991
|
+
import networkx as nx
|
|
992
|
+
from networkx.readwrite import json_graph
|
|
993
|
+
from pathlib import Path
|
|
994
|
+
|
|
995
|
+
data = json.loads(Path('graphify-out/graph.json').read_text())
|
|
996
|
+
G = json_graph.node_link_graph(data, edges='links')
|
|
997
|
+
|
|
998
|
+
a_term = 'NODE_A'
|
|
999
|
+
b_term = 'NODE_B'
|
|
1000
|
+
|
|
1001
|
+
def find_node(term):
|
|
1002
|
+
term = term.lower()
|
|
1003
|
+
scored = sorted(
|
|
1004
|
+
[(sum(1 for w in term.split() if w in G.nodes[n].get('label','').lower()), n)
|
|
1005
|
+
for n in G.nodes()],
|
|
1006
|
+
reverse=True
|
|
1007
|
+
)
|
|
1008
|
+
return scored[0][1] if scored and scored[0][0] > 0 else None
|
|
1009
|
+
|
|
1010
|
+
src = find_node(a_term)
|
|
1011
|
+
tgt = find_node(b_term)
|
|
1012
|
+
|
|
1013
|
+
if not src or not tgt:
|
|
1014
|
+
print(f'Could not find nodes matching: {a_term!r} or {b_term!r}')
|
|
1015
|
+
sys.exit(0)
|
|
1016
|
+
|
|
1017
|
+
try:
|
|
1018
|
+
path = nx.shortest_path(G, src, tgt)
|
|
1019
|
+
print(f'Shortest path ({len(path)-1} hops):')
|
|
1020
|
+
for i, nid in enumerate(path):
|
|
1021
|
+
label = G.nodes[nid].get('label', nid)
|
|
1022
|
+
if i < len(path) - 1:
|
|
1023
|
+
edge = G.edges[nid, path[i+1]]
|
|
1024
|
+
rel = edge.get('relation', '')
|
|
1025
|
+
conf = edge.get('confidence', '')
|
|
1026
|
+
print(f' {label} --{rel}--> [{conf}]')
|
|
1027
|
+
else:
|
|
1028
|
+
print(f' {label}')
|
|
1029
|
+
except nx.NetworkXNoPath:
|
|
1030
|
+
print(f'No path found between {a_term!r} and {b_term!r}')
|
|
1031
|
+
except nx.NodeNotFound as e:
|
|
1032
|
+
print(f'Node not found: {e}')
|
|
1033
|
+
"
|
|
1034
|
+
```
|
|
1035
|
+
|
|
1036
|
+
Replace `NODE_A` and `NODE_B` with the actual concept names from the user. Then explain the path in plain language - what each hop means, why it's significant.
|
|
1037
|
+
|
|
1038
|
+
After writing the explanation, save it back:
|
|
1039
|
+
|
|
1040
|
+
```bash
|
|
1041
|
+
$(cat .graphify_python) -m graphify save-result --question "Path from NODE_A to NODE_B" --answer "ANSWER" --type path_query --nodes NODE_A NODE_B
|
|
1042
|
+
```
|
|
1043
|
+
|
|
1044
|
+
---
|
|
1045
|
+
|
|
1046
|
+
## For /graphify explain
|
|
1047
|
+
|
|
1048
|
+
Give a plain-language explanation of a single node - everything connected to it.
|
|
1049
|
+
|
|
1050
|
+
First check the graph exists:
|
|
1051
|
+
```bash
|
|
1052
|
+
$(cat .graphify_python) -c "
|
|
1053
|
+
from pathlib import Path
|
|
1054
|
+
if not Path('graphify-out/graph.json').exists():
|
|
1055
|
+
print('ERROR: No graph found. Run /graphify <path> first to build the graph.')
|
|
1056
|
+
raise SystemExit(1)
|
|
1057
|
+
"
|
|
1058
|
+
```
|
|
1059
|
+
If it fails, stop and tell the user to run `/graphify <path>` first.
|
|
1060
|
+
|
|
1061
|
+
```bash
|
|
1062
|
+
$(cat .graphify_python) -c "
|
|
1063
|
+
import json, sys
|
|
1064
|
+
import networkx as nx
|
|
1065
|
+
from networkx.readwrite import json_graph
|
|
1066
|
+
from pathlib import Path
|
|
1067
|
+
|
|
1068
|
+
data = json.loads(Path('graphify-out/graph.json').read_text())
|
|
1069
|
+
G = json_graph.node_link_graph(data, edges='links')
|
|
1070
|
+
|
|
1071
|
+
term = 'NODE_NAME'
|
|
1072
|
+
term_lower = term.lower()
|
|
1073
|
+
|
|
1074
|
+
# Find best matching node
|
|
1075
|
+
scored = sorted(
|
|
1076
|
+
[(sum(1 for w in term_lower.split() if w in G.nodes[n].get('label','').lower()), n)
|
|
1077
|
+
for n in G.nodes()],
|
|
1078
|
+
reverse=True
|
|
1079
|
+
)
|
|
1080
|
+
if not scored or scored[0][0] == 0:
|
|
1081
|
+
print(f'No node matching {term!r}')
|
|
1082
|
+
sys.exit(0)
|
|
1083
|
+
|
|
1084
|
+
nid = scored[0][1]
|
|
1085
|
+
data_n = G.nodes[nid]
|
|
1086
|
+
print(f'NODE: {data_n.get(\"label\", nid)}')
|
|
1087
|
+
print(f' source: {data_n.get(\"source_file\",\"unknown\")}')
|
|
1088
|
+
print(f' type: {data_n.get(\"file_type\",\"unknown\")}')
|
|
1089
|
+
print(f' degree: {G.degree(nid)}')
|
|
1090
|
+
print()
|
|
1091
|
+
print('CONNECTIONS:')
|
|
1092
|
+
for neighbor in G.neighbors(nid):
|
|
1093
|
+
edge = G.edges[nid, neighbor]
|
|
1094
|
+
nlabel = G.nodes[neighbor].get('label', neighbor)
|
|
1095
|
+
rel = edge.get('relation', '')
|
|
1096
|
+
conf = edge.get('confidence', '')
|
|
1097
|
+
src_file = G.nodes[neighbor].get('source_file', '')
|
|
1098
|
+
print(f' --{rel}--> {nlabel} [{conf}] ({src_file})')
|
|
1099
|
+
"
|
|
1100
|
+
```
|
|
1101
|
+
|
|
1102
|
+
Replace `NODE_NAME` with the concept the user asked about. Then write a 3-5 sentence explanation of what this node is, what it connects to, and why those connections are significant. Use the source locations as citations.
|
|
1103
|
+
|
|
1104
|
+
After writing the explanation, save it back:
|
|
1105
|
+
|
|
1106
|
+
```bash
|
|
1107
|
+
$(cat .graphify_python) -m graphify save-result --question "Explain NODE_NAME" --answer "ANSWER" --type explain --nodes NODE_NAME
|
|
1108
|
+
```
|
|
1109
|
+
|
|
1110
|
+
---
|
|
1111
|
+
|
|
1112
|
+
## For /graphify add
|
|
1113
|
+
|
|
1114
|
+
Fetch a URL and add it to the corpus, then update the graph.
|
|
1115
|
+
|
|
1116
|
+
```bash
|
|
1117
|
+
$(cat .graphify_python) -c "
|
|
1118
|
+
import sys
|
|
1119
|
+
from graphify.ingest import ingest
|
|
1120
|
+
from pathlib import Path
|
|
1121
|
+
|
|
1122
|
+
try:
|
|
1123
|
+
out = ingest('URL', Path('./raw'), author='AUTHOR', contributor='CONTRIBUTOR')
|
|
1124
|
+
print(f'Saved to {out}')
|
|
1125
|
+
except ValueError as e:
|
|
1126
|
+
print(f'error: {e}', file=sys.stderr)
|
|
1127
|
+
sys.exit(1)
|
|
1128
|
+
except RuntimeError as e:
|
|
1129
|
+
print(f'error: {e}', file=sys.stderr)
|
|
1130
|
+
sys.exit(1)
|
|
1131
|
+
"
|
|
1132
|
+
```
|
|
1133
|
+
|
|
1134
|
+
Replace `URL` with the actual URL, `AUTHOR` with the user's name if provided, `CONTRIBUTOR` likewise. If the command exits with an error, tell the user what went wrong - do not silently continue. After a successful save, automatically run the `--update` pipeline on `./raw` to merge the new file into the existing graph.
|
|
1135
|
+
|
|
1136
|
+
Supported URL types (auto-detected):
|
|
1137
|
+
- Twitter/X → fetched via oEmbed, saved as `.md` with tweet text and author
|
|
1138
|
+
- arXiv → abstract + metadata saved as `.md`
|
|
1139
|
+
- PDF → downloaded as `.pdf`
|
|
1140
|
+
- Images (.png/.jpg/.webp) → downloaded, vision extraction runs on next build
|
|
1141
|
+
- Any webpage → converted to markdown via html2text
|
|
1142
|
+
|
|
1143
|
+
---
|
|
1144
|
+
|
|
1145
|
+
## For --watch
|
|
1146
|
+
|
|
1147
|
+
Start a background watcher that monitors a folder and auto-updates the graph when files change.
|
|
1148
|
+
|
|
1149
|
+
```bash
|
|
1150
|
+
python3 -m graphify.watch INPUT_PATH --debounce 3
|
|
1151
|
+
```
|
|
1152
|
+
|
|
1153
|
+
Replace INPUT_PATH with the folder to watch. Behavior depends on what changed:
|
|
1154
|
+
|
|
1155
|
+
- **Code files only (.py, .ts, .go, etc.):** re-runs AST extraction + rebuild + cluster immediately, no LLM needed. `graph.json` and `GRAPH_REPORT.md` are updated automatically.
|
|
1156
|
+
- **Docs, papers, or images:** writes a `graphify-out/needs_update` flag and prints a notification to run `/graphify --update` (LLM semantic re-extraction required).
|
|
1157
|
+
|
|
1158
|
+
Debounce (default 3s): waits until file activity stops before triggering, so a wave of parallel agent writes doesn't trigger a rebuild per file.
|
|
1159
|
+
|
|
1160
|
+
Press Ctrl+C to stop.
|
|
1161
|
+
|
|
1162
|
+
For agentic workflows: run `--watch` in a background terminal. Code changes from agent waves are picked up automatically between waves. If agents are also writing docs or notes, you'll need a manual `/graphify --update` after those waves.
|
|
1163
|
+
|
|
1164
|
+
---
|
|
1165
|
+
|
|
1166
|
+
## For git commit hook
|
|
1167
|
+
|
|
1168
|
+
Install a post-commit hook that auto-rebuilds the graph after every commit. No background process needed - triggers once per commit, works with any editor.
|
|
1169
|
+
|
|
1170
|
+
```bash
|
|
1171
|
+
graphify hook install # install
|
|
1172
|
+
graphify hook uninstall # remove
|
|
1173
|
+
graphify hook status # check
|
|
1174
|
+
```
|
|
1175
|
+
|
|
1176
|
+
After every `git commit`, the hook detects which code files changed (via `git diff HEAD~1`), re-runs AST extraction on those files, and rebuilds `graph.json` and `GRAPH_REPORT.md`. Doc/image changes are ignored by the hook - run `/graphify --update` manually for those.
|
|
1177
|
+
|
|
1178
|
+
If a post-commit hook already exists, graphify appends to it rather than replacing it.
|
|
1179
|
+
|
|
1180
|
+
---
|
|
1181
|
+
|
|
1182
|
+
## For native CLAUDE.md integration
|
|
1183
|
+
|
|
1184
|
+
Run once per project to make graphify always-on in Claude Code sessions:
|
|
1185
|
+
|
|
1186
|
+
```bash
|
|
1187
|
+
graphify claude install
|
|
1188
|
+
```
|
|
1189
|
+
|
|
1190
|
+
This writes a `## graphify` section to the local `CLAUDE.md` that instructs Claude to check the graph before answering codebase questions and rebuild it after code changes. No manual `/graphify` needed in future sessions.
|
|
1191
|
+
|
|
1192
|
+
```bash
|
|
1193
|
+
graphify claude uninstall # remove the section
|
|
1194
|
+
```
|
|
1195
|
+
|
|
1196
|
+
---
|
|
1197
|
+
|
|
1198
|
+
## Honesty Rules
|
|
1199
|
+
|
|
1200
|
+
- Never invent an edge. If unsure, use AMBIGUOUS.
|
|
1201
|
+
- Never skip the corpus check warning.
|
|
1202
|
+
- Always show token cost in the report.
|
|
1203
|
+
- Never hide cohesion scores behind symbols - show the raw number.
|
|
1204
|
+
- Never run HTML viz on a graph with more than 5,000 nodes without warning the user.
|